- Update to 2.6.34-rc4.
authorJeff Mahoney <jeffm@suse.de>
Wed, 14 Apr 2010 20:24:58 +0000 (16:24 -0400)
committerJeff Mahoney <jeffm@suse.de>
Wed, 14 Apr 2010 20:24:58 +0000 (16:24 -0400)
  - Eliminated 3 patches.

suse-commit: d9cc9fa000db75f9468d1b8b866157ca4e5641c3

245 files changed:
1  2 
MAINTAINERS
Makefile
arch/ia64/kernel/acpi.c
arch/ia64/kernel/mca.c
arch/ia64/sn/pci/tioca_provider.c
arch/powerpc/kernel/of_platform.c
arch/powerpc/kernel/vio.c
arch/powerpc/kvm/book3s.c
arch/powerpc/platforms/52xx/mpc52xx_gpio.c
arch/powerpc/platforms/52xx/mpc52xx_gpt.c
arch/powerpc/platforms/82xx/ep8248e.c
arch/powerpc/platforms/cell/axon_msi.c
arch/powerpc/platforms/chrp/setup.c
arch/powerpc/platforms/pasemi/gpio_mdio.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/sysdev/fsl_msi.c
arch/powerpc/sysdev/fsl_rio.c
arch/powerpc/sysdev/pmi.c
arch/sparc/kernel/central.c
arch/x86/Kconfig
arch/x86/kdb/kdba_bt.c
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/nmi.c
arch/x86/kernel/cpu/mcheck/mce-inject.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/dumpstack.h
arch/x86/kernel/e820.c
arch/x86/kernel/hpet.c
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/setup.c
arch/x86/kvm/svm.c
arch/x86/kvm/x86.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/pageattr.c
drivers/acpi/numa.c
drivers/acpi/osl.c
drivers/acpi/pci_root.c
drivers/acpi/processor_driver.c
drivers/acpi/processor_idle.c
drivers/acpi/processor_perflib.c
drivers/acpi/scan.c
drivers/acpi/thermal.c
drivers/ata/libata-core.c
drivers/ata/sata_fsl.c
drivers/base/cpu.c
drivers/block/xen-blkfront.c
drivers/char/agp/amd-k7-agp.c
drivers/char/agp/generic.c
drivers/char/agp/intel-agp.c
drivers/char/mem.c
drivers/char/raw.c
drivers/char/tty_io.c
drivers/connector/cn_proc.c
drivers/crypto/amcc/crypto4xx_core.c
drivers/crypto/talitos.c
drivers/dma/fsldma.c
drivers/firmware/dcdbas.c
drivers/firmware/dell_rbu.c
drivers/firmware/dmi_scan.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/vmwgfx/Kconfig
drivers/hid/hid-apple.c
drivers/ieee1394/sbp2.c
drivers/input/misc/sparcspkr.c
drivers/input/mouse/synaptics.c
drivers/input/mouse/touchkit_ps2.c
drivers/input/serio/xilinx_ps2.c
drivers/input/xen-kbdfront.c
drivers/macintosh/smu.c
drivers/macintosh/therm_pm72.c
drivers/macintosh/therm_windtunnel.c
drivers/md/dm-memcache.c
drivers/md/dm-raid45.c
drivers/md/dm-region-hash.c
drivers/media/video/uvc/uvc_ctrl.c
drivers/media/video/uvc/uvc_driver.c
drivers/mfd/ezx-pcap.c
drivers/mfd/twl4030-irq.c
drivers/mtd/maps/physmap_of.c
drivers/mtd/maps/sun_uflash.c
drivers/mtd/nand/fsl_upm.c
drivers/mtd/nand/ndfc.c
drivers/net/Kconfig
drivers/net/Makefile
drivers/net/e1000e/netdev.c
drivers/net/ehea/ehea_main.c
drivers/net/fec_mpc52xx_phy.c
drivers/net/gianfar.c
drivers/net/ibm_newemac/core.c
drivers/net/ibm_newemac/mal.c
drivers/net/ibm_newemac/rgmii.c
drivers/net/ibm_newemac/zmii.c
drivers/net/igb/igb_main.c
drivers/net/ixgbe/ixgbe_main.c
drivers/net/myri_sbus.c
drivers/net/niu.c
drivers/net/sky2.c
drivers/net/sunbmac.c
drivers/net/sunlance.c
drivers/net/tulip/tulip_core.c
drivers/net/xen-netfront.c
drivers/oprofile/buffer_sync.c
drivers/pci/dmar.c
drivers/pci/pci.c
drivers/pcmcia/electra_cf.c
drivers/pcmcia/m8xx_pcmcia.c
drivers/platform/x86/acer-wmi.c
drivers/sbus/char/display7seg.c
drivers/sbus/char/envctrl.c
drivers/sbus/char/flash.c
drivers/scsi/device_handler/scsi_dh.c
drivers/scsi/device_handler/scsi_dh_alua.c
drivers/scsi/device_handler/scsi_dh_emc.c
drivers/scsi/device_handler/scsi_dh_hp_sw.c
drivers/scsi/device_handler/scsi_dh_rdac.c
drivers/scsi/ibmvscsi/ibmvscsi.c
drivers/scsi/megaraid/megaraid_mbox.c
drivers/scsi/qla4xxx/ql4_os.c
drivers/scsi/qlogicpti.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_netlink.c
drivers/scsi/scsi_proc.c
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/sd.c
drivers/scsi/sun_esp.c
drivers/serial/8250.c
drivers/serial/sunsu.c
drivers/spi/spi_mpc8xxx.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/fhci-hcd.c
drivers/usb/host/ohci-q.c
drivers/video/aty/radeon_monitor.c
drivers/video/bw2.c
drivers/video/cg14.c
drivers/video/cg3.c
drivers/video/cg6.c
drivers/video/console/bitblit.c
drivers/video/ffb.c
drivers/video/leo.c
drivers/video/p9100.c
drivers/video/platinumfb.c
drivers/video/tcx.c
drivers/video/xen-fbfront.c
drivers/watchdog/cpwd.c
drivers/watchdog/riowd.c
drivers/xen/sys-hypervisor.c
drivers/xen/xenbus/xenbus_probe.c
fs/btrfs/extent_io.c
fs/btrfs/super.c
fs/compat_ioctl.c
fs/mpage.c
fs/nfs/dns_resolve.c
fs/nfs/file.c
fs/nfs/inode.c
fs/nfs4acl_base.c
fs/nfs4acl_compat.c
fs/nfsd/export.c
fs/nfsd/nfs4idmap.c
fs/nfsd/vfs.c
fs/ocfs2/inode.c
fs/partitions/check.c
fs/proc/array.c
fs/proc/base.c
fs/proc/kcore.c
fs/reiserfs/dir.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/linux-2.6/xfs_super.c
include/linux/blkdev.h
include/linux/fs.h
include/linux/genhd.h
include/linux/jbd.h
include/linux/kernel.h
include/linux/libata.h
include/linux/mm.h
include/linux/module.h
include/linux/security.h
include/linux/slab.h
include/net/sock.h
include/xen/xenbus.h
init/do_mounts.c
init/main.c
kdb/kdbdereference.c
kdb/kdbmain.c
kdb/kdbsupport.c
kdb/modules/kdbm_pg.c
kdb/modules/kdbm_task.c
kdb/modules/kdbm_vm.c
kernel/async.c
kernel/audit.c
kernel/exit.c
kernel/irq/manage.c
kernel/irq/proc.c
kernel/kallsyms.c
kernel/module.c
kernel/sched.c
kernel/sys.c
kernel/sysctl_binary.c
kernel/taskstats.c
kernel/unwind.c
lib/Kconfig.debug
mm/Makefile
mm/filemap.c
mm/hugetlb.c
mm/memcontrol.c
mm/memory.c
mm/migrate.c
mm/mmap.c
mm/mprotect.c
mm/page_io.c
mm/slab.c
mm/slub.c
mm/swap_state.c
mm/truncate.c
mm/vmscan.c
mm/vmstat.c
net/bridge/br_if.c
net/core/dev.c
net/core/filter.c
net/ipv4/inet_fragment.c
net/ipv4/ip_fragment.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/ipv6/addrconf.c
net/ipv6/reassembly.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/netfilter/core.c
net/netfilter/nf_conntrack_acct.c
net/netfilter/nf_conntrack_netlink.c
net/sctp/ulpevent.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcauth_unix.c
sound/sparc/cs4231.c
sound/sparc/dbri.c
virt/kvm/ioapic.c

diff --cc MAINTAINERS
Simple merge
diff --cc Makefile
Simple merge
Simple merge
  #include <linux/cpumask.h>
  #include <linux/kdebug.h>
  #include <linux/cpu.h>
+ #include <linux/gfp.h>
 +#ifdef CONFIG_KDB
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>  /* for switch state wrappers */
 +#endif /* CONFIG_KDB */
  
  #include <asm/delay.h>
  #include <asm/machvec.h>
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 136268c,0000000..6d973f1
mode 100644,000000..100644
--- /dev/null
@@@ -1,5760 -1,0 +1,5761 @@@
 +/*
 + * This file is subject to the terms and conditions of the GNU General Public
 + * License.  See the file "COPYING" in the main directory of this archive
 + * for more details.
 + *
 + * Copyright (c) 2006, 2007-2009 Silicon Graphics, Inc.  All Rights Reserved.
 + *
 + * Common code for doing accurate backtraces on i386 and x86_64, including
 + * printing the values of arguments.
 + */
 +
++#include <linux/slab.h>
 +#include <linux/init.h>
 +#include <linux/kallsyms.h>
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>
 +#include <linux/ctype.h>
 +#include <linux/string.h>
 +#include <linux/stringify.h>
 +#include <linux/kernel.h>
 +#include <linux/sched.h>
 +#include <linux/nmi.h>
 +#include <asm/asm-offsets.h>
 +#include <asm/system.h>
 +
 +#define KDB_DEBUG_BB(fmt, ...)                                                        \
 +      {if (KDB_DEBUG(BB)) kdb_printf(fmt, ## __VA_ARGS__);}
 +#define KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix)                    \
 +      kdb_printf(prefix "%c0x%x" suffix,                                      \
 +                 offset >= 0 ? '+' : '-',                                     \
 +                 offset >= 0 ? offset : -offset)
 +#define KDB_DEBUG_BB_OFFSET(offset, prefix, suffix)                           \
 +      {if (KDB_DEBUG(BB)) KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix);}
 +
 +#define       BB_CHECK(expr, val, ret)                                                \
 +({                                                                            \
 +      if (unlikely(expr)) {                                                   \
 +              kdb_printf("%s, line %d: BB_CHECK(" #expr ") failed "           \
 +                      #val "=%lx\n",                                          \
 +                      __FUNCTION__, __LINE__, (long)val);                     \
 +              bb_giveup = 1;                                                  \
 +              return ret;                                                     \
 +      }                                                                       \
 +})
 +
 +static int bb_giveup;
 +
 +/* Use BBRG_Rxx for both i386 and x86_64.  RAX through R15 must be at the end,
 + * starting with RAX.  Some of these codes do not reflect actual registers,
 + * such codes are special cases when parsing the record of register changes.
 + * When updating BBRG_ entries, update bbrg_name as well.
 + */
 +
 +enum bb_reg_code
 +{
 +      BBRG_UNDEFINED = 0,     /* Register contents are undefined */
 +      BBRG_OSP,               /* original stack pointer on entry to function */
 +      BBRG_RAX,
 +      BBRG_RBX,
 +      BBRG_RCX,
 +      BBRG_RDX,
 +      BBRG_RDI,
 +      BBRG_RSI,
 +      BBRG_RBP,
 +      BBRG_RSP,
 +      BBRG_R8,
 +      BBRG_R9,
 +      BBRG_R10,
 +      BBRG_R11,
 +      BBRG_R12,
 +      BBRG_R13,
 +      BBRG_R14,
 +      BBRG_R15,
 +};
 +
 +const static char *bbrg_name[] = {
 +      [BBRG_UNDEFINED]   = "undefined",
 +      [BBRG_OSP]         = "osp",
 +      [BBRG_RAX]         = "rax",
 +      [BBRG_RBX]         = "rbx",
 +      [BBRG_RCX]         = "rcx",
 +      [BBRG_RDX]         = "rdx",
 +      [BBRG_RDI]         = "rdi",
 +      [BBRG_RSI]         = "rsi",
 +      [BBRG_RBP]         = "rbp",
 +      [BBRG_RSP]         = "rsp",
 +      [BBRG_R8]          = "r8",
 +      [BBRG_R9]          = "r9",
 +      [BBRG_R10]         = "r10",
 +      [BBRG_R11]         = "r11",
 +      [BBRG_R12]         = "r12",
 +      [BBRG_R13]         = "r13",
 +      [BBRG_R14]         = "r14",
 +      [BBRG_R15]         = "r15",
 +};
 +
 +/* Map a register name to its register code.  This includes the sub-register
 + * addressable fields, e.g. parts of rax can be addressed as ax, al, ah, eax.
 + * The list is sorted so it can be binary chopped, sort command is:
 + *   LANG=C sort -t '"' -k2
 + */
 +
 +struct bb_reg_code_map {
 +      enum bb_reg_code reg;
 +      const char *name;
 +};
 +
 +const static struct bb_reg_code_map
 +bb_reg_code_map[] = {
 +      { BBRG_RAX, "ah" },
 +      { BBRG_RAX, "al" },
 +      { BBRG_RAX, "ax" },
 +      { BBRG_RBX, "bh" },
 +      { BBRG_RBX, "bl" },
 +      { BBRG_RBP, "bp" },
 +      { BBRG_RBP, "bpl" },
 +      { BBRG_RBX, "bx" },
 +      { BBRG_RCX, "ch" },
 +      { BBRG_RCX, "cl" },
 +      { BBRG_RCX, "cx" },
 +      { BBRG_RDX, "dh" },
 +      { BBRG_RDI, "di" },
 +      { BBRG_RDI, "dil" },
 +      { BBRG_RDX, "dl" },
 +      { BBRG_RDX, "dx" },
 +      { BBRG_RAX, "eax" },
 +      { BBRG_RBP, "ebp" },
 +      { BBRG_RBX, "ebx" },
 +      { BBRG_RCX, "ecx" },
 +      { BBRG_RDI, "edi" },
 +      { BBRG_RDX, "edx" },
 +      { BBRG_RSI, "esi" },
 +      { BBRG_RSP, "esp" },
 +      { BBRG_R10, "r10" },
 +      { BBRG_R10, "r10d" },
 +      { BBRG_R10, "r10l" },
 +      { BBRG_R10, "r10w" },
 +      { BBRG_R11, "r11" },
 +      { BBRG_R11, "r11d" },
 +      { BBRG_R11, "r11l" },
 +      { BBRG_R11, "r11w" },
 +      { BBRG_R12, "r12" },
 +      { BBRG_R12, "r12d" },
 +      { BBRG_R12, "r12l" },
 +      { BBRG_R12, "r12w" },
 +      { BBRG_R13, "r13" },
 +      { BBRG_R13, "r13d" },
 +      { BBRG_R13, "r13l" },
 +      { BBRG_R13, "r13w" },
 +      { BBRG_R14, "r14" },
 +      { BBRG_R14, "r14d" },
 +      { BBRG_R14, "r14l" },
 +      { BBRG_R14, "r14w" },
 +      { BBRG_R15, "r15" },
 +      { BBRG_R15, "r15d" },
 +      { BBRG_R15, "r15l" },
 +      { BBRG_R15, "r15w" },
 +      { BBRG_R8,  "r8" },
 +      { BBRG_R8,  "r8d" },
 +      { BBRG_R8,  "r8l" },
 +      { BBRG_R8,  "r8w" },
 +      { BBRG_R9,  "r9" },
 +      { BBRG_R9,  "r9d" },
 +      { BBRG_R9,  "r9l" },
 +      { BBRG_R9,  "r9w" },
 +      { BBRG_RAX, "rax" },
 +      { BBRG_RBP, "rbp" },
 +      { BBRG_RBX, "rbx" },
 +      { BBRG_RCX, "rcx" },
 +      { BBRG_RDI, "rdi" },
 +      { BBRG_RDX, "rdx" },
 +      { BBRG_RSI, "rsi" },
 +      { BBRG_RSP, "rsp" },
 +      { BBRG_RSI, "si" },
 +      { BBRG_RSI, "sil" },
 +      { BBRG_RSP, "sp" },
 +      { BBRG_RSP, "spl" },
 +};
 +
 +/* Record register contents in terms of the values that were passed to this
 + * function, IOW track which registers contain an input value.  A register's
 + * contents can be undefined, it can contain an input register value or it can
 + * contain an offset from the original stack pointer.
 + *
 + * This structure is used to represent the current contents of the integer
 + * registers, it is held in an array that is indexed by BBRG_xxx.  The element
 + * for BBRG_xxx indicates what input value is currently in BBRG_xxx.  When
 + * 'value' is BBRG_OSP then register BBRG_xxx contains a stack pointer,
 + * pointing at 'offset' from the original stack pointer on entry to the
 + * function.  When 'value' is not BBRG_OSP then element BBRG_xxx contains the
 + * original contents of an input register and offset is ignored.
 + *
 + * An input register 'value' can be stored in more than one register and/or in
 + * more than one memory location.
 + */
 +
 +struct bb_reg_contains
 +{
 +      enum bb_reg_code value: 8;
 +      short offset;
 +};
 +
 +/* Note: the offsets in struct bb_mem_contains in this code are _NOT_ offsets
 + * from OSP, they are offsets from current RSP.  It fits better with the way
 + * that struct pt_regs is built, some code pushes extra data before pt_regs so
 + * working with OSP relative offsets gets messy.  struct bb_mem_contains
 + * entries must be in descending order of RSP offset.
 + */
 +
 +typedef struct { DECLARE_BITMAP(bits, BBRG_R15+1); } bbrgmask_t;
 +#define BB_SKIP(reg) (1 << (BBRG_ ## reg))
 +struct bb_mem_contains {
 +      short offset_address;
 +      enum bb_reg_code value: 8;
 +};
 +
 +/* Transfer of control to a label outside the current function.  If the
 + * transfer is to a known common restore path that expects known registers
 + * and/or a known memory state (e.g. struct pt_regs) then do a sanity check on
 + * the state at this point.
 + */
 +
 +struct bb_name_state {
 +      const char *name;                       /* target function */
 +      bfd_vma address;                        /* Address of target function */
 +      const char *fname;                      /* optional from function name */
 +      const struct bb_mem_contains *mem;      /* expected memory state */
 +      const struct bb_reg_contains *regs;     /* expected register state */
 +      const unsigned short mem_size;          /* ARRAY_SIZE(mem) */
 +      const unsigned short regs_size;         /* ARRAY_SIZE(regs) */
 +      const short osp_offset;                 /* RSP in regs == OSP+osp_offset */
 +      const bbrgmask_t skip_mem;              /* Some slots in mem may be undefined */
 +      const bbrgmask_t skip_regs;             /* Some slots in regs may be undefined */
 +};
 +
 +/* NS (NAME_STATE) macros define the register and memory state when we transfer
 + * control to or start decoding a special case name.  Use NS when the target
 + * label always has the same state.  Use NS_FROM and specify the source label
 + * if the target state is slightly different depending on where it is branched
 + * from.  This gives better state checking, by isolating the special cases.
 + *
 + * Note: for the same target label, NS_FROM entries must be followed by a
 + * single NS entry.
 + */
 +
 +#define       NS_FROM(iname, ifname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
 +      { \
 +              .name = iname, \
 +              .fname = ifname, \
 +              .mem = imem, \
 +              .regs = iregs, \
 +              .mem_size = ARRAY_SIZE(imem), \
 +              .regs_size = ARRAY_SIZE(iregs), \
 +              .skip_mem.bits[0] = iskip_mem, \
 +              .skip_regs.bits[0] = iskip_regs, \
 +              .osp_offset = iosp_offset, \
 +              .address = 0 \
 +      }
 +
 +/* Shorter forms for the common cases */
 +#define       NS(iname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
 +        NS_FROM(iname, NULL, imem, iregs, iskip_mem, iskip_regs, iosp_offset)
 +#define       NS_MEM(iname, imem, iskip_mem) \
 +        NS_FROM(iname, NULL, imem, no_regs, iskip_mem, 0, 0)
 +#define       NS_MEM_FROM(iname, ifname, imem, iskip_mem) \
 +        NS_FROM(iname, ifname, imem, no_regs, iskip_mem, 0, 0)
 +#define       NS_REG(iname, iregs, iskip_regs) \
 +        NS_FROM(iname, NULL, no_memory, iregs, 0, iskip_regs, 0)
 +#define       NS_REG_FROM(iname, ifname, iregs, iskip_regs) \
 +        NS_FROM(iname, ifname, no_memory, iregs, 0, iskip_regs, 0)
 +
 +static void
 +bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src);
 +
 +static const char *bb_mod_name, *bb_func_name;
 +
 +static int
 +bb_noret(const char *name)
 +{
 +      if (strcmp(name, "panic") == 0 ||
 +          strcmp(name, "do_exit") == 0 ||
 +          strcmp(name, "do_group_exit") == 0 ||
 +          strcmp(name, "complete_and_exit") == 0)
 +              return 1;
 +      return 0;
 +}
 +
 +/*============================================================================*/
 +/*                                                                            */
 +/* Most of the basic block code and data is common to x86_64 and i386.  This  */
 +/* large ifdef  contains almost all of the differences between the two        */
 +/* architectures.                                                             */
 +/*                                                                            */
 +/* Make sure you update the correct section of this ifdef.                    */
 +/*                                                                            */
 +/*============================================================================*/
 +
 +#ifdef        CONFIG_X86_64
 +
 +/* Registers that can be used to pass parameters, in the order that parameters
 + * are passed.
 + */
 +
 +const static enum bb_reg_code
 +bb_param_reg[] = {
 +      BBRG_RDI,
 +      BBRG_RSI,
 +      BBRG_RDX,
 +      BBRG_RCX,
 +      BBRG_R8,
 +      BBRG_R9,
 +};
 +
 +const static enum bb_reg_code
 +bb_preserved_reg[] = {
 +      BBRG_RBX,
 +      BBRG_RBP,
 +      BBRG_RSP,
 +      BBRG_R12,
 +      BBRG_R13,
 +      BBRG_R14,
 +      BBRG_R15,
 +};
 +
 +static const struct bb_mem_contains full_pt_regs[] = {
 +      { 0x70, BBRG_RDI },
 +      { 0x68, BBRG_RSI },
 +      { 0x60, BBRG_RDX },
 +      { 0x58, BBRG_RCX },
 +      { 0x50, BBRG_RAX },
 +      { 0x48, BBRG_R8  },
 +      { 0x40, BBRG_R9  },
 +      { 0x38, BBRG_R10 },
 +      { 0x30, BBRG_R11 },
 +      { 0x28, BBRG_RBX },
 +      { 0x20, BBRG_RBP },
 +      { 0x18, BBRG_R12 },
 +      { 0x10, BBRG_R13 },
 +      { 0x08, BBRG_R14 },
 +      { 0x00, BBRG_R15 },
 +};
 +static const struct bb_mem_contains full_pt_regs_plus_1[] = {
 +      { 0x78, BBRG_RDI },
 +      { 0x70, BBRG_RSI },
 +      { 0x68, BBRG_RDX },
 +      { 0x60, BBRG_RCX },
 +      { 0x58, BBRG_RAX },
 +      { 0x50, BBRG_R8  },
 +      { 0x48, BBRG_R9  },
 +      { 0x40, BBRG_R10 },
 +      { 0x38, BBRG_R11 },
 +      { 0x30, BBRG_RBX },
 +      { 0x28, BBRG_RBP },
 +      { 0x20, BBRG_R12 },
 +      { 0x18, BBRG_R13 },
 +      { 0x10, BBRG_R14 },
 +      { 0x08, BBRG_R15 },
 +};
 +/*
 + * Going into error_exit we have the hardware pushed error_code on the stack
 + * plus a full pt_regs
 + */
 +static const struct bb_mem_contains error_code_full_pt_regs[] = {
 +      { 0x78, BBRG_UNDEFINED },
 +      { 0x70, BBRG_RDI },
 +      { 0x68, BBRG_RSI },
 +      { 0x60, BBRG_RDX },
 +      { 0x58, BBRG_RCX },
 +      { 0x50, BBRG_RAX },
 +      { 0x48, BBRG_R8  },
 +      { 0x40, BBRG_R9  },
 +      { 0x38, BBRG_R10 },
 +      { 0x30, BBRG_R11 },
 +      { 0x28, BBRG_RBX },
 +      { 0x20, BBRG_RBP },
 +      { 0x18, BBRG_R12 },
 +      { 0x10, BBRG_R13 },
 +      { 0x08, BBRG_R14 },
 +      { 0x00, BBRG_R15 },
 +};
 +static const struct bb_mem_contains partial_pt_regs[] = {
 +      { 0x40, BBRG_RDI },
 +      { 0x38, BBRG_RSI },
 +      { 0x30, BBRG_RDX },
 +      { 0x28, BBRG_RCX },
 +      { 0x20, BBRG_RAX },
 +      { 0x18, BBRG_R8  },
 +      { 0x10, BBRG_R9  },
 +      { 0x08, BBRG_R10 },
 +      { 0x00, BBRG_R11 },
 +};
 +static const struct bb_mem_contains partial_pt_regs_plus_1[] = {
 +      { 0x48, BBRG_RDI },
 +      { 0x40, BBRG_RSI },
 +      { 0x38, BBRG_RDX },
 +      { 0x30, BBRG_RCX },
 +      { 0x28, BBRG_RAX },
 +      { 0x20, BBRG_R8  },
 +      { 0x18, BBRG_R9  },
 +      { 0x10, BBRG_R10 },
 +      { 0x08, BBRG_R11 },
 +};
 +static const struct bb_mem_contains partial_pt_regs_plus_2[] = {
 +      { 0x50, BBRG_RDI },
 +      { 0x48, BBRG_RSI },
 +      { 0x40, BBRG_RDX },
 +      { 0x38, BBRG_RCX },
 +      { 0x30, BBRG_RAX },
 +      { 0x28, BBRG_R8  },
 +      { 0x20, BBRG_R9  },
 +      { 0x18, BBRG_R10 },
 +      { 0x10, BBRG_R11 },
 +};
 +static const struct bb_mem_contains no_memory[] = {
 +};
 +/* Hardware has already pushed an error_code on the stack.  Use undefined just
 + * to set the initial stack offset.
 + */
 +static const struct bb_mem_contains error_code[] = {
 +      { 0x0, BBRG_UNDEFINED },
 +};
 +/* error_code plus original rax */
 +static const struct bb_mem_contains error_code_rax[] = {
 +      { 0x8, BBRG_UNDEFINED },
 +      { 0x0, BBRG_RAX },
 +};
 +
 +static const struct bb_reg_contains all_regs[] = {
 +      [BBRG_RAX] = { BBRG_RAX, 0 },
 +      [BBRG_RBX] = { BBRG_RBX, 0 },
 +      [BBRG_RCX] = { BBRG_RCX, 0 },
 +      [BBRG_RDX] = { BBRG_RDX, 0 },
 +      [BBRG_RDI] = { BBRG_RDI, 0 },
 +      [BBRG_RSI] = { BBRG_RSI, 0 },
 +      [BBRG_RBP] = { BBRG_RBP, 0 },
 +      [BBRG_RSP] = { BBRG_OSP, 0 },
 +      [BBRG_R8 ] = { BBRG_R8,  0 },
 +      [BBRG_R9 ] = { BBRG_R9,  0 },
 +      [BBRG_R10] = { BBRG_R10, 0 },
 +      [BBRG_R11] = { BBRG_R11, 0 },
 +      [BBRG_R12] = { BBRG_R12, 0 },
 +      [BBRG_R13] = { BBRG_R13, 0 },
 +      [BBRG_R14] = { BBRG_R14, 0 },
 +      [BBRG_R15] = { BBRG_R15, 0 },
 +};
 +static const struct bb_reg_contains no_regs[] = {
 +};
 +
 +static struct bb_name_state bb_special_cases[] = {
 +
 +      /* First the cases that pass data only in memory.  We do not check any
 +       * register state for these cases.
 +       */
 +
 +      /* Simple cases, no exceptions */
 +      NS_MEM("ia32_ptregs_common", partial_pt_regs_plus_1, 0),
 +      NS_MEM("ia32_sysret", partial_pt_regs, 0),
 +      NS_MEM("int_careful", partial_pt_regs, 0),
 +      NS_MEM("ia32_badarg", partial_pt_regs, 0),
 +      NS_MEM("int_restore_rest", full_pt_regs, 0),
 +      NS_MEM("int_signal", full_pt_regs, 0),
 +      NS_MEM("int_very_careful", partial_pt_regs, 0),
 +      NS_MEM("ptregscall_common", full_pt_regs_plus_1, 0),
 +      NS_MEM("ret_from_intr", partial_pt_regs_plus_2, 0),
 +      NS_MEM("stub32_clone", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub32_execve", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub32_fork", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub32_iopl", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub32_rt_sigreturn", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub32_sigaltstack", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub32_sigreturn", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub32_vfork", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub_clone", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub_execve", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub_fork", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub_iopl", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub_rt_sigreturn", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub_sigaltstack", partial_pt_regs_plus_1, 0),
 +      NS_MEM("stub_vfork", partial_pt_regs_plus_1, 0),
 +      NS_MEM("sysenter_auditsys", partial_pt_regs,
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11)),
 +
 +      NS_MEM("paranoid_exit", error_code_full_pt_regs, 0),
 +
 +      NS_MEM_FROM("ia32_badsys", "ia32_sysenter_target",
 +              partial_pt_regs,
 +              /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
 +               * some paths.  It also stomps on RAX.
 +               */
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
 +              BB_SKIP(RAX)),
 +      NS_MEM_FROM("ia32_badsys", "ia32_cstar_target",
 +              partial_pt_regs,
 +              /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
 +               * paths.  It also stomps on RAX.  Even more confusing, instead
 +               * of storing RCX it stores RBP.  WTF?
 +               */
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +      NS_MEM_FROM("ia32_badsys", "ia32_syscall",
 +              partial_pt_regs,
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11)),
 +      NS_MEM("ia32_badsys", partial_pt_regs, 0),
 +
 +#ifdef CONFIG_AUDITSYSCALL
 +      NS_MEM_FROM("int_with_check", "sysexit_audit", partial_pt_regs,
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
 +              BB_SKIP(RAX)),
 +      NS_MEM_FROM("int_with_check", "ia32_cstar_target", partial_pt_regs,
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +#endif
 +      NS_MEM("int_with_check", no_memory, 0),
 +
 +      /* Various bits of code branch to int_ret_from_sys_call, with slightly
 +       * different missing values in pt_regs.
 +       */
 +      NS_MEM_FROM("int_ret_from_sys_call", "ret_from_fork",
 +              partial_pt_regs,
 +              BB_SKIP(R11)),
 +      NS_MEM_FROM("int_ret_from_sys_call", "stub_execve",
 +              partial_pt_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +      NS_MEM_FROM("int_ret_from_sys_call", "stub_rt_sigreturn",
 +              partial_pt_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +      NS_MEM_FROM("int_ret_from_sys_call", "kernel_execve",
 +              partial_pt_regs,
 +              BB_SKIP(RAX)),
 +      NS_MEM_FROM("int_ret_from_sys_call", "ia32_syscall",
 +              partial_pt_regs,
 +              /* ia32_syscall only saves RDI through RCX. */
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
 +              BB_SKIP(RAX)),
 +      NS_MEM_FROM("int_ret_from_sys_call", "ia32_sysenter_target",
 +              partial_pt_regs,
 +              /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
 +              * some paths.  It also stomps on RAX.
 +              */
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
 +              BB_SKIP(RAX)),
 +      NS_MEM_FROM("int_ret_from_sys_call", "ia32_cstar_target",
 +              partial_pt_regs,
 +              /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
 +               * paths.  It also stomps on RAX.  Even more confusing, instead
 +               * of storing RCX it stores RBP.  WTF?
 +               */
 +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +      NS_MEM_FROM("int_ret_from_sys_call", "ia32_badsys",
 +              partial_pt_regs, BB_SKIP(RAX)),
 +      NS_MEM("int_ret_from_sys_call", partial_pt_regs, 0),
 +
 +#ifdef        CONFIG_PREEMPT
 +      NS_MEM("retint_kernel", partial_pt_regs, BB_SKIP(RAX)),
 +#endif        /* CONFIG_PREEMPT */
 +
 +      NS_MEM("retint_careful", partial_pt_regs, BB_SKIP(RAX)),
 +
 +      /* Horrible hack: For a brand new x86_64 task, switch_to() branches to
 +       * ret_from_fork with a totally different stack state from all the
 +       * other tasks that come out of switch_to().  This non-standard state
 +       * cannot be represented so just ignore the branch from switch_to() to
 +       * ret_from_fork.  Due to inlining and linker labels, switch_to() can
 +       * appear as several different function labels, including schedule,
 +       * context_switch and __sched_text_start.
 +       */
 +      NS_MEM_FROM("ret_from_fork", "schedule", no_memory, 0),
 +      NS_MEM_FROM("ret_from_fork", "__schedule", no_memory, 0),
 +      NS_MEM_FROM("ret_from_fork", "__sched_text_start", no_memory, 0),
 +      NS_MEM_FROM("ret_from_fork", "context_switch", no_memory, 0),
 +      NS_MEM("ret_from_fork", full_pt_regs, 0),
 +
 +      NS_MEM_FROM("ret_from_sys_call", "ret_from_fork",
 +              partial_pt_regs,
 +              BB_SKIP(R11)),
 +      NS_MEM("ret_from_sys_call", partial_pt_regs, 0),
 +
 +      NS_MEM("retint_restore_args",
 +              partial_pt_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +
 +      NS_MEM("retint_swapgs",
 +              partial_pt_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +
 +      /* Now the cases that pass data in registers.  We do not check any
 +       * memory state for these cases.
 +       */
 +
 +      NS_REG("bad_put_user",
 +              all_regs, BB_SKIP(RBX)),
 +
 +      NS_REG("bad_get_user",
 +              all_regs, BB_SKIP(RAX) | BB_SKIP(RDX)),
 +
 +      NS_REG("bad_to_user",
 +              all_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +
 +      NS_REG("ia32_ptregs_common",
 +              all_regs,
 +              0),
 +
 +      NS_REG("copy_user_generic_unrolled",
 +              all_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +
 +      NS_REG("copy_user_generic_string",
 +              all_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RCX)),
 +
 +      NS_REG("irq_return",
 +              all_regs,
 +              0),
 +
 +      /* Finally the cases that pass data in both registers and memory.
 +       */
 +
 +      NS("invalid_TSS", error_code, all_regs, 0, 0, 0),
 +      NS("segment_not_present", error_code, all_regs, 0, 0, 0),
 +      NS("alignment_check", error_code, all_regs, 0, 0, 0),
 +      NS("page_fault", error_code, all_regs, 0, 0, 0),
 +      NS("general_protection", error_code, all_regs, 0, 0, 0),
 +      NS("error_entry", error_code_rax, all_regs, 0, BB_SKIP(RAX), -0x10),
 +      NS("error_exit", error_code_full_pt_regs, no_regs, 0, 0, 0x30),
 +      NS("common_interrupt", error_code, all_regs, 0, 0, -0x8),
 +      NS("save_args", error_code, all_regs, 0, 0, -0x50),
 +      NS("int3", no_memory, all_regs, 0, 0, -0x80),
 +};
 +
 +static const char *bb_spurious[] = {
 +                              /* schedule */
 +      "thread_return",
 +                              /* system_call */
 +      "system_call_after_swapgs",
 +      "system_call_fastpath",
 +      "ret_from_sys_call",
 +      "sysret_check",
 +      "sysret_careful",
 +      "sysret_signal",
 +      "badsys",
 +#ifdef CONFIG_AUDITSYSCALL
 +      "auditsys",
 +      "sysret_audit",
 +#endif
 +      "tracesys",
 +      "int_ret_from_sys_call",
 +      "int_with_check",
 +      "int_careful",
 +      "int_very_careful",
 +      "int_signal",
 +      "int_restore_rest",
 +                              /* common_interrupt */
 +      "ret_from_intr",
 +      "exit_intr",
 +      "retint_with_reschedule",
 +      "retint_check",
 +      "retint_swapgs",
 +      "retint_restore_args",
 +      "restore_args",
 +      "irq_return",
 +      "bad_iret",
 +      "retint_careful",
 +      "retint_signal",
 +#ifdef        CONFIG_PREEMPT
 +      "retint_kernel",
 +#endif        /* CONFIG_PREEMPT */
 +                              /* paranoid_exit */
 +      "paranoid_swapgs",
 +      "paranoid_restore",
 +      "paranoid_userspace",
 +      "paranoid_schedule",
 +                              /* error_entry */
 +      "error_swapgs",
 +      "error_sti",
 +      "error_kernelspace",
 +                              /* nmi */
 +#ifdef CONFIG_TRACE_IRQFLAGS
 +      "nmi_swapgs",
 +      "nmi_restore",
 +      "nmi_userspace",
 +      "nmi_schedule",
 +#endif
 +                              /* load_gs_index */
 +      "gs_change",
 +      "bad_gs",
 +                              /* ia32_sysenter_target */
 +      "sysenter_do_call",
 +      "sysenter_dispatch",
 +      "sysexit_from_sys_call",
 +#ifdef CONFIG_AUDITSYSCALL
 +      "sysenter_auditsys",
 +      "sysexit_audit",
 +#endif
 +      "sysenter_tracesys",
 +                              /* ia32_cstar_target */
 +      "cstar_do_call",
 +      "cstar_dispatch",
 +      "sysretl_from_sys_call",
 +#ifdef CONFIG_AUDITSYSCALL
 +      "cstar_auditsys",
 +      "sysretl_audit",
 +#endif
 +      "cstar_tracesys",
 +                              /* ia32_syscall */
 +      "ia32_do_call",
 +      "ia32_sysret",
 +      "ia32_tracesys",
 +#ifdef        CONFIG_HIBERNATION
 +                              /* restore_image */
 +      "loop",
 +      "done",
 +#endif        /* CONFIG_HIBERNATION */
 +#ifdef        CONFIG_KPROBES
 +                              /* jprobe_return */
 +      "jprobe_return_end",
 +                              /* kretprobe_trampoline_holder */
 +      "kretprobe_trampoline",
 +#endif        /* CONFIG_KPROBES */
 +#ifdef        CONFIG_KEXEC
 +                              /* relocate_kernel */
 +      "relocate_new_kernel",
 +#endif        /* CONFIG_KEXEC */
 +#ifdef        CONFIG_PARAVIRT_XEN
 +                              /* arch/i386/xen/xen-asm.S */
 +      "xen_irq_enable_direct_end",
 +      "xen_irq_disable_direct_end",
 +      "xen_save_fl_direct_end",
 +      "xen_restore_fl_direct_end",
 +      "xen_iret_start_crit",
 +      "iret_restore_end",
 +      "xen_iret_end_crit",
 +      "hyper_iret",
 +#endif        /* CONFIG_XEN */
 +};
 +
 +static const char *bb_hardware_handlers[] = {
 +      "system_call",
 +      "common_interrupt",
 +      "error_entry",
 +      "debug",
 +      "nmi",
 +      "int3",
 +      "double_fault",
 +      "stack_segment",
 +      "machine_check",
 +      "kdb_call",
 +};
 +
 +static int
 +bb_hardware_pushed_arch(kdb_machreg_t rsp,
 +                      const struct kdb_activation_record *ar)
 +{
 +      /* x86_64 interrupt stacks are 16 byte aligned and you must get the
 +       * next rsp from stack, it cannot be statically calculated.  Do not
 +       * include the word at rsp, it is pushed by hardware but is treated as
 +       * a normal software return value.
 +       *
 +       * When an IST switch occurs (e.g. NMI) then the saved rsp points to
 +       * another stack entirely.  Assume that the IST stack is 16 byte
 +       * aligned and just return the size of the hardware data on this stack.
 +       * The stack unwind code will take care of the stack switch.
 +       */
 +      kdb_machreg_t saved_rsp = *((kdb_machreg_t *)rsp + 3);
 +      int hardware_pushed = saved_rsp - rsp - KDB_WORD_SIZE;
 +      if (hardware_pushed < 4 * KDB_WORD_SIZE ||
 +          saved_rsp < ar->stack.logical_start ||
 +          saved_rsp >= ar->stack.logical_end)
 +              return 4 * KDB_WORD_SIZE;
 +      else
 +              return hardware_pushed;
 +}
 +
 +static void
 +bb_start_block0(void)
 +{
 +      bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
 +      bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
 +      bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
 +      bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
 +      bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
 +      bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
 +      bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
 +      bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
 +      bb_reg_code_set_value(BBRG_R8, BBRG_R8);
 +      bb_reg_code_set_value(BBRG_R9, BBRG_R9);
 +      bb_reg_code_set_value(BBRG_R10, BBRG_R10);
 +      bb_reg_code_set_value(BBRG_R11, BBRG_R11);
 +      bb_reg_code_set_value(BBRG_R12, BBRG_R12);
 +      bb_reg_code_set_value(BBRG_R13, BBRG_R13);
 +      bb_reg_code_set_value(BBRG_R14, BBRG_R14);
 +      bb_reg_code_set_value(BBRG_R15, BBRG_R15);
 +}
 +
 +/* x86_64 does not have a special case for __switch_to */
 +
 +static void
 +bb_fixup_switch_to(char *p)
 +{
 +}
 +
 +static int
 +bb_asmlinkage_arch(void)
 +{
 +      return strncmp(bb_func_name, "__down", 6) == 0 ||
 +             strncmp(bb_func_name, "__up", 4) == 0 ||
 +             strncmp(bb_func_name, "stub_", 5) == 0 ||
 +             strcmp(bb_func_name, "ret_from_fork") == 0 ||
 +             strcmp(bb_func_name, "ptregscall_common") == 0;
 +}
 +
 +#else /* !CONFIG_X86_64 */
 +
 +/* Registers that can be used to pass parameters, in the order that parameters
 + * are passed.
 + */
 +
 +const static enum bb_reg_code
 +bb_param_reg[] = {
 +      BBRG_RAX,
 +      BBRG_RDX,
 +      BBRG_RCX,
 +};
 +
 +const static enum bb_reg_code
 +bb_preserved_reg[] = {
 +      BBRG_RBX,
 +      BBRG_RBP,
 +      BBRG_RSP,
 +      BBRG_RSI,
 +      BBRG_RDI,
 +};
 +
 +static const struct bb_mem_contains full_pt_regs[] = {
 +      { 0x18, BBRG_RAX },
 +      { 0x14, BBRG_RBP },
 +      { 0x10, BBRG_RDI },
 +      { 0x0c, BBRG_RSI },
 +      { 0x08, BBRG_RDX },
 +      { 0x04, BBRG_RCX },
 +      { 0x00, BBRG_RBX },
 +};
 +static const struct bb_mem_contains no_memory[] = {
 +};
 +/* Hardware has already pushed an error_code on the stack.  Use undefined just
 + * to set the initial stack offset.
 + */
 +static const struct bb_mem_contains error_code[] = {
 +      { 0x0, BBRG_UNDEFINED },
 +};
 +/* rbx already pushed */
 +static const struct bb_mem_contains rbx_pushed[] = {
 +      { 0x0, BBRG_RBX },
 +};
 +#ifdef        CONFIG_MATH_EMULATION
 +static const struct bb_mem_contains mem_fpu_reg_round[] = {
 +      { 0xc, BBRG_RBP },
 +      { 0x8, BBRG_RSI },
 +      { 0x4, BBRG_RDI },
 +      { 0x0, BBRG_RBX },
 +};
 +#endif        /* CONFIG_MATH_EMULATION */
 +
 +static const struct bb_reg_contains all_regs[] = {
 +      [BBRG_RAX] = { BBRG_RAX, 0 },
 +      [BBRG_RBX] = { BBRG_RBX, 0 },
 +      [BBRG_RCX] = { BBRG_RCX, 0 },
 +      [BBRG_RDX] = { BBRG_RDX, 0 },
 +      [BBRG_RDI] = { BBRG_RDI, 0 },
 +      [BBRG_RSI] = { BBRG_RSI, 0 },
 +      [BBRG_RBP] = { BBRG_RBP, 0 },
 +      [BBRG_RSP] = { BBRG_OSP, 0 },
 +};
 +static const struct bb_reg_contains no_regs[] = {
 +};
 +#ifdef        CONFIG_MATH_EMULATION
 +static const struct bb_reg_contains reg_fpu_reg_round[] = {
 +      [BBRG_RBP] = { BBRG_OSP, -0x4 },
 +      [BBRG_RSP] = { BBRG_OSP, -0x10 },
 +};
 +#endif        /* CONFIG_MATH_EMULATION */
 +
 +static struct bb_name_state bb_special_cases[] = {
 +
 +      /* First the cases that pass data only in memory.  We do not check any
 +       * register state for these cases.
 +       */
 +
 +      /* Simple cases, no exceptions */
 +      NS_MEM("check_userspace", full_pt_regs, 0),
 +      NS_MEM("device_not_available_emulate", full_pt_regs, 0),
 +      NS_MEM("ldt_ss", full_pt_regs, 0),
 +      NS_MEM("no_singlestep", full_pt_regs, 0),
 +      NS_MEM("restore_all", full_pt_regs, 0),
 +      NS_MEM("restore_nocheck", full_pt_regs, 0),
 +      NS_MEM("restore_nocheck_notrace", full_pt_regs, 0),
 +      NS_MEM("ret_from_exception", full_pt_regs, 0),
 +      NS_MEM("ret_from_fork", full_pt_regs, 0),
 +      NS_MEM("ret_from_intr", full_pt_regs, 0),
 +      NS_MEM("work_notifysig", full_pt_regs, 0),
 +      NS_MEM("work_pending", full_pt_regs, 0),
 +
 +#ifdef        CONFIG_PREEMPT
 +      NS_MEM("resume_kernel", full_pt_regs, 0),
 +#endif        /* CONFIG_PREEMPT */
 +
 +      NS_MEM("common_interrupt", error_code, 0),
 +      NS_MEM("error_code", error_code, 0),
 +
 +      NS_MEM("bad_put_user", rbx_pushed, 0),
 +
 +      NS_MEM_FROM("resume_userspace", "syscall_badsys",
 +              full_pt_regs, BB_SKIP(RAX)),
 +      NS_MEM_FROM("resume_userspace", "syscall_fault",
 +              full_pt_regs, BB_SKIP(RAX)),
 +      NS_MEM_FROM("resume_userspace", "syscall_trace_entry",
 +              full_pt_regs, BB_SKIP(RAX)),
 +      /* Too difficult to trace through the various vm86 functions for now.
 +       * They are C functions that start off with some memory state, fiddle
 +       * the registers then jmp directly to resume_userspace.  For the
 +       * moment, just assume that they are valid and do no checks.
 +       */
 +      NS_FROM("resume_userspace", "do_int",
 +              no_memory, no_regs, 0, 0, 0),
 +      NS_FROM("resume_userspace", "do_sys_vm86",
 +              no_memory, no_regs, 0, 0, 0),
 +      NS_FROM("resume_userspace", "handle_vm86_fault",
 +              no_memory, no_regs, 0, 0, 0),
 +      NS_FROM("resume_userspace", "handle_vm86_trap",
 +              no_memory, no_regs, 0, 0, 0),
 +      NS_MEM("resume_userspace", full_pt_regs, 0),
 +
 +      NS_MEM_FROM("syscall_badsys", "ia32_sysenter_target",
 +              full_pt_regs, BB_SKIP(RBP)),
 +      NS_MEM("syscall_badsys", full_pt_regs, 0),
 +
 +      NS_MEM_FROM("syscall_call", "syscall_trace_entry",
 +              full_pt_regs, BB_SKIP(RAX)),
 +      NS_MEM("syscall_call", full_pt_regs, 0),
 +
 +      NS_MEM_FROM("syscall_exit", "syscall_trace_entry",
 +              full_pt_regs, BB_SKIP(RAX)),
 +      NS_MEM("syscall_exit", full_pt_regs, 0),
 +
 +      NS_MEM_FROM("syscall_exit_work", "ia32_sysenter_target",
 +              full_pt_regs, BB_SKIP(RAX) | BB_SKIP(RBP)),
 +      NS_MEM_FROM("syscall_exit_work", "system_call",
 +              full_pt_regs, BB_SKIP(RAX)),
 +      NS_MEM("syscall_exit_work", full_pt_regs, 0),
 +
 +      NS_MEM_FROM("syscall_trace_entry", "ia32_sysenter_target",
 +              full_pt_regs, BB_SKIP(RBP)),
 +      NS_MEM_FROM("syscall_trace_entry", "system_call",
 +              full_pt_regs, BB_SKIP(RAX)),
 +      NS_MEM("syscall_trace_entry", full_pt_regs, 0),
 +
 +      /* Now the cases that pass data in registers.  We do not check any
 +       * memory state for these cases.
 +       */
 +
 +      NS_REG("syscall_fault", all_regs, 0),
 +
 +      NS_REG("bad_get_user", all_regs,
 +              BB_SKIP(RAX) | BB_SKIP(RDX)),
 +
 +      /* Finally the cases that pass data in both registers and memory.
 +      */
 +
 +      /* This entry is redundant now because bb_fixup_switch_to() hides the
 +       * jmp __switch_to case, however the entry is left here as
 +       * documentation.
 +       *
 +       * NS("__switch_to", no_memory, no_regs, 0, 0, 0),
 +       */
 +
 +      NS("iret_exc", no_memory, all_regs, 0, 0, 0x20),
 +
 +#ifdef        CONFIG_MATH_EMULATION
 +      NS("fpu_reg_round", mem_fpu_reg_round, reg_fpu_reg_round, 0, 0, 0),
 +#endif        /* CONFIG_MATH_EMULATION */
 +};
 +
 +static const char *bb_spurious[] = {
 +                              /* ret_from_exception */
 +      "ret_from_intr",
 +      "check_userspace",
 +      "resume_userspace",
 +                              /* resume_kernel */
 +#ifdef        CONFIG_PREEMPT
 +      "need_resched",
 +#endif        /* CONFIG_PREEMPT */
 +                              /* ia32_sysenter_target */
 +      "sysenter_past_esp",
 +                              /* system_call */
 +      "no_singlestep",
 +      "syscall_call",
 +      "syscall_exit",
 +      "restore_all",
 +      "restore_nocheck",
 +      "restore_nocheck_notrace",
 +      "ldt_ss",
 +      /* do not include iret_exc, it is in a .fixup section */
 +                              /* work_pending */
 +      "work_resched",
 +      "work_notifysig",
 +#ifdef        CONFIG_VM86
 +      "work_notifysig_v86",
 +#endif        /* CONFIG_VM86 */
 +                              /* page_fault */
 +      "error_code",
 +                              /* device_not_available */
 +      "device_not_available_emulate",
 +                              /* debug */
 +      "debug_esp_fix_insn",
 +      "debug_stack_correct",
 +                              /* nmi */
 +      "nmi_stack_correct",
 +      "nmi_stack_fixup",
 +      "nmi_debug_stack_check",
 +      "nmi_espfix_stack",
 +#ifdef        CONFIG_HIBERNATION
 +                              /* restore_image */
 +      "copy_loop",
 +      "done",
 +#endif        /* CONFIG_HIBERNATION */
 +#ifdef        CONFIG_KPROBES
 +                              /* jprobe_return */
 +      "jprobe_return_end",
 +#endif        /* CONFIG_KPROBES */
 +#ifdef        CONFIG_KEXEC
 +                              /* relocate_kernel */
 +      "relocate_new_kernel",
 +#endif        /* CONFIG_KEXEC */
 +#ifdef        CONFIG_MATH_EMULATION
 +                              /* assorted *.S files in arch/i386/math_emu */
 +      "Denorm_done",
 +      "Denorm_shift_more_than_32",
 +      "Denorm_shift_more_than_63",
 +      "Denorm_shift_more_than_64",
 +      "Do_unmasked_underflow",
 +      "Exp_not_underflow",
 +      "fpu_Arith_exit",
 +      "fpu_reg_round",
 +      "fpu_reg_round_signed_special_exit",
 +      "fpu_reg_round_special_exit",
 +      "L_accum_done",
 +      "L_accum_loaded",
 +      "L_accum_loop",
 +      "L_arg1_larger",
 +      "L_bugged",
 +      "L_bugged_1",
 +      "L_bugged_2",
 +      "L_bugged_3",
 +      "L_bugged_4",
 +      "L_bugged_denorm_486",
 +      "L_bugged_round24",
 +      "L_bugged_round53",
 +      "L_bugged_round64",
 +      "LCheck_24_round_up",
 +      "LCheck_53_round_up",
 +      "LCheck_Round_Overflow",
 +      "LCheck_truncate_24",
 +      "LCheck_truncate_53",
 +      "LCheck_truncate_64",
 +      "LDenormal_adj_exponent",
 +      "L_deNormalised",
 +      "LDo_24_round_up",
 +      "LDo_2nd_32_bits",
 +      "LDo_2nd_div",
 +      "LDo_3rd_32_bits",
 +      "LDo_3rd_div",
 +      "LDo_53_round_up",
 +      "LDo_64_round_up",
 +      "L_done",
 +      "LDo_truncate_24",
 +      "LDown_24",
 +      "LDown_53",
 +      "LDown_64",
 +      "L_entry_bugged",
 +      "L_error_exit",
 +      "L_exactly_32",
 +      "L_exception_exit",
 +      "L_exit",
 +      "L_exit_nuo_valid",
 +      "L_exit_nuo_zero",
 +      "L_exit_valid",
 +      "L_extent_zero",
 +      "LFirst_div_done",
 +      "LFirst_div_not_1",
 +      "L_Full_Division",
 +      "LGreater_Half_24",
 +      "LGreater_Half_53",
 +      "LGreater_than_1",
 +      "LLess_than_1",
 +      "L_Make_denorm",
 +      "L_more_31_no_low",
 +      "L_more_63_no_low",
 +      "L_more_than_31",
 +      "L_more_than_63",
 +      "L_more_than_64",
 +      "L_more_than_65",
 +      "L_more_than_95",
 +      "L_must_be_zero",
 +      "L_n_exit",
 +      "L_no_adjust",
 +      "L_no_bit_lost",
 +      "L_no_overflow",
 +      "L_no_precision_loss",
 +      "L_Normalised",
 +      "L_norm_bugged",
 +      "L_n_shift_1",
 +      "L_nuo_shift_1",
 +      "L_overflow",
 +      "L_precision_lost_down",
 +      "L_precision_lost_up",
 +      "LPrevent_2nd_overflow",
 +      "LPrevent_3rd_overflow",
 +      "LPseudoDenormal",
 +      "L_Re_normalise",
 +      "LResult_Normalised",
 +      "L_round",
 +      "LRound_large",
 +      "LRound_nearest_24",
 +      "LRound_nearest_53",
 +      "LRound_nearest_64",
 +      "LRound_not_small",
 +      "LRound_ovfl",
 +      "LRound_precision",
 +      "LRound_prep",
 +      "L_round_the_result",
 +      "LRound_To_24",
 +      "LRound_To_53",
 +      "LRound_To_64",
 +      "LSecond_div_done",
 +      "LSecond_div_not_1",
 +      "L_shift_1",
 +      "L_shift_32",
 +      "L_shift_65_nc",
 +      "L_shift_done",
 +      "Ls_less_than_32",
 +      "Ls_more_than_63",
 +      "Ls_more_than_95",
 +      "L_Store_significand",
 +      "L_subtr",
 +      "LTest_over",
 +      "LTruncate_53",
 +      "LTruncate_64",
 +      "L_underflow",
 +      "L_underflow_to_zero",
 +      "LUp_24",
 +      "LUp_53",
 +      "LUp_64",
 +      "L_zero",
 +      "Normalise_result",
 +      "Signal_underflow",
 +      "sqrt_arg_ge_2",
 +      "sqrt_get_more_precision",
 +      "sqrt_more_prec_large",
 +      "sqrt_more_prec_ok",
 +      "sqrt_more_prec_small",
 +      "sqrt_near_exact",
 +      "sqrt_near_exact_large",
 +      "sqrt_near_exact_ok",
 +      "sqrt_near_exact_small",
 +      "sqrt_near_exact_x",
 +      "sqrt_prelim_no_adjust",
 +      "sqrt_round_result",
 +      "sqrt_stage_2_done",
 +      "sqrt_stage_2_error",
 +      "sqrt_stage_2_finish",
 +      "sqrt_stage_2_positive",
 +      "sqrt_stage_3_error",
 +      "sqrt_stage_3_finished",
 +      "sqrt_stage_3_no_error",
 +      "sqrt_stage_3_positive",
 +      "Unmasked_underflow",
 +      "xExp_not_underflow",
 +#endif        /* CONFIG_MATH_EMULATION */
 +};
 +
 +static const char *bb_hardware_handlers[] = {
 +      "ret_from_exception",
 +      "system_call",
 +      "work_pending",
 +      "syscall_fault",
 +      "page_fault",
 +      "coprocessor_error",
 +      "simd_coprocessor_error",
 +      "device_not_available",
 +      "debug",
 +      "nmi",
 +      "int3",
 +      "overflow",
 +      "bounds",
 +      "invalid_op",
 +      "coprocessor_segment_overrun",
 +      "invalid_TSS",
 +      "segment_not_present",
 +      "stack_segment",
 +      "general_protection",
 +      "alignment_check",
 +      "kdb_call",
 +      "divide_error",
 +      "machine_check",
 +      "spurious_interrupt_bug",
 +};
 +
 +static int
 +bb_hardware_pushed_arch(kdb_machreg_t rsp,
 +                      const struct kdb_activation_record *ar)
 +{
 +      return (2 * KDB_WORD_SIZE);
 +}
 +
 +static void
 +bb_start_block0(void)
 +{
 +      bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
 +      bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
 +      bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
 +      bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
 +      bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
 +      bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
 +      bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
 +      bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
 +}
 +
 +/* The i386 code that switches stack in a context switch is an extremely
 + * special case.  It saves the rip pointing to a label that is not otherwise
 + * referenced, saves the current rsp then pushes a word.  The magic code that
 + * resumes the new task picks up the saved rip and rsp, effectively referencing
 + * a label that otherwise is not used and ignoring the pushed word.
 + *
 + * The simplest way to handle this very strange case is to recognise jmp
 + * address <__switch_to> and treat it as a popfl instruction.  This avoids
 + * terminating the block on this jmp and removes one word from the stack state,
 + * which is the end effect of all the magic code.
 + *
 + * Called with the instruction line, starting after the first ':'.
 + */
 +
 +static void
 +bb_fixup_switch_to(char *p)
 +{
 +      char *p1 = p;
 +      p += strspn(p, " \t");          /* start of instruction */
 +      if (strncmp(p, "jmp", 3))
 +              return;
 +      p += strcspn(p, " \t");         /* end of instruction */
 +      p += strspn(p, " \t");          /* start of address */
 +      p += strcspn(p, " \t");         /* end of address */
 +      p += strspn(p, " \t");          /* start of comment */
 +      if (strcmp(p, "<__switch_to>") == 0)
 +              strcpy(p1, "popfl");
 +}
 +
 +static int
 +bb_asmlinkage_arch(void)
 +{
 +      return strcmp(bb_func_name, "ret_from_exception") == 0 ||
 +             strcmp(bb_func_name, "syscall_trace_entry") == 0;
 +}
 +
 +#endif        /* CONFIG_X86_64 */
 +
 +
 +/*============================================================================*/
 +/*                                                                            */
 +/* Common code and data.                                                      */
 +/*                                                                            */
 +/*============================================================================*/
 +
 +
 +/* Tracking registers by decoding the instructions is quite a bit harder than
 + * doing the same tracking using compiler generated information.  Register
 + * contents can remain in the same register, they can be copied to other
 + * registers, they can be stored on stack or they can be modified/overwritten.
 + * At any one time, there are 0 or more copies of the original value that was
 + * supplied in each register on input to the current function.  If a register
 + * exists in multiple places, one copy of that register is the master version,
 + * the others are temporary copies which may or may not be destroyed before the
 + * end of the function.
 + *
 + * The compiler knows which copy of a register is the master and which are
 + * temporary copies, which makes it relatively easy to track register contents
 + * as they are saved and restored.  Without that compiler based knowledge, this
 + * code has to track _every_ possible copy of each register, simply because we
 + * do not know which is the master copy and which are temporary copies which
 + * may be destroyed later.
 + *
 + * It gets worse: registers that contain parameters can be copied to other
 + * registers which are then saved on stack in a lower level function.  Also the
 + * stack pointer may be held in multiple registers (typically RSP and RBP)
 + * which contain different offsets from the base of the stack on entry to this
 + * function.  All of which means that we have to track _all_ register
 + * movements, or at least as much as possible.
 + *
 + * Start with the basic block that contains the start of the function, by
 + * definition all registers contain their initial value.  Track each
 + * instruction's effect on register contents, this includes reading from a
 + * parameter register before any write to that register, IOW the register
 + * really does contain a parameter.  The register state is represented by a
 + * dynamically sized array with each entry containing :-
 + *
 + *   Register name
 + *   Location it is copied to (another register or stack + offset)
 + *
 + * Besides the register tracking array, we track which parameter registers are
 + * read before being written, to determine how many parameters are passed in
 + * registers.  We also track which registers contain stack pointers, including
 + * their offset from the original stack pointer on entry to the function.
 + *
 + * At each exit from the current basic block (via JMP instruction or drop
 + * through), the register state is cloned to form the state on input to the
 + * target basic block and the target is marked for processing using this state.
 + * When there are multiple ways to enter a basic block (e.g. several JMP
 + * instructions referencing the same target) then there will be multiple sets
 + * of register state to form the "input" for that basic block, there is no
 + * guarantee that all paths to that block will have the same register state.
 + *
 + * As each target block is processed, all the known sets of register state are
 + * merged to form a suitable subset of the state which agrees with all the
 + * inputs.  The most common case is where one path to this block copies a
 + * register to another register but another path does not, therefore the copy
 + * is only a temporary and should not be propogated into this block.
 + *
 + * If the target block already has an input state from the current transfer
 + * point and the new input state is identical to the previous input state then
 + * we have reached a steady state for the arc from the current location to the
 + * target block.  Therefore there is no need to process the target block again.
 + *
 + * The steps of "process a block, create state for target block(s), pick a new
 + * target block, merge state for target block, process target block" will
 + * continue until all the state changes have propogated all the way down the
 + * basic block tree, including round any cycles in the tree.  The merge step
 + * only deletes tracking entries from the input state(s), it never adds a
 + * tracking entry.  Therefore the overall algorithm is guaranteed to converge
 + * to a steady state, the worst possible case is that every tracking entry into
 + * a block is deleted, which will result in an empty output state.
 + *
 + * As each instruction is decoded, it is checked to see if this is the point at
 + * which execution left this function.  This can be a call to another function
 + * (actually the return address to this function) or is the instruction which
 + * was about to be executed when an interrupt occurred (including an oops).
 + * Save the register state at this point.
 + *
 + * We always know what the registers contain when execution left this function.
 + * For an interrupt, the registers are in struct pt_regs.  For a call to
 + * another function, we have already deduced the register state on entry to the
 + * other function by unwinding to the start of that function.  Given the
 + * register state on exit from this function plus the known register contents
 + * on entry to the next function, we can determine the stack pointer value on
 + * input to this function.  That in turn lets us calculate the address of input
 + * registers that have been stored on stack, giving us the input parameters.
 + * Finally the stack pointer gives us the return address which is the exit
 + * point from the calling function, repeat the unwind process on that function.
 + *
 + * The data that tracks which registers contain input parameters is function
 + * global, not local to any basic block.  To determine which input registers
 + * contain parameters, we have to decode the entire function.  Otherwise an
 + * exit early in the function might not have read any parameters yet.
 + */
 +
 +/* Record memory contents in terms of the values that were passed to this
 + * function, IOW track which memory locations contain an input value.  A memory
 + * location's contents can be undefined, it can contain an input register value
 + * or it can contain an offset from the original stack pointer.
 + *
 + * This structure is used to record register contents that have been stored in
 + * memory.  Location (BBRG_OSP + 'offset_address') contains the input value
 + * from register 'value'.  When 'value' is BBRG_OSP then offset_value contains
 + * the offset from the original stack pointer that was stored in this memory
 + * location.  When 'value' is not BBRG_OSP then the memory location contains
 + * the original contents of an input register and offset_value is ignored.
 + *
 + * An input register 'value' can be stored in more than one register and/or in
 + * more than one memory location.
 + */
 +
 +struct bb_memory_contains
 +{
 +      short offset_address;
 +      enum bb_reg_code value: 8;
 +      short offset_value;
 +};
 +
 +/* Track the register state in each basic block. */
 +
 +struct bb_reg_state
 +{
 +      /* Indexed by register value 'reg - BBRG_RAX' */
 +      struct bb_reg_contains contains[KDB_INT_REGISTERS];
 +      int ref_count;
 +      int mem_count;
 +      /* dynamic size for memory locations, see mem_count */
 +      struct bb_memory_contains memory[0];
 +};
 +
 +static struct bb_reg_state *bb_reg_state, *bb_exit_state;
 +static int bb_reg_state_max, bb_reg_params, bb_memory_params;
 +
 +struct bb_actual
 +{
 +      bfd_vma value;
 +      int valid;
 +};
 +
 +/* Contains the actual hex value of a register, plus a valid bit.  Indexed by
 + * register value 'reg - BBRG_RAX'
 + */
 +static struct bb_actual bb_actual[KDB_INT_REGISTERS];
 +
 +static bfd_vma bb_func_start, bb_func_end;
 +static bfd_vma bb_common_interrupt, bb_error_entry, bb_ret_from_intr,
 +             bb_thread_return, bb_sync_regs, bb_save_v86_state,
 +             bb__sched_text_start, bb__sched_text_end,
 +             bb_save_args, bb_save_rest, bb_save_paranoid;
 +
 +/* Record jmp instructions, both conditional and unconditional.  These form the
 + * arcs between the basic blocks.  This is also used to record the state when
 + * one block drops through into the next.
 + *
 + * A bb can have multiple associated bb_jmp entries, one for each jcc
 + * instruction plus at most one bb_jmp for the drop through case.  If a bb
 + * drops through to the next bb then the drop through bb_jmp entry will be the
 + * last entry in the set of bb_jmp's that are associated with the bb.  This is
 + * enforced by the fact that jcc entries are added during the disassembly phase
 + * of pass 1, the drop through entries are added near the end of pass 1.
 + *
 + * At address 'from' in this block, we have a jump to address 'to'.  The
 + * register state at 'from' is copied to the target block.
 + */
 +
 +struct bb_jmp
 +{
 +      bfd_vma from;
 +      bfd_vma to;
 +      struct bb_reg_state *state;
 +      unsigned int drop_through: 1;
 +};
 +
 +struct bb
 +{
 +      bfd_vma start;
 +      /* The end address of a basic block is sloppy.  It can be the first
 +       * byte of the last instruction in the block or it can be the last byte
 +       * of the block.
 +       */
 +      bfd_vma end;
 +      unsigned int changed: 1;
 +      unsigned int drop_through: 1;
 +};
 +
 +static struct bb **bb_list, *bb_curr;
 +static int bb_max, bb_count;
 +
 +static struct bb_jmp *bb_jmp_list;
 +static int bb_jmp_max, bb_jmp_count;
 +
 +/* Add a new bb entry to the list.  This does an insert sort. */
 +
 +static struct bb *
 +bb_new(bfd_vma order)
 +{
 +      int i, j;
 +      struct bb *bb, *p;
 +      if (bb_giveup)
 +              return NULL;
 +      if (bb_count == bb_max) {
 +              struct bb **bb_list_new;
 +              bb_max += 10;
 +              bb_list_new = debug_kmalloc(bb_max*sizeof(*bb_list_new),
 +                                          GFP_ATOMIC);
 +              if (!bb_list_new) {
 +                      kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
 +                      bb_giveup = 1;
 +                      return NULL;
 +              }
 +              memcpy(bb_list_new, bb_list, bb_count*sizeof(*bb_list));
 +              debug_kfree(bb_list);
 +              bb_list = bb_list_new;
 +      }
 +      bb = debug_kmalloc(sizeof(*bb), GFP_ATOMIC);
 +      if (!bb) {
 +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
 +              bb_giveup = 1;
 +              return NULL;
 +      }
 +      memset(bb, 0, sizeof(*bb));
 +      for (i = 0; i < bb_count; ++i) {
 +              p = bb_list[i];
 +              if ((p->start && p->start > order) ||
 +                  (p->end && p->end > order))
 +                      break;
 +      }
 +      for (j = bb_count-1; j >= i; --j)
 +              bb_list[j+1] = bb_list[j];
 +      bb_list[i] = bb;
 +      ++bb_count;
 +      return bb;
 +}
 +
 +/* Add a new bb_jmp entry to the list.  This list is not sorted. */
 +
 +static struct bb_jmp *
 +bb_jmp_new(bfd_vma from, bfd_vma to, unsigned int drop_through)
 +{
 +      struct bb_jmp *bb_jmp;
 +      if (bb_giveup)
 +              return NULL;
 +      if (bb_jmp_count == bb_jmp_max) {
 +              struct bb_jmp *bb_jmp_list_new;
 +              bb_jmp_max += 10;
 +              bb_jmp_list_new =
 +                      debug_kmalloc(bb_jmp_max*sizeof(*bb_jmp_list_new),
 +                                    GFP_ATOMIC);
 +              if (!bb_jmp_list_new) {
 +                      kdb_printf("\n\n%s: out of debug_kmalloc\n",
 +                                 __FUNCTION__);
 +                      bb_giveup = 1;
 +                      return NULL;
 +              }
 +              memcpy(bb_jmp_list_new, bb_jmp_list,
 +                     bb_jmp_count*sizeof(*bb_jmp_list));
 +              debug_kfree(bb_jmp_list);
 +              bb_jmp_list = bb_jmp_list_new;
 +      }
 +      bb_jmp = bb_jmp_list + bb_jmp_count++;
 +      bb_jmp->from = from;
 +      bb_jmp->to = to;
 +      bb_jmp->drop_through = drop_through;
 +      bb_jmp->state = NULL;
 +      return bb_jmp;
 +}
 +
 +static void
 +bb_delete(int i)
 +{
 +      struct bb *bb = bb_list[i];
 +      memcpy(bb_list+i, bb_list+i+1, (bb_count-i-1)*sizeof(*bb_list));
 +      bb_list[--bb_count] = NULL;
 +      debug_kfree(bb);
 +}
 +
 +static struct bb *
 +bb_add(bfd_vma start, bfd_vma end)
 +{
 +      int i;
 +      struct bb *bb;
 +      /* Ignore basic blocks whose start address is outside the current
 +       * function.  These occur for call instructions and for tail recursion.
 +       */
 +      if (start &&
 +          (start < bb_func_start || start >= bb_func_end))
 +                     return NULL;
 +      for (i = 0; i < bb_count; ++i) {
 +              bb = bb_list[i];
 +              if ((start && bb->start == start) ||
 +                  (end && bb->end == end))
 +                      return bb;
 +      }
 +      bb = bb_new(start ? start : end);
 +      if (bb) {
 +              bb->start = start;
 +              bb->end = end;
 +      }
 +      return bb;
 +}
 +
 +static struct bb_jmp *
 +bb_jmp_add(bfd_vma from, bfd_vma to, unsigned int drop_through)
 +{
 +      int i;
 +      struct bb_jmp *bb_jmp;
 +      for (i = 0, bb_jmp = bb_jmp_list; i < bb_jmp_count; ++i, ++bb_jmp) {
 +              if (bb_jmp->from == from &&
 +                  bb_jmp->to == to &&
 +                  bb_jmp->drop_through == drop_through)
 +                      return bb_jmp;
 +      }
 +      bb_jmp = bb_jmp_new(from, to, drop_through);
 +      return bb_jmp;
 +}
 +
 +static unsigned long bb_curr_addr, bb_exit_addr;
 +static char bb_buffer[256];   /* A bit too big to go on stack */
 +
 +/* Computed jmp uses 'jmp *addr(,%reg,[48])' where 'addr' is the start of a
 + * table of addresses that point into the current function.  Run the table and
 + * generate bb starts for each target address plus a bb_jmp from this address
 + * to the target address.
 + *
 + * Only called for 'jmp' instructions, with the pointer starting at 'jmp'.
 + */
 +
 +static void
 +bb_pass1_computed_jmp(char *p)
 +{
 +      unsigned long table, scale;
 +      kdb_machreg_t addr;
 +      struct bb* bb;
 +      p += strcspn(p, " \t");         /* end of instruction */
 +      p += strspn(p, " \t");          /* start of address */
 +      if (*p++ != '*')
 +              return;
 +      table = simple_strtoul(p, &p, 0);
 +      if (strncmp(p, "(,%", 3) != 0)
 +              return;
 +      p += 3;
 +      p += strcspn(p, ",");           /* end of reg */
 +      if (*p++ != ',')
 +              return;
 +      scale = simple_strtoul(p, &p, 0);
 +      if (scale != KDB_WORD_SIZE || strcmp(p, ")"))
 +              return;
 +      while (!bb_giveup) {
 +              if (kdb_getword(&addr, table, sizeof(addr)))
 +                      return;
 +              if (addr < bb_func_start || addr >= bb_func_end)
 +                      return;
 +              bb = bb_add(addr, 0);
 +              if (bb)
 +                      bb_jmp_add(bb_curr_addr, addr, 0);
 +              table += KDB_WORD_SIZE;
 +      }
 +}
 +
 +/* Pass 1, identify the start and end of each basic block */
 +
 +static int
 +bb_dis_pass1(PTR file, const char *fmt, ...)
 +{
 +      int l = strlen(bb_buffer);
 +      char *p;
 +      va_list ap;
 +      va_start(ap, fmt);
 +      vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
 +      va_end(ap);
 +      if ((p = strchr(bb_buffer, '\n'))) {
 +              *p = '\0';
 +              /* ret[q], iret[q], sysexit, sysret, ud2a or jmp[q] end a
 +               * block.  As does a call to a function marked noret.
 +               */
 +              p = bb_buffer;
 +              p += strcspn(p, ":");
 +              if (*p++ == ':') {
 +                      bb_fixup_switch_to(p);
 +                      p += strspn(p, " \t");  /* start of instruction */
 +                      if (strncmp(p, "ret", 3) == 0 ||
 +                          strncmp(p, "iret", 4) == 0 ||
 +                          strncmp(p, "sysexit", 7) == 0 ||
 +                          strncmp(p, "sysret", 6) == 0 ||
 +                          strncmp(p, "ud2a", 4) == 0 ||
 +                          strncmp(p, "jmp", 3) == 0) {
 +                              if (strncmp(p, "jmp", 3) == 0)
 +                                      bb_pass1_computed_jmp(p);
 +                              bb_add(0, bb_curr_addr);
 +                      };
 +                      if (strncmp(p, "call", 4) == 0) {
 +                              strsep(&p, " \t");      /* end of opcode */
 +                              if (p)
 +                                      p += strspn(p, " \t");  /* operand(s) */
 +                              if (p && strchr(p, '<')) {
 +                                      p = strchr(p, '<') + 1;
 +                                      *strchr(p, '>') = '\0';
 +                                      if (bb_noret(p))
 +                                              bb_add(0, bb_curr_addr);
 +                              }
 +                      };
 +              }
 +              bb_buffer[0] = '\0';
 +      }
 +      return 0;
 +}
 +
 +static void
 +bb_printaddr_pass1(bfd_vma addr, disassemble_info *dip)
 +{
 +      kdb_symtab_t symtab;
 +      unsigned int offset;
 +      struct bb* bb;
 +      /* disasm only calls the printaddr routine for the target of jmp, loop
 +       * or call instructions, i.e. the start of a basic block.  call is
 +       * ignored by bb_add because the target address is outside the current
 +       * function.
 +       */
 +      dip->fprintf_func(dip->stream, "0x%lx", addr);
 +      kdbnearsym(addr, &symtab);
 +      if (symtab.sym_name) {
 +              dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
 +              if ((offset = addr - symtab.sym_start))
 +                      dip->fprintf_func(dip->stream, "+0x%x", offset);
 +              dip->fprintf_func(dip->stream, ">");
 +      }
 +      bb = bb_add(addr, 0);
 +      if (bb)
 +              bb_jmp_add(bb_curr_addr, addr, 0);
 +}
 +
 +static void
 +bb_pass1(void)
 +{
 +      int i;
 +      unsigned long addr;
 +      struct bb *bb;
 +      struct bb_jmp *bb_jmp;
 +
 +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
 +              kdb_printf("%s: func_name %s func_start " kdb_bfd_vma_fmt0
 +                         " func_end " kdb_bfd_vma_fmt0 "\n",
 +                         __FUNCTION__,
 +                         bb_func_name,
 +                         bb_func_start,
 +                         bb_func_end);
 +      kdb_di.fprintf_func = bb_dis_pass1;
 +      kdb_di.print_address_func = bb_printaddr_pass1;
 +
 +      bb_add(bb_func_start, 0);
 +      for (bb_curr_addr = bb_func_start;
 +           bb_curr_addr < bb_func_end;
 +           ++bb_curr_addr) {
 +              unsigned char c;
 +              if (kdb_getarea(c, bb_curr_addr)) {
 +                      kdb_printf("%s: unreadable function code at ",
 +                                 __FUNCTION__);
 +                      kdb_symbol_print(bb_curr_addr, NULL, KDB_SP_DEFAULT);
 +                      kdb_printf(", giving up\n");
 +                      bb_giveup = 1;
 +                      return;
 +              }
 +      }
 +      for (addr = bb_func_start; addr < bb_func_end; ) {
 +              bb_curr_addr = addr;
 +              addr += kdba_id_printinsn(addr, &kdb_di);
 +              kdb_di.fprintf_func(NULL, "\n");
 +      }
 +      if (bb_giveup)
 +              goto out;
 +
 +      /* Special case: a block consisting of a single instruction which is
 +       * both the target of a jmp and is also an ending instruction, so we
 +       * add two blocks using the same address, one as a start and one as an
 +       * end, in no guaranteed order.  The end must be ordered after the
 +       * start.
 +       */
 +      for (i = 0; i < bb_count-1; ++i) {
 +              struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
 +              if (bb1->end && bb1->end == bb2->start) {
 +                      bb = bb_list[i+1];
 +                      bb_list[i+1] = bb_list[i];
 +                      bb_list[i] = bb;
 +              }
 +      }
 +
 +      /* Some bb have a start address, some have an end address.  Collapse
 +       * them into entries that have both start and end addresses.  The first
 +       * entry is guaranteed to have a start address.
 +       */
 +      for (i = 0; i < bb_count-1; ++i) {
 +              struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
 +              if (bb1->end)
 +                      continue;
 +              if (bb2->start) {
 +                      bb1->end = bb2->start - 1;
 +                      bb1->drop_through = 1;
 +                      bb_jmp_add(bb1->end, bb2->start, 1);
 +              } else {
 +                      bb1->end = bb2->end;
 +                      bb_delete(i+1);
 +              }
 +      }
 +      bb = bb_list[bb_count-1];
 +      if (!bb->end)
 +              bb->end = bb_func_end - 1;
 +
 +      /* It would be nice to check that all bb have a valid start and end
 +       * address but there is just too much garbage code in the kernel to do
 +       * that check.  Aligned functions in assembler code mean that there is
 +       * space between the end of one function and the start of the next and
 +       * that space contains previous code from the assembler's buffers.  It
 +       * looks like dead code with nothing that branches to it, so no start
 +       * address.  do_sys_vm86() ends with 'jmp resume_userspace' which the C
 +       * compiler does not know about so gcc appends the normal exit code,
 +       * again nothing branches to this dangling code.
 +       *
 +       * The best we can do is delete bb entries with no start address.
 +       */
 +      for (i = 0; i < bb_count; ++i) {
 +              struct bb *bb = bb_list[i];
 +              if (!bb->start)
 +                      bb_delete(i--);
 +      }
 +      for (i = 0; i < bb_count; ++i) {
 +              struct bb *bb = bb_list[i];
 +              if (!bb->end) {
 +                      kdb_printf("%s: incomplete bb state\n", __FUNCTION__);
 +                      bb_giveup = 1;
 +                      goto debug;
 +              }
 +      }
 +
 +out:
 +      if (!KDB_DEBUG(BB))
 +              return;
 +debug:
 +      kdb_printf("%s: end\n", __FUNCTION__);
 +      for (i = 0; i < bb_count; ++i) {
 +              bb = bb_list[i];
 +              kdb_printf("  bb[%d] start "
 +                         kdb_bfd_vma_fmt0
 +                         " end " kdb_bfd_vma_fmt0
 +                         " drop_through %d",
 +                         i, bb->start, bb->end, bb->drop_through);
 +              kdb_printf("\n");
 +      }
 +      for (i = 0; i < bb_jmp_count; ++i) {
 +              bb_jmp = bb_jmp_list + i;
 +              kdb_printf("  bb_jmp[%d] from "
 +                         kdb_bfd_vma_fmt0
 +                         " to " kdb_bfd_vma_fmt0
 +                         " drop_through %d\n",
 +                         i, bb_jmp->from, bb_jmp->to, bb_jmp->drop_through);
 +      }
 +}
 +
 +/* Pass 2, record register changes in each basic block */
 +
 +/* For each opcode that we care about, indicate how it uses its operands.  Most
 + * opcodes can be handled generically because they completely specify their
 + * operands in the instruction, however many opcodes have side effects such as
 + * reading or writing rax or updating rsp.  Instructions that change registers
 + * that are not listed in the operands must be handled as special cases.  In
 + * addition, instructions that copy registers while preserving their contents
 + * (push, pop, mov) or change the contents in a well defined way (add with an
 + * immediate, lea) must be handled as special cases in order to track the
 + * register contents.
 + *
 + * The tables below only list opcodes that are actually used in the Linux
 + * kernel, so they omit most of the floating point and all of the SSE type
 + * instructions.  The operand usage entries only cater for accesses to memory
 + * and to the integer registers, accesses to floating point registers and flags
 + * are not relevant for kernel backtraces.
 + */
 +
 +enum bb_operand_usage {
 +      BBOU_UNKNOWN = 0,
 +              /* generic entries.  because xchg can do any combinations of
 +               * read src, write src, read dst and  write dst we need to
 +               * define all 16 possibilities.  These are ordered by rs = 1,
 +               * rd = 2, ws = 4, wd = 8, bb_usage_x*() functions rely on this
 +               * order.
 +               */
 +      BBOU_RS = 1,    /* read src */          /*  1 */
 +      BBOU_RD,        /* read dst */          /*  2 */
 +      BBOU_RSRD,                              /*  3 */
 +      BBOU_WS,        /* write src */         /*  4 */
 +      BBOU_RSWS,                              /*  5 */
 +      BBOU_RDWS,                              /*  6 */
 +      BBOU_RSRDWS,                            /*  7 */
 +      BBOU_WD,        /* write dst */         /*  8 */
 +      BBOU_RSWD,                              /*  9 */
 +      BBOU_RDWD,                              /* 10 */
 +      BBOU_RSRDWD,                            /* 11 */
 +      BBOU_WSWD,                              /* 12 */
 +      BBOU_RSWSWD,                            /* 13 */
 +      BBOU_RDWSWD,                            /* 14 */
 +      BBOU_RSRDWSWD,                          /* 15 */
 +              /* opcode specific entries */
 +      BBOU_ADD,
 +      BBOU_AND,
 +      BBOU_CALL,
 +      BBOU_CBW,
 +      BBOU_CMOV,
 +      BBOU_CMPXCHG,
 +      BBOU_CMPXCHGD,
 +      BBOU_CPUID,
 +      BBOU_CWD,
 +      BBOU_DIV,
 +      BBOU_IDIV,
 +      BBOU_IMUL,
 +      BBOU_IRET,
 +      BBOU_JMP,
 +      BBOU_LAHF,
 +      BBOU_LEA,
 +      BBOU_LEAVE,
 +      BBOU_LODS,
 +      BBOU_LOOP,
 +      BBOU_LSS,
 +      BBOU_MONITOR,
 +      BBOU_MOV,
 +      BBOU_MOVS,
 +      BBOU_MUL,
 +      BBOU_MWAIT,
 +      BBOU_NOP,
 +      BBOU_OUTS,
 +      BBOU_POP,
 +      BBOU_POPF,
 +      BBOU_PUSH,
 +      BBOU_PUSHF,
 +      BBOU_RDMSR,
 +      BBOU_RDTSC,
 +      BBOU_RET,
 +      BBOU_SAHF,
 +      BBOU_SCAS,
 +      BBOU_SUB,
 +      BBOU_SYSEXIT,
 +      BBOU_SYSRET,
 +      BBOU_WRMSR,
 +      BBOU_XADD,
 +      BBOU_XCHG,
 +      BBOU_XOR,
 +};
 +
 +struct bb_opcode_usage {
 +      int length;
 +      enum bb_operand_usage usage;
 +      const char *opcode;
 +};
 +
 +/* This table is sorted in alphabetical order of opcode, except that the
 + * trailing '"' is treated as a high value.  For example, 'in' sorts after
 + * 'inc', 'bt' after 'btc'.  This modified sort order ensures that shorter
 + * opcodes come after long ones.  A normal sort would put 'in' first, so 'in'
 + * would match both 'inc' and 'in'.  When adding any new entries to this table,
 + * be careful to put shorter entries last in their group.
 + *
 + * To automatically sort the table (in vi)
 + *   Mark the first and last opcode line with 'a and 'b
 + *   'a
 + *   !'bsed -e 's/"}/}}/' | LANG=C sort -t '"' -k2 | sed -e 's/}}/"}/'
 + *
 + * If a new instruction has to be added, first consider if it affects registers
 + * other than those listed in the operands.  Also consider if you want to track
 + * the results of issuing the instruction, IOW can you extract useful
 + * information by looking in detail at the modified registers or memory.  If
 + * either test is true then you need a special case to handle the instruction.
 + *
 + * The generic entries at the start of enum bb_operand_usage all have one thing
 + * in common, if a register or memory location is updated then that location
 + * becomes undefined, i.e. we lose track of anything that was previously saved
 + * in that location.  So only use a generic BBOU_* value when the result of the
 + * instruction cannot be calculated exactly _and_ when all the affected
 + * registers are listed in the operands.
 + *
 + * Examples:
 + *
 + * 'call' does not generate a known result, but as a side effect of call,
 + * several scratch registers become undefined, so it needs a special BBOU_CALL
 + * entry.
 + *
 + * 'adc' generates a variable result, it depends on the carry flag, so 'adc'
 + * gets a generic entry.  'add' can generate an exact result (add with
 + * immediate on a register that points to the stack) or it can generate an
 + * unknown result (add a variable, or add immediate to a register that does not
 + * contain a stack pointer) so 'add' has its own BBOU_ADD entry.
 + */
 +
 +static const struct bb_opcode_usage
 +bb_opcode_usage_all[] = {
 +      {3, BBOU_RSRDWD,  "adc"},
 +      {3, BBOU_ADD,     "add"},
 +      {3, BBOU_AND,     "and"},
 +      {3, BBOU_RSWD,    "bsf"},
 +      {3, BBOU_RSWD,    "bsr"},
 +      {5, BBOU_RSWS,    "bswap"},
 +      {3, BBOU_RSRDWD,  "btc"},
 +      {3, BBOU_RSRDWD,  "btr"},
 +      {3, BBOU_RSRDWD,  "bts"},
 +      {2, BBOU_RSRD,    "bt"},
 +      {4, BBOU_CALL,    "call"},
 +      {4, BBOU_CBW,     "cbtw"},      /* Intel cbw */
 +      {3, BBOU_NOP,     "clc"},
 +      {3, BBOU_NOP,     "cld"},
 +      {7, BBOU_RS,      "clflush"},
 +      {4, BBOU_NOP,     "clgi"},
 +      {3, BBOU_NOP,     "cli"},
 +      {4, BBOU_CWD,     "cltd"},      /* Intel cdq */
 +      {4, BBOU_CBW,     "cltq"},      /* Intel cdqe */
 +      {4, BBOU_NOP,     "clts"},
 +      {4, BBOU_CMOV,    "cmov"},
 +      {9, BBOU_CMPXCHGD,"cmpxchg16"},
 +      {8, BBOU_CMPXCHGD,"cmpxchg8"},
 +      {7, BBOU_CMPXCHG, "cmpxchg"},
 +      {3, BBOU_RSRD,    "cmp"},
 +      {5, BBOU_CPUID,   "cpuid"},
 +      {4, BBOU_CWD,     "cqto"},      /* Intel cdo */
 +      {4, BBOU_CWD,     "cwtd"},      /* Intel cwd */
 +      {4, BBOU_CBW,     "cwtl"},      /* Intel cwde */
 +      {4, BBOU_NOP,     "data"},      /* alternative ASM_NOP<n> generates data16 on x86_64 */
 +      {3, BBOU_RSWS,    "dec"},
 +      {3, BBOU_DIV,     "div"},
 +      {5, BBOU_RS,      "fdivl"},
 +      {5, BBOU_NOP,     "finit"},
 +      {6, BBOU_RS,      "fistpl"},
 +      {4, BBOU_RS,      "fldl"},
 +      {4, BBOU_RS,      "fmul"},
 +      {6, BBOU_NOP,     "fnclex"},
 +      {6, BBOU_NOP,     "fninit"},
 +      {6, BBOU_RS,      "fnsave"},
 +      {7, BBOU_NOP,     "fnsetpm"},
 +      {6, BBOU_RS,      "frstor"},
 +      {5, BBOU_WS,      "fstsw"},
 +      {5, BBOU_RS,      "fsubp"},
 +      {5, BBOU_NOP,     "fwait"},
 +      {7, BBOU_RS,      "fxrstor"},
 +      {6, BBOU_RS,      "fxsave"},
 +      {3, BBOU_NOP,     "hlt"},
 +      {4, BBOU_IDIV,    "idiv"},
 +      {4, BBOU_IMUL,    "imul"},
 +      {3, BBOU_RSWS,    "inc"},
 +      {3, BBOU_NOP,     "int"},
 +      {7, BBOU_RSRD,    "invlpga"},
 +      {6, BBOU_RS,      "invlpg"},
 +      {2, BBOU_RSWD,    "in"},
 +      {4, BBOU_IRET,    "iret"},
 +      {1, BBOU_JMP,     "j"},
 +      {4, BBOU_LAHF,    "lahf"},
 +      {3, BBOU_RSWD,    "lar"},
 +      {5, BBOU_RS,      "lcall"},
 +      {5, BBOU_LEAVE,   "leave"},
 +      {3, BBOU_LEA,     "lea"},
 +      {6, BBOU_NOP,     "lfence"},
 +      {4, BBOU_RS,      "lgdt"},
 +      {4, BBOU_RS,      "lidt"},
 +      {4, BBOU_RS,      "ljmp"},
 +      {4, BBOU_RS,      "lldt"},
 +      {4, BBOU_RS,      "lmsw"},
 +      {4, BBOU_LODS,    "lods"},
 +      {4, BBOU_LOOP,    "loop"},
 +      {4, BBOU_NOP,     "lret"},
 +      {3, BBOU_RSWD,    "lsl"},
 +      {3, BBOU_LSS,     "lss"},
 +      {3, BBOU_RS,      "ltr"},
 +      {6, BBOU_NOP,     "mfence"},
 +      {7, BBOU_MONITOR, "monitor"},
 +      {4, BBOU_MOVS,    "movs"},
 +      {3, BBOU_MOV,     "mov"},
 +      {3, BBOU_MUL,     "mul"},
 +      {5, BBOU_MWAIT,   "mwait"},
 +      {3, BBOU_RSWS,    "neg"},
 +      {3, BBOU_NOP,     "nop"},
 +      {3, BBOU_RSWS,    "not"},
 +      {2, BBOU_RSRDWD,  "or"},
 +      {4, BBOU_OUTS,    "outs"},
 +      {3, BBOU_RSRD,    "out"},
 +      {5, BBOU_NOP,     "pause"},
 +      {4, BBOU_POPF,    "popf"},
 +      {3, BBOU_POP,     "pop"},
 +      {8, BBOU_RS,      "prefetch"},
 +      {5, BBOU_PUSHF,   "pushf"},
 +      {4, BBOU_PUSH,    "push"},
 +      {3, BBOU_RSRDWD,  "rcl"},
 +      {3, BBOU_RSRDWD,  "rcr"},
 +      {5, BBOU_RDMSR,   "rdmsr"},
 +      {5, BBOU_RDMSR,   "rdpmc"},     /* same side effects as rdmsr */
 +      {5, BBOU_RDTSC,   "rdtsc"},
 +      {3, BBOU_RET,     "ret"},
 +      {3, BBOU_RSRDWD,  "rol"},
 +      {3, BBOU_RSRDWD,  "ror"},
 +      {4, BBOU_SAHF,    "sahf"},
 +      {3, BBOU_RSRDWD,  "sar"},
 +      {3, BBOU_RSRDWD,  "sbb"},
 +      {4, BBOU_SCAS,    "scas"},
 +      {3, BBOU_WS,      "set"},
 +      {6, BBOU_NOP,     "sfence"},
 +      {4, BBOU_WS,      "sgdt"},
 +      {3, BBOU_RSRDWD,  "shl"},
 +      {3, BBOU_RSRDWD,  "shr"},
 +      {4, BBOU_WS,      "sidt"},
 +      {4, BBOU_WS,      "sldt"},
 +      {3, BBOU_NOP,     "stc"},
 +      {3, BBOU_NOP,     "std"},
 +      {4, BBOU_NOP,     "stgi"},
 +      {3, BBOU_NOP,     "sti"},
 +      {4, BBOU_SCAS,    "stos"},
 +      {4, BBOU_WS,      "strl"},
 +      {3, BBOU_WS,      "str"},
 +      {3, BBOU_SUB,     "sub"},
 +      {6, BBOU_NOP,     "swapgs"},
 +      {7, BBOU_SYSEXIT, "sysexit"},
 +      {6, BBOU_SYSRET,  "sysret"},
 +      {4, BBOU_NOP,     "test"},
 +      {4, BBOU_NOP,     "ud2a"},
 +      {7, BBOU_RS,      "vmclear"},
 +      {8, BBOU_NOP,     "vmlaunch"},
 +      {6, BBOU_RS,      "vmload"},
 +      {7, BBOU_RS,      "vmptrld"},
 +      {6, BBOU_WD,      "vmread"},    /* vmread src is an encoding, not a register */
 +      {8, BBOU_NOP,     "vmresume"},
 +      {5, BBOU_RS,      "vmrun"},
 +      {6, BBOU_RS,      "vmsave"},
 +      {7, BBOU_WD,      "vmwrite"},   /* vmwrite src is an encoding, not a register */
 +      {3, BBOU_NOP,     "vmxoff"},
 +      {6, BBOU_NOP,     "wbinvd"},
 +      {5, BBOU_WRMSR,   "wrmsr"},
 +      {4, BBOU_XADD,    "xadd"},
 +      {4, BBOU_XCHG,    "xchg"},
 +      {3, BBOU_XOR,     "xor"},
 +      {4, BBOU_NOP,     "xrstor"},
 +      {4, BBOU_NOP,     "xsave"},
 +       {10, BBOU_WS,      "xstore-rng"},
 +};
 +
 +/* To speed up searching, index bb_opcode_usage_all by the first letter of each
 + * opcode.
 + */
 +static struct {
 +      const struct bb_opcode_usage *opcode;
 +      int size;
 +} bb_opcode_usage[26];
 +
 +struct bb_operand {
 +      char *base;
 +      char *index;
 +      char *segment;
 +      long disp;
 +      unsigned int scale;
 +      enum bb_reg_code base_rc;               /* UNDEFINED or RAX through R15 */
 +      enum bb_reg_code index_rc;              /* UNDEFINED or RAX through R15 */
 +      unsigned int present            :1;
 +      unsigned int disp_present       :1;
 +      unsigned int indirect           :1;     /* must be combined with reg or memory */
 +      unsigned int immediate          :1;     /* exactly one of these 3 must be set */
 +      unsigned int reg                :1;
 +      unsigned int memory             :1;
 +};
 +
 +struct bb_decode {
 +      char *prefix;
 +      char *opcode;
 +      const struct bb_opcode_usage *match;
 +      struct bb_operand src;
 +      struct bb_operand dst;
 +      struct bb_operand dst2;
 +};
 +
 +static struct bb_decode bb_decode;
 +
 +static enum bb_reg_code
 +bb_reg_map(const char *reg)
 +{
 +      int lo, hi, c;
 +      const struct bb_reg_code_map *p;
 +      lo = 0;
 +      hi = ARRAY_SIZE(bb_reg_code_map) - 1;
 +      while (lo <= hi) {
 +              int mid = (hi + lo) / 2;
 +              p = bb_reg_code_map + mid;
 +              c = strcmp(p->name, reg+1);
 +              if (c == 0)
 +                      return p->reg;
 +              else if (c > 0)
 +                      hi = mid - 1;
 +              else
 +                      lo = mid + 1;
 +      }
 +      return BBRG_UNDEFINED;
 +}
 +
 +static void
 +bb_parse_operand(char *str, struct bb_operand *operand)
 +{
 +      char *p = str;
 +      int sign = 1;
 +      operand->present = 1;
 +      /* extract any segment prefix */
 +      if (p[0] == '%' && p[1] && p[2] == 's' && p[3] == ':') {
 +              operand->memory = 1;
 +              operand->segment = p;
 +              p[3] = '\0';
 +              p += 4;
 +      }
 +      /* extract displacement, base, index, scale */
 +      if (*p == '*') {
 +              /* jmp/call *disp(%reg), *%reg or *0xnnn */
 +              operand->indirect = 1;
 +              ++p;
 +      }
 +      if (*p == '-') {
 +              sign = -1;
 +              ++p;
 +      }
 +      if (*p == '$') {
 +              operand->immediate = 1;
 +              operand->disp_present = 1;
 +              operand->disp = simple_strtoul(p+1, &p, 0);
 +      } else if (isdigit(*p)) {
 +              operand->memory = 1;
 +              operand->disp_present = 1;
 +              operand->disp = simple_strtoul(p, &p, 0) * sign;
 +      }
 +      if (*p == '%') {
 +              operand->reg = 1;
 +              operand->base = p;
 +      } else if (*p == '(') {
 +              operand->memory = 1;
 +              operand->base = ++p;
 +              p += strcspn(p, ",)");
 +              if (p == operand->base)
 +                      operand->base = NULL;
 +              if (*p == ',') {
 +                      *p = '\0';
 +                      operand->index = ++p;
 +                      p += strcspn(p, ",)");
 +                      if (p == operand->index)
 +                              operand->index = NULL;
 +              }
 +              if (*p == ',') {
 +                      *p = '\0';
 +                      operand->scale = simple_strtoul(p+1, &p, 0);
 +              }
 +              *p = '\0';
 +      } else if (*p) {
 +              kdb_printf("%s: unexpected token '%c' after disp '%s'\n",
 +                         __FUNCTION__, *p, str);
 +              bb_giveup = 1;
 +      }
 +      if ((operand->immediate + operand->reg + operand->memory != 1) ||
 +          (operand->indirect && operand->immediate)) {
 +              kdb_printf("%s: incorrect decode '%s' N %d I %d R %d M %d\n",
 +                         __FUNCTION__, str,
 +                         operand->indirect, operand->immediate, operand->reg,
 +                         operand->memory);
 +              bb_giveup = 1;
 +      }
 +      if (operand->base)
 +              operand->base_rc = bb_reg_map(operand->base);
 +      if (operand->index)
 +              operand->index_rc = bb_reg_map(operand->index);
 +}
 +
 +static void
 +bb_print_operand(const char *type, const struct bb_operand *operand)
 +{
 +      if (!operand->present)
 +              return;
 +      kdb_printf("  %s %c%c: ",
 +                 type,
 +                 operand->indirect ? 'N' : ' ',
 +                 operand->immediate ? 'I' :
 +                   operand->reg ? 'R' :
 +                   operand->memory ? 'M' :
 +                   '?'
 +                 );
 +      if (operand->segment)
 +              kdb_printf("%s:", operand->segment);
 +      if (operand->immediate) {
 +              kdb_printf("$0x%lx", operand->disp);
 +      } else if (operand->reg) {
 +              if (operand->indirect)
 +                      kdb_printf("*");
 +              kdb_printf("%s", operand->base);
 +      } else if (operand->memory) {
 +              if (operand->indirect && (operand->base || operand->index))
 +                      kdb_printf("*");
 +              if (operand->disp_present) {
 +                      kdb_printf("0x%lx", operand->disp);
 +              }
 +              if (operand->base || operand->index || operand->scale) {
 +                      kdb_printf("(");
 +                      if (operand->base)
 +                              kdb_printf("%s", operand->base);
 +                      if (operand->index || operand->scale)
 +                              kdb_printf(",");
 +                      if (operand->index)
 +                              kdb_printf("%s", operand->index);
 +                      if (operand->scale)
 +                              kdb_printf(",%d", operand->scale);
 +                      kdb_printf(")");
 +              }
 +      }
 +      if (operand->base_rc)
 +              kdb_printf(" base_rc %d (%s)",
 +                         operand->base_rc, bbrg_name[operand->base_rc]);
 +      if (operand->index_rc)
 +              kdb_printf(" index_rc %d (%s)",
 +                         operand->index_rc,
 +                         bbrg_name[operand->index_rc]);
 +      kdb_printf("\n");
 +}
 +
 +static void
 +bb_print_opcode(void)
 +{
 +      const struct bb_opcode_usage *o = bb_decode.match;
 +      kdb_printf("  ");
 +      if (bb_decode.prefix)
 +              kdb_printf("%s ", bb_decode.prefix);
 +      kdb_printf("opcode '%s' matched by '%s', usage %d\n",
 +                 bb_decode.opcode, o->opcode, o->usage);
 +}
 +
 +static int
 +bb_parse_opcode(void)
 +{
 +      int c, i;
 +      const struct bb_opcode_usage *o;
 +      static int bb_parse_opcode_error_limit = 5;
 +      c = bb_decode.opcode[0] - 'a';
 +      if (c < 0 || c >= ARRAY_SIZE(bb_opcode_usage))
 +              goto nomatch;
 +      o = bb_opcode_usage[c].opcode;
 +      if (!o)
 +              goto nomatch;
 +      for (i = 0; i < bb_opcode_usage[c].size; ++i, ++o) {
 +              if (strncmp(bb_decode.opcode, o->opcode, o->length) == 0) {
 +                      bb_decode.match = o;
 +                      if (KDB_DEBUG(BB))
 +                              bb_print_opcode();
 +                      return 0;
 +              }
 +      }
 +nomatch:
 +      if (!bb_parse_opcode_error_limit)
 +              return 1;
 +      --bb_parse_opcode_error_limit;
 +      kdb_printf("%s: no match at [%s]%s " kdb_bfd_vma_fmt0 " - '%s'\n",
 +                 __FUNCTION__,
 +                 bb_mod_name, bb_func_name, bb_curr_addr,
 +                 bb_decode.opcode);
 +      return 1;
 +}
 +
 +static bool
 +bb_is_int_reg(enum bb_reg_code reg)
 +{
 +      return reg >= BBRG_RAX && reg < (BBRG_RAX + KDB_INT_REGISTERS);
 +}
 +
 +static bool
 +bb_is_simple_memory(const struct bb_operand *operand)
 +{
 +      return operand->memory &&
 +             bb_is_int_reg(operand->base_rc) &&
 +             !operand->index_rc &&
 +             operand->scale == 0 &&
 +             !operand->segment;
 +}
 +
 +static bool
 +bb_is_static_disp(const struct bb_operand *operand)
 +{
 +      return operand->memory &&
 +             !operand->base_rc &&
 +             !operand->index_rc &&
 +             operand->scale == 0 &&
 +             !operand->segment &&
 +             !operand->indirect;
 +}
 +
 +static enum bb_reg_code
 +bb_reg_code_value(enum bb_reg_code reg)
 +{
 +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
 +      return bb_reg_state->contains[reg - BBRG_RAX].value;
 +}
 +
 +static short
 +bb_reg_code_offset(enum bb_reg_code reg)
 +{
 +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
 +      return bb_reg_state->contains[reg - BBRG_RAX].offset;
 +}
 +
 +static void
 +bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src)
 +{
 +      BB_CHECK(!bb_is_int_reg(dst), dst, );
 +      bb_reg_state->contains[dst - BBRG_RAX].value = src;
 +}
 +
 +static void
 +bb_reg_code_set_offset(enum bb_reg_code dst, short offset)
 +{
 +      BB_CHECK(!bb_is_int_reg(dst), dst, );
 +      bb_reg_state->contains[dst - BBRG_RAX].offset = offset;
 +}
 +
 +static bool
 +bb_is_osp_defined(enum bb_reg_code reg)
 +{
 +      if (bb_is_int_reg(reg))
 +              return bb_reg_code_value(reg) == BBRG_OSP;
 +      else
 +              return 0;
 +}
 +
 +static bfd_vma
 +bb_actual_value(enum bb_reg_code reg)
 +{
 +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
 +      return bb_actual[reg - BBRG_RAX].value;
 +}
 +
 +static int
 +bb_actual_valid(enum bb_reg_code reg)
 +{
 +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
 +      return bb_actual[reg - BBRG_RAX].valid;
 +}
 +
 +static void
 +bb_actual_set_value(enum bb_reg_code reg, bfd_vma value)
 +{
 +      BB_CHECK(!bb_is_int_reg(reg), reg, );
 +      bb_actual[reg - BBRG_RAX].value = value;
 +}
 +
 +static void
 +bb_actual_set_valid(enum bb_reg_code reg, int valid)
 +{
 +      BB_CHECK(!bb_is_int_reg(reg), reg, );
 +      bb_actual[reg - BBRG_RAX].valid = valid;
 +}
 +
 +/* The scheduler code switches RSP then does PUSH, it is not an error for RSP
 + * to be undefined in this area of the code.
 + */
 +static bool
 +bb_is_scheduler_address(void)
 +{
 +      return bb_curr_addr >= bb__sched_text_start &&
 +             bb_curr_addr < bb__sched_text_end;
 +}
 +
 +static void
 +bb_reg_read(enum bb_reg_code reg)
 +{
 +      int i, r = 0;
 +      if (!bb_is_int_reg(reg) ||
 +          bb_reg_code_value(reg) != reg)
 +              return;
 +      for (i = 0;
 +           i < min_t(unsigned int, REGPARM, ARRAY_SIZE(bb_param_reg));
 +           ++i) {
 +              if (reg == bb_param_reg[i]) {
 +                      r = i + 1;
 +                      break;
 +              }
 +      }
 +      bb_reg_params = max(bb_reg_params, r);
 +}
 +
 +static void
 +bb_do_reg_state_print(const struct bb_reg_state *s)
 +{
 +      int i, offset_address, offset_value;
 +      const struct bb_memory_contains *c;
 +      enum bb_reg_code value;
 +      kdb_printf("  bb_reg_state %p\n", s);
 +      for (i = 0; i < ARRAY_SIZE(s->contains); ++i) {
 +              value = s->contains[i].value;
 +              offset_value = s->contains[i].offset;
 +              kdb_printf("    %s = %s",
 +                         bbrg_name[i + BBRG_RAX], bbrg_name[value]);
 +              if (value == BBRG_OSP)
 +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
 +              kdb_printf("\n");
 +      }
 +      for (i = 0, c = s->memory; i < s->mem_count; ++i, ++c) {
 +              offset_address = c->offset_address;
 +              value = c->value;
 +              offset_value = c->offset_value;
 +              kdb_printf("    slot %d offset_address %c0x%x %s",
 +                         i,
 +                         offset_address >= 0 ? '+' : '-',
 +                         offset_address >= 0 ? offset_address : -offset_address,
 +                         bbrg_name[value]);
 +              if (value == BBRG_OSP)
 +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
 +              kdb_printf("\n");
 +      }
 +}
 +
 +static void
 +bb_reg_state_print(const struct bb_reg_state *s)
 +{
 +      if (KDB_DEBUG(BB))
 +              bb_do_reg_state_print(s);
 +}
 +
 +/* Set register 'dst' to contain the value from 'src'.  This includes reading
 + * from 'src' and writing to 'dst'.  The offset value is copied iff 'src'
 + * contains a stack pointer.
 + *
 + * Be very careful about the context here.  'dst' and 'src' reflect integer
 + * registers by name, _not_ by the value of their contents.  "mov %rax,%rsi"
 + * will call this function as bb_reg_set_reg(BBRG_RSI, BBRG_RAX), which
 + * reflects what the assembler code is doing.  However we need to track the
 + * _values_ in the registers, not their names.  IOW, we really care about "what
 + * value does rax contain when it is copied into rsi?", so we can record the
 + * fact that we now have two copies of that value, one in rax and one in rsi.
 + */
 +
 +static void
 +bb_reg_set_reg(enum bb_reg_code dst, enum bb_reg_code src)
 +{
 +      enum bb_reg_code src_value = BBRG_UNDEFINED;
 +      short offset_value = 0;
 +      KDB_DEBUG_BB("  %s = %s", bbrg_name[dst], bbrg_name[src]);
 +      if (bb_is_int_reg(src)) {
 +              bb_reg_read(src);
 +              src_value = bb_reg_code_value(src);
 +              KDB_DEBUG_BB(" (%s", bbrg_name[src_value]);
 +              if (bb_is_osp_defined(src)) {
 +                      offset_value = bb_reg_code_offset(src);
 +                      KDB_DEBUG_BB_OFFSET(offset_value, "", "");
 +              }
 +              KDB_DEBUG_BB(")");
 +      }
 +      if (bb_is_int_reg(dst)) {
 +              bb_reg_code_set_value(dst, src_value);
 +              bb_reg_code_set_offset(dst, offset_value);
 +      }
 +      KDB_DEBUG_BB("\n");
 +}
 +
 +static void
 +bb_reg_set_undef(enum bb_reg_code dst)
 +{
 +      bb_reg_set_reg(dst, BBRG_UNDEFINED);
 +}
 +
 +/* Delete any record of a stored register held in osp + 'offset' */
 +
 +static void
 +bb_delete_memory(short offset)
 +{
 +      int i;
 +      struct bb_memory_contains *c;
 +      for (i = 0, c = bb_reg_state->memory;
 +           i < bb_reg_state->mem_count;
 +           ++i, ++c) {
 +              if (c->offset_address == offset &&
 +                  c->value != BBRG_UNDEFINED) {
 +                      KDB_DEBUG_BB("  delete %s from ",
 +                                   bbrg_name[c->value]);
 +                      KDB_DEBUG_BB_OFFSET(offset, "osp", "");
 +                      KDB_DEBUG_BB(" slot %d\n",
 +                                   (int)(c - bb_reg_state->memory));
 +                      memset(c, BBRG_UNDEFINED, sizeof(*c));
 +                      if (i == bb_reg_state->mem_count - 1)
 +                              --bb_reg_state->mem_count;
 +              }
 +      }
 +}
 +
 +/* Set memory location *('dst' + 'offset_address') to contain the supplied
 + * value and offset.  'dst' is assumed to be a register that contains a stack
 + * pointer.
 + */
 +
 +static void
 +bb_memory_set_reg_value(enum bb_reg_code dst, short offset_address,
 +                      enum bb_reg_code value, short offset_value)
 +{
 +      int i;
 +      struct bb_memory_contains *c, *free = NULL;
 +      BB_CHECK(!bb_is_osp_defined(dst), dst, );
 +      KDB_DEBUG_BB("  *(%s", bbrg_name[dst]);
 +      KDB_DEBUG_BB_OFFSET(offset_address, "", "");
 +      offset_address += bb_reg_code_offset(dst);
 +      KDB_DEBUG_BB_OFFSET(offset_address, " osp", ") = ");
 +      KDB_DEBUG_BB("%s", bbrg_name[value]);
 +      if (value == BBRG_OSP)
 +              KDB_DEBUG_BB_OFFSET(offset_value, "", "");
 +      for (i = 0, c = bb_reg_state->memory;
 +           i < bb_reg_state_max;
 +           ++i, ++c) {
 +              if (c->offset_address == offset_address)
 +                      free = c;
 +              else if (c->value == BBRG_UNDEFINED && !free)
 +                      free = c;
 +      }
 +      if (!free) {
 +              struct bb_reg_state *new, *old = bb_reg_state;
 +              size_t old_size, new_size;
 +              int slot;
 +              old_size = sizeof(*old) + bb_reg_state_max *
 +                                sizeof(old->memory[0]);
 +              slot = bb_reg_state_max;
 +              bb_reg_state_max += 5;
 +              new_size = sizeof(*new) + bb_reg_state_max *
 +                                sizeof(new->memory[0]);
 +              new = debug_kmalloc(new_size, GFP_ATOMIC);
 +              if (!new) {
 +                      kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
 +                      bb_giveup = 1;
 +              } else {
 +                      memcpy(new, old, old_size);
 +                      memset((char *)new + old_size, BBRG_UNDEFINED,
 +                             new_size - old_size);
 +                      bb_reg_state = new;
 +                      debug_kfree(old);
 +                      free = bb_reg_state->memory + slot;
 +              }
 +      }
 +      if (free) {
 +              int slot = free - bb_reg_state->memory;
 +              free->offset_address = offset_address;
 +              free->value = value;
 +              free->offset_value = offset_value;
 +              KDB_DEBUG_BB(" slot %d", slot);
 +              bb_reg_state->mem_count = max(bb_reg_state->mem_count, slot+1);
 +      }
 +      KDB_DEBUG_BB("\n");
 +}
 +
 +/* Set memory location *('dst' + 'offset') to contain the value from register
 + * 'src'.  'dst' is assumed to be a register that contains a stack pointer.
 + * This differs from bb_memory_set_reg_value because it takes a src register
 + * which contains a value and possibly an offset, bb_memory_set_reg_value is
 + * passed the value and offset directly.
 + */
 +
 +static void
 +bb_memory_set_reg(enum bb_reg_code dst, enum bb_reg_code src,
 +                short offset_address)
 +{
 +      int offset_value;
 +      enum bb_reg_code value;
 +      BB_CHECK(!bb_is_osp_defined(dst), dst, );
 +      if (!bb_is_int_reg(src))
 +              return;
 +      value = bb_reg_code_value(src);
 +      if (value == BBRG_UNDEFINED) {
 +              bb_delete_memory(offset_address + bb_reg_code_offset(dst));
 +              return;
 +      }
 +      offset_value = bb_reg_code_offset(src);
 +      bb_reg_read(src);
 +      bb_memory_set_reg_value(dst, offset_address, value, offset_value);
 +}
 +
 +/* Set register 'dst' to contain the value from memory *('src' + offset_address).
 + * 'src' is assumed to be a register that contains a stack pointer.
 + */
 +
 +static void
 +bb_reg_set_memory(enum bb_reg_code dst, enum bb_reg_code src, short offset_address)
 +{
 +      int i, defined = 0;
 +      struct bb_memory_contains *s;
 +      BB_CHECK(!bb_is_osp_defined(src), src, );
 +      KDB_DEBUG_BB("  %s = *(%s",
 +                   bbrg_name[dst], bbrg_name[src]);
 +      KDB_DEBUG_BB_OFFSET(offset_address, "", ")");
 +      offset_address += bb_reg_code_offset(src);
 +      KDB_DEBUG_BB_OFFSET(offset_address, " (osp", ")");
 +      for (i = 0, s = bb_reg_state->memory;
 +           i < bb_reg_state->mem_count;
 +           ++i, ++s) {
 +              if (s->offset_address == offset_address && bb_is_int_reg(dst)) {
 +                      bb_reg_code_set_value(dst, s->value);
 +                      KDB_DEBUG_BB(" value %s", bbrg_name[s->value]);
 +                      if (s->value == BBRG_OSP) {
 +                              bb_reg_code_set_offset(dst, s->offset_value);
 +                              KDB_DEBUG_BB_OFFSET(s->offset_value, "", "");
 +                      } else {
 +                              bb_reg_code_set_offset(dst, 0);
 +                      }
 +                      defined = 1;
 +              }
 +      }
 +      if (!defined)
 +              bb_reg_set_reg(dst, BBRG_UNDEFINED);
 +      else
 +              KDB_DEBUG_BB("\n");
 +}
 +
 +/* A generic read from an operand. */
 +
 +static void
 +bb_read_operand(const struct bb_operand *operand)
 +{
 +      int m = 0;
 +      if (operand->base_rc)
 +              bb_reg_read(operand->base_rc);
 +      if (operand->index_rc)
 +              bb_reg_read(operand->index_rc);
 +      if (bb_is_simple_memory(operand) &&
 +          bb_is_osp_defined(operand->base_rc) &&
 +          bb_decode.match->usage != BBOU_LEA) {
 +              m = (bb_reg_code_offset(operand->base_rc) + operand->disp +
 +                   KDB_WORD_SIZE - 1) / KDB_WORD_SIZE;
 +              bb_memory_params = max(bb_memory_params, m);
 +      }
 +}
 +
 +/* A generic write to an operand, resulting in an undefined value in that
 + * location.  All well defined operands are handled separately, this function
 + * only handles the opcodes where the result is undefined.
 + */
 +
 +static void
 +bb_write_operand(const struct bb_operand *operand)
 +{
 +      enum bb_reg_code base_rc = operand->base_rc;
 +      if (operand->memory) {
 +              if (base_rc)
 +                      bb_reg_read(base_rc);
 +              if (operand->index_rc)
 +                      bb_reg_read(operand->index_rc);
 +      } else if (operand->reg && base_rc) {
 +              bb_reg_set_undef(base_rc);
 +      }
 +      if (bb_is_simple_memory(operand) && bb_is_osp_defined(base_rc)) {
 +              int offset;
 +              offset = bb_reg_code_offset(base_rc) + operand->disp;
 +              offset = ALIGN(offset - KDB_WORD_SIZE + 1, KDB_WORD_SIZE);
 +              bb_delete_memory(offset);
 +      }
 +}
 +
 +/* Adjust a register that contains a stack pointer */
 +
 +static void
 +bb_adjust_osp(enum bb_reg_code reg, int adjust)
 +{
 +      int offset = bb_reg_code_offset(reg), old_offset = offset;
 +      KDB_DEBUG_BB("  %s osp offset ", bbrg_name[reg]);
 +      KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", " -> ");
 +      offset += adjust;
 +      bb_reg_code_set_offset(reg, offset);
 +      KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", "\n");
 +      /* When RSP is adjusted upwards, it invalidates any memory
 +       * stored between the old and current stack offsets.
 +       */
 +      if (reg == BBRG_RSP) {
 +              while (old_offset < bb_reg_code_offset(reg)) {
 +                      bb_delete_memory(old_offset);
 +                      old_offset += KDB_WORD_SIZE;
 +              }
 +      }
 +}
 +
 +/* The current instruction adjusts a register that contains a stack pointer.
 + * Direction is 1 or -1, depending on whether the instruction is add/lea or
 + * sub.
 + */
 +
 +static void
 +bb_adjust_osp_instruction(int direction)
 +{
 +      enum bb_reg_code dst_reg = bb_decode.dst.base_rc;
 +      if (bb_decode.src.immediate ||
 +          bb_decode.match->usage == BBOU_LEA /* lea has its own checks */) {
 +              int adjust = direction * bb_decode.src.disp;
 +              bb_adjust_osp(dst_reg, adjust);
 +      } else {
 +              /* variable stack adjustment, osp offset is not well defined */
 +              KDB_DEBUG_BB("  %s osp offset ", bbrg_name[dst_reg]);
 +              KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(dst_reg), "", " -> undefined\n");
 +              bb_reg_code_set_value(dst_reg, BBRG_UNDEFINED);
 +              bb_reg_code_set_offset(dst_reg, 0);
 +      }
 +}
 +
 +/* Some instructions using memory have an explicit length suffix (b, w, l, q).
 + * The equivalent instructions using a register imply the length from the
 + * register name.  Deduce the operand length.
 + */
 +
 +static int
 +bb_operand_length(const struct bb_operand *operand, char opcode_suffix)
 +{
 +      int l = 0;
 +      switch (opcode_suffix) {
 +      case 'b':
 +              l = 8;
 +              break;
 +      case 'w':
 +              l = 16;
 +              break;
 +      case 'l':
 +              l = 32;
 +              break;
 +      case 'q':
 +              l = 64;
 +              break;
 +      }
 +      if (l == 0 && operand->reg) {
 +              switch (strlen(operand->base)) {
 +              case 3:
 +                      switch (operand->base[2]) {
 +                      case 'h':
 +                      case 'l':
 +                              l = 8;
 +                              break;
 +                      default:
 +                              l = 16;
 +                              break;
 +                      }
 +              case 4:
 +                      if (operand->base[1] == 'r')
 +                              l = 64;
 +                      else
 +                              l = 32;
 +                      break;
 +              }
 +      }
 +      return l;
 +}
 +
 +static int
 +bb_reg_state_size(const struct bb_reg_state *state)
 +{
 +      return sizeof(*state) +
 +             state->mem_count * sizeof(state->memory[0]);
 +}
 +
 +/* Canonicalize the current bb_reg_state so it can be compared against
 + * previously created states.  Sort the memory entries in descending order of
 + * offset_address (stack grows down).  Empty slots are moved to the end of the
 + * list and trimmed.
 + */
 +
 +static void
 +bb_reg_state_canonicalize(void)
 +{
 +      int i, order, changed;
 +      struct bb_memory_contains *p1, *p2, temp;
 +      do {
 +              changed = 0;
 +              for (i = 0, p1 = bb_reg_state->memory;
 +                   i < bb_reg_state->mem_count-1;
 +                   ++i, ++p1) {
 +                      p2 = p1 + 1;
 +                      if (p2->value == BBRG_UNDEFINED) {
 +                              order = 0;
 +                      } else if (p1->value == BBRG_UNDEFINED) {
 +                              order = 1;
 +                      } else if (p1->offset_address < p2->offset_address) {
 +                              order = 1;
 +                      } else if (p1->offset_address > p2->offset_address) {
 +                              order = -1;
 +                      } else {
 +                              order = 0;
 +                      }
 +                      if (order > 0) {
 +                              temp = *p2;
 +                              *p2 = *p1;
 +                              *p1 = temp;
 +                              changed = 1;
 +                      }
 +              }
 +      } while(changed);
 +      for (i = 0, p1 = bb_reg_state->memory;
 +           i < bb_reg_state_max;
 +           ++i, ++p1) {
 +              if (p1->value != BBRG_UNDEFINED)
 +                      bb_reg_state->mem_count = i + 1;
 +      }
 +      bb_reg_state_print(bb_reg_state);
 +}
 +
 +static int
 +bb_special_case(bfd_vma to)
 +{
 +      int i, j, rsp_offset, expect_offset, offset, errors = 0, max_errors = 40;
 +      enum bb_reg_code reg, expect_value, value;
 +      struct bb_name_state *r;
 +
 +      for (i = 0, r = bb_special_cases;
 +           i < ARRAY_SIZE(bb_special_cases);
 +           ++i, ++r) {
 +              if (to == r->address &&
 +                  (r->fname == NULL || strcmp(bb_func_name, r->fname) == 0))
 +                      goto match;
 +      }
 +      /* Some inline assembler code has jumps to .fixup sections which result
 +       * in out of line transfers with undefined state, ignore them.
 +       */
 +      if (strcmp(bb_func_name, "strnlen_user") == 0 ||
 +          strcmp(bb_func_name, "copy_from_user") == 0)
 +              return 1;
 +      return 0;
 +
 +match:
 +      /* Check the running registers match */
 +      for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
 +              expect_value = r->regs[reg].value;
 +              if (test_bit(expect_value, r->skip_regs.bits)) {
 +                      /* this regs entry is not defined for this label */
 +                      continue;
 +              }
 +              if (expect_value == BBRG_UNDEFINED)
 +                      continue;
 +              expect_offset = r->regs[reg].offset;
 +              value = bb_reg_code_value(reg);
 +              offset = bb_reg_code_offset(reg);
 +              if (expect_value == value &&
 +                  (value != BBRG_OSP || r->osp_offset == offset))
 +                      continue;
 +              kdb_printf("%s: Expected %s to contain %s",
 +                         __FUNCTION__,
 +                         bbrg_name[reg],
 +                         bbrg_name[expect_value]);
 +              if (r->osp_offset)
 +                      KDB_DEBUG_BB_OFFSET_PRINTF(r->osp_offset, "", "");
 +              kdb_printf(".  It actually contains %s", bbrg_name[value]);
 +              if (offset)
 +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
 +              kdb_printf("\n");
 +              ++errors;
 +              if (max_errors-- == 0)
 +                      goto fail;
 +      }
 +      /* Check that any memory data on stack matches */
 +      i = j = 0;
 +      while (i < bb_reg_state->mem_count &&
 +             j < r->mem_size) {
 +              expect_value = r->mem[j].value;
 +              if (test_bit(expect_value, r->skip_mem.bits) ||
 +                  expect_value == BBRG_UNDEFINED) {
 +                      /* this memory slot is not defined for this label */
 +                      ++j;
 +                      continue;
 +              }
 +              rsp_offset = bb_reg_state->memory[i].offset_address -
 +                      bb_reg_code_offset(BBRG_RSP);
 +              if (rsp_offset >
 +                  r->mem[j].offset_address) {
 +                      /* extra slots in memory are OK */
 +                      ++i;
 +              } else if (rsp_offset <
 +                         r->mem[j].offset_address) {
 +                      /* Required memory slot is missing */
 +                      kdb_printf("%s: Invalid bb_reg_state.memory, "
 +                                 "missing memory entry[%d] %s\n",
 +                         __FUNCTION__, j, bbrg_name[expect_value]);
 +                      ++errors;
 +                      if (max_errors-- == 0)
 +                              goto fail;
 +                      ++j;
 +              } else {
 +                      if (bb_reg_state->memory[i].offset_value ||
 +                          bb_reg_state->memory[i].value != expect_value) {
 +                              /* memory slot is present but contains wrong
 +                               * value.
 +                               */
 +                              kdb_printf("%s: Invalid bb_reg_state.memory, "
 +                                          "wrong value in slot %d, "
 +                                          "should be %s, it is %s\n",
 +                                 __FUNCTION__, i,
 +                                 bbrg_name[expect_value],
 +                                 bbrg_name[bb_reg_state->memory[i].value]);
 +                              ++errors;
 +                              if (max_errors-- == 0)
 +                                      goto fail;
 +                      }
 +                      ++i;
 +                      ++j;
 +              }
 +      }
 +      while (j < r->mem_size) {
 +              expect_value = r->mem[j].value;
 +              if (test_bit(expect_value, r->skip_mem.bits) ||
 +                  expect_value == BBRG_UNDEFINED)
 +                      ++j;
 +              else
 +                      break;
 +      }
 +      if (j != r->mem_size) {
 +              /* Hit end of memory before testing all the pt_reg slots */
 +              kdb_printf("%s: Invalid bb_reg_state.memory, "
 +                          "missing trailing entries\n",
 +                 __FUNCTION__);
 +              ++errors;
 +              if (max_errors-- == 0)
 +                      goto fail;
 +      }
 +      if (errors)
 +              goto fail;
 +      return 1;
 +fail:
 +      kdb_printf("%s: on transfer to %s\n", __FUNCTION__, r->name);
 +      bb_giveup = 1;
 +      return 1;
 +}
 +
 +/* Transfer of control to a label outside the current function.  If the
 + * transfer is to a known common code path then do a sanity check on the state
 + * at this point.
 + */
 +
 +static void
 +bb_sanity_check(int type)
 +{
 +      enum bb_reg_code expect, actual;
 +      int i, offset, error = 0;
 +
 +      for (i = 0; i < ARRAY_SIZE(bb_preserved_reg); ++i) {
 +              expect = bb_preserved_reg[i];
 +              actual = bb_reg_code_value(expect);
 +              offset = bb_reg_code_offset(expect);
 +              if (expect == actual)
 +                      continue;
 +              /* type == 1 is sysret/sysexit, ignore RSP */
 +              if (type && expect == BBRG_RSP)
 +                      continue;
 +              /* type == 1 is sysret/sysexit, ignore RBP for i386 */
 +              /* We used to have "#ifndef CONFIG_X86_64" for the type=1 RBP
 +               * test; however, x86_64 can run ia32 compatible mode and
 +               * hit this problem. Perform the following test anyway!
 +               */
 +              if (type && expect == BBRG_RBP)
 +                      continue;
 +              /* RSP should contain OSP+0.  Except for ptregscall_common and
 +               * ia32_ptregs_common, they get a partial pt_regs, fudge the
 +               * stack to make it a full pt_regs then reverse the effect on
 +               * exit, so the offset is -0x50 on exit.
 +               */
 +              if (expect == BBRG_RSP &&
 +                  bb_is_osp_defined(expect) &&
 +                  (offset == 0 ||
 +                   (offset == -0x50 &&
 +                    (strcmp(bb_func_name, "ptregscall_common") == 0 ||
 +                     strcmp(bb_func_name, "ia32_ptregs_common") == 0))))
 +                      continue;
 +              /* The put_user and save_paranoid functions are special.
 +               * %rbx gets clobbered */
 +              if (expect == BBRG_RBX &&
 +                      (strncmp(bb_func_name, "__put_user_", 11) == 0 ||
 +                       strcmp(bb_func_name, "save_paranoid") == 0))
 +                      continue;
 +              /* Ignore rbp and rsp for error_entry */
 +              if ((strcmp(bb_func_name, "error_entry") == 0) &&
 +                  (expect == BBRG_RBX ||
 +                   (expect == BBRG_RSP && bb_is_osp_defined(expect) && offset == -0x10)))
 +                      continue;
 +              kdb_printf("%s: Expected %s, got %s",
 +                         __FUNCTION__,
 +                         bbrg_name[expect], bbrg_name[actual]);
 +              if (offset)
 +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
 +              kdb_printf("\n");
 +              error = 1;
 +      }
 +      BB_CHECK(error, error, );
 +}
 +
 +/* Transfer of control.  Follow the arc and save the current state as input to
 + * another basic block.
 + */
 +
 +static void
 +bb_transfer(bfd_vma from, bfd_vma to, unsigned int drop_through)
 +{
 +      int i, found;
 +      size_t size;
 +      struct bb* bb = NULL;   /*stupid gcc */
 +      struct bb_jmp *bb_jmp;
 +      struct bb_reg_state *state;
 +      bb_reg_state_canonicalize();
 +      found = 0;
 +      for (i = 0; i < bb_jmp_count; ++i) {
 +              bb_jmp = bb_jmp_list + i;
 +              if (bb_jmp->from == from &&
 +                  bb_jmp->to == to &&
 +                  bb_jmp->drop_through == drop_through) {
 +                      found = 1;
 +                      break;
 +              }
 +      }
 +      if (!found) {
 +              /* Transfer outside the current function.  Check the special
 +               * cases (mainly in entry.S) first.  If it is not a known
 +               * special case then check if the target address is the start
 +               * of a function or not.  If it is the start of a function then
 +               * assume tail recursion and require that the state be the same
 +               * as on entry.  Otherwise assume out of line code (e.g.
 +               * spinlock contention path) and ignore it, the state can be
 +               * anything.
 +               */
 +              kdb_symtab_t symtab;
 +              if (bb_special_case(to))
 +                      return;
 +              kdbnearsym(to, &symtab);
 +              if (symtab.sym_start != to)
 +                      return;
 +              bb_sanity_check(0);
 +              if (bb_giveup)
 +                      return;
 +#ifdef        NO_SIBLINGS
 +              /* Only print this message when the kernel is compiled with
 +               * -fno-optimize-sibling-calls.  Otherwise it would print a
 +               * message for every tail recursion call.  If you see the
 +               * message below then you probably have an assembler label that
 +               * is not listed in the special cases.
 +               */
 +              kdb_printf("  not matched: from "
 +                         kdb_bfd_vma_fmt0
 +                         " to " kdb_bfd_vma_fmt0
 +                         " drop_through %d bb_jmp[%d]\n",
 +                         from, to, drop_through, i);
 +#endif        /* NO_SIBLINGS */
 +              return;
 +      }
 +      KDB_DEBUG_BB("  matched: from " kdb_bfd_vma_fmt0
 +                   " to " kdb_bfd_vma_fmt0
 +                   " drop_through %d bb_jmp[%d]\n",
 +                   from, to, drop_through, i);
 +      found = 0;
 +      for (i = 0; i < bb_count; ++i) {
 +              bb = bb_list[i];
 +              if (bb->start == to) {
 +                      found = 1;
 +                      break;
 +              }
 +      }
 +      BB_CHECK(!found, to, );
 +      /* If the register state for this arc has already been set (we are
 +       * rescanning the block that originates the arc) and the state is the
 +       * same as the previous state for this arc then this input to the
 +       * target block is the same as last time, so there is no need to rescan
 +       * the target block.
 +       */
 +      state = bb_jmp->state;
 +      size = bb_reg_state_size(bb_reg_state);
 +      if (state) {
 +              bb_reg_state->ref_count = state->ref_count;
 +              if (memcmp(state, bb_reg_state, size) == 0) {
 +                      KDB_DEBUG_BB("  no state change\n");
 +                      return;
 +              }
 +              if (--state->ref_count == 0)
 +                      debug_kfree(state);
 +              bb_jmp->state = NULL;
 +      }
 +      /* New input state is required.  To save space, check if any other arcs
 +       * have the same state and reuse them where possible.  The overall set
 +       * of inputs to the target block is now different so the target block
 +       * must be rescanned.
 +       */
 +      bb->changed = 1;
 +      for (i = 0; i < bb_jmp_count; ++i) {
 +              state = bb_jmp_list[i].state;
 +              if (!state)
 +                      continue;
 +              bb_reg_state->ref_count = state->ref_count;
 +              if (memcmp(state, bb_reg_state, size) == 0) {
 +                      KDB_DEBUG_BB("  reuse bb_jmp[%d]\n", i);
 +                      bb_jmp->state = state;
 +                      ++state->ref_count;
 +                      return;
 +              }
 +      }
 +      state = debug_kmalloc(size, GFP_ATOMIC);
 +      if (!state) {
 +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
 +              bb_giveup = 1;
 +              return;
 +      }
 +      memcpy(state, bb_reg_state, size);
 +      state->ref_count = 1;
 +      bb_jmp->state = state;
 +      KDB_DEBUG_BB("  new state %p\n", state);
 +}
 +
 +/* Isolate the processing for 'mov' so it can be used for 'xadd'/'xchg' as
 + * well.
 + *
 + * xadd/xchg expect this function to return BBOU_NOP for special cases,
 + * otherwise it returns BBOU_RSWD.  All special cases must be handled entirely
 + * within this function, including doing bb_read_operand or bb_write_operand
 + * where necessary.
 + */
 +
 +static enum bb_operand_usage
 +bb_usage_mov(const struct bb_operand *src, const struct bb_operand *dst, int l)
 +{
 +      int full_register_src, full_register_dst;
 +      full_register_src = bb_operand_length(src, bb_decode.opcode[l])
 +                          == KDB_WORD_SIZE * 8;
 +      full_register_dst = bb_operand_length(dst, bb_decode.opcode[l])
 +                          == KDB_WORD_SIZE * 8;
 +      /* If both src and dst are full integer registers then record the
 +       * register change.
 +       */
 +      if (src->reg &&
 +          bb_is_int_reg(src->base_rc) &&
 +          dst->reg &&
 +          bb_is_int_reg(dst->base_rc) &&
 +          full_register_src &&
 +          full_register_dst) {
 +              /* Special case for the code that switches stacks in
 +               * jprobe_return.  That code must modify RSP but it does it in
 +               * a well defined manner.  Do not invalidate RSP.
 +               */
 +              if (src->base_rc == BBRG_RBX &&
 +                  dst->base_rc == BBRG_RSP &&
 +                  strcmp(bb_func_name, "jprobe_return") == 0) {
 +                      bb_read_operand(src);
 +                      return BBOU_NOP;
 +              }
 +              /* math_abort takes the equivalent of a longjmp structure and
 +               * resets the stack.  Ignore this, it leaves RSP well defined.
 +               */
 +              if (dst->base_rc == BBRG_RSP &&
 +                  strcmp(bb_func_name, "math_abort") == 0) {
 +                      bb_read_operand(src);
 +                      return BBOU_NOP;
 +              }
 +              bb_reg_set_reg(dst->base_rc, src->base_rc);
 +              return BBOU_NOP;
 +      }
 +      /* If the move is from a full integer register to stack then record it.
 +       */
 +      if (src->reg &&
 +          bb_is_simple_memory(dst) &&
 +          bb_is_osp_defined(dst->base_rc) &&
 +          full_register_src) {
 +              /* Ugly special case.  Initializing list heads on stack causes
 +               * false references to stack variables when the list head is
 +               * used.  Static code analysis cannot detect that the list head
 +               * has been changed by a previous execution loop and that a
 +               * basic block is only executed after the list head has been
 +               * changed.
 +               *
 +               * These false references can result in valid stack variables
 +               * being incorrectly cleared on some logic paths.  Ignore
 +               * stores to stack variables which point to themselves or to
 +               * the previous word so the list head initialization is not
 +               * recorded.
 +               */
 +              if (bb_is_osp_defined(src->base_rc)) {
 +                      int stack1 = bb_reg_code_offset(src->base_rc);
 +                      int stack2 = bb_reg_code_offset(dst->base_rc) +
 +                                   dst->disp;
 +                      if (stack1 == stack2 ||
 +                          stack1 == stack2 - KDB_WORD_SIZE)
 +                              return BBOU_NOP;
 +              }
 +              bb_memory_set_reg(dst->base_rc, src->base_rc, dst->disp);
 +              return BBOU_NOP;
 +      }
 +      /* If the move is from stack to a full integer register then record it.
 +       */
 +      if (bb_is_simple_memory(src) &&
 +          bb_is_osp_defined(src->base_rc) &&
 +          dst->reg &&
 +          bb_is_int_reg(dst->base_rc) &&
 +          full_register_dst) {
 +#ifdef        CONFIG_X86_32
 +#ifndef TSS_sysenter_sp0
 +#define TSS_sysenter_sp0 SYSENTER_stack_sp0
 +#endif
 +              /* mov from TSS_sysenter_sp0+offset to esp to fix up the
 +               * sysenter stack, it leaves esp well defined.  mov
 +               * TSS_ysenter_sp0+offset(%esp),%esp is followed by up to 5
 +               * push instructions to mimic the hardware stack push.  If
 +               * TSS_sysenter_sp0 is offset then only 3 words will be
 +               * pushed.
 +               */
 +              if (dst->base_rc == BBRG_RSP &&
 +                  src->disp >= TSS_sysenter_sp0 &&
 +                  bb_is_osp_defined(BBRG_RSP)) {
 +                      int pushes;
 +                      pushes = src->disp == TSS_sysenter_sp0 ? 5 : 3;
 +                      bb_reg_code_set_offset(BBRG_RSP,
 +                              bb_reg_code_offset(BBRG_RSP) +
 +                                      pushes * KDB_WORD_SIZE);
 +                      KDB_DEBUG_BB_OFFSET(
 +                              bb_reg_code_offset(BBRG_RSP),
 +                              "  sysenter fixup, RSP",
 +                             "\n");
 +                      return BBOU_NOP;
 +              }
 +#endif        /* CONFIG_X86_32 */
 +              bb_read_operand(src);
 +              bb_reg_set_memory(dst->base_rc, src->base_rc, src->disp);
 +              return BBOU_NOP;
 +      }
 +      /* move %gs:0x<nn>,%rsp is used to unconditionally switch to another
 +       * stack.  Ignore this special case, it is handled by the stack
 +       * unwinding code.
 +       */
 +      if (src->segment &&
 +          strcmp(src->segment, "%gs") == 0 &&
 +          dst->reg &&
 +          dst->base_rc == BBRG_RSP)
 +              return BBOU_NOP;
 +      /* move %reg,%reg is a nop */
 +      if (src->reg &&
 +          dst->reg &&
 +          !src->segment &&
 +          !dst->segment &&
 +          strcmp(src->base, dst->base) == 0)
 +              return BBOU_NOP;
 +      /* Special case for the code that switches stacks in the scheduler
 +       * (switch_to()).  That code must modify RSP but it does it in a well
 +       * defined manner.  Do not invalidate RSP.
 +       */
 +      if (dst->reg &&
 +          dst->base_rc == BBRG_RSP &&
 +          full_register_dst &&
 +          bb_is_scheduler_address()) {
 +              bb_read_operand(src);
 +              return BBOU_NOP;
 +      }
 +      /* Special case for the code that switches stacks in resume from
 +       * hibernation code.  That code must modify RSP but it does it in a
 +       * well defined manner.  Do not invalidate RSP.
 +       */
 +      if (src->memory &&
 +          dst->reg &&
 +          dst->base_rc == BBRG_RSP &&
 +          full_register_dst &&
 +          strcmp(bb_func_name, "restore_image") == 0) {
 +              bb_read_operand(src);
 +              return BBOU_NOP;
 +      }
 +      return BBOU_RSWD;
 +}
 +
 +static enum bb_operand_usage
 +bb_usage_xadd(const struct bb_operand *src, const struct bb_operand *dst)
 +{
 +      /* Simulate xadd as a series of instructions including mov, that way we
 +       * get the benefit of all the special cases already handled by
 +       * BBOU_MOV.
 +       *
 +       * tmp = src + dst, src = dst, dst = tmp.
 +       *
 +       * For tmp, pick a register that is undefined.  If all registers are
 +       * defined then pick one that is not being used by xadd.
 +       */
 +      enum bb_reg_code reg = BBRG_UNDEFINED;
 +      struct bb_operand tmp;
 +      struct bb_reg_contains save_tmp;
 +      enum bb_operand_usage usage;
 +      int undefined = 0;
 +      for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
 +              if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
 +                      undefined = 1;
 +                      break;
 +              }
 +      }
 +      if (!undefined) {
 +              for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
 +                      if (reg != src->base_rc &&
 +                          reg != src->index_rc &&
 +                          reg != dst->base_rc &&
 +                          reg != dst->index_rc &&
 +                          reg != BBRG_RSP)
 +                              break;
 +              }
 +      }
 +      KDB_DEBUG_BB("  %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
 +      save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
 +      bb_reg_set_undef(reg);
 +      memset(&tmp, 0, sizeof(tmp));
 +      tmp.present = 1;
 +      tmp.reg = 1;
 +      tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
 +      if (tmp.base) {
 +              tmp.base[0] = '%';
 +              strcpy(tmp.base + 1, bbrg_name[reg]);
 +      }
 +      tmp.base_rc = reg;
 +      bb_read_operand(src);
 +      bb_read_operand(dst);
 +      if (bb_usage_mov(src, dst, sizeof("xadd")-1) == BBOU_NOP)
 +              usage = BBOU_RSRD;
 +      else
 +              usage = BBOU_RSRDWS;
 +      bb_usage_mov(&tmp, dst, sizeof("xadd")-1);
 +      KDB_DEBUG_BB("  %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
 +      bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
 +      debug_kfree(tmp.base);
 +      return usage;
 +}
 +
 +static enum bb_operand_usage
 +bb_usage_xchg(const struct bb_operand *src, const struct bb_operand *dst)
 +{
 +      /* Simulate xchg as a series of mov instructions, that way we get the
 +       * benefit of all the special cases already handled by BBOU_MOV.
 +       *
 +       * mov dst,tmp; mov src,dst; mov tmp,src;
 +       *
 +       * For tmp, pick a register that is undefined.  If all registers are
 +       * defined then pick one that is not being used by xchg.
 +       */
 +      enum bb_reg_code reg = BBRG_UNDEFINED;
 +      int rs = BBOU_RS, rd = BBOU_RD, ws = BBOU_WS, wd = BBOU_WD;
 +      struct bb_operand tmp;
 +      struct bb_reg_contains save_tmp;
 +      int undefined = 0;
 +      for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
 +              if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
 +                      undefined = 1;
 +                      break;
 +              }
 +      }
 +      if (!undefined) {
 +              for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
 +                      if (reg != src->base_rc &&
 +                          reg != src->index_rc &&
 +                          reg != dst->base_rc &&
 +                          reg != dst->index_rc &&
 +                          reg != BBRG_RSP)
 +                              break;
 +              }
 +      }
 +      KDB_DEBUG_BB("  %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
 +      save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
 +      memset(&tmp, 0, sizeof(tmp));
 +      tmp.present = 1;
 +      tmp.reg = 1;
 +      tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
 +      if (tmp.base) {
 +              tmp.base[0] = '%';
 +              strcpy(tmp.base + 1, bbrg_name[reg]);
 +      }
 +      tmp.base_rc = reg;
 +      if (bb_usage_mov(dst, &tmp, sizeof("xchg")-1) == BBOU_NOP)
 +              rd = 0;
 +      if (bb_usage_mov(src, dst, sizeof("xchg")-1) == BBOU_NOP) {
 +              rs = 0;
 +              wd = 0;
 +      }
 +      if (bb_usage_mov(&tmp, src, sizeof("xchg")-1) == BBOU_NOP)
 +              ws = 0;
 +      KDB_DEBUG_BB("  %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
 +      bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
 +      debug_kfree(tmp.base);
 +      return rs | rd | ws | wd;
 +}
 +
 +/* Invalidate all the scratch registers */
 +
 +static void
 +bb_invalidate_scratch_reg(void)
 +{
 +      int i, j;
 +      for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
 +              for (j = 0; j < ARRAY_SIZE(bb_preserved_reg); ++j) {
 +                      if (i == bb_preserved_reg[j])
 +                              goto preserved;
 +              }
 +              bb_reg_set_undef(i);
 +preserved:
 +              continue;
 +      }
 +}
 +
 +static void
 +bb_pass2_computed_jmp(const struct bb_operand *src)
 +{
 +      unsigned long table = src->disp;
 +      kdb_machreg_t addr;
 +      while (!bb_giveup) {
 +              if (kdb_getword(&addr, table, sizeof(addr)))
 +                      return;
 +              if (addr < bb_func_start || addr >= bb_func_end)
 +                      return;
 +              bb_transfer(bb_curr_addr, addr, 0);
 +              table += KDB_WORD_SIZE;
 +      }
 +}
 +
 +/* The current instruction has been decoded and all the information is in
 + * bb_decode.  Based on the opcode, track any operand usage that we care about.
 + */
 +
 +static void
 +bb_usage(void)
 +{
 +      enum bb_operand_usage usage = bb_decode.match->usage;
 +      struct bb_operand *src = &bb_decode.src;
 +      struct bb_operand *dst = &bb_decode.dst;
 +      struct bb_operand *dst2 = &bb_decode.dst2;
 +      int opcode_suffix, operand_length;
 +
 +      /* First handle all the special usage cases, and map them to a generic
 +       * case after catering for the side effects.
 +       */
 +
 +      if (usage == BBOU_IMUL &&
 +          src->present && !dst->present && !dst2->present) {
 +              /* single operand imul, same effects as mul */
 +              usage = BBOU_MUL;
 +      }
 +
 +      /* AT&T syntax uses movs<l1><l2> for move with sign extension, instead
 +       * of the Intel movsx.  The AT&T syntax causes problems for the opcode
 +       * mapping; movs with sign extension needs to be treated as a generic
 +       * read src, write dst, but instead it falls under the movs I/O
 +       * instruction.  Fix it.
 +       */
 +      if (usage == BBOU_MOVS && strlen(bb_decode.opcode) > 5)
 +              usage = BBOU_RSWD;
 +
 +      /* This switch statement deliberately does not use 'default' at the top
 +       * level.  That way the compiler will complain if a new BBOU_ enum is
 +       * added above and not explicitly handled here.
 +       */
 +      switch (usage) {
 +      case BBOU_UNKNOWN:      /* drop through */
 +      case BBOU_RS:           /* drop through */
 +      case BBOU_RD:           /* drop through */
 +      case BBOU_RSRD:         /* drop through */
 +      case BBOU_WS:           /* drop through */
 +      case BBOU_RSWS:         /* drop through */
 +      case BBOU_RDWS:         /* drop through */
 +      case BBOU_RSRDWS:       /* drop through */
 +      case BBOU_WD:           /* drop through */
 +      case BBOU_RSWD:         /* drop through */
 +      case BBOU_RDWD:         /* drop through */
 +      case BBOU_RSRDWD:       /* drop through */
 +      case BBOU_WSWD:         /* drop through */
 +      case BBOU_RSWSWD:       /* drop through */
 +      case BBOU_RDWSWD:       /* drop through */
 +      case BBOU_RSRDWSWD:
 +              break;          /* ignore generic usage for now */
 +      case BBOU_ADD:
 +              /* Special case for add instructions that adjust registers
 +               * which are mapping the stack.
 +               */
 +              if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
 +                      bb_adjust_osp_instruction(1);
 +                      usage = BBOU_RS;
 +              } else {
 +                      usage = BBOU_RSRDWD;
 +              }
 +              break;
 +      case BBOU_AND:
 +              /* Special case when trying to round the stack pointer
 +               * to achieve byte alignment
 +               */
 +              if (dst->reg && dst->base_rc == BBRG_RSP &&
 +                      src->immediate && strncmp(bb_func_name, "efi_call", 8) == 0) {
 +                              usage = BBOU_NOP;
 +              } else {
 +                      usage = BBOU_RSRDWD;
 +              }
 +              break;
 +      case BBOU_CALL:
 +              bb_reg_state_print(bb_reg_state);
 +              usage = BBOU_NOP;
 +              if (bb_is_static_disp(src)) {
 +                      /* save_args is special.  It saves
 +                       * a partial pt_regs onto the stack and switches
 +                       * to the interrupt stack.
 +                       */
 +                      if (src->disp == bb_save_args) {
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RDI, 0x48);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RSI, 0x40);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RDX, 0x38);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RCX, 0x30);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RAX, 0x28);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R8,  0x20);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R9,  0x18);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R10, 0x10);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R11, 0x08);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0);
 +                              /* This is actually on the interrupt stack,
 +                               * but we fudge it so the unwind works.
 +                               */
 +                              bb_memory_set_reg_value(BBRG_RSP, -0x8, BBRG_RBP, 0);
 +                              bb_reg_set_reg(BBRG_RBP, BBRG_RSP);
 +                              bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
 +                      }
 +                      /* save_rest juggles the stack frame to append the
 +                       * rest of the pt_regs onto a stack where SAVE_ARGS
 +                       * or save_args has already been done.
 +                       */
 +                      else if (src->disp == bb_save_rest) {
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RBX, 0x30);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0x28);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R12, 0x20);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R13, 0x18);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R14, 0x10);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R15, 0x08);
 +                      }
 +                      /* error_entry and save_paranoid save a full pt_regs.
 +                       * Break out so the scratch registers aren't invalidated.
 +                       */
 +                      else if (src->disp == bb_error_entry || src->disp == bb_save_paranoid) {
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RDI, 0x70);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RSI, 0x68);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RDX, 0x60);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RCX, 0x58);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RAX, 0x50);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R8,  0x48);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R9,  0x40);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R10, 0x38);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R11, 0x30);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RBX, 0x28);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_RBP, 0x20);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R12, 0x18);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R13, 0x10);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R14, 0x08);
 +                              bb_memory_set_reg(BBRG_RSP, BBRG_R15, 0);
 +                              break;
 +                      }
 +              }
 +              /* Invalidate the scratch registers */
 +              bb_invalidate_scratch_reg();
 +
 +              /* These special cases need scratch registers invalidated first */
 +              if (bb_is_static_disp(src)) {
 +                      /* Function sync_regs and save_v86_state are special.
 +                       * Their return value is the new stack pointer
 +                       */
 +                      if (src->disp == bb_sync_regs) {
 +                              bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
 +                      } else if (src->disp == bb_save_v86_state) {
 +                              bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
 +                              bb_adjust_osp(BBRG_RAX, +KDB_WORD_SIZE);
 +                      }
 +              }
 +              break;
 +      case BBOU_CBW:
 +              /* Convert word in RAX.  Read RAX, write RAX */
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RAX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_CMOV:
 +              /* cmove %gs:0x<nn>,%rsp is used to conditionally switch to
 +               * another stack.  Ignore this special case, it is handled by
 +               * the stack unwinding code.
 +               */
 +              if (src->segment &&
 +                  strcmp(src->segment, "%gs") == 0 &&
 +                  dst->reg &&
 +                  dst->base_rc == BBRG_RSP)
 +                      usage = BBOU_NOP;
 +              else
 +                      usage = BBOU_RSWD;
 +              break;
 +      case BBOU_CMPXCHG:
 +              /* Read RAX, write RAX plus src read, dst write */
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RAX);
 +              usage = BBOU_RSWD;
 +              break;
 +      case BBOU_CMPXCHGD:
 +              /* Read RAX, RBX, RCX, RDX, write RAX, RDX plus src read/write */
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_read(BBRG_RBX);
 +              bb_reg_read(BBRG_RCX);
 +              bb_reg_read(BBRG_RDX);
 +              bb_reg_set_undef(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RDX);
 +              usage = BBOU_RSWS;
 +              break;
 +      case BBOU_CPUID:
 +              /* Read RAX, write RAX, RBX, RCX, RDX */
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RBX);
 +              bb_reg_set_undef(BBRG_RCX);
 +              bb_reg_set_undef(BBRG_RDX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_CWD:
 +              /* Convert word in RAX, RDX.  Read RAX, write RDX */
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RDX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_DIV:  /* drop through */
 +      case BBOU_IDIV:
 +              /* The 8 bit variants only affect RAX, the 16, 32 and 64 bit
 +               * variants affect RDX as well.
 +               */
 +              switch (usage) {
 +              case BBOU_DIV:
 +                      opcode_suffix = bb_decode.opcode[3];
 +                      break;
 +              case BBOU_IDIV:
 +                      opcode_suffix = bb_decode.opcode[4];
 +                      break;
 +              default:
 +                      opcode_suffix = 'q';
 +                      break;
 +              }
 +              operand_length = bb_operand_length(src, opcode_suffix);
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RAX);
 +              if (operand_length != 8) {
 +                      bb_reg_read(BBRG_RDX);
 +                      bb_reg_set_undef(BBRG_RDX);
 +              }
 +              usage = BBOU_RS;
 +              break;
 +      case BBOU_IMUL:
 +              /* Only the two and three operand forms get here.  The one
 +               * operand form is treated as mul.
 +               */
 +              if (dst2->present) {
 +                      /* The three operand form is a special case, read the first two
 +                       * operands, write the third.
 +                       */
 +                      bb_read_operand(src);
 +                      bb_read_operand(dst);
 +                      bb_write_operand(dst2);
 +                      usage = BBOU_NOP;
 +              } else {
 +                      usage = BBOU_RSRDWD;
 +              }
 +              break;
 +      case BBOU_IRET:
 +              bb_sanity_check(0);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_JMP:
 +              if (bb_is_static_disp(src))
 +                      bb_transfer(bb_curr_addr, src->disp, 0);
 +              else if (src->indirect &&
 +                       src->disp &&
 +                       src->base == NULL &&
 +                       src->index &&
 +                       src->scale == KDB_WORD_SIZE)
 +                      bb_pass2_computed_jmp(src);
 +              usage = BBOU_RS;
 +              break;
 +      case BBOU_LAHF:
 +              /* Write RAX */
 +              bb_reg_set_undef(BBRG_RAX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_LEA:
 +              /* dst = src + disp.  Often used to calculate offsets into the
 +               * stack, so check if it uses a stack pointer.
 +               */
 +              usage = BBOU_RSWD;
 +              if (bb_is_simple_memory(src)) {
 +                     if (bb_is_osp_defined(src->base_rc)) {
 +                              bb_reg_set_reg(dst->base_rc, src->base_rc);
 +                              bb_adjust_osp_instruction(1);
 +                              usage = BBOU_RS;
 +                      } else if (src->disp == 0 &&
 +                                 src->base_rc == dst->base_rc) {
 +                              /* lea 0(%reg),%reg is generated by i386
 +                               * GENERIC_NOP7.
 +                               */
 +                              usage = BBOU_NOP;
 +                      } else if (src->disp == 4096 &&
 +                                 (src->base_rc == BBRG_R8 ||
 +                                  src->base_rc == BBRG_RDI) &&
 +                                 strcmp(bb_func_name, "relocate_kernel") == 0) {
 +                              /* relocate_kernel: setup a new stack at the
 +                               * end of the physical control page, using
 +                               * (x86_64) lea 4096(%r8),%rsp or (i386) lea
 +                               * 4096(%edi),%esp
 +                               */
 +                              usage = BBOU_NOP;
 +                      }
 +              }
 +              break;
 +      case BBOU_LEAVE:
 +              /* RSP = RBP; RBP = *(RSP); RSP += KDB_WORD_SIZE; */
 +              bb_reg_set_reg(BBRG_RSP, BBRG_RBP);
 +              if (bb_is_osp_defined(BBRG_RSP))
 +                      bb_reg_set_memory(BBRG_RBP, BBRG_RSP, 0);
 +              else
 +                      bb_reg_set_undef(BBRG_RBP);
 +              if (bb_is_osp_defined(BBRG_RSP))
 +                      bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
 +              /* common_interrupt uses leave in a non-standard manner */
 +              if (strcmp(bb_func_name, "common_interrupt") != 0)
 +                      bb_sanity_check(0);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_LODS:
 +              /* Read RSI, write RAX, RSI */
 +              bb_reg_read(BBRG_RSI);
 +              bb_reg_set_undef(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RSI);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_LOOP:
 +              /* Read and write RCX */
 +              bb_reg_read(BBRG_RCX);
 +              bb_reg_set_undef(BBRG_RCX);
 +              if (bb_is_static_disp(src))
 +                      bb_transfer(bb_curr_addr, src->disp, 0);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_LSS:
 +              /* lss offset(%esp),%esp leaves esp well defined */
 +              if (dst->reg &&
 +                  dst->base_rc == BBRG_RSP &&
 +                  bb_is_simple_memory(src) &&
 +                  src->base_rc == BBRG_RSP) {
 +                      bb_adjust_osp(BBRG_RSP, 2*KDB_WORD_SIZE + src->disp);
 +                      usage = BBOU_NOP;
 +              } else {
 +                      usage = BBOU_RSWD;
 +              }
 +              break;
 +      case BBOU_MONITOR:
 +              /* Read RAX, RCX, RDX */
 +              bb_reg_set_undef(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RCX);
 +              bb_reg_set_undef(BBRG_RDX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_MOV:
 +              usage = bb_usage_mov(src, dst, sizeof("mov")-1);
 +              break;
 +      case BBOU_MOVS:
 +              /* Read RSI, RDI, write RSI, RDI */
 +              bb_reg_read(BBRG_RSI);
 +              bb_reg_read(BBRG_RDI);
 +              bb_reg_set_undef(BBRG_RSI);
 +              bb_reg_set_undef(BBRG_RDI);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_MUL:
 +              /* imul (one operand form only) or mul.  Read RAX.  If the
 +               * operand length is not 8 then write RDX.
 +               */
 +              if (bb_decode.opcode[0] == 'i')
 +                      opcode_suffix = bb_decode.opcode[4];
 +              else
 +                      opcode_suffix = bb_decode.opcode[3];
 +              operand_length = bb_operand_length(src, opcode_suffix);
 +              bb_reg_read(BBRG_RAX);
 +              if (operand_length != 8)
 +                      bb_reg_set_undef(BBRG_RDX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_MWAIT:
 +              /* Read RAX, RCX */
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_read(BBRG_RCX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_NOP:
 +              break;
 +      case BBOU_OUTS:
 +              /* Read RSI, RDX, write RSI */
 +              bb_reg_read(BBRG_RSI);
 +              bb_reg_read(BBRG_RDX);
 +              bb_reg_set_undef(BBRG_RSI);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_POP:
 +              /* Complicated by the fact that you can pop from top of stack
 +               * to a stack location, for this case the destination location
 +               * is calculated after adjusting RSP.  Analysis of the kernel
 +               * code shows that gcc only uses this strange format to get the
 +               * flags into a local variable, e.g. pushf; popl 0x10(%esp); so
 +               * I am going to ignore this special case.
 +               */
 +              usage = BBOU_WS;
 +              if (!bb_is_osp_defined(BBRG_RSP)) {
 +                      if (!bb_is_scheduler_address()) {
 +                              kdb_printf("pop when BBRG_RSP is undefined?\n");
 +                              bb_giveup = 1;
 +                      }
 +              } else {
 +                      if (src->reg) {
 +                              bb_reg_set_memory(src->base_rc, BBRG_RSP, 0);
 +                              usage = BBOU_NOP;
 +                      }
 +                      /* pop %rsp does not adjust rsp */
 +                      if (!src->reg ||
 +                          src->base_rc != BBRG_RSP)
 +                              bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
 +              }
 +              break;
 +      case BBOU_POPF:
 +              /* Do not care about flags, just adjust RSP */
 +              if (!bb_is_osp_defined(BBRG_RSP)) {
 +                      if (!bb_is_scheduler_address()) {
 +                              kdb_printf("popf when BBRG_RSP is undefined?\n");
 +                              bb_giveup = 1;
 +                      }
 +              } else {
 +                      bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
 +              }
 +              usage = BBOU_WS;
 +              break;
 +      case BBOU_PUSH:
 +              /* Complicated by the fact that you can push from a stack
 +               * location to top of stack, the source location is calculated
 +               * before adjusting RSP.  Analysis of the kernel code shows
 +               * that gcc only uses this strange format to restore the flags
 +               * from a local variable, e.g. pushl 0x10(%esp); popf; so I am
 +               * going to ignore this special case.
 +               */
 +              usage = BBOU_RS;
 +              if (!bb_is_osp_defined(BBRG_RSP)) {
 +                      if (!bb_is_scheduler_address()) {
 +                              kdb_printf("push when BBRG_RSP is undefined?\n");
 +                              bb_giveup = 1;
 +                      }
 +              } else {
 +                      bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
 +                      if (src->reg &&
 +                          bb_reg_code_offset(BBRG_RSP) <= 0)
 +                              bb_memory_set_reg(BBRG_RSP, src->base_rc, 0);
 +              }
 +              break;
 +      case BBOU_PUSHF:
 +              /* Do not care about flags, just adjust RSP */
 +              if (!bb_is_osp_defined(BBRG_RSP)) {
 +                      if (!bb_is_scheduler_address()) {
 +                              kdb_printf("pushf when BBRG_RSP is undefined?\n");
 +                              bb_giveup = 1;
 +                      }
 +              } else {
 +                      bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
 +              }
 +              usage = BBOU_WS;
 +              break;
 +      case BBOU_RDMSR:
 +              /* Read RCX, write RAX, RDX */
 +              bb_reg_read(BBRG_RCX);
 +              bb_reg_set_undef(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RDX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_RDTSC:
 +              /* Write RAX, RDX */
 +              bb_reg_set_undef(BBRG_RAX);
 +              bb_reg_set_undef(BBRG_RDX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_RET:
 +              usage = BBOU_NOP;
 +              if (src->immediate && bb_is_osp_defined(BBRG_RSP)) {
 +                      bb_adjust_osp(BBRG_RSP, src->disp);
 +              }
 +              /* Functions that restore state which was saved by another
 +               * function or build new kernel stacks.  We cannot verify what
 +               * is being restored so skip the sanity check.
 +               */
 +              if (strcmp(bb_func_name, "restore_image") == 0 ||
 +                  strcmp(bb_func_name, "relocate_kernel") == 0 ||
 +                  strcmp(bb_func_name, "identity_mapped") == 0 ||
 +                  strcmp(bb_func_name, "xen_iret_crit_fixup") == 0 ||
 +                  strcmp(bb_func_name, "math_abort") == 0 ||
 +                  strcmp(bb_func_name, "save_args") == 0 ||
 +                  strcmp(bb_func_name, "kretprobe_trampoline_holder") == 0)
 +                      break;
 +              bb_sanity_check(0);
 +              break;
 +      case BBOU_SAHF:
 +              /* Read RAX */
 +              bb_reg_read(BBRG_RAX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_SCAS:
 +              /* Read RAX, RDI, write RDI */
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_read(BBRG_RDI);
 +              bb_reg_set_undef(BBRG_RDI);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_SUB:
 +              /* Special case for sub instructions that adjust registers
 +               * which are mapping the stack.
 +               */
 +              if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
 +                      bb_adjust_osp_instruction(-1);
 +                      usage = BBOU_RS;
 +              } else {
 +                      usage = BBOU_RSRDWD;
 +              }
 +              break;
 +      case BBOU_SYSEXIT:
 +              bb_sanity_check(1);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_SYSRET:
 +              bb_sanity_check(1);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_WRMSR:
 +              /* Read RCX, RAX, RDX */
 +              bb_reg_read(BBRG_RCX);
 +              bb_reg_read(BBRG_RAX);
 +              bb_reg_read(BBRG_RDX);
 +              usage = BBOU_NOP;
 +              break;
 +      case BBOU_XADD:
 +              usage = bb_usage_xadd(src, dst);
 +              break;
 +      case BBOU_XCHG:
 +              /* i386 do_IRQ with 4K stacks does xchg %ebx,%esp; call
 +               * irq_handler; mov %ebx,%esp; to switch stacks.  Ignore this
 +               * stack switch when tracking registers, it is handled by
 +               * higher level backtrace code.  Convert xchg %ebx,%esp to mov
 +               * %esp,%ebx so the later mov %ebx,%esp becomes a NOP and the
 +               * stack remains defined so we can backtrace through do_IRQ's
 +               * stack switch.
 +               *
 +               * Ditto for do_softirq.
 +               */
 +              if (src->reg &&
 +                  dst->reg &&
 +                  src->base_rc == BBRG_RBX &&
 +                  dst->base_rc == BBRG_RSP &&
 +                  (strcmp(bb_func_name, "do_IRQ") == 0 ||
 +                   strcmp(bb_func_name, "do_softirq") == 0)) {
 +                      strcpy(bb_decode.opcode, "mov");
 +                      usage = bb_usage_mov(dst, src, sizeof("mov")-1);
 +              } else {
 +                      usage = bb_usage_xchg(src, dst);
 +              }
 +              break;
 +      case BBOU_XOR:
 +              /* xor %reg,%reg only counts as a register write, the original
 +               * contents of reg are irrelevant.
 +               */
 +              if (src->reg && dst->reg && src->base_rc == dst->base_rc)
 +                      usage = BBOU_WS;
 +              else
 +                      usage = BBOU_RSRDWD;
 +              break;
 +      }
 +
 +      /* The switch statement above handled all the special cases.  Every
 +       * opcode should now have a usage of NOP or one of the generic cases.
 +       */
 +      if (usage == BBOU_UNKNOWN || usage == BBOU_NOP) {
 +              /* nothing to do */
 +      } else if (usage >= BBOU_RS && usage <= BBOU_RSRDWSWD) {
 +              if (usage & BBOU_RS)
 +                      bb_read_operand(src);
 +              if (usage & BBOU_RD)
 +                      bb_read_operand(dst);
 +              if (usage & BBOU_WS)
 +                      bb_write_operand(src);
 +              if (usage & BBOU_WD)
 +                      bb_write_operand(dst);
 +      } else {
 +              kdb_printf("%s: opcode not fully handled\n", __FUNCTION__);
 +              if (!KDB_DEBUG(BB)) {
 +                      bb_print_opcode();
 +                      if (bb_decode.src.present)
 +                              bb_print_operand("src", &bb_decode.src);
 +                      if (bb_decode.dst.present)
 +                              bb_print_operand("dst", &bb_decode.dst);
 +                      if (bb_decode.dst2.present)
 +                              bb_print_operand("dst2", &bb_decode.dst2);
 +              }
 +              bb_giveup = 1;
 +      }
 +}
 +
 +static void
 +bb_parse_buffer(void)
 +{
 +      char *p, *src, *dst = NULL, *dst2 = NULL;
 +      int paren = 0;
 +      p = bb_buffer;
 +      memset(&bb_decode, 0, sizeof(bb_decode));
 +      KDB_DEBUG_BB(" '%s'\n", p);
 +      p += strcspn(p, ":");   /* skip address and function name+offset: */
 +      if (*p++ != ':') {
 +              kdb_printf("%s: cannot find ':' in buffer '%s'\n",
 +                         __FUNCTION__, bb_buffer);
 +              bb_giveup = 1;
 +              return;
 +      }
 +      p += strspn(p, " \t");  /* step to opcode */
 +      if (strncmp(p, "(bad)", 5) == 0)
 +              strcpy(p, "nop");
 +      /* separate any opcode prefix */
 +      if (strncmp(p, "lock", 4) == 0 ||
 +          strncmp(p, "rep", 3) == 0 ||
 +          strncmp(p, "rex", 3) == 0 ||
 +          strncmp(p, "addr", 4) == 0) {
 +              bb_decode.prefix = p;
 +              p += strcspn(p, " \t");
 +              *p++ = '\0';
 +              p += strspn(p, " \t");
 +      }
 +      bb_decode.opcode = p;
 +      strsep(&p, " \t");      /* step to end of opcode */
 +      if (bb_parse_opcode())
 +              return;
 +      if (!p)
 +              goto no_operands;
 +      p += strspn(p, " \t");  /* step to operand(s) */
 +      if (!*p)
 +              goto no_operands;
 +      src = p;
 +      p = strsep(&p, " \t");  /* strip comments after operands */
 +      /* split 'src','dst' but ignore ',' inside '(' ')' */
 +      while (*p) {
 +              if (*p == '(') {
 +                      ++paren;
 +              } else if (*p == ')') {
 +                      --paren;
 +              } else if (*p == ',' && paren == 0) {
 +                      *p = '\0';
 +                      if (dst)
 +                              dst2 = p+1;
 +                      else
 +                              dst = p+1;
 +              }
 +              ++p;
 +      }
 +      bb_parse_operand(src, &bb_decode.src);
 +      if (KDB_DEBUG(BB))
 +              bb_print_operand("src", &bb_decode.src);
 +      if (dst && !bb_giveup) {
 +              bb_parse_operand(dst, &bb_decode.dst);
 +              if (KDB_DEBUG(BB))
 +                      bb_print_operand("dst", &bb_decode.dst);
 +      }
 +      if (dst2 && !bb_giveup) {
 +              bb_parse_operand(dst2, &bb_decode.dst2);
 +              if (KDB_DEBUG(BB))
 +                      bb_print_operand("dst2", &bb_decode.dst2);
 +      }
 +no_operands:
 +      if (!bb_giveup)
 +              bb_usage();
 +}
 +
 +static int
 +bb_dis_pass2(PTR file, const char *fmt, ...)
 +{
 +      char *p;
 +      int l = strlen(bb_buffer);
 +      va_list ap;
 +      va_start(ap, fmt);
 +      vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
 +      va_end(ap);
 +      if ((p = strchr(bb_buffer, '\n'))) {
 +              *p = '\0';
 +              p = bb_buffer;
 +              p += strcspn(p, ":");
 +              if (*p++ == ':')
 +                      bb_fixup_switch_to(p);
 +              bb_parse_buffer();
 +              bb_buffer[0] = '\0';
 +      }
 +      return 0;
 +}
 +
 +static void
 +bb_printaddr_pass2(bfd_vma addr, disassemble_info *dip)
 +{
 +      kdb_symtab_t symtab;
 +      unsigned int offset;
 +      dip->fprintf_func(dip->stream, "0x%lx", addr);
 +      kdbnearsym(addr, &symtab);
 +      if (symtab.sym_name) {
 +              dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
 +              if ((offset = addr - symtab.sym_start))
 +                      dip->fprintf_func(dip->stream, "+0x%x", offset);
 +              dip->fprintf_func(dip->stream, ">");
 +      }
 +}
 +
 +/* Set the starting register and memory state for the current bb */
 +
 +static void
 +bb_start_block0_special(void)
 +{
 +      int i;
 +      short offset_address;
 +      enum bb_reg_code reg, value;
 +      struct bb_name_state *r;
 +      for (i = 0, r = bb_special_cases;
 +           i < ARRAY_SIZE(bb_special_cases);
 +           ++i, ++r) {
 +              if (bb_func_start == r->address && r->fname == NULL)
 +                      goto match;
 +      }
 +      return;
 +match:
 +      /* Set the running registers */
 +      for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
 +              value = r->regs[reg].value;
 +              if (test_bit(value, r->skip_regs.bits)) {
 +                      /* this regs entry is not defined for this label */
 +                      continue;
 +              }
 +              bb_reg_code_set_value(reg, value);
 +              bb_reg_code_set_offset(reg, r->regs[reg].offset);
 +      }
 +      /* Set any memory contents, e.g. pt_regs.  Adjust RSP as required. */
 +      offset_address = 0;
 +      for (i = 0; i < r->mem_size; ++i) {
 +              offset_address = max_t(int,
 +                              r->mem[i].offset_address + KDB_WORD_SIZE,
 +                              offset_address);
 +      }
 +      if (bb_reg_code_offset(BBRG_RSP) > -offset_address)
 +              bb_adjust_osp(BBRG_RSP, -offset_address - bb_reg_code_offset(BBRG_RSP));
 +      for (i = 0; i < r->mem_size; ++i) {
 +              value = r->mem[i].value;
 +              if (test_bit(value, r->skip_mem.bits)) {
 +                      /* this memory entry is not defined for this label */
 +                      continue;
 +              }
 +              bb_memory_set_reg_value(BBRG_RSP, r->mem[i].offset_address,
 +                                      value, 0);
 +              bb_reg_set_undef(value);
 +      }
 +      return;
 +}
 +
 +static void
 +bb_pass2_start_block(int number)
 +{
 +      int i, j, k, first, changed;
 +      size_t size;
 +      struct bb_jmp *bb_jmp;
 +      struct bb_reg_state *state;
 +      struct bb_memory_contains *c1, *c2;
 +      bb_reg_state->mem_count = bb_reg_state_max;
 +      size = bb_reg_state_size(bb_reg_state);
 +      memset(bb_reg_state, 0, size);
 +
 +      if (number == 0) {
 +              /* The first block is assumed to have well defined inputs */
 +              bb_start_block0();
 +              /* Some assembler labels have non-standard entry
 +               * states.
 +               */
 +              bb_start_block0_special();
 +              bb_reg_state_print(bb_reg_state);
 +              return;
 +      }
 +
 +      /* Merge all the input states for the current bb together */
 +      first = 1;
 +      changed = 0;
 +      for (i = 0; i < bb_jmp_count; ++i) {
 +              bb_jmp = bb_jmp_list + i;
 +              if (bb_jmp->to != bb_curr->start)
 +                      continue;
 +              state = bb_jmp->state;
 +              if (!state)
 +                      continue;
 +              if (first) {
 +                      size = bb_reg_state_size(state);
 +                      memcpy(bb_reg_state, state, size);
 +                      KDB_DEBUG_BB("  first state %p\n", state);
 +                      bb_reg_state_print(bb_reg_state);
 +                      first = 0;
 +                      continue;
 +              }
 +
 +              KDB_DEBUG_BB("  merging state %p\n", state);
 +              /* Merge the register states */
 +              for (j = 0; j < ARRAY_SIZE(state->contains); ++j) {
 +                      if (memcmp(bb_reg_state->contains + j,
 +                                 state->contains + j,
 +                                 sizeof(bb_reg_state->contains[0]))) {
 +                              /* Different states for this register from two
 +                               * or more inputs, make it undefined.
 +                               */
 +                              if (bb_reg_state->contains[j].value ==
 +                                  BBRG_UNDEFINED) {
 +                                      KDB_DEBUG_BB("  ignoring %s\n",
 +                                                  bbrg_name[j + BBRG_RAX]);
 +                              } else {
 +                                      bb_reg_set_undef(BBRG_RAX + j);
 +                                      changed = 1;
 +                              }
 +                      }
 +              }
 +
 +              /* Merge the memory states.  This relies on both
 +               * bb_reg_state->memory and state->memory being sorted in
 +               * descending order, with undefined entries at the end.
 +               */
 +              c1 = bb_reg_state->memory;
 +              c2 = state->memory;
 +              j = k = 0;
 +              while (j < bb_reg_state->mem_count &&
 +                     k < state->mem_count) {
 +                      if (c1->offset_address < c2->offset_address) {
 +                              KDB_DEBUG_BB_OFFSET(c2->offset_address,
 +                                                  "  ignoring c2->offset_address ",
 +                                                  "\n");
 +                              ++c2;
 +                              ++k;
 +                              continue;
 +                      }
 +                      if (c1->offset_address > c2->offset_address) {
 +                              /* Memory location is not in all input states,
 +                               * delete the memory location.
 +                               */
 +                              bb_delete_memory(c1->offset_address);
 +                              changed = 1;
 +                              ++c1;
 +                              ++j;
 +                              continue;
 +                      }
 +                      if (memcmp(c1, c2, sizeof(*c1))) {
 +                              /* Same location, different contents, delete
 +                               * the memory location.
 +                               */
 +                              bb_delete_memory(c1->offset_address);
 +                              KDB_DEBUG_BB_OFFSET(c2->offset_address,
 +                                                  "  ignoring c2->offset_address ",
 +                                                  "\n");
 +                              changed = 1;
 +                      }
 +                      ++c1;
 +                      ++c2;
 +                      ++j;
 +                      ++k;
 +              }
 +              while (j < bb_reg_state->mem_count) {
 +                      bb_delete_memory(c1->offset_address);
 +                      changed = 1;
 +                      ++c1;
 +                      ++j;
 +              }
 +      }
 +      if (changed) {
 +              KDB_DEBUG_BB("  final state\n");
 +              bb_reg_state_print(bb_reg_state);
 +      }
 +}
 +
 +/* We have reached the exit point from the current function, either a call to
 + * the next function or the instruction that was about to executed when an
 + * interrupt occurred.  Save the current register state in bb_exit_state.
 + */
 +
 +static void
 +bb_save_exit_state(void)
 +{
 +      size_t size;
 +      debug_kfree(bb_exit_state);
 +      bb_exit_state = NULL;
 +      bb_reg_state_canonicalize();
 +      size = bb_reg_state_size(bb_reg_state);
 +      bb_exit_state = debug_kmalloc(size, GFP_ATOMIC);
 +      if (!bb_exit_state) {
 +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
 +              bb_giveup = 1;
 +              return;
 +      }
 +      memcpy(bb_exit_state, bb_reg_state, size);
 +}
 +
 +static int
 +bb_pass2_do_changed_blocks(int allow_missing)
 +{
 +      int i, j, missing, changed, maxloops;
 +      unsigned long addr;
 +      struct bb_jmp *bb_jmp;
 +      KDB_DEBUG_BB("\n  %s: allow_missing %d\n", __FUNCTION__, allow_missing);
 +      /* Absolute worst case is we have to iterate over all the basic blocks
 +       * in an "out of order" state, each iteration losing one register or
 +       * memory state.  Any more loops than that is a bug.  "out of order"
 +       * means that the layout of blocks in memory does not match the logic
 +       * flow through those blocks so (for example) block 27 comes before
 +       * block 2.  To allow for out of order blocks, multiply maxloops by the
 +       * number of blocks.
 +       */
 +      maxloops = (KDB_INT_REGISTERS + bb_reg_state_max) * bb_count;
 +      changed = 1;
 +      do {
 +              changed = 0;
 +              for (i = 0; i < bb_count; ++i) {
 +                      bb_curr = bb_list[i];
 +                      if (!bb_curr->changed)
 +                              continue;
 +                      missing = 0;
 +                      for (j = 0, bb_jmp = bb_jmp_list;
 +                           j < bb_jmp_count;
 +                           ++j, ++bb_jmp) {
 +                              if (bb_jmp->to == bb_curr->start &&
 +                                  !bb_jmp->state)
 +                                      ++missing;
 +                      }
 +                      if (missing > allow_missing)
 +                              continue;
 +                      bb_curr->changed = 0;
 +                      changed = 1;
 +                      KDB_DEBUG_BB("\n  bb[%d]\n", i);
 +                      bb_pass2_start_block(i);
 +                      for (addr = bb_curr->start;
 +                           addr <= bb_curr->end; ) {
 +                              bb_curr_addr = addr;
 +                              if (addr == bb_exit_addr)
 +                                      bb_save_exit_state();
 +                              addr += kdba_id_printinsn(addr, &kdb_di);
 +                              kdb_di.fprintf_func(NULL, "\n");
 +                              if (bb_giveup)
 +                                      goto done;
 +                      }
 +                      if (!bb_exit_state) {
 +                              /* ATTRIB_NORET functions are a problem with
 +                               * the current gcc.  Allow the trailing address
 +                               * a bit of leaway.
 +                               */
 +                              if (addr == bb_exit_addr ||
 +                                  addr == bb_exit_addr + 1)
 +                                      bb_save_exit_state();
 +                      }
 +                      if (bb_curr->drop_through)
 +                              bb_transfer(bb_curr->end,
 +                                          bb_list[i+1]->start, 1);
 +              }
 +              if (maxloops-- == 0) {
 +                      kdb_printf("\n\n%s maxloops reached\n",
 +                                 __FUNCTION__);
 +                      bb_giveup = 1;
 +                      goto done;
 +              }
 +      } while(changed);
 +done:
 +      for (i = 0; i < bb_count; ++i) {
 +              bb_curr = bb_list[i];
 +              if (bb_curr->changed)
 +                      return 1;       /* more to do, increase allow_missing */
 +      }
 +      return 0;       /* all blocks done */
 +}
 +
 +/* Assume that the current function is a pass through function that does not
 + * refer to its register parameters.  Exclude known asmlinkage functions and
 + * assume the other functions actually use their registers.
 + */
 +
 +static void
 +bb_assume_pass_through(void)
 +{
 +      static int first_time = 1;
 +      if (strncmp(bb_func_name, "sys_", 4) == 0 ||
 +          strncmp(bb_func_name, "compat_sys_", 11) == 0 ||
 +          strcmp(bb_func_name, "schedule") == 0 ||
 +          strcmp(bb_func_name, "do_softirq") == 0 ||
 +          strcmp(bb_func_name, "printk") == 0 ||
 +          strcmp(bb_func_name, "vprintk") == 0 ||
 +          strcmp(bb_func_name, "preempt_schedule") == 0 ||
 +          strcmp(bb_func_name, "start_kernel") == 0 ||
 +          strcmp(bb_func_name, "csum_partial") == 0 ||
 +          strcmp(bb_func_name, "csum_partial_copy_generic") == 0 ||
 +          strcmp(bb_func_name, "math_state_restore") == 0 ||
 +          strcmp(bb_func_name, "panic") == 0 ||
 +          strcmp(bb_func_name, "kdb_printf") == 0 ||
 +          strcmp(bb_func_name, "kdb_interrupt") == 0)
 +              return;
 +      if (bb_asmlinkage_arch())
 +              return;
 +      bb_reg_params = REGPARM;
 +      if (first_time) {
 +              kdb_printf("  %s has memory parameters but no register "
 +                         "parameters.\n  Assuming it is a 'pass "
 +                         "through' function that does not refer to "
 +                         "its register\n  parameters and setting %d "
 +                         "register parameters\n",
 +                         bb_func_name, REGPARM);
 +              first_time = 0;
 +              return;
 +      }
 +      kdb_printf("  Assuming %s is 'pass through' with %d register "
 +                 "parameters\n",
 +                 bb_func_name, REGPARM);
 +}
 +
 +static void
 +bb_pass2(void)
 +{
 +      int allow_missing;
 +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
 +              kdb_printf("%s: start\n", __FUNCTION__);
 +
 +      kdb_di.fprintf_func = bb_dis_pass2;
 +      kdb_di.print_address_func = bb_printaddr_pass2;
 +
 +      bb_reg_state = debug_kmalloc(sizeof(*bb_reg_state), GFP_ATOMIC);
 +      if (!bb_reg_state) {
 +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
 +              bb_giveup = 1;
 +              return;
 +      }
 +      bb_list[0]->changed = 1;
 +
 +      /* If a block does not have all its input states available then it is
 +       * possible for a register to initially appear to hold a known value,
 +       * but when other inputs are available then it becomes a variable
 +       * value.  The initial false state of "known" can generate false values
 +       * for other registers and can even make it look like stack locations
 +       * are being changed.
 +       *
 +       * To avoid these false positives, only process blocks which have all
 +       * their inputs defined.  That gives a clean depth first traversal of
 +       * the tree, except for loops.  If there are any loops, then start
 +       * processing blocks with one missing input, then two missing inputs
 +       * etc.
 +       *
 +       * Absolute worst case is we have to iterate over all the jmp entries,
 +       * each iteration allowing one more missing input.  Any more loops than
 +       * that is a bug.  Watch out for the corner case of 0 jmp entries.
 +       */
 +      for (allow_missing = 0; allow_missing <= bb_jmp_count; ++allow_missing) {
 +              if (!bb_pass2_do_changed_blocks(allow_missing))
 +                      break;
 +              if (bb_giveup)
 +                      break;
 +      }
 +      if (allow_missing > bb_jmp_count) {
 +              kdb_printf("\n\n%s maxloops reached\n",
 +                         __FUNCTION__);
 +              bb_giveup = 1;
 +              return;
 +      }
 +
 +      if (bb_memory_params && bb_reg_params)
 +              bb_reg_params = REGPARM;
 +      if (REGPARM &&
 +          bb_memory_params &&
 +          !bb_reg_params)
 +              bb_assume_pass_through();
 +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
 +              kdb_printf("%s: end bb_reg_params %d bb_memory_params %d\n",
 +                         __FUNCTION__, bb_reg_params, bb_memory_params);
 +              if (bb_exit_state) {
 +                      kdb_printf("%s: bb_exit_state at " kdb_bfd_vma_fmt0 "\n",
 +                                 __FUNCTION__, bb_exit_addr);
 +                      bb_do_reg_state_print(bb_exit_state);
 +              }
 +      }
 +}
 +
 +static void
 +bb_cleanup(void)
 +{
 +      int i;
 +      struct bb* bb;
 +      struct bb_reg_state *state;
 +      while (bb_count) {
 +              bb = bb_list[0];
 +              bb_delete(0);
 +      }
 +      debug_kfree(bb_list);
 +      bb_list = NULL;
 +      bb_count = bb_max = 0;
 +      for (i = 0; i < bb_jmp_count; ++i) {
 +              state = bb_jmp_list[i].state;
 +              if (state && --state->ref_count == 0)
 +                      debug_kfree(state);
 +      }
 +      debug_kfree(bb_jmp_list);
 +      bb_jmp_list = NULL;
 +      bb_jmp_count = bb_jmp_max = 0;
 +      debug_kfree(bb_reg_state);
 +      bb_reg_state = NULL;
 +      bb_reg_state_max = 0;
 +      debug_kfree(bb_exit_state);
 +      bb_exit_state = NULL;
 +      bb_reg_params = bb_memory_params = 0;
 +      bb_giveup = 0;
 +}
 +
 +static int
 +bb_spurious_global_label(const char *func_name)
 +{
 +      int i;
 +      for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
 +              if (strcmp(bb_spurious[i], func_name) == 0)
 +                      return 1;
 +      }
 +      return 0;
 +}
 +
 +/* Given the current actual register contents plus the exit state deduced from
 + * a basic block analysis of the current function, rollback the actual register
 + * contents to the values they had on entry to this function.
 + */
 +
 +static void
 +bb_actual_rollback(const struct kdb_activation_record *ar)
 +{
 +      int i, offset_address;
 +      struct bb_memory_contains *c;
 +      enum bb_reg_code reg;
 +      unsigned long address, osp = 0;
 +      struct bb_actual new[ARRAY_SIZE(bb_actual)];
 +
 +
 +      if (!bb_exit_state) {
 +              kdb_printf("%s: no bb_exit_state, cannot rollback\n",
 +                         __FUNCTION__);
 +              bb_giveup = 1;
 +              return;
 +      }
 +      memcpy(bb_reg_state, bb_exit_state, bb_reg_state_size(bb_exit_state));
 +      memset(new, 0, sizeof(new));
 +
 +      /* The most important register for obtaining saved state is rsp so get
 +       * its new value first.  Prefer rsp if it is valid, then other
 +       * registers.  Saved values of rsp in memory are unusable without a
 +       * register that points to memory.
 +       */
 +      if (!bb_actual_valid(BBRG_RSP)) {
 +              kdb_printf("%s: no starting value for RSP, cannot rollback\n",
 +                         __FUNCTION__);
 +              bb_giveup = 1;
 +              return;
 +      }
 +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
 +              kdb_printf("%s: rsp " kdb_bfd_vma_fmt0,
 +                         __FUNCTION__, bb_actual_value(BBRG_RSP));
 +      i = BBRG_RSP;
 +      if (!bb_is_osp_defined(i)) {
 +              for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
 +                      if (bb_is_osp_defined(i) && bb_actual_valid(i))
 +                              break;
 +              }
 +      }
 +      if (bb_is_osp_defined(i) && bb_actual_valid(i)) {
 +              osp = new[BBRG_RSP - BBRG_RAX].value =
 +                    bb_actual_value(i) - bb_reg_code_offset(i);
 +              new[BBRG_RSP - BBRG_RAX].valid = 1;
 +              if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
 +                      kdb_printf(" -> osp " kdb_bfd_vma_fmt0 "\n", osp);
 +      } else {
 +              bb_actual_set_valid(BBRG_RSP, 0);
 +              if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
 +                      kdb_printf(" -> undefined\n");
 +              kdb_printf("%s: no ending value for RSP, cannot rollback\n",
 +                         __FUNCTION__);
 +              bb_giveup = 1;
 +              return;
 +      }
 +
 +      /* Now the other registers.  First look at register values that have
 +       * been copied to other registers.
 +       */
 +      for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
 +              reg = bb_reg_code_value(i);
 +              if (bb_is_int_reg(reg)) {
 +                      new[reg - BBRG_RAX] = bb_actual[i - BBRG_RAX];
 +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
 +                              kdb_printf("%s: %s is in %s ",
 +                                          __FUNCTION__,
 +                                          bbrg_name[reg],
 +                                          bbrg_name[i]);
 +                              if (bb_actual_valid(i))
 +                                      kdb_printf(" -> " kdb_bfd_vma_fmt0 "\n",
 +                                                  bb_actual_value(i));
 +                              else
 +                                      kdb_printf("(invalid)\n");
 +                      }
 +              }
 +      }
 +
 +      /* Finally register values that have been saved on stack */
 +      for (i = 0, c = bb_reg_state->memory;
 +           i < bb_reg_state->mem_count;
 +           ++i, ++c) {
 +              offset_address = c->offset_address;
 +              reg = c->value;
 +              if (!bb_is_int_reg(reg))
 +                      continue;
 +              address = osp + offset_address;
 +              if (address < ar->stack.logical_start ||
 +                  address >= ar->stack.logical_end) {
 +                      new[reg - BBRG_RAX].value = 0;
 +                      new[reg - BBRG_RAX].valid = 0;
 +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
 +                              kdb_printf("%s: %s -> undefined\n",
 +                                         __FUNCTION__,
 +                                         bbrg_name[reg]);
 +              } else {
 +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
 +                              kdb_printf("%s: %s -> *(osp",
 +                                         __FUNCTION__,
 +                                         bbrg_name[reg]);
 +                              KDB_DEBUG_BB_OFFSET_PRINTF(offset_address, "", " ");
 +                              kdb_printf(kdb_bfd_vma_fmt0, address);
 +                      }
 +                      new[reg - BBRG_RAX].value = *(bfd_vma *)address;
 +                      new[reg - BBRG_RAX].valid = 1;
 +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
 +                              kdb_printf(") = " kdb_bfd_vma_fmt0 "\n",
 +                                         new[reg - BBRG_RAX].value);
 +              }
 +      }
 +
 +      memcpy(bb_actual, new, sizeof(bb_actual));
 +}
 +
 +/* Return true if the current function is an interrupt handler */
 +
 +static bool
 +bb_interrupt_handler(kdb_machreg_t rip)
 +{
 +      unsigned long disp8, disp32, target, addr = (unsigned long)rip;
 +      unsigned char code[5];
 +      int i;
 +
 +      for (i = 0; i < ARRAY_SIZE(bb_hardware_handlers); ++i)
 +              if (strcmp(bb_func_name, bb_hardware_handlers[i]) == 0)
 +                      return 1;
 +
 +      /* Given the large number of interrupt handlers, it is easiest to look
 +       * at the next instruction and see if it is a jmp to the common exit
 +       * routines.
 +       */
 +      if (kdb_getarea(code, addr) ||
 +          kdb_getword(&disp32, addr+1, 4) ||
 +          kdb_getword(&disp8, addr+1, 1))
 +              return 0;       /* not a valid code address */
 +      if (code[0] == 0xe9) {
 +              target = addr + (s32) disp32 + 5;       /* jmp disp32 */
 +              if (target == bb_ret_from_intr ||
 +                  target == bb_common_interrupt ||
 +                  target == bb_error_entry)
 +                      return 1;
 +      }
 +      if (code[0] == 0xeb) {
 +              target = addr + (s8) disp8 + 2;         /* jmp disp8 */
 +              if (target == bb_ret_from_intr ||
 +                  target == bb_common_interrupt ||
 +                  target == bb_error_entry)
 +                      return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Copy argument information that was deduced by the basic block analysis and
 + * rollback into the kdb stack activation record.
 + */
 +
 +static void
 +bb_arguments(struct kdb_activation_record *ar)
 +{
 +      int i;
 +      enum bb_reg_code reg;
 +      kdb_machreg_t rsp;
 +      ar->args = bb_reg_params + bb_memory_params;
 +      bitmap_zero(ar->valid.bits, KDBA_MAXARGS);
 +      for (i = 0; i < bb_reg_params; ++i) {
 +              reg = bb_param_reg[i];
 +              if (bb_actual_valid(reg)) {
 +                      ar->arg[i] = bb_actual_value(reg);
 +                      set_bit(i, ar->valid.bits);
 +              }
 +      }
 +      if (!bb_actual_valid(BBRG_RSP))
 +              return;
 +      rsp = bb_actual_value(BBRG_RSP);
 +      for (i = bb_reg_params; i < ar->args; ++i) {
 +              rsp += KDB_WORD_SIZE;
 +              if (kdb_getarea(ar->arg[i], rsp) == 0)
 +                      set_bit(i, ar->valid.bits);
 +      }
 +}
 +
 +/* Given an exit address from a function, decompose the entire function into
 + * basic blocks and determine the register state at the exit point.
 + */
 +
 +static void
 +kdb_bb(unsigned long exit)
 +{
 +      kdb_symtab_t symtab;
 +      if (!kdbnearsym(exit, &symtab)) {
 +              kdb_printf("%s: address " kdb_bfd_vma_fmt0 " not recognised\n",
 +                         __FUNCTION__, exit);
 +              bb_giveup = 1;
 +              return;
 +      }
 +      bb_exit_addr = exit;
 +      bb_mod_name = symtab.mod_name;
 +      bb_func_name = symtab.sym_name;
 +      bb_func_start = symtab.sym_start;
 +      bb_func_end = symtab.sym_end;
 +      /* Various global labels exist in the middle of assembler code and have
 +       * a non-standard state.  Ignore these labels and use the start of the
 +       * previous label instead.
 +       */
 +      while (bb_spurious_global_label(symtab.sym_name)) {
 +              if (!kdbnearsym(symtab.sym_start - 1, &symtab))
 +                      break;
 +              bb_func_start = symtab.sym_start;
 +      }
 +      bb_mod_name = symtab.mod_name;
 +      bb_func_name = symtab.sym_name;
 +      bb_func_start = symtab.sym_start;
 +      /* Ignore spurious labels past this point and use the next non-spurious
 +       * label as the end point.
 +       */
 +      if (kdbnearsym(bb_func_end, &symtab)) {
 +              while (bb_spurious_global_label(symtab.sym_name)) {
 +                      bb_func_end = symtab.sym_end;
 +                      if (!kdbnearsym(symtab.sym_end + 1, &symtab))
 +                              break;
 +              }
 +      }
 +      bb_pass1();
 +      if (!bb_giveup)
 +              bb_pass2();
 +      if (bb_giveup)
 +              kdb_printf("%s: " kdb_bfd_vma_fmt0
 +                         " [%s]%s failed at " kdb_bfd_vma_fmt0 "\n\n",
 +                         __FUNCTION__, exit,
 +                         bb_mod_name, bb_func_name, bb_curr_addr);
 +}
 +
 +static int
 +kdb_bb1(int argc, const char **argv)
 +{
 +      int diag, nextarg = 1;
 +      kdb_machreg_t addr;
 +      unsigned long offset;
 +
 +      bb_cleanup();   /* in case previous command was interrupted */
 +      kdba_id_init(&kdb_di);
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +      diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
 +      if (diag)
 +              return diag;
 +      if (!addr)
 +              return KDB_BADADDR;
 +      kdb_save_flags();
 +      kdb_flags |= KDB_DEBUG_FLAG_BB << KDB_DEBUG_FLAG_SHIFT;
 +      kdb_bb(addr);
 +      bb_cleanup();
 +      kdb_restore_flags();
 +      kdbnearsym_cleanup();
 +      return 0;
 +}
 +
 +/* Run a basic block analysis on every function in the base kernel.  Used as a
 + * global sanity check to find errors in the basic block code.
 + */
 +
 +static int
 +kdb_bb_all(int argc, const char **argv)
 +{
 +      loff_t pos = 0;
 +      const char *symname;
 +      unsigned long addr;
 +      int i, max_errors = 20;
 +      struct bb_name_state *r;
 +      kdb_printf("%s: build variables:"
 +                 " CCVERSION \"" __stringify(CCVERSION) "\""
 +#ifdef        CONFIG_X86_64
 +                 " CONFIG_X86_64"
 +#endif
 +#ifdef        CONFIG_4KSTACKS
 +                 " CONFIG_4KSTACKS"
 +#endif
 +#ifdef        CONFIG_PREEMPT
 +                 " CONFIG_PREEMPT"
 +#endif
 +#ifdef        CONFIG_VM86
 +                 " CONFIG_VM86"
 +#endif
 +#ifdef        CONFIG_FRAME_POINTER
 +                 " CONFIG_FRAME_POINTER"
 +#endif
 +#ifdef        CONFIG_TRACE_IRQFLAGS
 +                 " CONFIG_TRACE_IRQFLAGS"
 +#endif
 +#ifdef        CONFIG_HIBERNATION
 +                 " CONFIG_HIBERNATION"
 +#endif
 +#ifdef        CONFIG_KPROBES
 +                 " CONFIG_KPROBES"
 +#endif
 +#ifdef        CONFIG_KEXEC
 +                 " CONFIG_KEXEC"
 +#endif
 +#ifdef        CONFIG_MATH_EMULATION
 +                 " CONFIG_MATH_EMULATION"
 +#endif
 +#ifdef        CONFIG_PARAVIRT_XEN
 +                 " CONFIG_XEN"
 +#endif
 +#ifdef        CONFIG_DEBUG_INFO
 +                 " CONFIG_DEBUG_INFO"
 +#endif
 +#ifdef        NO_SIBLINGS
 +                 " NO_SIBLINGS"
 +#endif
 +                 " REGPARM=" __stringify(REGPARM)
 +                 "\n\n", __FUNCTION__);
 +      for (i = 0, r = bb_special_cases;
 +           i < ARRAY_SIZE(bb_special_cases);
 +           ++i, ++r) {
 +              if (!r->address)
 +                      kdb_printf("%s: cannot find special_case name %s\n",
 +                                 __FUNCTION__, r->name);
 +      }
 +      for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
 +              if (!kallsyms_lookup_name(bb_spurious[i]))
 +                      kdb_printf("%s: cannot find spurious label %s\n",
 +                                 __FUNCTION__, bb_spurious[i]);
 +      }
 +      while ((symname = kdb_walk_kallsyms(&pos))) {
 +              if (strcmp(symname, "_stext") == 0 ||
 +                  strcmp(symname, "stext") == 0)
 +                      break;
 +      }
 +      if (!symname) {
 +              kdb_printf("%s: cannot find _stext\n", __FUNCTION__);
 +              return 0;
 +      }
 +      kdba_id_init(&kdb_di);
 +      i = 0;
 +      while ((symname = kdb_walk_kallsyms(&pos))) {
 +              if (strcmp(symname, "_etext") == 0)
 +                      break;
 +              if (i++ % 100 == 0)
 +                      kdb_printf(".");
 +              /* x86_64 has some 16 bit functions that appear between stext
 +               * and _etext.  Skip them.
 +               */
 +              if (strcmp(symname, "verify_cpu") == 0 ||
 +                  strcmp(symname, "verify_cpu_noamd") == 0 ||
 +                  strcmp(symname, "verify_cpu_sse_test") == 0 ||
 +                  strcmp(symname, "verify_cpu_no_longmode") == 0 ||
 +                  strcmp(symname, "verify_cpu_sse_ok") == 0 ||
 +                  strcmp(symname, "mode_seta") == 0 ||
 +                  strcmp(symname, "bad_address") == 0 ||
 +                  strcmp(symname, "wakeup_code") == 0 ||
 +                  strcmp(symname, "wakeup_code_start") == 0 ||
 +                  strcmp(symname, "wakeup_start") == 0 ||
 +                  strcmp(symname, "wakeup_32_vector") == 0 ||
 +                  strcmp(symname, "wakeup_32") == 0 ||
 +                  strcmp(symname, "wakeup_long64_vector") == 0 ||
 +                  strcmp(symname, "wakeup_long64") == 0 ||
 +                  strcmp(symname, "gdta") == 0 ||
 +                  strcmp(symname, "idt_48a") == 0 ||
 +                  strcmp(symname, "gdt_48a") == 0 ||
 +                  strcmp(symname, "bogus_real_magic") == 0 ||
 +                  strcmp(symname, "bogus_64_magic") == 0 ||
 +                  strcmp(symname, "no_longmode") == 0 ||
 +                  strcmp(symname, "mode_set") == 0 ||
 +                  strcmp(symname, "mode_seta") == 0 ||
 +                  strcmp(symname, "setbada") == 0 ||
 +                  strcmp(symname, "check_vesa") == 0 ||
 +                  strcmp(symname, "check_vesaa") == 0 ||
 +                  strcmp(symname, "_setbada") == 0 ||
 +                  strcmp(symname, "wakeup_stack_begin") == 0 ||
 +                  strcmp(symname, "wakeup_stack") == 0 ||
 +                  strcmp(symname, "wakeup_level4_pgt") == 0 ||
 +                  strcmp(symname, "acpi_copy_wakeup_routine") == 0 ||
 +                  strcmp(symname, "wakeup_end") == 0 ||
 +                  strcmp(symname, "do_suspend_lowlevel_s4bios") == 0 ||
 +                  strcmp(symname, "do_suspend_lowlevel") == 0 ||
 +                  strcmp(symname, "wakeup_pmode_return") == 0 ||
 +                  strcmp(symname, "restore_registers") == 0)
 +                      continue;
 +              /* __kprobes_text_end contains branches to the middle of code,
 +               * with undefined states.
 +               */
 +              if (strcmp(symname, "__kprobes_text_end") == 0)
 +                      continue;
 +              /* Data in the middle of the text segment :( */
 +              if (strcmp(symname, "level2_kernel_pgt") == 0 ||
 +                  strcmp(symname, "level3_kernel_pgt") == 0)
 +                      continue;
 +              if (bb_spurious_global_label(symname))
 +                      continue;
 +              if ((addr = kallsyms_lookup_name(symname)) == 0)
 +                      continue;
 +              // kdb_printf("BB " kdb_bfd_vma_fmt0 " %s\n", addr, symname);
 +              bb_cleanup();   /* in case previous command was interrupted */
 +              kdbnearsym_cleanup();
 +              kdb_bb(addr);
 +              touch_nmi_watchdog();
 +              if (bb_giveup) {
 +                      if (max_errors-- == 0) {
 +                              kdb_printf("%s: max_errors reached, giving up\n",
 +                                         __FUNCTION__);
 +                              break;
 +                      } else {
 +                              bb_giveup = 0;
 +                      }
 +              }
 +      }
 +      kdb_printf("\n");
 +      bb_cleanup();
 +      kdbnearsym_cleanup();
 +      return 0;
 +}
 +
 +/*
 + *=============================================================================
 + *
 + * Everything above this line is doing basic block analysis, function by
 + * function.  Everything below this line uses the basic block data to do a
 + * complete backtrace over all functions that are used by a process.
 + *
 + *=============================================================================
 + */
 +
 +
 +/*============================================================================*/
 +/*                                                                            */
 +/* Most of the backtrace code and data is common to x86_64 and i386.  This    */
 +/* large ifdef contains all of the differences between the two architectures. */
 +/*                                                                            */
 +/* Make sure you update the correct section of this ifdef.                    */
 +/*                                                                            */
 +/*============================================================================*/
 +#define XCS "cs"
 +#define RSP "sp"
 +#define RIP "ip"
 +#define ARCH_RSP sp
 +#define ARCH_RIP ip
 +
 +#ifdef        CONFIG_X86_64
 +
 +#define ARCH_NORMAL_PADDING (16 * 8)
 +
 +/* x86_64 has multiple alternate stacks, with different sizes and different
 + * offsets to get the link from one stack to the next.  All of the stacks are
 + * in the per_cpu area: either in the orig_ist or irq_stack_ptr. Debug events
 + * can even have multiple nested stacks within the single physical stack,
 + * each nested stack has its own link and some of those links are wrong.
 + *
 + * Consistent it's not!
 + *
 + * Do not assume that these stacks are aligned on their size.
 + */
 +#define INTERRUPT_STACK (N_EXCEPTION_STACKS + 1)
 +void
 +kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
 +                            struct kdb_activation_record *ar)
 +{
 +      static struct {
 +              const char *id;
 +              unsigned int total_size;
 +              unsigned int nested_size;
 +              unsigned int next;
 +      } *sdp, stack_data[] = {
 +              [STACKFAULT_STACK - 1] =  { "stackfault",    EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
 +              [DOUBLEFAULT_STACK - 1] = { "doublefault",   EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
 +              [NMI_STACK - 1] =         { "nmi",           EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
 +              [DEBUG_STACK - 1] =       { "debug",         DEBUG_STKSZ,     EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
 +              [MCE_STACK - 1] =         { "machine check", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
 +              [INTERRUPT_STACK - 1] =   { "interrupt",     IRQ_STACK_SIZE,  IRQ_STACK_SIZE,  IRQ_STACK_SIZE  -   sizeof(void *) },
 +      };
 +      unsigned long total_start = 0, total_size, total_end;
 +      int sd, found = 0;
 +      extern unsigned long kdba_orig_ist(int, int);
 +
 +      for (sd = 0, sdp = stack_data;
 +           sd < ARRAY_SIZE(stack_data);
 +           ++sd, ++sdp) {
 +              total_size = sdp->total_size;
 +              if (!total_size)
 +                      continue;       /* in case stack_data[] has any holes */
 +              if (cpu < 0) {
 +                      /* Arbitrary address which can be on any cpu, see if it
 +                       * falls within any of the alternate stacks
 +                       */
 +                      int c;
 +                      for_each_online_cpu(c) {
 +                              if (sd == INTERRUPT_STACK - 1)
 +                                      total_end = (unsigned long)per_cpu(irq_stack_ptr, c);
 +                              else
 +                                      total_end = per_cpu(orig_ist, c).ist[sd];
 +                              total_start = total_end - total_size;
 +                              if (addr >= total_start && addr < total_end) {
 +                                      found = 1;
 +                                      cpu = c;
 +                                      break;
 +                              }
 +                      }
 +                      if (!found)
 +                              continue;
 +              }
 +              /* Only check the supplied or found cpu */
 +              if (sd == INTERRUPT_STACK - 1)
 +                      total_end = (unsigned long)per_cpu(irq_stack_ptr, cpu);
 +              else
 +                      total_end = per_cpu(orig_ist, cpu).ist[sd];
 +              total_start = total_end - total_size;
 +              if (addr >= total_start && addr < total_end) {
 +                      found = 1;
 +                      break;
 +              }
 +      }
 +      if (!found)
 +              return;
 +      /* find which nested stack the address is in */
 +      while (addr > total_start + sdp->nested_size)
 +              total_start += sdp->nested_size;
 +      ar->stack.physical_start = total_start;
 +      ar->stack.physical_end = total_start + sdp->nested_size;
 +      ar->stack.logical_start = total_start;
 +      ar->stack.logical_end = total_start + sdp->next;
 +      ar->stack.next = *(unsigned long *)ar->stack.logical_end;
 +      ar->stack.id = sdp->id;
 +
 +      /* Nasty: when switching to the interrupt stack, the stack state of the
 +       * caller is split over two stacks, the original stack and the
 +       * interrupt stack.  One word (the previous frame pointer) is stored on
 +       * the interrupt stack, the rest of the interrupt data is in the old
 +       * frame.  To make the interrupted stack state look as though it is
 +       * contiguous, copy the missing word from the interrupt stack to the
 +       * original stack and adjust the new stack pointer accordingly.
 +       */
 +
 +      if (sd == INTERRUPT_STACK - 1) {
 +              *(unsigned long *)(ar->stack.next - KDB_WORD_SIZE) =
 +                      ar->stack.next;
 +              ar->stack.next -= KDB_WORD_SIZE;
 +      }
 +}
 +
 +/* rip is not in the thread struct for x86_64.  We know that the stack value
 + * was saved in schedule near the label thread_return.  Setting rip to
 + * thread_return lets the stack trace find that we are in schedule and
 + * correctly decode its prologue.
 + */
 +
 +static kdb_machreg_t
 +kdba_bt_stack_rip(const struct task_struct *p)
 +{
 +      return bb_thread_return;
 +}
 +
 +#else /* !CONFIG_X86_64 */
 +
 +#define ARCH_NORMAL_PADDING (19 * 4)
 +
 +#ifdef        CONFIG_4KSTACKS
 +static struct thread_info **kdba_hardirq_ctx, **kdba_softirq_ctx;
 +#endif        /* CONFIG_4KSTACKS */
 +
 +/* On a 4K stack kernel, hardirq_ctx and softirq_ctx are [NR_CPUS] arrays.  The
 + * first element of each per-cpu stack is a struct thread_info.
 + */
 +void
 +kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
 +                            struct kdb_activation_record *ar)
 +{
 +#ifdef        CONFIG_4KSTACKS
 +      struct thread_info *tinfo;
 +      tinfo = (struct thread_info *)(addr & -THREAD_SIZE);
 +      if (cpu < 0) {
 +              /* Arbitrary address, see if it falls within any of the irq
 +               * stacks
 +               */
 +              int found = 0;
 +              for_each_online_cpu(cpu) {
 +                      if (tinfo == kdba_hardirq_ctx[cpu] ||
 +                          tinfo == kdba_softirq_ctx[cpu]) {
 +                              found = 1;
 +                              break;
 +                      }
 +              }
 +              if (!found)
 +                      return;
 +      }
 +      if (tinfo == kdba_hardirq_ctx[cpu] ||
 +          tinfo == kdba_softirq_ctx[cpu]) {
 +              ar->stack.physical_start = (kdb_machreg_t)tinfo;
 +              ar->stack.physical_end = ar->stack.physical_start + THREAD_SIZE;
 +              ar->stack.logical_start = ar->stack.physical_start +
 +                                        sizeof(struct thread_info);
 +              ar->stack.logical_end = ar->stack.physical_end;
 +              ar->stack.next = tinfo->previous_esp;
 +              if (tinfo == kdba_hardirq_ctx[cpu])
 +                      ar->stack.id = "hardirq_ctx";
 +              else
 +                      ar->stack.id = "softirq_ctx";
 +      }
 +#endif        /* CONFIG_4KSTACKS */
 +}
 +
 +/* rip is in the thread struct for i386 */
 +
 +static kdb_machreg_t
 +kdba_bt_stack_rip(const struct task_struct *p)
 +{
 +      return p->thread.ip;
 +}
 +
 +#endif        /* CONFIG_X86_64 */
 +
 +/* Given an address which claims to be on a stack, an optional cpu number and
 + * an optional task address, get information about the stack.
 + *
 + * t == NULL, cpu < 0 indicates an arbitrary stack address with no associated
 + * struct task, the address can be in an alternate stack or any task's normal
 + * stack.
 + *
 + * t != NULL, cpu >= 0 indicates a running task, the address can be in an
 + * alternate stack or that task's normal stack.
 + *
 + * t != NULL, cpu < 0 indicates a blocked task, the address can only be in that
 + * task's normal stack.
 + *
 + * t == NULL, cpu >= 0 is not a valid combination.
 + */
 +
 +static void
 +kdba_get_stack_info(kdb_machreg_t rsp, int cpu,
 +                  struct kdb_activation_record *ar,
 +                  const struct task_struct *t)
 +{
 +      struct thread_info *tinfo;
 +      struct task_struct *g, *p;
 +      memset(&ar->stack, 0, sizeof(ar->stack));
 +      if (KDB_DEBUG(ARA))
 +              kdb_printf("%s: " RSP "=0x%lx cpu=%d task=%p\n",
 +                         __FUNCTION__, rsp, cpu, t);
 +      if (t == NULL || cpu >= 0) {
 +              kdba_get_stack_info_alternate(rsp, cpu, ar);
 +              if (ar->stack.logical_start)
 +                      goto out;
 +      }
 +      rsp &= -THREAD_SIZE;
 +      tinfo = (struct thread_info *)rsp;
 +      if (t == NULL) {
 +              /* Arbitrary stack address without an associated task, see if
 +               * it falls within any normal process stack, including the idle
 +               * tasks.
 +               */
 +              kdb_do_each_thread(g, p) {
 +                      if (tinfo == task_thread_info(p)) {
 +                              t = p;
 +                              goto found;
 +                      }
 +              } kdb_while_each_thread(g, p);
 +              for_each_online_cpu(cpu) {
 +                      p = idle_task(cpu);
 +                      if (tinfo == task_thread_info(p)) {
 +                              t = p;
 +                              goto found;
 +                      }
 +              }
 +      found:
 +              if (KDB_DEBUG(ARA))
 +                      kdb_printf("%s: found task %p\n", __FUNCTION__, t);
 +      } else if (cpu >= 0) {
 +              /* running task */
 +              struct kdb_running_process *krp = kdb_running_process + cpu;
 +              if (krp->p != t || tinfo != task_thread_info(t))
 +                      t = NULL;
 +              if (KDB_DEBUG(ARA))
 +                      kdb_printf("%s: running task %p\n", __FUNCTION__, t);
 +      } else {
 +              /* blocked task */
 +              if (tinfo != task_thread_info(t))
 +                      t = NULL;
 +              if (KDB_DEBUG(ARA))
 +                      kdb_printf("%s: blocked task %p\n", __FUNCTION__, t);
 +      }
 +      if (t) {
 +              ar->stack.physical_start = rsp;
 +              ar->stack.physical_end = rsp + THREAD_SIZE;
 +              ar->stack.logical_start = rsp + sizeof(struct thread_info);
 +              ar->stack.logical_end = ar->stack.physical_end - ARCH_NORMAL_PADDING;
 +              ar->stack.next = 0;
 +              ar->stack.id = "normal";
 +      }
 +out:
 +      if (ar->stack.physical_start && KDB_DEBUG(ARA)) {
 +              kdb_printf("%s: ar->stack\n", __FUNCTION__);
 +              kdb_printf("    physical_start=0x%lx\n", ar->stack.physical_start);
 +              kdb_printf("    physical_end=0x%lx\n", ar->stack.physical_end);
 +              kdb_printf("    logical_start=0x%lx\n", ar->stack.logical_start);
 +              kdb_printf("    logical_end=0x%lx\n", ar->stack.logical_end);
 +              kdb_printf("    next=0x%lx\n", ar->stack.next);
 +              kdb_printf("    id=%s\n", ar->stack.id);
 +              kdb_printf("    set MDCOUNT %ld\n",
 +                         (ar->stack.physical_end - ar->stack.physical_start) /
 +                         KDB_WORD_SIZE);
 +              kdb_printf("    mds " kdb_machreg_fmt0 "\n",
 +                         ar->stack.physical_start);
 +      }
 +}
 +
 +static void
 +bt_print_one(kdb_machreg_t rip, kdb_machreg_t rsp,
 +            const struct kdb_activation_record *ar,
 +            const kdb_symtab_t *symtab, int argcount)
 +{
 +      int btsymarg = 0;
 +      int nosect = 0;
 +
 +      kdbgetintenv("BTSYMARG", &btsymarg);
 +      kdbgetintenv("NOSECT", &nosect);
 +
 +      kdb_printf(kdb_machreg_fmt0, rsp);
 +      kdb_symbol_print(rip, symtab,
 +                       KDB_SP_SPACEB|KDB_SP_VALUE);
 +      if (argcount && ar->args) {
 +              int i, argc = ar->args;
 +              kdb_printf(" (");
 +              if (argc > argcount)
 +                      argc = argcount;
 +              for (i = 0; i < argc; i++) {
 +                      if (i)
 +                              kdb_printf(", ");
 +                      if (test_bit(i, ar->valid.bits))
 +                              kdb_printf("0x%lx", ar->arg[i]);
 +                      else
 +                              kdb_printf("invalid");
 +              }
 +              kdb_printf(")");
 +      }
 +      kdb_printf("\n");
 +      if (symtab->sym_name) {
 +              if (!nosect) {
 +                      kdb_printf("                               %s",
 +                                 symtab->mod_name);
 +                      if (symtab->sec_name && symtab->sec_start)
 +                              kdb_printf(" 0x%lx 0x%lx",
 +                                         symtab->sec_start, symtab->sec_end);
 +                      kdb_printf(" 0x%lx 0x%lx\n",
 +                                 symtab->sym_start, symtab->sym_end);
 +              }
 +      }
 +      if (argcount && ar->args && btsymarg) {
 +              int i, argc = ar->args;
 +              kdb_symtab_t arg_symtab;
 +              for (i = 0; i < argc; i++) {
 +                      kdb_machreg_t arg = ar->arg[i];
 +                      if (test_bit(i, ar->valid.bits) &&
 +                          kdbnearsym(arg, &arg_symtab)) {
 +                              kdb_printf("                       ARG %2d ", i);
 +                              kdb_symbol_print(arg, &arg_symtab,
 +                                               KDB_SP_DEFAULT|KDB_SP_NEWLINE);
 +                      }
 +              }
 +      }
 +}
 +
 +static void
 +kdba_bt_new_stack(struct kdb_activation_record *ar, kdb_machreg_t *rsp,
 +                 int *count, int *suppress)
 +{
 +      /* Nasty: save_args builds a partial pt_regs, with r15 through
 +       * rbx not being filled in.  It passes struct pt_regs* to do_IRQ (in
 +       * rdi) but the stack pointer is not adjusted to account for r15
 +       * through rbx.  This has two effects :-
 +       *
 +       * (1) struct pt_regs on an external interrupt actually overlaps with
 +       *     the local stack area used by do_IRQ.  Not only are r15-rbx
 +       *     undefined, the area that claims to hold their values can even
 +       *     change as the irq is processed.
 +       *
 +       * (2) The back stack pointer saved for the new frame is not pointing
 +       *     at pt_regs, it is pointing at rbx within the pt_regs passed to
 +       *     do_IRQ.
 +       *
 +       * There is nothing that I can do about (1) but I have to fix (2)
 +       * because kdb backtrace looks for the "start" address of pt_regs as it
 +       * walks back through the stacks.  When switching from the interrupt
 +       * stack to another stack, we have to assume that pt_regs has been
 +       * seen and turn off backtrace supression.
 +       */
 +      int probable_pt_regs = strcmp(ar->stack.id, "interrupt") == 0;
 +      *rsp = ar->stack.next;
 +      if (KDB_DEBUG(ARA))
 +              kdb_printf("new " RSP "=" kdb_machreg_fmt0 "\n", *rsp);
 +      bb_actual_set_value(BBRG_RSP, *rsp);
 +      kdba_get_stack_info(*rsp, -1, ar, NULL);
 +      if (!ar->stack.physical_start) {
 +              kdb_printf("+++ Cannot resolve next stack\n");
 +      } else if (!*suppress) {
 +              kdb_printf(" ======================= <%s>\n",
 +                         ar->stack.id);
 +              ++*count;
 +      }
 +      if (probable_pt_regs)
 +              *suppress = 0;
 +}
 +
 +/*
 + * kdba_bt_stack
 + *
 + * Inputs:
 + *    addr    Address provided to 'bt' command, if any.
 + *    argcount
 + *    p       Pointer to task for 'btp' command.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    Ultimately all the bt* commands come through this routine.  If
 + *    old_style is 0 then it uses the basic block analysis to get an accurate
 + *    backtrace with arguments, otherwise it falls back to the old method of
 + *    printing anything on stack that looks like a kernel address.
 + *
 + *    Allowing for the stack data pushed by the hardware is tricky.  We
 + *    deduce the presence of hardware pushed data by looking for interrupt
 + *    handlers, either by name or by the code that they contain.  This
 + *    information must be applied to the next function up the stack, because
 + *    the hardware data is above the saved rip for the interrupted (next)
 + *    function.
 + *
 + *    To make things worse, the amount of data pushed is arch specific and
 + *    may depend on the rsp for the next function, not the current function.
 + *    The number of bytes pushed by hardware cannot be calculated until we
 + *    are actually processing the stack for the interrupted function and have
 + *    its rsp.
 + *
 + *    It is also possible for an interrupt to occur in user space and for the
 + *    interrupt handler to also be interrupted.  Check the code selector
 + *    whenever the previous function is an interrupt handler and stop
 + *    backtracing if the interrupt was not in kernel space.
 + */
 +
 +static int
 +kdba_bt_stack(kdb_machreg_t addr, int argcount, const struct task_struct *p,
 +             int old_style)
 +{
 +      struct kdb_activation_record ar;
 +      kdb_machreg_t rip = 0, rsp = 0, prev_rsp, cs;
 +      kdb_symtab_t symtab;
 +      int rip_at_rsp = 0, count = 0, btsp = 0, suppress,
 +          interrupt_handler = 0, prev_interrupt_handler = 0, hardware_pushed,
 +          prev_noret = 0;
 +      struct pt_regs *regs = NULL;
 +
 +      kdbgetintenv("BTSP", &btsp);
 +      suppress = !btsp;
 +      memset(&ar, 0, sizeof(ar));
 +      if (old_style)
 +              kdb_printf("Using old style backtrace, unreliable with no arguments\n");
 +
 +      /*
 +       * The caller may have supplied an address at which the stack traceback
 +       * operation should begin.  This address is assumed by this code to
 +       * point to a return address on the stack to be traced back.
 +       *
 +       * Warning: type in the wrong address and you will get garbage in the
 +       * backtrace.
 +       */
 +      if (addr) {
 +              rsp = addr;
 +              kdb_getword(&rip, rsp, sizeof(rip));
 +              rip_at_rsp = 1;
 +              suppress = 0;
 +              kdba_get_stack_info(rsp, -1, &ar, NULL);
 +      } else {
 +              if (task_curr(p)) {
 +                      struct kdb_running_process *krp =
 +                          kdb_running_process + task_cpu(p);
 +                      kdb_machreg_t cs;
 +                      regs = krp->regs;
 +                      if (krp->seqno &&
 +                          krp->p == p &&
 +                          krp->seqno >= kdb_seqno - 1 &&
 +                          !KDB_NULL_REGS(regs)) {
 +                              /* valid saved state, continue processing */
 +                      } else {
 +                              kdb_printf
 +                                  ("Process did not save state, cannot backtrace\n");
 +                              kdb_ps1(p);
 +                              return 0;
 +                      }
 +                      kdba_getregcontents(XCS, regs, &cs);
 +                      if ((cs & 0xffff) != __KERNEL_CS) {
 +                              kdb_printf("Stack is not in kernel space, backtrace not available\n");
 +                              return 0;
 +                      }
 +                      rip = krp->arch.ARCH_RIP;
 +                      rsp = krp->arch.ARCH_RSP;
 +                      kdba_get_stack_info(rsp, kdb_process_cpu(p), &ar, p);
 +              } else {
 +                      /* Not on cpu, assume blocked.  Blocked tasks do not
 +                       * have pt_regs.  p->thread contains some data, alas
 +                       * what it contains differs between i386 and x86_64.
 +                       */
 +                      rip = kdba_bt_stack_rip(p);
 +                      rsp = p->thread.sp;
 +                      suppress = 0;
 +                      kdba_get_stack_info(rsp, -1, &ar, p);
 +              }
 +      }
 +      if (!ar.stack.physical_start) {
 +              kdb_printf(RSP "=0x%lx is not in a valid kernel stack, backtrace not available\n",
 +                         rsp);
 +              return 0;
 +      }
 +      memset(&bb_actual, 0, sizeof(bb_actual));
 +      bb_actual_set_value(BBRG_RSP, rsp);
 +      bb_actual_set_valid(BBRG_RSP, 1);
 +
 +      kdb_printf(RSP "%*s" RIP "%*sFunction (args)\n",
 +                 2*KDB_WORD_SIZE, " ",
 +                 2*KDB_WORD_SIZE, " ");
 +      if (ar.stack.next && !suppress)
 +              kdb_printf(" ======================= <%s>\n",
 +                         ar.stack.id);
 +
 +      bb_cleanup();
 +      /* Run through all the stacks */
 +      while (ar.stack.physical_start) {
 +              if (rip_at_rsp) {
 +                      rip = *(kdb_machreg_t *)rsp;
 +                      /* I wish that gcc was fixed to include a nop
 +                       * instruction after ATTRIB_NORET functions.  The lack
 +                       * of a nop means that the return address points to the
 +                       * start of next function, so fudge it to point to one
 +                       * byte previous.
 +                       *
 +                       * No, we cannot just decrement all rip values.
 +                       * Sometimes an rip legally points to the start of a
 +                       * function, e.g. interrupted code or hand crafted
 +                       * assembler.
 +                       */
 +                      if (prev_noret) {
 +                              kdbnearsym(rip, &symtab);
 +                              if (rip == symtab.sym_start) {
 +                                      --rip;
 +                                      if (KDB_DEBUG(ARA))
 +                                              kdb_printf("\tprev_noret, " RIP
 +                                                         "=0x%lx\n", rip);
 +                              }
 +                      }
 +              }
 +              kdbnearsym(rip, &symtab);
 +              if (old_style) {
 +                      if (__kernel_text_address(rip) && !suppress) {
 +                              bt_print_one(rip, rsp, &ar, &symtab, 0);
 +                              ++count;
 +                      }
 +                      if (rsp == (unsigned long)regs) {
 +                              if (ar.stack.next && suppress)
 +                                      kdb_printf(" ======================= <%s>\n",
 +                                                 ar.stack.id);
 +                              ++count;
 +                              suppress = 0;
 +                      }
 +                      rsp += sizeof(rip);
 +                      rip_at_rsp = 1;
 +                      if (rsp >= ar.stack.logical_end) {
 +                              if (!ar.stack.next)
 +                                      break;
 +                              kdba_bt_new_stack(&ar, &rsp, &count, &suppress);
 +                              rip_at_rsp = 0;
 +                              continue;
 +                      }
 +              } else {
 +                      /* Start each analysis with no dynamic data from the
 +                       * previous kdb_bb() run.
 +                       */
 +                      bb_cleanup();
 +                      kdb_bb(rip);
 +                      if (bb_giveup)
 +                              break;
 +                      prev_interrupt_handler = interrupt_handler;
 +                      interrupt_handler = bb_interrupt_handler(rip);
 +                      prev_rsp = rsp;
 +                      if (rip_at_rsp) {
 +                              if (prev_interrupt_handler) {
 +                                      cs = *((kdb_machreg_t *)rsp + 1) & 0xffff;
 +                                      hardware_pushed =
 +                                              bb_hardware_pushed_arch(rsp, &ar);
 +                              } else {
 +                                      cs = __KERNEL_CS;
 +                                      hardware_pushed = 0;
 +                              }
 +                              rsp += sizeof(rip) + hardware_pushed;
 +                              if (KDB_DEBUG(ARA))
 +                                      kdb_printf("%s: " RSP " "
 +                                                 kdb_machreg_fmt0
 +                                                 " -> " kdb_machreg_fmt0
 +                                                 " hardware_pushed %d"
 +                                                 " prev_interrupt_handler %d"
 +                                                 " cs 0x%lx\n",
 +                                                 __FUNCTION__,
 +                                                 prev_rsp,
 +                                                 rsp,
 +                                                 hardware_pushed,
 +                                                 prev_interrupt_handler,
 +                                                 cs);
 +                              if (rsp >= ar.stack.logical_end &&
 +                                  ar.stack.next) {
 +                                      kdba_bt_new_stack(&ar, &rsp, &count,
 +                                                         &suppress);
 +                                      rip_at_rsp = 0;
 +                                      continue;
 +                              }
 +                              bb_actual_set_value(BBRG_RSP, rsp);
 +                      } else {
 +                              cs = __KERNEL_CS;
 +                      }
 +                      rip_at_rsp = 1;
 +                      bb_actual_rollback(&ar);
 +                      if (bb_giveup)
 +                              break;
 +                      if (bb_actual_value(BBRG_RSP) < rsp) {
 +                              kdb_printf("%s: " RSP " is going backwards, "
 +                                         kdb_machreg_fmt0 " -> "
 +                                         kdb_machreg_fmt0 "\n",
 +                                         __FUNCTION__,
 +                                         rsp,
 +                                         bb_actual_value(BBRG_RSP));
 +                              bb_giveup = 1;
 +                              break;
 +                      }
 +                      bb_arguments(&ar);
 +                      if (!suppress) {
 +                              bt_print_one(rip, prev_rsp, &ar, &symtab, argcount);
 +                              ++count;
 +                      }
 +                      /* Functions that terminate the backtrace */
 +                      if (strcmp(bb_func_name, "cpu_idle") == 0 ||
 +                          strcmp(bb_func_name, "child_rip") == 0)
 +                              break;
 +                      if (rsp >= ar.stack.logical_end &&
 +                          !ar.stack.next)
 +                              break;
 +                      if (rsp <= (unsigned long)regs &&
 +                          bb_actual_value(BBRG_RSP) > (unsigned long)regs) {
 +                              if (ar.stack.next && suppress)
 +                                      kdb_printf(" ======================= <%s>\n",
 +                                                 ar.stack.id);
 +                              ++count;
 +                              suppress = 0;
 +                      }
 +                      if (cs != __KERNEL_CS) {
 +                              kdb_printf("Reached user space\n");
 +                              break;
 +                      }
 +                      rsp = bb_actual_value(BBRG_RSP);
 +              }
 +              prev_noret = bb_noret(bb_func_name);
 +              if (count > 200)
 +                      break;
 +      }
 +      if (bb_giveup)
 +              return 1;
 +      bb_cleanup();
 +      kdbnearsym_cleanup();
 +
 +      if (count > 200) {
 +              kdb_printf("bt truncated, count limit reached\n");
 +              return 1;
 +      } else if (suppress) {
 +              kdb_printf
 +                  ("bt did not find pt_regs - no trace produced.  Suggest 'set BTSP 1'\n");
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * kdba_bt_address
 + *
 + *    Do a backtrace starting at a specified stack address.  Use this if the
 + *    heuristics get the stack decode wrong.
 + *
 + * Inputs:
 + *    addr    Address provided to 'bt' command.
 + *    argcount
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    mds %rsp comes in handy when examining the stack to do a manual
 + *    traceback.
 + */
 +
 +int kdba_bt_address(kdb_machreg_t addr, int argcount)
 +{
 +      int ret;
 +      kdba_id_init(&kdb_di);                  /* kdb_bb needs this done once */
 +      ret = kdba_bt_stack(addr, argcount, NULL, 0);
 +      if (ret == 1)
 +              ret = kdba_bt_stack(addr, argcount, NULL, 1);
 +      return ret;
 +}
 +
 +/*
 + * kdba_bt_process
 + *
 + *    Do a backtrace for a specified process.
 + *
 + * Inputs:
 + *    p       Struct task pointer extracted by 'bt' command.
 + *    argcount
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + */
 +
 +int kdba_bt_process(const struct task_struct *p, int argcount)
 +{
 +      int ret;
 +      kdba_id_init(&kdb_di);                  /* kdb_bb needs this done once */
 +      ret = kdba_bt_stack(0, argcount, p, 0);
 +      if (ret == 1)
 +              ret = kdba_bt_stack(0, argcount, p, 1);
 +      return ret;
 +}
 +
 +static int __init kdba_bt_x86_init(void)
 +{
 +      int i, c, cp = -1;
 +      struct bb_name_state *r;
 +
 +      kdb_register_repeat("bb1", kdb_bb1, "<vaddr>",  "Analyse one basic block", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("bb_all", kdb_bb_all, "",   "Backtrace check on all built in functions", 0, KDB_REPEAT_NONE);
 +
 +      /* Split the opcode usage table by the first letter of each set of
 +       * opcodes, for faster mapping of opcode to its operand usage.
 +       */
 +      for (i = 0; i < ARRAY_SIZE(bb_opcode_usage_all); ++i) {
 +              c = bb_opcode_usage_all[i].opcode[0] - 'a';
 +              if (c != cp) {
 +                      cp = c;
 +                      bb_opcode_usage[c].opcode = bb_opcode_usage_all + i;
 +              }
 +              ++bb_opcode_usage[c].size;
 +      }
 +
 +      bb_common_interrupt = kallsyms_lookup_name("common_interrupt");
 +      bb_error_entry = kallsyms_lookup_name("error_entry");
 +      bb_ret_from_intr = kallsyms_lookup_name("ret_from_intr");
 +      bb_thread_return = kallsyms_lookup_name("thread_return");
 +      bb_sync_regs = kallsyms_lookup_name("sync_regs");
 +      bb_save_v86_state = kallsyms_lookup_name("save_v86_state");
 +      bb__sched_text_start = kallsyms_lookup_name("__sched_text_start");
 +      bb__sched_text_end = kallsyms_lookup_name("__sched_text_end");
 +      bb_save_args = kallsyms_lookup_name("save_args");
 +      bb_save_rest = kallsyms_lookup_name("save_rest");
 +      bb_save_paranoid = kallsyms_lookup_name("save_paranoid");
 +      for (i = 0, r = bb_special_cases;
 +           i < ARRAY_SIZE(bb_special_cases);
 +           ++i, ++r) {
 +              r->address = kallsyms_lookup_name(r->name);
 +      }
 +
 +#ifdef        CONFIG_4KSTACKS
 +      kdba_hardirq_ctx = (struct thread_info **)kallsyms_lookup_name("hardirq_ctx");
 +      kdba_softirq_ctx = (struct thread_info **)kallsyms_lookup_name("softirq_ctx");
 +#endif        /* CONFIG_4KSTACKS */
 +
 +      return 0;
 +}
 +
 +static void __exit kdba_bt_x86_exit(void)
 +{
 +      kdb_unregister("bb1");
 +      kdb_unregister("bb_all");
 +}
 +
 +module_init(kdba_bt_x86_init)
 +module_exit(kdba_bt_x86_exit)
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -608,6 -607,16 +607,18 @@@ static int __init setup_elfcorehdr(cha
  early_param("elfcorehdr", setup_elfcorehdr);
  #endif
  
+ static __init void reserve_ibft_region(void)
+ {
+       unsigned long addr, size = 0;
+       addr = find_ibft_region(&size);
++#ifndef CONFIG_XEN
+       if (size)
+               reserve_early_overlap_ok(addr, addr + size, "ibft");
++#endif
+ }
  #ifdef CONFIG_X86_RESERVE_LOW_64K
  static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
  {
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -20,7 -20,7 +20,8 @@@
  #include <linux/device.h>
  #include <linux/mutex.h>
  #include <linux/smp_lock.h>
+ #include <linux/gfp.h>
 +#include <linux/vmalloc.h>
  
  #include <asm/uaccess.h>
  
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -1,6 -1,6 +1,6 @@@
  config DRM_VMWGFX
        tristate "DRM driver for VMware Virtual GPU"
-       depends on DRM && PCI && !XEN
 -      depends on DRM && PCI && FB
++      depends on DRM && PCI && FB && !XEN
        select FB_DEFERRED_IO
        select FB_CFB_FILLRECT
        select FB_CFB_COPYAREA
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 4e3731c,0000000..1b15859
mode 100644,000000..100644
--- /dev/null
@@@ -1,301 -1,0 +1,302 @@@
 +/*
 + * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
 + *
 + * Module Author: Heinz Mauelshagen <heinzm@redhat.com>
 + *
 + * Device-mapper memory object handling:
 + *
 + * o allocate/free total_pages in a per client page pool.
 + *
 + * o allocate/free memory objects with chunks (1..n) of
 + *   pages_per_chunk pages hanging off.
 + *
 + * This file is released under the GPL.
 + */
 +
 +#define       DM_MEM_CACHE_VERSION    "0.2"
 +
 +#include "dm.h"
 +#include "dm-memcache.h"
 +#include <linux/dm-io.h>
++#include <linux/slab.h>
 +
 +struct dm_mem_cache_client {
 +      spinlock_t lock;
 +      mempool_t *objs_pool;
 +      struct page_list *free_list;
 +      unsigned objects;
 +      unsigned chunks;
 +      unsigned pages_per_chunk;
 +      unsigned free_pages;
 +      unsigned total_pages;
 +};
 +
 +/*
 + * Free pages and page_list elements of client.
 + */
 +static void free_cache_pages(struct page_list *list)
 +{
 +      while (list) {
 +              struct page_list *pl = list;
 +
 +              list = pl->next;
 +              BUG_ON(!pl->page);
 +              __free_page(pl->page);
 +              kfree(pl);
 +      }
 +}
 +
 +/*
 + * Alloc number of pages and page_list elements as required by client.
 + */
 +static struct page_list *alloc_cache_pages(unsigned pages)
 +{
 +      struct page_list *pl, *ret = NULL;
 +      struct page *page;
 +
 +      while (pages--) {
 +              page = alloc_page(GFP_NOIO);
 +              if (!page)
 +                      goto err;
 +
 +              pl = kmalloc(sizeof(*pl), GFP_NOIO);
 +              if (!pl) {
 +                      __free_page(page);
 +                      goto err;
 +              }
 +
 +              pl->page = page;
 +              pl->next = ret;
 +              ret = pl;
 +      }
 +
 +      return ret;
 +
 +err:
 +      free_cache_pages(ret);
 +      return NULL;
 +}
 +
 +/*
 + * Allocate page_list elements from the pool to chunks of the memory object.
 + */
 +static void alloc_chunks(struct dm_mem_cache_client *cl,
 +                       struct dm_mem_cache_object *obj)
 +{
 +      unsigned chunks = cl->chunks;
 +      unsigned long flags;
 +
 +      local_irq_save(flags);
 +      local_irq_disable();
 +      while (chunks--) {
 +              unsigned p = cl->pages_per_chunk;
 +
 +              obj[chunks].pl = NULL;
 +
 +              while (p--) {
 +                      struct page_list *pl;
 +
 +                      /* Take next element from free list */
 +                      spin_lock(&cl->lock);
 +                      pl = cl->free_list;
 +                      BUG_ON(!pl);
 +                      cl->free_list = pl->next;
 +                      spin_unlock(&cl->lock);
 +
 +                      pl->next = obj[chunks].pl;
 +                      obj[chunks].pl = pl;
 +              }
 +      }
 +
 +      local_irq_restore(flags);
 +}
 +
 +/*
 + * Free page_list elements putting them back onto free list
 + */
 +static void free_chunks(struct dm_mem_cache_client *cl,
 +                      struct dm_mem_cache_object *obj)
 +{
 +      unsigned chunks = cl->chunks;
 +      unsigned long flags;
 +      struct page_list *next, *pl;
 +
 +      local_irq_save(flags);
 +      local_irq_disable();
 +      while (chunks--) {
 +              for (pl = obj[chunks].pl; pl; pl = next) {
 +                      next = pl->next;
 +
 +                      spin_lock(&cl->lock);
 +                      pl->next = cl->free_list;
 +                      cl->free_list = pl;
 +                      cl->free_pages++;
 +                      spin_unlock(&cl->lock);
 +              }
 +      }
 +
 +      local_irq_restore(flags);
 +}
 +
 +/*
 + * Create/destroy dm memory cache client resources.
 + */
 +struct dm_mem_cache_client *
 +dm_mem_cache_client_create(unsigned objects, unsigned chunks,
 +                         unsigned pages_per_chunk)
 +{
 +      unsigned total_pages = objects * chunks * pages_per_chunk;
 +      struct dm_mem_cache_client *client;
 +
 +      BUG_ON(!total_pages);
 +      client = kzalloc(sizeof(*client), GFP_KERNEL);
 +      if (!client)
 +              return ERR_PTR(-ENOMEM);
 +
 +      client->objs_pool = mempool_create_kmalloc_pool(objects,
 +                              chunks * sizeof(struct dm_mem_cache_object));
 +      if (!client->objs_pool)
 +              goto err;
 +
 +      client->free_list = alloc_cache_pages(total_pages);
 +      if (!client->free_list)
 +              goto err1;
 +
 +      spin_lock_init(&client->lock);
 +      client->objects = objects;
 +      client->chunks = chunks;
 +      client->pages_per_chunk = pages_per_chunk;
 +      client->free_pages = client->total_pages = total_pages;
 +      return client;
 +
 +err1:
 +      mempool_destroy(client->objs_pool);
 +err:
 +      kfree(client);
 +      return ERR_PTR(-ENOMEM);
 +}
 +EXPORT_SYMBOL(dm_mem_cache_client_create);
 +
 +void dm_mem_cache_client_destroy(struct dm_mem_cache_client *cl)
 +{
 +      BUG_ON(cl->free_pages != cl->total_pages);
 +      free_cache_pages(cl->free_list);
 +      mempool_destroy(cl->objs_pool);
 +      kfree(cl);
 +}
 +EXPORT_SYMBOL(dm_mem_cache_client_destroy);
 +
 +/*
 + * Grow a clients cache by an amount of pages.
 + *
 + * Don't call from interrupt context!
 + */
 +int dm_mem_cache_grow(struct dm_mem_cache_client *cl, unsigned objects)
 +{
 +      unsigned pages = objects * cl->chunks * cl->pages_per_chunk;
 +      struct page_list *pl, *last;
 +
 +      BUG_ON(!pages);
 +      pl = alloc_cache_pages(pages);
 +      if (!pl)
 +              return -ENOMEM;
 +
 +      last = pl;
 +      while (last->next)
 +              last = last->next;
 +
 +      spin_lock_irq(&cl->lock);
 +      last->next = cl->free_list;
 +      cl->free_list = pl;
 +      cl->free_pages += pages;
 +      cl->total_pages += pages;
 +      cl->objects++;
 +      spin_unlock_irq(&cl->lock);
 +
 +      mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
 +      return 0;
 +}
 +EXPORT_SYMBOL(dm_mem_cache_grow);
 +
 +/* Shrink a clients cache by an amount of pages */
 +int dm_mem_cache_shrink(struct dm_mem_cache_client *cl, unsigned objects)
 +{
 +      int r;
 +      unsigned pages = objects * cl->chunks * cl->pages_per_chunk, p = pages;
 +      unsigned long flags;
 +      struct page_list *last = NULL, *pl, *pos;
 +
 +      BUG_ON(!pages);
 +
 +      spin_lock_irqsave(&cl->lock, flags);
 +      pl = pos = cl->free_list;
 +      while (p-- && pos->next) {
 +              last = pos;
 +              pos = pos->next;
 +      }
 +
 +      if (++p)
 +              r = -ENOMEM;
 +      else {
 +              r = 0;
 +              cl->free_list = pos;
 +              cl->free_pages -= pages;
 +              cl->total_pages -= pages;
 +              cl->objects--;
 +              last->next = NULL;
 +      }
 +      spin_unlock_irqrestore(&cl->lock, flags);
 +
 +      if (!r) {
 +              free_cache_pages(pl);
 +              mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
 +      }
 +
 +      return r;
 +}
 +EXPORT_SYMBOL(dm_mem_cache_shrink);
 +
 +/*
 + * Allocate/free a memory object
 + *
 + * Can be called from interrupt context
 + */
 +struct dm_mem_cache_object *dm_mem_cache_alloc(struct dm_mem_cache_client *cl)
 +{
 +      int r = 0;
 +      unsigned pages = cl->chunks * cl->pages_per_chunk;
 +      unsigned long flags;
 +      struct dm_mem_cache_object *obj;
 +
 +      obj = mempool_alloc(cl->objs_pool, GFP_NOIO);
 +      if (!obj)
 +              return ERR_PTR(-ENOMEM);
 +
 +      spin_lock_irqsave(&cl->lock, flags);
 +      if (pages > cl->free_pages)
 +              r = -ENOMEM;
 +      else
 +              cl->free_pages -= pages;
 +      spin_unlock_irqrestore(&cl->lock, flags);
 +
 +      if (r) {
 +              mempool_free(obj, cl->objs_pool);
 +              return ERR_PTR(r);
 +      }
 +
 +      alloc_chunks(cl, obj);
 +      return obj;
 +}
 +EXPORT_SYMBOL(dm_mem_cache_alloc);
 +
 +void dm_mem_cache_free(struct dm_mem_cache_client *cl,
 +                     struct dm_mem_cache_object *obj)
 +{
 +      free_chunks(cl, obj);
 +      mempool_free(obj, cl->objs_pool);
 +}
 +EXPORT_SYMBOL(dm_mem_cache_free);
 +
 +MODULE_DESCRIPTION(DM_NAME " dm memory cache");
 +MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
 +MODULE_LICENSE("GPL");
index eb5ae0a,0000000..fdf7ce4
mode 100644,000000..100644
--- /dev/null
@@@ -1,4522 -1,0 +1,4523 @@@
 +/*
 + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
 + *
 + * Module Author: Heinz Mauelshagen <Mauelshagen@RedHat.com>
 + *
 + * This file is released under the GPL.
 + *
 + *
 + * Linux 2.6 Device Mapper RAID4 and RAID5 target.
 + *
 + * Supports:
 + *    o RAID4 with dedicated and selectable parity device
 + *    o RAID5 with rotating parity (left+right, symmetric+asymmetric)
 + *    o run time optimization of xor algorithm used to calculate parity
 + *
 + *
 + * Thanks to MD for:
 + *    o the raid address calculation algorithm
 + *    o the base of the biovec <-> page list copier.
 + *
 + *
 + * Uses region hash to keep track of how many writes are in flight to
 + * regions in order to use dirty log to keep state of regions to recover:
 + *
 + *    o clean regions (those which are synchronized
 + *    and don't have write io in flight)
 + *    o dirty regions (those with write io in flight)
 + *
 + *
 + * On startup, any dirty regions are migrated to the 'nosync' state
 + * and are subject to recovery by the daemon.
 + *
 + * See raid_ctr() for table definition.
 + *
 + *
 + * FIXME:
 + * o add virtual interface for locking
 + * o remove instrumentation (REMOVEME:)
 + *
 + */
 +
 +static const char *version = "v0.2431";
 +
 +#include "dm.h"
 +#include "dm-memcache.h"
 +#include "dm-message.h"
 +#include "dm-raid45.h"
 +
 +#include <linux/kernel.h>
 +#include <linux/vmalloc.h>
++#include <linux/slab.h>
 +
 +#include <linux/dm-io.h>
 +#include <linux/dm-dirty-log.h>
 +#include <linux/dm-region-hash.h>
 +
 +/* # of parallel recovered regions */
 +/* FIXME: cope with multiple recovery stripes in raid_set struct. */
 +#define MAX_RECOVER   1 /* needs to be 1! */
 +
 +/*
 + * Configurable parameters
 + */
 +#define       INLINE
 +
 +/* Default # of stripes if not set in constructor. */
 +#define       STRIPES                 64
 +
 +/* Minimum/maximum # of selectable stripes. */
 +#define       STRIPES_MIN             8
 +#define       STRIPES_MAX             16384
 +
 +/* Default chunk size in sectors if not set in constructor. */
 +#define       CHUNK_SIZE              64
 +
 +/* Default io size in sectors if not set in constructor. */
 +#define       IO_SIZE_MIN             SECTORS_PER_PAGE
 +#define       IO_SIZE                 IO_SIZE_MIN
 +
 +/* Maximum setable chunk size in sectors. */
 +#define       CHUNK_SIZE_MAX          16384
 +
 +/* Recover io size default in sectors. */
 +#define       RECOVER_IO_SIZE_MIN     64
 +#define       RECOVER_IO_SIZE         256
 +
 +/* Default percentage recover io bandwidth. */
 +#define       BANDWIDTH               10
 +#define       BANDWIDTH_MIN           1
 +#define       BANDWIDTH_MAX           100
 +/*
 + * END Configurable parameters
 + */
 +
 +#define       TARGET  "dm-raid45"
 +#define       DAEMON  "kraid45d"
 +#define       DM_MSG_PREFIX   TARGET
 +
 +#define       SECTORS_PER_PAGE        (PAGE_SIZE >> SECTOR_SHIFT)
 +
 +/* Amount/size for __xor(). */
 +#define       SECTORS_PER_XOR SECTORS_PER_PAGE
 +#define       XOR_SIZE        PAGE_SIZE
 +
 +/* Derive raid_set from stripe_cache pointer. */
 +#define       RS(x)   container_of(x, struct raid_set, sc)
 +
 +/* Check value in range. */
 +#define       range_ok(i, min, max)   (i >= min && i <= max)
 +
 +/* Page reference. */
 +#define PAGE(stripe, p)       ((stripe)->obj[p].pl->page)
 +
 +/* Bio list reference. */
 +#define       BL(stripe, p, rw)       (stripe->ss[p].bl + rw)
 +
 +/* Page list reference. */
 +#define       PL(stripe, p)           (stripe->obj[p].pl)
 +
 +/* Check argument is power of 2. */
 +#define POWER_OF_2(a) (!(a & (a - 1)))
 +
 +/* Factor out to dm-bio-list.h */
 +static inline void bio_list_push(struct bio_list *bl, struct bio *bio)
 +{
 +      bio->bi_next = bl->head;
 +      bl->head = bio;
 +
 +      if (!bl->tail)
 +              bl->tail = bio;
 +}
 +
 +/* Factor out to dm.h */
 +#define TI_ERR_RET(str, ret) \
 +      do { ti->error = DM_MSG_PREFIX ": " str; return ret; } while (0);
 +#define TI_ERR(str)     TI_ERR_RET(str, -EINVAL)
 +
 +/*-----------------------------------------------------------------
 + * Stripe cache
 + *
 + * Cache for all reads and writes to raid sets (operational or degraded)
 + *
 + * We need to run all data to and from a RAID set through this cache,
 + * because parity chunks need to get calculated from data chunks
 + * or, in the degraded/resynchronization case, missing chunks need
 + * to be reconstructed using the other chunks of the stripe.
 + *---------------------------------------------------------------*/
 +/* Protect kmem cache # counter. */
 +static atomic_t _stripe_sc_nr = ATOMIC_INIT(-1); /* kmem cache # counter. */
 +
 +/* A stripe set (holds bios hanging off). */
 +struct stripe_set {
 +      struct stripe *stripe;  /* Backpointer to stripe for endio(). */
 +      struct bio_list bl[3]; /* Reads, writes, and writes merged. */
 +#define       WRITE_MERGED    2
 +};
 +
 +#if READ != 0 || WRITE != 1
 +#error dm-raid45: READ/WRITE != 0/1 used as index!!!
 +#endif
 +
 +/*
 + * Stripe linked list indexes. Keep order, because the stripe
 + * and the stripe cache rely on the first 3!
 + */
 +enum list_types {
 +      LIST_IO = 0,    /* Stripes with io pending. */
 +      LIST_ENDIO,     /* Stripes to endio. */
 +      LIST_LRU,       /* Least recently used stripes. */
 +      LIST_HASH,      /* Hashed stripes. */
 +      LIST_RECOVER = LIST_HASH,       /* For recovery type stripes only. */
 +      NR_LISTS,       /* To size array in struct stripe. */
 +};
 +
 +enum lock_types {
 +      LOCK_ENDIO = 0, /* Protect endio list. */
 +      LOCK_LRU,       /* Protect lru list. */
 +      NR_LOCKS,       /* To size array in struct stripe_cache. */
 +};
 +
 +/* A stripe: the io object to handle all reads and writes to a RAID set. */
 +struct stripe {
 +      struct stripe_cache *sc;        /* Backpointer to stripe cache. */
 +
 +      sector_t key;           /* Hash key. */
 +      region_t region;        /* Region stripe is mapped to. */
 +
 +      /* Reference count. */
 +      atomic_t cnt;
 +
 +      struct {
 +              unsigned long flags;    /* flags (see below). */
 +
 +              /*
 +               * Pending ios in flight:
 +               *
 +               * used as a 'lock' to control move of stripe to endio list
 +               */
 +              atomic_t pending;       /* Pending ios in flight. */
 +
 +              /* Sectors to read and write for multi page stripe sets. */
 +              unsigned size;
 +      } io;
 +
 +      /* Lock on stripe (for clustering). */
 +      void *lock;
 +
 +      /*
 +       * 4 linked lists:
 +       *   o io list to flush io
 +       *   o endio list
 +       *   o LRU list to put stripes w/o reference count on
 +       *   o stripe cache hash
 +       */
 +      struct list_head lists[NR_LISTS];
 +
 +      struct {
 +              unsigned short parity;  /* Parity chunk index. */
 +              short recover;          /* Recovery chunk index. */
 +      } idx;
 +
 +      /* This sets memory cache object (dm-mem-cache). */
 +      struct dm_mem_cache_object *obj;
 +
 +      /* Array of stripe sets (dynamically allocated). */
 +      struct stripe_set ss[0];
 +};
 +
 +/* States stripes can be in (flags field). */
 +enum stripe_states {
 +      STRIPE_ACTIVE,          /* Active io on stripe. */
 +      STRIPE_ERROR,           /* io error on stripe. */
 +      STRIPE_MERGED,          /* Writes got merged. */
 +      STRIPE_READ,            /* Read. */
 +      STRIPE_RBW,             /* Read-before-write. */
 +      STRIPE_RECONSTRUCT,     /* reconstruct of a missing chunk required. */
 +      STRIPE_RECOVER,         /* Stripe used for RAID set recovery. */
 +};
 +
 +/* ... and macros to access them. */
 +#define       BITOPS(name, what, var, flag) \
 +static inline int TestClear ## name ## what(struct var *v) \
 +{ return test_and_clear_bit(flag, &v->io.flags); } \
 +static inline int TestSet ## name ## what(struct var *v) \
 +{ return test_and_set_bit(flag, &v->io.flags); } \
 +static inline void Clear ## name ## what(struct var *v) \
 +{ clear_bit(flag, &v->io.flags); } \
 +static inline void Set ## name ## what(struct var *v) \
 +{ set_bit(flag, &v->io.flags); } \
 +static inline int name ## what(struct var *v) \
 +{ return test_bit(flag, &v->io.flags); }
 +
 +
 +BITOPS(Stripe, Active, stripe, STRIPE_ACTIVE)
 +BITOPS(Stripe, Merged, stripe, STRIPE_MERGED)
 +BITOPS(Stripe, Error, stripe, STRIPE_ERROR)
 +BITOPS(Stripe, Read, stripe, STRIPE_READ)
 +BITOPS(Stripe, RBW, stripe, STRIPE_RBW)
 +BITOPS(Stripe, Reconstruct, stripe, STRIPE_RECONSTRUCT)
 +BITOPS(Stripe, Recover, stripe, STRIPE_RECOVER)
 +
 +/* A stripe hash. */
 +struct stripe_hash {
 +      struct list_head *hash;
 +      unsigned buckets;
 +      unsigned mask;
 +      unsigned prime;
 +      unsigned shift;
 +};
 +
 +/* A stripe cache. */
 +struct stripe_cache {
 +      /* Stripe hash. */
 +      struct stripe_hash hash;
 +
 +      /* Stripes with io to flush, stripes to endio and LRU lists. */
 +      struct list_head lists[3];
 +
 +      /* Locks to protect endio and lru lists. */
 +      spinlock_t locks[NR_LOCKS];
 +
 +      /* Slab cache to allocate stripes from. */
 +      struct {
 +              struct kmem_cache *cache;       /* Cache itself. */
 +              char name[32];  /* Unique name. */
 +      } kc;
 +
 +      struct dm_io_client *dm_io_client; /* dm-io client resource context. */
 +
 +      /* dm-mem-cache client resource context. */
 +      struct dm_mem_cache_client *mem_cache_client;
 +
 +      int stripes_parm;           /* # stripes parameter from constructor. */
 +      atomic_t stripes;           /* actual # of stripes in cache. */
 +      atomic_t stripes_to_shrink; /* # of stripes to shrink cache by. */
 +      atomic_t stripes_last;      /* last # of stripes in cache. */
 +      atomic_t active_stripes;    /* actual # of active stripes in cache. */
 +
 +      /* REMOVEME: */
 +      atomic_t max_active_stripes; /* actual # of active stripes in cache. */
 +};
 +
 +/* Flag specs for raid_dev */ ;
 +enum raid_dev_flags { DEVICE_FAILED, IO_QUEUED };
 +
 +/* The raid device in a set. */
 +struct raid_dev {
 +      struct dm_dev *dev;
 +      unsigned long flags;    /* raid_dev_flags. */
 +      sector_t start;         /* offset to map to. */
 +};
 +
 +/* Flags spec for raid_set. */
 +enum raid_set_flags {
 +      RS_CHECK_OVERWRITE,     /* Check for chunk overwrites. */
 +      RS_DEAD,                /* RAID set inoperational. */
 +      RS_DEVEL_STATS,         /* REMOVEME: display status information. */
 +      RS_IO_ERROR,            /* io error on set. */
 +      RS_RECOVER,             /* Do recovery. */
 +      RS_RECOVERY_BANDWIDTH,  /* Allow recovery bandwidth (delayed bios). */
 +      RS_REGION_GET,          /* get a region to recover. */
 +      RS_SC_BUSY,             /* stripe cache busy -> send an event. */
 +      RS_SUSPENDED,           /* RAID set suspendedn. */
 +};
 +
 +/* REMOVEME: devel stats counters. */
 +enum stats_types {
 +      S_BIOS_READ,
 +      S_BIOS_ADDED_READ,
 +      S_BIOS_ENDIO_READ,
 +      S_BIOS_WRITE,
 +      S_BIOS_ADDED_WRITE,
 +      S_BIOS_ENDIO_WRITE,
 +      S_CAN_MERGE,
 +      S_CANT_MERGE,
 +      S_CONGESTED,
 +      S_DM_IO_READ,
 +      S_DM_IO_WRITE,
 +      S_ACTIVE_READS,
 +      S_BANDWIDTH,
 +      S_BARRIER,
 +      S_BIO_COPY_PL_NEXT,
 +      S_DEGRADED,
 +      S_DELAYED_BIOS,
 +      S_EVICT,
 +      S_FLUSHS,
 +      S_HITS_1ST,
 +      S_IOS_POST,
 +      S_INSCACHE,
 +      S_MAX_LOOKUP,
 +      S_MERGE_PAGE_LOCKED,
 +      S_NO_BANDWIDTH,
 +      S_NOT_CONGESTED,
 +      S_NO_RW,
 +      S_NOSYNC,
 +      S_PROHIBITPAGEIO,
 +      S_RECONSTRUCT_EI,
 +      S_RECONSTRUCT_DEV,
 +      S_REDO,
 +      S_REQUEUE,
 +      S_STRIPE_ERROR,
 +      S_SUM_DELAYED_BIOS,
 +      S_XORS,
 +      S_NR_STATS,     /* # of stats counters. */
 +};
 +
 +/* Status type -> string mappings. */
 +struct stats_map {
 +      const enum stats_types type;
 +      const char *str;
 +};
 +
 +static struct stats_map stats_map[] = {
 +      { S_BIOS_READ, "r=" },
 +      { S_BIOS_ADDED_READ, "/" },
 +      { S_BIOS_ENDIO_READ, "/" },
 +      { S_BIOS_WRITE, " w=" },
 +      { S_BIOS_ADDED_WRITE, "/" },
 +      { S_BIOS_ENDIO_WRITE, "/" },
 +      { S_DM_IO_READ, " rc=" },
 +      { S_DM_IO_WRITE, " wc=" },
 +      { S_ACTIVE_READS, " active_reads=" },
 +      { S_BANDWIDTH, " bandwidth=" },
 +      { S_NO_BANDWIDTH, " no_bandwidth=" },
 +      { S_BARRIER, " barrier=" },
 +      { S_BIO_COPY_PL_NEXT, " bio_copy_pl_next=" },
 +      { S_CAN_MERGE, " can_merge=" },
 +      { S_MERGE_PAGE_LOCKED, "/page_locked=" },
 +      { S_CANT_MERGE, "/cant_merge=" },
 +      { S_CONGESTED, " congested=" },
 +      { S_NOT_CONGESTED, "/not_congested=" },
 +      { S_DEGRADED, " degraded=" },
 +      { S_DELAYED_BIOS, " delayed_bios=" },
 +      { S_SUM_DELAYED_BIOS, "/sum_delayed_bios=" },
 +      { S_EVICT, " evict=" },
 +      { S_FLUSHS, " flushs=" },
 +      { S_HITS_1ST, " hits_1st=" },
 +      { S_IOS_POST, " ios_post=" },
 +      { S_INSCACHE, " inscache=" },
 +      { S_MAX_LOOKUP, " max_lookup=" },
 +      { S_NO_RW, " no_rw=" },
 +      { S_NOSYNC, " nosync=" },
 +      { S_PROHIBITPAGEIO, " ProhibitPageIO=" },
 +      { S_RECONSTRUCT_EI, " reconstruct_ei=" },
 +      { S_RECONSTRUCT_DEV, " reconstruct_dev=" },
 +      { S_REDO, " redo=" },
 +      { S_REQUEUE, " requeue=" },
 +      { S_STRIPE_ERROR, " stripe_error=" },
 +      { S_XORS, " xors=" },
 +};
 +
 +/*
 + * A RAID set.
 + */
 +typedef void (*xor_function_t)(unsigned count, unsigned long **data);
 +struct raid_set {
 +      struct dm_target *ti;   /* Target pointer. */
 +
 +      struct {
 +              unsigned long flags;    /* State flags. */
 +              spinlock_t in_lock;     /* Protects central input list below. */
 +              struct bio_list in;     /* Pending ios (central input list). */
 +              struct bio_list work;   /* ios work set. */
 +              wait_queue_head_t suspendq;     /* suspend synchronization. */
 +              atomic_t in_process;    /* counter of queued bios (suspendq). */
 +              atomic_t in_process_max;/* counter of queued bios max. */
 +
 +              /* io work. */
 +              struct workqueue_struct *wq;
 +              struct delayed_work dws;
 +      } io;
 +
 +      /* External locking. */
 +      struct dm_raid45_locking_type *locking;
 +
 +      struct stripe_cache sc; /* Stripe cache for this set. */
 +
 +      /* Xor optimization. */
 +      struct {
 +              struct xor_func *f;
 +              unsigned chunks;
 +              unsigned speed;
 +      } xor;
 +
 +      /* Recovery parameters. */
 +      struct recover {
 +              struct dm_dirty_log *dl;        /* Dirty log. */
 +              struct dm_region_hash *rh;      /* Region hash. */
 +
 +              /* dm-mem-cache client resource context for recovery stripes. */
 +              struct dm_mem_cache_client *mem_cache_client;
 +
 +              struct list_head stripes;       /* List of recovery stripes. */
 +
 +              region_t nr_regions;
 +              region_t nr_regions_to_recover;
 +              region_t nr_regions_recovered;
 +              unsigned long start_jiffies;
 +              unsigned long end_jiffies;
 +
 +              unsigned bandwidth;          /* Recovery bandwidth [%]. */
 +              unsigned bandwidth_work; /* Recovery bandwidth [factor]. */
 +              unsigned bandwidth_parm; /*  " constructor parm. */
 +              unsigned io_size;        /* io size <= chunk size. */
 +              unsigned io_size_parm;   /* io size ctr parameter. */
 +
 +              /* recovery io throttling. */
 +              atomic_t io_count[2];   /* counter recover/regular io. */
 +              unsigned long last_jiffies;
 +
 +              struct dm_region *reg;  /* Actual region to recover. */
 +              sector_t pos;   /* Position within region to recover. */
 +              sector_t end;   /* End of region to recover. */
 +      } recover;
 +
 +      /* RAID set parameters. */
 +      struct {
 +              struct raid_type *raid_type;    /* RAID type (eg, RAID4). */
 +              unsigned raid_parms;    /* # variable raid parameters. */
 +
 +              unsigned chunk_size;    /* Sectors per chunk. */
 +              unsigned chunk_size_parm;
 +              unsigned chunk_mask;    /* Mask for amount. */
 +              unsigned chunk_shift;   /* rsector chunk size shift. */
 +
 +              unsigned io_size;       /* Sectors per io. */
 +              unsigned io_size_parm;
 +              unsigned io_mask;       /* Mask for amount. */
 +              unsigned io_shift_mask; /* Mask for raid_address(). */
 +              unsigned io_shift;      /* rsector io size shift. */
 +              unsigned pages_per_io;  /* Pages per io. */
 +
 +              sector_t sectors_per_dev;       /* Sectors per device. */
 +
 +              atomic_t failed_devs;           /* Amount of devices failed. */
 +
 +              /* Index of device to initialize. */
 +              int dev_to_init;
 +              int dev_to_init_parm;
 +
 +              /* Raid devices dynamically allocated. */
 +              unsigned raid_devs;     /* # of RAID devices below. */
 +              unsigned data_devs;     /* # of RAID data devices. */
 +
 +              int ei;         /* index of failed RAID device. */
 +
 +              /* index of dedicated parity device (i.e. RAID4). */
 +              int pi;
 +              int pi_parm;    /* constructor parm for status output. */
 +      } set;
 +
 +      /* REMOVEME: devel stats counters. */
 +      atomic_t stats[S_NR_STATS];
 +
 +      /* Dynamically allocated temporary pointers for xor(). */
 +      unsigned long **data;
 +
 +      /* Dynamically allocated RAID devices. Alignment? */
 +      struct raid_dev dev[0];
 +};
 +
 +
 +BITOPS(RS, Bandwidth, raid_set, RS_RECOVERY_BANDWIDTH)
 +BITOPS(RS, CheckOverwrite, raid_set, RS_CHECK_OVERWRITE)
 +BITOPS(RS, Dead, raid_set, RS_DEAD)
 +BITOPS(RS, DevelStats, raid_set, RS_DEVEL_STATS)
 +BITOPS(RS, IoError, raid_set, RS_IO_ERROR)
 +BITOPS(RS, Recover, raid_set, RS_RECOVER)
 +BITOPS(RS, RegionGet, raid_set, RS_REGION_GET)
 +BITOPS(RS, ScBusy, raid_set, RS_SC_BUSY)
 +BITOPS(RS, Suspended, raid_set, RS_SUSPENDED)
 +#undef BITOPS
 +
 +#define       PageIO(page)            PageChecked(page)
 +#define       AllowPageIO(page)       SetPageChecked(page)
 +#define       ProhibitPageIO(page)    ClearPageChecked(page)
 +
 +/*-----------------------------------------------------------------
 + * Raid-4/5 set structures.
 + *---------------------------------------------------------------*/
 +/* RAID level definitions. */
 +enum raid_level {
 +      raid4,
 +      raid5,
 +};
 +
 +/* Symmetric/Asymmetric, Left/Right parity rotating algorithms. */
 +enum raid_algorithm {
 +      none,
 +      left_asym,
 +      right_asym,
 +      left_sym,
 +      right_sym,
 +};
 +
 +struct raid_type {
 +      const char *name;               /* RAID algorithm. */
 +      const char *descr;              /* Descriptor text for logging. */
 +      const unsigned parity_devs;     /* # of parity devices. */
 +      const unsigned minimal_devs;    /* minimal # of devices in set. */
 +      const enum raid_level level;            /* RAID level. */
 +      const enum raid_algorithm algorithm;    /* RAID algorithm. */
 +};
 +
 +/* Supported raid types and properties. */
 +static struct raid_type raid_types[] = {
 +      {"raid4", "RAID4 (dedicated parity disk)", 1, 3, raid4, none},
 +      {"raid5_la", "RAID5 (left asymmetric)", 1, 3, raid5, left_asym},
 +      {"raid5_ra", "RAID5 (right asymmetric)", 1, 3, raid5, right_asym},
 +      {"raid5_ls", "RAID5 (left symmetric)", 1, 3, raid5, left_sym},
 +      {"raid5_rs", "RAID5 (right symmetric)", 1, 3, raid5, right_sym},
 +};
 +
 +/* Address as calculated by raid_address(). */
 +struct address {
 +      sector_t key;           /* Hash key (start address of stripe). */
 +      unsigned di, pi;        /* Data and parity disks index. */
 +};
 +
 +/* REMOVEME: reset statistics counters. */
 +static void stats_reset(struct raid_set *rs)
 +{
 +      unsigned s = S_NR_STATS;
 +
 +      while (s--)
 +              atomic_set(rs->stats + s, 0);
 +}
 +
 +/*----------------------------------------------------------------
 + * RAID set management routines.
 + *--------------------------------------------------------------*/
 +/*
 + * Begin small helper functions.
 + */
 +/* Queue (optionally delayed) io work. */
 +static void wake_do_raid_delayed(struct raid_set *rs, unsigned long delay)
 +{
 +      struct delayed_work *dws = &rs->io.dws;
 +
 +      cancel_delayed_work(dws);
 +      queue_delayed_work(rs->io.wq, dws, delay);
 +}
 +
 +/* Queue io work immediately (called from region hash too). */
 +static INLINE void wake_do_raid(void *context)
 +{
 +      wake_do_raid_delayed(context, 0);
 +}
 +
 +/* Wait until all io has been processed. */
 +static INLINE void wait_ios(struct raid_set *rs)
 +{
 +      wait_event(rs->io.suspendq, !atomic_read(&rs->io.in_process));
 +}
 +
 +/* Declare io queued to device. */
 +static INLINE void io_dev_queued(struct raid_dev *dev)
 +{
 +      set_bit(IO_QUEUED, &dev->flags);
 +}
 +
 +/* Io on device and reset ? */
 +static inline int io_dev_clear(struct raid_dev *dev)
 +{
 +      return test_and_clear_bit(IO_QUEUED, &dev->flags);
 +}
 +
 +/* Get an io reference. */
 +static INLINE void io_get(struct raid_set *rs)
 +{
 +      int p = atomic_inc_return(&rs->io.in_process);
 +
 +      if (p > atomic_read(&rs->io.in_process_max))
 +              atomic_set(&rs->io.in_process_max, p); /* REMOVEME: max. */
 +}
 +
 +/* Put the io reference and conditionally wake io waiters. */
 +static INLINE void io_put(struct raid_set *rs)
 +{
 +      /* Intel: rebuild data corrupter? */
 +      if (!atomic_read(&rs->io.in_process)) {
 +              DMERR("%s would go negative!!!", __func__);
 +              return;
 +      }
 +
 +      if (atomic_dec_and_test(&rs->io.in_process))
 +              wake_up(&rs->io.suspendq);
 +}
 +
 +/* Calculate device sector offset. */
 +static INLINE sector_t _sector(struct raid_set *rs, struct bio *bio)
 +{
 +      sector_t sector = bio->bi_sector;
 +
 +      sector_div(sector, rs->set.data_devs);
 +      return sector;
 +}
 +
 +/* Test device operational. */
 +static INLINE int dev_operational(struct raid_set *rs, unsigned p)
 +{
 +      return !test_bit(DEVICE_FAILED, &rs->dev[p].flags);
 +}
 +
 +/* Return # of active stripes in stripe cache. */
 +static INLINE int sc_active(struct stripe_cache *sc)
 +{
 +      return atomic_read(&sc->active_stripes);
 +}
 +
 +/* Test io pending on stripe. */
 +static INLINE int stripe_io(struct stripe *stripe)
 +{
 +      return atomic_read(&stripe->io.pending);
 +}
 +
 +static INLINE void stripe_io_inc(struct stripe *stripe)
 +{
 +      atomic_inc(&stripe->io.pending);
 +}
 +
 +static INLINE void stripe_io_dec(struct stripe *stripe)
 +{
 +      atomic_dec(&stripe->io.pending);
 +}
 +
 +/* Wrapper needed by for_each_io_dev(). */
 +static void _stripe_io_inc(struct stripe *stripe, unsigned p)
 +{
 +      stripe_io_inc(stripe);
 +}
 +
 +/* Error a stripe. */
 +static INLINE void stripe_error(struct stripe *stripe, struct page *page)
 +{
 +      SetStripeError(stripe);
 +      SetPageError(page);
 +      atomic_inc(RS(stripe->sc)->stats + S_STRIPE_ERROR);
 +}
 +
 +/* Page IOed ok. */
 +enum dirty_type { CLEAN, DIRTY };
 +static INLINE void page_set(struct page *page, enum dirty_type type)
 +{
 +      switch (type) {
 +      case DIRTY:
 +              SetPageDirty(page);
 +              AllowPageIO(page);
 +              break;
 +
 +      case CLEAN:
 +              ClearPageDirty(page);
 +              break;
 +
 +      default:
 +              BUG();
 +      }
 +
 +      SetPageUptodate(page);
 +      ClearPageError(page);
 +}
 +
 +/* Return region state for a sector. */
 +static INLINE int
 +region_state(struct raid_set *rs, sector_t sector, unsigned long state)
 +{
 +      struct dm_region_hash *rh = rs->recover.rh;
 +
 +      return RSRecover(rs) ?
 +             (dm_rh_get_state(rh, dm_rh_sector_to_region(rh, sector), 1) &
 +              state) : 0;
 +}
 +
 +/* Check maximum devices which may fail in a raid set. */
 +static inline int raid_set_degraded(struct raid_set *rs)
 +{
 +      return RSIoError(rs);
 +}
 +
 +/* Check # of devices which may fail in a raid set. */
 +static INLINE int raid_set_operational(struct raid_set *rs)
 +{
 +      /* Too many failed devices -> BAD. */
 +      return atomic_read(&rs->set.failed_devs) <=
 +             rs->set.raid_type->parity_devs;
 +}
 +
 +/*
 + * Return true in case a page_list should be read/written
 + *
 + * Conditions to read/write:
 + *    o 1st page in list not uptodate
 + *    o 1st page in list dirty
 + *    o if we optimized io away, we flag it using the pages checked bit.
 + */
 +static INLINE unsigned page_io(struct page *page)
 +{
 +      /* Optimization: page was flagged to need io during first run. */
 +      if (PagePrivate(page)) {
 +              ClearPagePrivate(page);
 +              return 1;
 +      }
 +
 +      /* Avoid io if prohibited or a locked page. */
 +      if (!PageIO(page) || PageLocked(page))
 +              return 0;
 +
 +      if (!PageUptodate(page) || PageDirty(page)) {
 +              /* Flag page needs io for second run optimization. */
 +              SetPagePrivate(page);
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Call a function on each page list needing io. */
 +static INLINE unsigned
 +for_each_io_dev(struct raid_set *rs, struct stripe *stripe,
 +              void (*f_io)(struct stripe *stripe, unsigned p))
 +{
 +      unsigned p = rs->set.raid_devs, r = 0;
 +
 +      while (p--) {
 +              if (page_io(PAGE(stripe, p))) {
 +                      f_io(stripe, p);
 +                      r++;
 +              }
 +      }
 +
 +      return r;
 +}
 +
 +/* Reconstruct a particular device ?. */
 +static INLINE int dev_to_init(struct raid_set *rs)
 +{
 +      return rs->set.dev_to_init > -1;
 +}
 +
 +/*
 + * Index of device to calculate parity on.
 + * Either the parity device index *or* the selected device to init
 + * after a spare replacement.
 + */
 +static INLINE unsigned dev_for_parity(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +
 +      return dev_to_init(rs) ? rs->set.dev_to_init : stripe->idx.parity;
 +}
 +
 +/* Return the index of the device to be recovered. */
 +static int idx_get(struct raid_set *rs)
 +{
 +      /* Avoid to read in the pages to be reconstructed anyway. */
 +      if (dev_to_init(rs))
 +              return rs->set.dev_to_init;
 +      else if (rs->set.raid_type->level == raid4)
 +              return rs->set.pi;
 +
 +      return -1;
 +}
 +
 +/* RAID set congested function. */
 +static int raid_set_congested(void *congested_data, int bdi_bits)
 +{
 +      struct raid_set *rs = congested_data;
 +      int r = 0; /* Assume uncongested. */
 +      unsigned p = rs->set.raid_devs;
 +
 +      /* If any of our component devices are overloaded. */
 +      while (p--) {
 +              struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
 +
 +              r |= bdi_congested(&q->backing_dev_info, bdi_bits);
 +      }
 +
 +      /* REMOVEME: statistics. */
 +      atomic_inc(rs->stats + (r ? S_CONGESTED : S_NOT_CONGESTED));
 +      return r;
 +}
 +
 +/* Display RAID set dead message once. */
 +static void raid_set_dead(struct raid_set *rs)
 +{
 +      if (!TestSetRSDead(rs)) {
 +              unsigned p;
 +              char buf[BDEVNAME_SIZE];
 +
 +              DMERR("FATAL: too many devices failed -> RAID set dead");
 +
 +              for (p = 0; p < rs->set.raid_devs; p++) {
 +                      if (!dev_operational(rs, p))
 +                              DMERR("device /dev/%s failed",
 +                                    bdevname(rs->dev[p].dev->bdev, buf));
 +              }
 +      }
 +}
 +
 +/* RAID set degrade check. */
 +static INLINE int
 +raid_set_check_and_degrade(struct raid_set *rs,
 +                         struct stripe *stripe, unsigned p)
 +{
 +      if (test_and_set_bit(DEVICE_FAILED, &rs->dev[p].flags))
 +              return -EPERM;
 +
 +      /* Through an event in case of member device errors. */
 +      dm_table_event(rs->ti->table);
 +      atomic_inc(&rs->set.failed_devs);
 +
 +      /* Only log the first member error. */
 +      if (!TestSetRSIoError(rs)) {
 +              char buf[BDEVNAME_SIZE];
 +
 +              /* Store index for recovery. */
 +              mb();
 +              rs->set.ei = p;
 +              mb();
 +
 +              DMERR("CRITICAL: %sio error on device /dev/%s "
 +                    "in region=%llu; DEGRADING RAID set",
 +                    stripe ? "" : "FAKED ",
 +                    bdevname(rs->dev[p].dev->bdev, buf),
 +                    (unsigned long long) (stripe ? stripe->key : 0));
 +              DMERR("further device error messages suppressed");
 +      }
 +
 +      return 0;
 +}
 +
 +static void
 +raid_set_check_degrade(struct raid_set *rs, struct stripe *stripe)
 +{
 +      unsigned p = rs->set.raid_devs;
 +
 +      while (p--) {
 +              struct page *page = PAGE(stripe, p);
 +
 +              if (PageError(page)) {
 +                      ClearPageError(page);
 +                      raid_set_check_and_degrade(rs, stripe, p);
 +              }
 +      }
 +}
 +
 +/* RAID set upgrade check. */
 +static int raid_set_check_and_upgrade(struct raid_set *rs, unsigned p)
 +{
 +      if (!test_and_clear_bit(DEVICE_FAILED, &rs->dev[p].flags))
 +              return -EPERM;
 +
 +      if (atomic_dec_and_test(&rs->set.failed_devs)) {
 +              ClearRSIoError(rs);
 +              rs->set.ei = -1;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Lookup a RAID device by name or by major:minor number. */
 +union dev_lookup {
 +      const char *dev_name;
 +      struct raid_dev *dev;
 +};
 +enum lookup_type { byname, bymajmin, bynumber };
 +static int raid_dev_lookup(struct raid_set *rs, enum lookup_type by,
 +                         union dev_lookup *dl)
 +{
 +      unsigned p;
 +
 +      /*
 +       * Must be an incremental loop, because the device array
 +       * can have empty slots still on calls from raid_ctr()
 +       */
 +      for (p = 0; p < rs->set.raid_devs; p++) {
 +              char buf[BDEVNAME_SIZE];
 +              struct raid_dev *dev = rs->dev + p;
 +
 +              if (!dev->dev)
 +                      break;
 +
 +              /* Format dev string appropriately if necessary. */
 +              if (by == byname)
 +                      bdevname(dev->dev->bdev, buf);
 +              else if (by == bymajmin)
 +                      format_dev_t(buf, dev->dev->bdev->bd_dev);
 +
 +              /* Do the actual check. */
 +              if (by == bynumber) {
 +                      if (dl->dev->dev->bdev->bd_dev ==
 +                          dev->dev->bdev->bd_dev)
 +                              return p;
 +              } else if (!strcmp(dl->dev_name, buf))
 +                      return p;
 +      }
 +
 +      return -ENODEV;
 +}
 +
 +/* End io wrapper. */
 +static INLINE void
 +_bio_endio(struct raid_set *rs, struct bio *bio, int error)
 +{
 +      /* REMOVEME: statistics. */
 +      atomic_inc(rs->stats + (bio_data_dir(bio) == WRITE ?
 +                 S_BIOS_ENDIO_WRITE : S_BIOS_ENDIO_READ));
 +      bio_endio(bio, error);
 +      io_put(rs);             /* Wake any suspend waiters. */
 +}
 +
 +/*
 + * End small helper functions.
 + */
 +
 +
 +/*
 + * Stripe hash functions
 + */
 +/* Initialize/destroy stripe hash. */
 +static int hash_init(struct stripe_hash *hash, unsigned stripes)
 +{
 +      unsigned buckets = 2, max_buckets = stripes / 4;
 +      unsigned hash_primes[] = {
 +              /* Table of primes for hash_fn/table size optimization. */
 +              3, 7, 13, 27, 53, 97, 193, 389, 769,
 +              1543, 3079, 6151, 12289, 24593,
 +      };
 +
 +      /* Calculate number of buckets (2^^n <= stripes / 4). */
 +      while (buckets < max_buckets)
 +              buckets <<= 1;
 +
 +      /* Allocate stripe hash. */
 +      hash->hash = vmalloc(buckets * sizeof(*hash->hash));
 +      if (!hash->hash)
 +              return -ENOMEM;
 +
 +      hash->buckets = buckets;
 +      hash->mask = buckets - 1;
 +      hash->shift = ffs(buckets);
 +      if (hash->shift > ARRAY_SIZE(hash_primes) + 1)
 +              hash->shift = ARRAY_SIZE(hash_primes) + 1;
 +
 +      BUG_ON(hash->shift - 2 > ARRAY_SIZE(hash_primes) + 1);
 +      hash->prime = hash_primes[hash->shift - 2];
 +
 +      /* Initialize buckets. */
 +      while (buckets--)
 +              INIT_LIST_HEAD(hash->hash + buckets);
 +
 +      return 0;
 +}
 +
 +static INLINE void hash_exit(struct stripe_hash *hash)
 +{
 +      if (hash->hash) {
 +              vfree(hash->hash);
 +              hash->hash = NULL;
 +      }
 +}
 +
 +/* List add (head/tail/locked/unlocked) inlines. */
 +enum list_lock_type { LIST_LOCKED, LIST_UNLOCKED };
 +#define       LIST_DEL(name, list) \
 +static void stripe_ ## name ## _del(struct stripe *stripe, \
 +                                  enum list_lock_type lock) { \
 +      struct list_head *lh = stripe->lists + (list); \
 +      spinlock_t *l = NULL; \
 +\
 +      if (lock == LIST_LOCKED) { \
 +              l = stripe->sc->locks + LOCK_LRU; \
 +              spin_lock_irq(l); \
 +      } \
 +\
 +\
 +      if (!list_empty(lh)) \
 +              list_del_init(lh); \
 +\
 +      if (lock == LIST_LOCKED) \
 +              spin_unlock_irq(l); \
 +}
 +
 +LIST_DEL(hash, LIST_HASH)
 +LIST_DEL(lru, LIST_LRU)
 +#undef LIST_DEL
 +
 +enum list_pos_type { POS_HEAD, POS_TAIL };
 +#define       LIST_ADD(name, list) \
 +static void stripe_ ## name ## _add(struct stripe *stripe, \
 +                                  enum list_pos_type pos, \
 +                                  enum list_lock_type lock) { \
 +      struct list_head *lh = stripe->lists + (list); \
 +      struct stripe_cache *sc = stripe->sc; \
 +      spinlock_t *l = NULL; \
 +\
 +      if (lock == LIST_LOCKED) { \
 +              l = sc->locks + LOCK_LRU; \
 +              spin_lock_irq(l); \
 +      } \
 +\
 +      if (list_empty(lh)) { \
 +              if (pos == POS_HEAD) \
 +                      list_add(lh, sc->lists + (list)); \
 +              else \
 +                      list_add_tail(lh, sc->lists + (list)); \
 +      } \
 +\
 +      if (lock == LIST_LOCKED) \
 +              spin_unlock_irq(l); \
 +}
 +
 +LIST_ADD(endio, LIST_ENDIO)
 +LIST_ADD(io, LIST_IO)
 +LIST_ADD(lru, LIST_LRU)
 +#undef LIST_ADD
 +
 +#define POP(list) \
 +      do { \
 +              if (list_empty(sc->lists + list)) \
 +                      stripe = NULL; \
 +              else { \
 +                      stripe = list_first_entry(&sc->lists[list], \
 +                                                struct stripe, \
 +                                                lists[list]); \
 +                      list_del_init(&stripe->lists[list]); \
 +              } \
 +      } while (0);
 +
 +/* Pop an available stripe off the lru list. */
 +static struct stripe *stripe_lru_pop(struct stripe_cache *sc)
 +{
 +      struct stripe *stripe;
 +      spinlock_t *lock = sc->locks + LOCK_LRU;
 +
 +      spin_lock_irq(lock);
 +      POP(LIST_LRU);
 +      spin_unlock_irq(lock);
 +
 +      if (stripe)
 +              /* Remove from hash before reuse. */
 +              stripe_hash_del(stripe, LIST_UNLOCKED);
 +
 +      return stripe;
 +}
 +
 +static inline unsigned hash_fn(struct stripe_hash *hash, sector_t key)
 +{
 +      return (unsigned) (((key * hash->prime) >> hash->shift) & hash->mask);
 +}
 +
 +static inline struct list_head *
 +hash_bucket(struct stripe_hash *hash, sector_t key)
 +{
 +      return hash->hash + hash_fn(hash, key);
 +}
 +
 +/* Insert an entry into a hash. */
 +static inline void hash_insert(struct stripe_hash *hash, struct stripe *stripe)
 +{
 +      list_add(stripe->lists + LIST_HASH, hash_bucket(hash, stripe->key));
 +}
 +
 +/* Insert an entry into the stripe hash. */
 +static inline void
 +sc_insert(struct stripe_cache *sc, struct stripe *stripe)
 +{
 +      hash_insert(&sc->hash, stripe);
 +}
 +
 +/* Lookup an entry in the stripe hash. */
 +static inline struct stripe *
 +stripe_lookup(struct stripe_cache *sc, sector_t key)
 +{
 +      unsigned c = 0;
 +      struct stripe *stripe;
 +      struct list_head *bucket = hash_bucket(&sc->hash, key);
 +
 +      list_for_each_entry(stripe, bucket, lists[LIST_HASH]) {
 +              /* REMOVEME: statisics. */
 +              if (++c > atomic_read(RS(sc)->stats + S_MAX_LOOKUP))
 +                      atomic_set(RS(sc)->stats + S_MAX_LOOKUP, c);
 +
 +              if (stripe->key == key)
 +                      return stripe;
 +      }
 +
 +      return NULL;
 +}
 +
 +/* Resize the stripe cache hash on size changes. */
 +static int hash_resize(struct stripe_cache *sc)
 +{
 +      /* Resize threshold reached? */
 +      if (atomic_read(&sc->stripes) > 2 * atomic_read(&sc->stripes_last)
 +          || atomic_read(&sc->stripes) < atomic_read(&sc->stripes_last) / 4) {
 +              int r;
 +              struct stripe_hash hash, hash_tmp;
 +              spinlock_t *lock;
 +
 +              r = hash_init(&hash, atomic_read(&sc->stripes));
 +              if (r)
 +                      return r;
 +
 +              lock = sc->locks + LOCK_LRU;
 +              spin_lock_irq(lock);
 +              if (sc->hash.hash) {
 +                      unsigned b = sc->hash.buckets;
 +                      struct list_head *pos, *tmp;
 +
 +                      /* Walk old buckets and insert into new. */
 +                      while (b--) {
 +                              list_for_each_safe(pos, tmp, sc->hash.hash + b)
 +                                  hash_insert(&hash,
 +                                              list_entry(pos, struct stripe,
 +                                                         lists[LIST_HASH]));
 +                      }
 +
 +              }
 +
 +              memcpy(&hash_tmp, &sc->hash, sizeof(hash_tmp));
 +              memcpy(&sc->hash, &hash, sizeof(sc->hash));
 +              atomic_set(&sc->stripes_last, atomic_read(&sc->stripes));
 +              spin_unlock_irq(lock);
 +
 +              hash_exit(&hash_tmp);
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * Stripe cache locking functions
 + */
 +/* Dummy lock function for local RAID4+5. */
 +static void *no_lock(sector_t key, enum dm_lock_type type)
 +{
 +      return &no_lock;
 +}
 +
 +/* Dummy unlock function for local RAID4+5. */
 +static void no_unlock(void *lock_handle)
 +{
 +}
 +
 +/* No locking (for local RAID 4+5). */
 +static struct dm_raid45_locking_type locking_none = {
 +      .lock = no_lock,
 +      .unlock = no_unlock,
 +};
 +
 +/* Clustered RAID 4+5. */
 +/* FIXME: code this. */
 +static struct dm_raid45_locking_type locking_cluster = {
 +      .lock = no_lock,
 +      .unlock = no_unlock,
 +};
 +
 +/* Lock a stripe (for clustering). */
 +static int
 +stripe_lock(struct raid_set *rs, struct stripe *stripe, int rw, sector_t key)
 +{
 +      stripe->lock = rs->locking->lock(key, rw == READ ? DM_RAID45_SHARED :
 +                                                         DM_RAID45_EX);
 +      return stripe->lock ? 0 : -EPERM;
 +}
 +
 +/* Unlock a stripe (for clustering). */
 +static void stripe_unlock(struct raid_set *rs, struct stripe *stripe)
 +{
 +      rs->locking->unlock(stripe->lock);
 +      stripe->lock = NULL;
 +}
 +
 +/*
 + * Stripe cache functions.
 + */
 +/*
 + * Invalidate all page lists pages of a stripe.
 + *
 + * I only keep state for the whole list in the first page.
 + */
 +static INLINE void
 +stripe_pages_invalidate(struct stripe *stripe)
 +{
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--) {
 +              struct page *page = PAGE(stripe, p);
 +
 +              ProhibitPageIO(page);
 +              ClearPageChecked(page);
 +              ClearPageDirty(page);
 +              ClearPageError(page);
 +              __clear_page_locked(page);
 +              ClearPagePrivate(page);
 +              ClearPageUptodate(page);
 +      }
 +}
 +
 +/* Prepare stripe for (re)use. */
 +static INLINE void stripe_invalidate(struct stripe *stripe)
 +{
 +      stripe->io.flags = 0;
 +      stripe_pages_invalidate(stripe);
 +}
 +
 +/* Allow io on all chunks of a stripe. */
 +static INLINE void stripe_allow_io(struct stripe *stripe)
 +{
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--)
 +              AllowPageIO(PAGE(stripe, p));
 +}
 +
 +/* Initialize a stripe. */
 +static void
 +stripe_init(struct stripe_cache *sc, struct stripe *stripe)
 +{
 +      unsigned p = RS(sc)->set.raid_devs;
 +      unsigned i;
 +
 +      /* Work all io chunks. */
 +      while (p--) {
 +              struct stripe_set *ss = stripe->ss + p;
 +
 +              stripe->obj[p].private = ss;
 +              ss->stripe = stripe;
 +
 +              i = ARRAY_SIZE(ss->bl);
 +              while (i--)
 +                      bio_list_init(ss->bl + i);
 +      }
 +
 +      stripe->sc = sc;
 +
 +      i = ARRAY_SIZE(stripe->lists);
 +      while (i--)
 +              INIT_LIST_HEAD(stripe->lists + i);
 +
 +      atomic_set(&stripe->cnt, 0);
 +      atomic_set(&stripe->io.pending, 0);
 +
 +      stripe_invalidate(stripe);
 +}
 +
 +/* Number of pages per chunk. */
 +static inline unsigned chunk_pages(unsigned io_size)
 +{
 +      return dm_div_up(io_size, SECTORS_PER_PAGE);
 +}
 +
 +/* Number of pages per stripe. */
 +static inline unsigned stripe_pages(struct raid_set *rs, unsigned io_size)
 +{
 +      return chunk_pages(io_size) * rs->set.raid_devs;
 +}
 +
 +/* Initialize part of page_list (recovery). */
 +static INLINE void stripe_zero_pl_part(struct stripe *stripe, unsigned p,
 +                                     unsigned start, unsigned count)
 +{
 +      unsigned pages = chunk_pages(count);
 +      /* Get offset into the page_list. */
 +      struct page_list *pl = pl_elem(PL(stripe, p), start / SECTORS_PER_PAGE);
 +
 +      BUG_ON(!pl);
 +      while (pl && pages--) {
 +              BUG_ON(!pl->page);
 +              memset(page_address(pl->page), 0, PAGE_SIZE);
 +              pl = pl->next;
 +      }
 +}
 +
 +/* Initialize parity chunk of stripe. */
 +static INLINE void stripe_zero_chunk(struct stripe *stripe, unsigned p)
 +{
 +      stripe_zero_pl_part(stripe, p, 0, stripe->io.size);
 +}
 +
 +/* Return dynamic stripe structure size. */
 +static INLINE size_t stripe_size(struct raid_set *rs)
 +{
 +      return sizeof(struct stripe) +
 +                    rs->set.raid_devs * sizeof(struct stripe_set);
 +}
 +
 +/* Allocate a stripe and its memory object. */
 +/* XXX adjust to cope with stripe cache and recovery stripe caches. */
 +enum grow { SC_GROW, SC_KEEP };
 +static struct stripe *stripe_alloc(struct stripe_cache *sc,
 +                                 struct dm_mem_cache_client *mc,
 +                                 enum grow grow)
 +{
 +      int r;
 +      struct stripe *stripe;
 +
 +      stripe = kmem_cache_zalloc(sc->kc.cache, GFP_KERNEL);
 +      if (stripe) {
 +              /* Grow the dm-mem-cache by one object. */
 +              if (grow == SC_GROW) {
 +                      r = dm_mem_cache_grow(mc, 1);
 +                      if (r)
 +                              goto err_free;
 +              }
 +
 +              stripe->obj = dm_mem_cache_alloc(mc);
 +              if (!stripe->obj)
 +                      goto err_shrink;
 +
 +              stripe_init(sc, stripe);
 +      }
 +
 +      return stripe;
 +
 +err_shrink:
 +      if (grow == SC_GROW)
 +              dm_mem_cache_shrink(mc, 1);
 +err_free:
 +      kmem_cache_free(sc->kc.cache, stripe);
 +      return NULL;
 +}
 +
 +/*
 + * Free a stripes memory object, shrink the
 + * memory cache and free the stripe itself
 + */
 +static void stripe_free(struct stripe *stripe, struct dm_mem_cache_client *mc)
 +{
 +      dm_mem_cache_free(mc, stripe->obj);
 +      dm_mem_cache_shrink(mc, 1);
 +      kmem_cache_free(stripe->sc->kc.cache, stripe);
 +}
 +
 +/* Free the recovery stripe. */
 +static void stripe_recover_free(struct raid_set *rs)
 +{
 +      struct recover *rec = &rs->recover;
 +      struct list_head *stripes = &rec->stripes;
 +
 +      while (!list_empty(stripes)) {
 +              struct stripe *stripe = list_first_entry(stripes, struct stripe,
 +                                                       lists[LIST_RECOVER]);
 +              list_del(stripe->lists + LIST_RECOVER);
 +              stripe_free(stripe, rec->mem_cache_client);
 +      }
 +}
 +
 +/* Push a stripe safely onto the endio list to be handled by do_endios(). */
 +static INLINE void stripe_endio_push(struct stripe *stripe)
 +{
 +      int wake;
 +      unsigned long flags;
 +      struct stripe_cache *sc = stripe->sc;
 +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
 +
 +      spin_lock_irqsave(lock, flags);
 +      wake = list_empty(sc->lists + LIST_ENDIO);
 +      stripe_endio_add(stripe, POS_HEAD, LIST_UNLOCKED);
 +      spin_unlock_irqrestore(lock, flags);
 +
 +      if (wake)
 +              wake_do_raid(RS(sc));
 +}
 +
 +/* Protected check for stripe cache endio list empty. */
 +static INLINE int stripe_endio_empty(struct stripe_cache *sc)
 +{
 +      int r;
 +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
 +
 +      spin_lock_irq(lock);
 +      r = list_empty(sc->lists + LIST_ENDIO);
 +      spin_unlock_irq(lock);
 +
 +      return r;
 +}
 +
 +/* Pop a stripe off safely off the endio list. */
 +static struct stripe *stripe_endio_pop(struct stripe_cache *sc)
 +{
 +      struct stripe *stripe;
 +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
 +
 +      /* This runs in parallel with endio(). */
 +      spin_lock_irq(lock);
 +      POP(LIST_ENDIO)
 +      spin_unlock_irq(lock);
 +      return stripe;
 +}
 +
 +#undef POP
 +
 +/* Evict stripe from cache. */
 +static void stripe_evict(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      stripe_hash_del(stripe, LIST_UNLOCKED); /* Take off hash. */
 +
 +      if (list_empty(stripe->lists + LIST_LRU)) {
 +              stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
 +              atomic_inc(rs->stats + S_EVICT); /* REMOVEME: statistics. */
 +      }
 +}
 +
 +/* Grow stripe cache. */
 +static int
 +sc_grow(struct stripe_cache *sc, unsigned stripes, enum grow grow)
 +{
 +      int r = 0;
 +      struct raid_set *rs = RS(sc);
 +
 +      /* Try to allocate this many (additional) stripes. */
 +      while (stripes--) {
 +              struct stripe *stripe =
 +                      stripe_alloc(sc, sc->mem_cache_client, grow);
 +
 +              if (likely(stripe)) {
 +                      stripe->io.size = rs->set.io_size;
 +                      stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
 +                      atomic_inc(&sc->stripes);
 +              } else {
 +                      r = -ENOMEM;
 +                      break;
 +              }
 +      }
 +
 +      ClearRSScBusy(rs);
 +      return r ? r : hash_resize(sc);
 +}
 +
 +/* Shrink stripe cache. */
 +static int sc_shrink(struct stripe_cache *sc, unsigned stripes)
 +{
 +      int r = 0;
 +
 +      /* Try to get unused stripe from LRU list. */
 +      while (stripes--) {
 +              struct stripe *stripe;
 +
 +              stripe = stripe_lru_pop(sc);
 +              if (stripe) {
 +                      /* An lru stripe may never have ios pending! */
 +                      BUG_ON(stripe_io(stripe));
 +                      stripe_free(stripe, sc->mem_cache_client);
 +                      atomic_dec(&sc->stripes);
 +              } else {
 +                      r = -ENOENT;
 +                      break;
 +              }
 +      }
 +
 +      /* Check if stats are still sane. */
 +      if (atomic_read(&sc->max_active_stripes) >
 +          atomic_read(&sc->stripes))
 +              atomic_set(&sc->max_active_stripes, 0);
 +
 +      if (r)
 +              return r;
 +
 +      ClearRSScBusy(RS(sc));
 +      return hash_resize(sc);
 +}
 +
 +/* Create stripe cache. */
 +static int sc_init(struct raid_set *rs, unsigned stripes)
 +{
 +      unsigned i, nr;
 +      struct stripe_cache *sc = &rs->sc;
 +      struct stripe *stripe;
 +      struct recover *rec = &rs->recover;
 +
 +      /* Initialize lists and locks. */
 +      i = ARRAY_SIZE(sc->lists);
 +      while (i--)
 +              INIT_LIST_HEAD(sc->lists + i);
 +
 +      i = NR_LOCKS;
 +      while (i--)
 +              spin_lock_init(sc->locks + i);
 +
 +      /* Initialize atomic variables. */
 +      atomic_set(&sc->stripes, 0);
 +      atomic_set(&sc->stripes_last, 0);
 +      atomic_set(&sc->stripes_to_shrink, 0);
 +      atomic_set(&sc->active_stripes, 0);
 +      atomic_set(&sc->max_active_stripes, 0); /* REMOVEME: statistics. */
 +
 +      /*
 +       * We need a runtime unique # to suffix the kmem cache name
 +       * because we'll have one for each active RAID set.
 +       */
 +      nr = atomic_inc_return(&_stripe_sc_nr);
 +      sprintf(sc->kc.name, "%s_%d", TARGET, nr);
 +      sc->kc.cache = kmem_cache_create(sc->kc.name, stripe_size(rs),
 +                                       0, 0, NULL);
 +      if (!sc->kc.cache)
 +              return -ENOMEM;
 +
 +      /* Create memory cache client context for RAID stripe cache. */
 +      sc->mem_cache_client =
 +              dm_mem_cache_client_create(stripes, rs->set.raid_devs,
 +                                         chunk_pages(rs->set.io_size));
 +      if (IS_ERR(sc->mem_cache_client))
 +              return PTR_ERR(sc->mem_cache_client);
 +
 +      /* Create memory cache client context for RAID recovery stripe(s). */
 +      rec->mem_cache_client =
 +              dm_mem_cache_client_create(MAX_RECOVER, rs->set.raid_devs,
 +                                         chunk_pages(rec->io_size));
 +      if (IS_ERR(rec->mem_cache_client))
 +              return PTR_ERR(rec->mem_cache_client);
 +
 +      /* Allocate stripe for set recovery. */
 +      /* XXX: cope with MAX_RECOVERY. */
 +      INIT_LIST_HEAD(&rec->stripes);
 +      for (i = 0; i < MAX_RECOVER; i++) {
 +              stripe = stripe_alloc(sc, rec->mem_cache_client, SC_KEEP);
 +              if (!stripe)
 +                      return -ENOMEM;
 +
 +              SetStripeRecover(stripe);
 +              stripe->io.size = rec->io_size;
 +              list_add(stripe->lists + LIST_RECOVER, &rec->stripes);
 +      }
 +
 +      /*
 +       * Allocate the stripe objetcs from the
 +       * cache and add them to the LRU list.
 +       */
 +      return sc_grow(sc, stripes, SC_KEEP);
 +}
 +
 +/* Destroy the stripe cache. */
 +static void sc_exit(struct stripe_cache *sc)
 +{
 +      if (sc->kc.cache) {
 +              BUG_ON(sc_shrink(sc, atomic_read(&sc->stripes)));
 +              kmem_cache_destroy(sc->kc.cache);
 +      }
 +
 +      if (sc->mem_cache_client)
 +              dm_mem_cache_client_destroy(sc->mem_cache_client);
 +
 +      ClearRSRecover(RS(sc));
 +      stripe_recover_free(RS(sc));
 +      if (RS(sc)->recover.mem_cache_client)
 +              dm_mem_cache_client_destroy(RS(sc)->recover.mem_cache_client);
 +
 +      hash_exit(&sc->hash);
 +}
 +
 +/*
 + * Calculate RAID address
 + *
 + * Delivers tuple with the index of the data disk holding the chunk
 + * in the set, the parity disks index and the start of the stripe
 + * within the address space of the set (used as the stripe cache hash key).
 + */
 +/* thx MD. */
 +static struct address *
 +raid_address(struct raid_set *rs, sector_t sector, struct address *addr)
 +{
 +      unsigned data_devs = rs->set.data_devs, di, pi,
 +               raid_devs = rs->set.raid_devs;
 +      sector_t stripe, tmp;
 +
 +      /*
 +       * chunk_number = sector / chunk_size
 +       * stripe = chunk_number / data_devs
 +       * di = stripe % data_devs;
 +       */
 +      stripe = sector >> rs->set.chunk_shift;
 +      di = sector_div(stripe, data_devs);
 +
 +      switch (rs->set.raid_type->level) {
 +      case raid5:
 +              tmp = stripe;
 +              pi = sector_div(tmp, raid_devs);
 +
 +              switch (rs->set.raid_type->algorithm) {
 +              case left_asym:         /* Left asymmetric. */
 +                      pi = data_devs - pi;
 +              case right_asym:        /* Right asymmetric. */
 +                      if (di >= pi)
 +                              di++;
 +                      break;
 +
 +              case left_sym:          /* Left symmetric. */
 +                      pi = data_devs - pi;
 +              case right_sym:         /* Right symmetric. */
 +                      di = (pi + di + 1) % raid_devs;
 +                      break;
 +
 +              default:
 +                      DMERR("Unknown RAID algorithm %d",
 +                            rs->set.raid_type->algorithm);
 +                      goto out;
 +              }
 +
 +              break;
 +
 +      case raid4:
 +              pi = rs->set.pi;
 +              if (di >= pi)
 +                      di++;
 +              break;
 +
 +      default:
 +              DMERR("Unknown RAID level %d", rs->set.raid_type->level);
 +              goto out;
 +      }
 +
 +      /*
 +       * Hash key = start offset on any single device of the RAID set;
 +       * adjusted in case io size differs from chunk size.
 +       */
 +      addr->key = (stripe << rs->set.chunk_shift) +
 +                  (sector & rs->set.io_shift_mask);
 +      addr->di = di;
 +      addr->pi = pi;
 +
 +out:
 +      return addr;
 +}
 +
 +/*
 + * Copy data across between stripe pages and bio vectors.
 + *
 + * Pay attention to data alignment in stripe and bio pages.
 + */
 +static void
 +bio_copy_page_list(int rw, struct stripe *stripe,
 +                 struct page_list *pl, struct bio *bio)
 +{
 +      unsigned i, page_offset;
 +      void *page_addr;
 +      struct raid_set *rs = RS(stripe->sc);
 +      struct bio_vec *bv;
 +
 +      /* Get start page in page list for this sector. */
 +      i = (bio->bi_sector & rs->set.io_mask) / SECTORS_PER_PAGE;
 +      pl = pl_elem(pl, i);
 +
 +      page_addr = page_address(pl->page);
 +      page_offset = to_bytes(bio->bi_sector & (SECTORS_PER_PAGE - 1));
 +
 +      /* Walk all segments and copy data across between bio_vecs and pages. */
 +      bio_for_each_segment(bv, bio, i) {
 +              int len = bv->bv_len, size;
 +              unsigned bio_offset = 0;
 +              void *bio_addr = __bio_kmap_atomic(bio, i, KM_USER0);
 +redo:
 +              size = (page_offset + len > PAGE_SIZE) ?
 +                     PAGE_SIZE - page_offset : len;
 +
 +              if (rw == READ)
 +                      memcpy(bio_addr + bio_offset,
 +                             page_addr + page_offset, size);
 +              else
 +                      memcpy(page_addr + page_offset,
 +                             bio_addr + bio_offset, size);
 +
 +              page_offset += size;
 +              if (page_offset == PAGE_SIZE) {
 +                      /*
 +                       * We reached the end of the chunk page ->
 +                       * need refer to the next one to copy more data.
 +                       */
 +                      len -= size;
 +                      if (len) {
 +                              /* Get next page. */
 +                              pl = pl->next;
 +                              BUG_ON(!pl);
 +                              page_addr = page_address(pl->page);
 +                              page_offset = 0;
 +                              bio_offset += size;
 +                              /* REMOVEME: statistics. */
 +                              atomic_inc(rs->stats + S_BIO_COPY_PL_NEXT);
 +                              goto redo;
 +                      }
 +              }
 +
 +              __bio_kunmap_atomic(bio_addr, KM_USER0);
 +      }
 +}
 +
 +/*
 + * Xor optimization macros.
 + */
 +/* Xor data pointer declaration and initialization macros. */
 +#define DECLARE_2     unsigned long *d0 = data[0], *d1 = data[1]
 +#define DECLARE_3     DECLARE_2, *d2 = data[2]
 +#define DECLARE_4     DECLARE_3, *d3 = data[3]
 +#define DECLARE_5     DECLARE_4, *d4 = data[4]
 +#define DECLARE_6     DECLARE_5, *d5 = data[5]
 +#define DECLARE_7     DECLARE_6, *d6 = data[6]
 +#define DECLARE_8     DECLARE_7, *d7 = data[7]
 +
 +/* Xor unrole macros. */
 +#define D2(n) d0[n] = d0[n] ^ d1[n]
 +#define D3(n) D2(n) ^ d2[n]
 +#define D4(n) D3(n) ^ d3[n]
 +#define D5(n) D4(n) ^ d4[n]
 +#define D6(n) D5(n) ^ d5[n]
 +#define D7(n) D6(n) ^ d6[n]
 +#define D8(n) D7(n) ^ d7[n]
 +
 +#define       X_2(macro, offset)      macro(offset); macro(offset + 1);
 +#define       X_4(macro, offset)      X_2(macro, offset); X_2(macro, offset + 2);
 +#define       X_8(macro, offset)      X_4(macro, offset); X_4(macro, offset + 4);
 +#define       X_16(macro, offset)     X_8(macro, offset); X_8(macro, offset + 8);
 +#define       X_32(macro, offset)     X_16(macro, offset); X_16(macro, offset + 16);
 +#define       X_64(macro, offset)     X_32(macro, offset); X_32(macro, offset + 32);
 +
 +/* Define a _xor_#chunks_#xors_per_run() function. */
 +#define       _XOR(chunks, xors_per_run) \
 +static void _xor ## chunks ## _ ## xors_per_run(unsigned long **data) \
 +{ \
 +      unsigned end = XOR_SIZE / sizeof(data[0]), i; \
 +      DECLARE_ ## chunks; \
 +\
 +      for (i = 0; i < end; i += xors_per_run) { \
 +              X_ ## xors_per_run(D ## chunks, i); \
 +      } \
 +}
 +
 +/* Define xor functions for 2 - 8 chunks. */
 +#define       MAKE_XOR_PER_RUN(xors_per_run) \
 +      _XOR(2, xors_per_run); _XOR(3, xors_per_run); \
 +      _XOR(4, xors_per_run); _XOR(5, xors_per_run); \
 +      _XOR(6, xors_per_run); _XOR(7, xors_per_run); \
 +      _XOR(8, xors_per_run);
 +
 +MAKE_XOR_PER_RUN(8)   /* Define _xor_*_8() functions. */
 +MAKE_XOR_PER_RUN(16)  /* Define _xor_*_16() functions. */
 +MAKE_XOR_PER_RUN(32)  /* Define _xor_*_32() functions. */
 +MAKE_XOR_PER_RUN(64)  /* Define _xor_*_64() functions. */
 +
 +#define MAKE_XOR(xors_per_run) \
 +struct { \
 +      void (*f)(unsigned long **); \
 +} static xor_funcs ## xors_per_run[] = { \
 +      { NULL }, \
 +      { NULL }, \
 +      { _xor2_ ## xors_per_run }, \
 +      { _xor3_ ## xors_per_run }, \
 +      { _xor4_ ## xors_per_run }, \
 +      { _xor5_ ## xors_per_run }, \
 +      { _xor6_ ## xors_per_run }, \
 +      { _xor7_ ## xors_per_run }, \
 +      { _xor8_ ## xors_per_run }, \
 +}; \
 +\
 +static void xor_ ## xors_per_run(unsigned n, unsigned long **data) \
 +{ \
 +      /* Call respective function for amount of chunks. */ \
 +      xor_funcs ## xors_per_run[n].f(data); \
 +}
 +
 +/* Define xor_8() - xor_64 functions. */
 +MAKE_XOR(8)
 +MAKE_XOR(16)
 +MAKE_XOR(32)
 +MAKE_XOR(64)
 +
 +/* Maximum number of chunks, which can be xor'ed in one go. */
 +#define       XOR_CHUNKS_MAX  (ARRAY_SIZE(xor_funcs8) - 1)
 +
 +struct xor_func {
 +      xor_function_t f;
 +      const char *name;
 +} static xor_funcs[] = {
 +      {xor_8,   "xor_8"},
 +      {xor_16,  "xor_16"},
 +      {xor_32,  "xor_32"},
 +      {xor_64,  "xor_64"},
 +};
 +
 +/*
 + * Calculate crc.
 + *
 + * This indexes into the page list of the stripe.
 + *
 + * All chunks will be xored into the parity chunk
 + * in maximum groups of xor.chunks.
 + *
 + * FIXME: try mapping the pages on discontiguous memory.
 + */
 +static void xor(struct stripe *stripe, unsigned pi, unsigned sector)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned max_chunks = rs->xor.chunks, n, p;
 +      unsigned o = sector / SECTORS_PER_PAGE; /* Offset into the page_list. */
 +      unsigned long **d = rs->data;
 +      xor_function_t xor_f = rs->xor.f->f;
 +
 +      /* Address of parity page to xor into. */
 +      d[0] = page_address(pl_elem(PL(stripe, pi), o)->page);
 +
 +      /* Preset pointers to data pages. */
 +      for (n = 1, p = rs->set.raid_devs; p--; ) {
 +              if (p != pi && PageIO(PAGE(stripe, p)))
 +                      d[n++] = page_address(pl_elem(PL(stripe, p), o)->page);
 +
 +              /* If max chunks -> xor .*/
 +              if (n == max_chunks) {
 +                      xor_f(n, d);
 +                      n = 1;
 +              }
 +      }
 +
 +      /* If chunks -> xor. */
 +      if (n > 1)
 +              xor_f(n, d);
 +
 +      /* Set parity page uptodate and clean. */
 +      page_set(PAGE(stripe, pi), CLEAN);
 +}
 +
 +/* Common xor loop through all stripe page lists. */
 +static void common_xor(struct stripe *stripe, sector_t count,
 +                     unsigned off, unsigned p)
 +{
 +      unsigned sector;
 +
 +      for (sector = off; sector < count; sector += SECTORS_PER_XOR)
 +              xor(stripe, p, sector);
 +
 +      atomic_inc(RS(stripe->sc)->stats + S_XORS); /* REMOVEME: statistics. */
 +}
 +
 +/*
 + * Calculate parity sectors on intact stripes.
 + *
 + * Need to calculate raid address for recover stripe, because its
 + * chunk sizes differs and is typically larger than io chunk size.
 + */
 +static void parity_xor(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned chunk_size = rs->set.chunk_size,
 +               io_size = stripe->io.size,
 +               xor_size = chunk_size > io_size ? io_size : chunk_size;
 +      sector_t off;
 +
 +      /* This can be the recover stripe with a larger io size. */
 +      for (off = 0; off < io_size; off += xor_size) {
 +              unsigned pi;
 +
 +              /*
 +               * Recover stripe likely is bigger than regular io
 +               * ones and has no precalculated parity disk index ->
 +               * need to calculate RAID address.
 +               */
 +              if (unlikely(StripeRecover(stripe))) {
 +                      struct address addr;
 +
 +                      raid_address(rs,
 +                                   (stripe->key + off) * rs->set.data_devs,
 +                                   &addr);
 +                      pi = addr.pi;
 +                      stripe_zero_pl_part(stripe, pi, off,
 +                                          rs->set.chunk_size);
 +              } else
 +                      pi = stripe->idx.parity;
 +
 +              common_xor(stripe, xor_size, off, pi);
 +              page_set(PAGE(stripe, pi), DIRTY);
 +      }
 +}
 +
 +/* Reconstruct missing chunk. */
 +static void reconstruct_xor(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      int p = stripe->idx.recover;
 +
 +      BUG_ON(p < 0);
 +
 +      /* REMOVEME: statistics. */
 +      atomic_inc(rs->stats + (raid_set_degraded(rs) ?
 +                  S_RECONSTRUCT_EI : S_RECONSTRUCT_DEV));
 +
 +      /* Zero chunk to be reconstructed. */
 +      stripe_zero_chunk(stripe, p);
 +      common_xor(stripe, stripe->io.size, 0, p);
 +}
 +
 +/*
 + * Try getting a stripe either from the hash or from the lru list
 + */
 +static inline void _stripe_get(struct stripe *stripe)
 +{
 +      atomic_inc(&stripe->cnt);
 +}
 +
 +static struct stripe *stripe_get(struct raid_set *rs, struct address *addr)
 +{
 +      struct stripe_cache *sc = &rs->sc;
 +      struct stripe *stripe;
 +
 +      stripe = stripe_lookup(sc, addr->key);
 +      if (stripe) {
 +              _stripe_get(stripe);
 +              /* Remove from the lru list if on. */
 +              stripe_lru_del(stripe, LIST_LOCKED);
 +              atomic_inc(rs->stats + S_HITS_1ST); /* REMOVEME: statistics. */
 +      } else {
 +              /* Second try to get an LRU stripe. */
 +              stripe = stripe_lru_pop(sc);
 +              if (stripe) {
 +                      _stripe_get(stripe);
 +                      /* Invalidate before reinserting with changed key. */
 +                      stripe_invalidate(stripe);
 +                      stripe->key = addr->key;
 +                      stripe->region = dm_rh_sector_to_region(rs->recover.rh,
 +                                                              addr->key);
 +                      stripe->idx.parity = addr->pi;
 +                      sc_insert(sc, stripe);
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_INSCACHE);
 +              }
 +      }
 +
 +      return stripe;
 +}
 +
 +/*
 + * Decrement reference count on a stripe.
 + *
 + * Move it to list of LRU stripes if zero.
 + */
 +static void stripe_put(struct stripe *stripe)
 +{
 +      if (atomic_dec_and_test(&stripe->cnt)) {
 +              if (TestClearStripeActive(stripe))
 +                      atomic_dec(&stripe->sc->active_stripes);
 +
 +              /* Put stripe onto the LRU list. */
 +              stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
 +      }
 +
 +      BUG_ON(atomic_read(&stripe->cnt) < 0);
 +}
 +
 +/*
 + * Process end io
 + *
 + * I need to do it here because I can't in interrupt
 + *
 + * Read and write functions are split in order to avoid
 + * conditionals in the main loop for performamce reasons.
 + */
 +
 +/* Helper read bios on a page list. */
 +static void _bio_copy_page_list(struct stripe *stripe, struct page_list *pl,
 +                              struct bio *bio)
 +{
 +      bio_copy_page_list(READ, stripe, pl, bio);
 +}
 +
 +/* Helper write bios on a page list. */
 +static void _rh_dec(struct stripe *stripe, struct page_list *pl,
 +                  struct bio *bio)
 +{
 +      dm_rh_dec(RS(stripe->sc)->recover.rh, stripe->region);
 +}
 +
 +/* End io all bios on a page list. */
 +static inline int
 +page_list_endio(int rw, struct stripe *stripe, unsigned p, unsigned *count)
 +{
 +      int r = 0;
 +      struct bio_list *bl = BL(stripe, p, rw);
 +
 +      if (!bio_list_empty(bl)) {
 +              struct page_list *pl = PL(stripe, p);
 +              struct page *page = pl->page;
 +
 +              if (PageLocked(page))
 +                      r = -EBUSY;
 +              /*
 +               * FIXME: PageUptodate() not cleared
 +               *        properly for missing chunks ?
 +               */
 +              else if (PageUptodate(page)) {
 +                      struct bio *bio;
 +                      struct raid_set *rs = RS(stripe->sc);
 +                      void (*h_f)(struct stripe *, struct page_list *,
 +                                  struct bio *) =
 +                              (rw == READ) ? _bio_copy_page_list : _rh_dec;
 +
 +                      while ((bio = bio_list_pop(bl))) {
 +                              h_f(stripe, pl, bio);
 +                              _bio_endio(rs, bio, 0);
 +                              stripe_put(stripe);
 +                              if (count)
 +                                      (*count)++;
 +                      }
 +              } else
 +                      r = -EAGAIN;
 +      }
 +
 +      return r;
 +}
 +
 +/*
 + * End io all reads/writes on a stripe copying
 + * read date accross from stripe to bios.
 + */
 +static int stripe_endio(int rw, struct stripe *stripe, unsigned *count)
 +{
 +      int r = 0;
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--) {
 +              int rr = page_list_endio(rw, stripe, p, count);
 +
 +              if (rr && r != -EIO)
 +                      r = rr;
 +      }
 +
 +      return r;
 +}
 +
 +/* Fail all ios on a bio list and return # of bios. */
 +static unsigned
 +bio_list_fail(struct raid_set *rs, struct stripe *stripe, struct bio_list *bl)
 +{
 +      unsigned r;
 +      struct bio *bio;
 +
 +      raid_set_dead(rs);
 +
 +      /* Update region counters. */
 +      if (stripe) {
 +              struct dm_region_hash *rh = rs->recover.rh;
 +
 +              bio_list_for_each(bio, bl) {
 +                      if (bio_data_dir(bio) == WRITE)
 +                              dm_rh_dec(rh, stripe->region);
 +              }
 +      }
 +
 +      /* Error end io all bios. */
 +      for (r = 0; (bio = bio_list_pop(bl)); r++)
 +              _bio_endio(rs, bio, -EIO);
 +
 +      return r;
 +}
 +
 +/* Fail all ios of a bio list of a stripe and drop io pending count. */
 +static void
 +stripe_bio_list_fail(struct raid_set *rs, struct stripe *stripe,
 +                   struct bio_list *bl)
 +{
 +      unsigned put = bio_list_fail(rs, stripe, bl);
 +
 +      while (put--)
 +              stripe_put(stripe);
 +}
 +
 +/* Fail all ios hanging off all bio lists of a stripe. */
 +static void stripe_fail_io(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned p = rs->set.raid_devs;
 +
 +      stripe_evict(stripe);
 +
 +      while (p--) {
 +              struct stripe_set *ss = stripe->ss + p;
 +              int i = ARRAY_SIZE(ss->bl);
 +
 +              while (i--)
 +                      stripe_bio_list_fail(rs, stripe, ss->bl + i);
 +      }
 +}
 +
 +/*
 + * Handle all stripes by handing them to the daemon, because we can't
 + * map their pages to copy the data in interrupt context.
 + *
 + * We don't want to handle them here either, while interrupts are disabled.
 + */
 +
 +/* Read/write endio function for dm-io (interrupt context). */
 +static void endio(unsigned long error, void *context)
 +{
 +      struct dm_mem_cache_object *obj = context;
 +      struct stripe_set *ss = obj->private;
 +      struct stripe *stripe = ss->stripe;
 +      struct page *page = obj->pl->page;
 +
 +      if (unlikely(error))
 +              stripe_error(stripe, page);
 +      else
 +              page_set(page, CLEAN);
 +
 +      __clear_page_locked(page);
 +      stripe_io_dec(stripe);
 +
 +      /* Add stripe to endio list and wake daemon. */
 +      stripe_endio_push(stripe);
 +}
 +
 +/*
 + * Recovery io throttling
 + */
 +/* Conditionally reset io counters. */
 +enum count_type { IO_WORK = 0, IO_RECOVER };
 +static int recover_io_reset(struct raid_set *rs)
 +{
 +      unsigned long j = jiffies;
 +
 +      /* Pay attention to jiffies overflows. */
 +      if (j > rs->recover.last_jiffies + HZ
 +          || j < rs->recover.last_jiffies) {
 +              rs->recover.last_jiffies = j;
 +              atomic_set(rs->recover.io_count + IO_WORK, 0);
 +              atomic_set(rs->recover.io_count + IO_RECOVER, 0);
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Count ios. */
 +static INLINE void
 +recover_io_count(struct raid_set *rs, struct stripe *stripe)
 +{
 +      if (RSRecover(rs)) {
 +              recover_io_reset(rs);
 +              atomic_inc(rs->recover.io_count +
 +                         (StripeRecover(stripe) ? IO_RECOVER : IO_WORK));
 +      }
 +}
 +
 +/* Read/Write a page_list asynchronously. */
 +static void page_list_rw(struct stripe *stripe, unsigned p)
 +{
 +      struct stripe_cache *sc = stripe->sc;
 +      struct raid_set *rs = RS(sc);
 +      struct dm_mem_cache_object *obj = stripe->obj + p;
 +      struct page_list *pl = obj->pl;
 +      struct page *page = pl->page;
 +      struct raid_dev *dev = rs->dev + p;
 +      struct dm_io_region io = {
 +              .bdev = dev->dev->bdev,
 +              .sector = stripe->key,
 +              .count = stripe->io.size,
 +      };
 +      struct dm_io_request control = {
 +              .bi_rw = PageDirty(page) ? WRITE : READ,
 +              .mem.type = DM_IO_PAGE_LIST,
 +              .mem.ptr.pl = pl,
 +              .mem.offset = 0,
 +              .notify.fn = endio,
 +              .notify.context = obj,
 +              .client = sc->dm_io_client,
 +      };
 +
 +      BUG_ON(PageLocked(page));
 +
 +      /*
 +       * Don't rw past end of device, which can happen, because
 +       * typically sectors_per_dev isn't divisable by io_size.
 +       */
 +      if (unlikely(io.sector + io.count > rs->set.sectors_per_dev))
 +              io.count = rs->set.sectors_per_dev - io.sector;
 +
 +      io.sector += dev->start;        /* Add <offset>. */
 +      recover_io_count(rs, stripe);   /* Recovery io accounting. */
 +
 +      /* REMOVEME: statistics. */
 +      atomic_inc(rs->stats +
 +                  (PageDirty(page) ? S_DM_IO_WRITE : S_DM_IO_READ));
 +
 +      ClearPageError(page);
 +      __set_page_locked(page);
 +      io_dev_queued(dev);
 +      BUG_ON(dm_io(&control, 1, &io, NULL));
 +}
 +
 +/*
 + * Write dirty / read not uptodate page lists of a stripe.
 + */
 +static unsigned stripe_page_lists_rw(struct raid_set *rs, struct stripe *stripe)
 +{
 +      unsigned r;
 +
 +      /*
 +       * Increment the pending count on the stripe
 +       * first, so that we don't race in endio().
 +       *
 +       * An inc (IO) is needed for any page:
 +       *
 +       * o not uptodate
 +       * o dirtied by writes merged
 +       * o dirtied by parity calculations
 +       */
 +      r = for_each_io_dev(rs, stripe, _stripe_io_inc);
 +      if (r) {
 +              /* io needed: chunks are not uptodate/dirty. */
 +              int max;        /* REMOVEME: */
 +              struct stripe_cache *sc = &rs->sc;
 +
 +              if (!TestSetStripeActive(stripe))
 +                      atomic_inc(&sc->active_stripes);
 +
 +              /* Take off the lru list in case it got added there. */
 +              stripe_lru_del(stripe, LIST_LOCKED);
 +
 +              /* Submit actual io. */
 +              for_each_io_dev(rs, stripe, page_list_rw);
 +
 +              /* REMOVEME: statistics */
 +              max = sc_active(sc);
 +              if (atomic_read(&sc->max_active_stripes) < max)
 +                      atomic_set(&sc->max_active_stripes, max);
 +
 +              atomic_inc(rs->stats + S_FLUSHS);
 +              /* END REMOVEME: statistics */
 +      }
 +
 +      return r;
 +}
 +
 +/* Work in all pending writes. */
 +static INLINE void _writes_merge(struct stripe *stripe, unsigned p)
 +{
 +      struct bio_list *write = BL(stripe, p, WRITE);
 +
 +      if (!bio_list_empty(write)) {
 +              struct page_list *pl = stripe->obj[p].pl;
 +              struct bio *bio;
 +              struct bio_list *write_merged = BL(stripe, p, WRITE_MERGED);
 +
 +              /*
 +               * We can play with the lists without holding a lock,
 +               * because it is just us accessing them anyway.
 +               */
 +              bio_list_for_each(bio, write)
 +                      bio_copy_page_list(WRITE, stripe, pl, bio);
 +
 +              bio_list_merge(write_merged, write);
 +              bio_list_init(write);
 +              page_set(pl->page, DIRTY);
 +      }
 +}
 +
 +/* Merge in all writes hence dirtying respective pages. */
 +static INLINE void writes_merge(struct stripe *stripe)
 +{
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--)
 +              _writes_merge(stripe, p);
 +}
 +
 +/* Check, if a chunk gets completely overwritten. */
 +static INLINE int stripe_check_overwrite(struct stripe *stripe, unsigned p)
 +{
 +      unsigned sectors = 0;
 +      struct bio *bio;
 +      struct bio_list *bl = BL(stripe, p, WRITE);
 +
 +      bio_list_for_each(bio, bl)
 +              sectors += bio_sectors(bio);
 +
 +      return sectors == RS(stripe->sc)->set.io_size;
 +}
 +
 +/*
 + * Prepare stripe to avoid io on broken/reconstructed
 + * drive in order to reconstruct date on endio.
 + */
 +enum prepare_type { IO_ALLOW, IO_PROHIBIT };
 +static void stripe_prepare(struct stripe *stripe, unsigned p,
 +                         enum prepare_type type)
 +{
 +      struct page *page = PAGE(stripe, p);
 +
 +      switch (type) {
 +      case IO_PROHIBIT:
 +              /*
 +               * In case we prohibit, we gotta make sure, that
 +               * io on all other chunks than the one which failed
 +               * or is being reconstructed is allowed and that it
 +               * doesn't have state uptodate.
 +               */
 +              stripe_allow_io(stripe);
 +              ClearPageUptodate(page);
 +              ProhibitPageIO(page);
 +
 +              /* REMOVEME: statistics. */
 +              atomic_inc(RS(stripe->sc)->stats + S_PROHIBITPAGEIO);
 +              stripe->idx.recover = p;
 +              SetStripeReconstruct(stripe);
 +              break;
 +
 +      case IO_ALLOW:
 +              AllowPageIO(page);
 +              stripe->idx.recover = -1;
 +              ClearStripeReconstruct(stripe);
 +              break;
 +
 +      default:
 +              BUG();
 +      }
 +}
 +
 +/*
 + * Degraded/reconstruction mode.
 + *
 + * Check stripe state to figure which chunks don't need IO.
 + */
 +static INLINE void stripe_check_reconstruct(struct stripe *stripe,
 +                                          int prohibited)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +
 +      /*
 +       * Degraded mode (device(s) failed) ->
 +       * avoid io on the failed device.
 +       */
 +      if (unlikely(raid_set_degraded(rs))) {
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + S_DEGRADED);
 +              stripe_prepare(stripe, rs->set.ei, IO_PROHIBIT);
 +              return;
 +      } else {
 +              /*
 +               * Reconstruction mode (ie. a particular device or
 +               * some (rotating) parity chunk is being resynchronized) ->
 +               *   o make sure all needed pages are read in
 +               *   o writes are allowed to go through
 +               */
 +              int r = region_state(rs, stripe->key, DM_RH_NOSYNC);
 +
 +              if (r) {
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_NOSYNC);
 +                      stripe_prepare(stripe, dev_for_parity(stripe),
 +                                     IO_PROHIBIT);
 +                      return;
 +              }
 +      }
 +
 +      /*
 +       * All disks good. Avoid reading parity chunk and reconstruct it
 +       * unless we have prohibited io to chunk(s).
 +       */
 +      if (!prohibited) {
 +              if (StripeMerged(stripe))
 +                      stripe_prepare(stripe, stripe->idx.parity, IO_ALLOW);
 +              else {
 +                      stripe_prepare(stripe, stripe->idx.parity, IO_PROHIBIT);
 +
 +                      /*
 +                       * Overrule stripe_prepare to reconstruct the
 +                       * parity chunk, because it'll be created new anyway.
 +                       */
 +                      ClearStripeReconstruct(stripe);
 +              }
 +      }
 +}
 +
 +/* Check, if stripe is ready to merge writes. */
 +static INLINE int stripe_check_merge(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      int prohibited = 0;
 +      unsigned chunks = 0, p = rs->set.raid_devs;
 +
 +      /* Walk all chunks. */
 +      while (p--) {
 +              struct page *page = PAGE(stripe, p);
 +
 +              /* Can't merge active chunks. */
 +              if (PageLocked(page)) {
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_MERGE_PAGE_LOCKED);
 +                      break;
 +              }
 +
 +              /* Can merge uptodate chunks and have to count parity chunk. */
 +              if (PageUptodate(page) || p == stripe->idx.parity) {
 +                      chunks++;
 +                      continue;
 +              }
 +
 +              /* Read before write ordering. */
 +              if (RSCheckOverwrite(rs) &&
 +                  bio_list_empty(BL(stripe, p, READ))) {
 +                      int r = stripe_check_overwrite(stripe, p);
 +
 +                      if (r) {
 +                              chunks++;
 +                              /* REMOVEME: statistics. */
 +                              atomic_inc(RS(stripe->sc)->stats +
 +                                         S_PROHIBITPAGEIO);
 +                              ProhibitPageIO(page);
 +                              prohibited = 1;
 +                      }
 +              }
 +      }
 +
 +      if (chunks == rs->set.raid_devs) {
 +              /* All pages are uptodate or get written over or mixture. */
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + S_CAN_MERGE);
 +              return 0;
 +      } else
 +              /* REMOVEME: statistics.*/
 +              atomic_inc(rs->stats + S_CANT_MERGE);
 +
 +      return prohibited ? 1 : -EPERM;
 +}
 +
 +/* Check, if stripe is ready to merge writes. */
 +static INLINE int stripe_check_read(struct stripe *stripe)
 +{
 +      int r = 0;
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      /* Walk all chunks. */
 +      while (p--) {
 +              struct page *page = PAGE(stripe, p);
 +
 +              if (!PageLocked(page) &&
 +                  bio_list_empty(BL(stripe, p, READ))) {
 +                      ProhibitPageIO(page);
 +                      r = 1;
 +              }
 +      }
 +
 +      return r;
 +}
 +
 +/*
 + * Read/write a stripe.
 + *
 + * All stripe read/write activity goes through this function.
 + *
 + * States to cover:
 + *   o stripe to read and/or write
 + *   o stripe with error to reconstruct
 + */
 +static int stripe_rw(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      int prohibited = 0, r;
 +
 +      /*
 +       * Check the state of the RAID set and if degraded (or
 +       * resynchronizing for reads), read in all other chunks but
 +       * the one on the dead/resynchronizing device in order to be
 +       * able to reconstruct the missing one.
 +       *
 +       * Merge all writes hanging off uptodate pages of the stripe.
 +       */
 +
 +      /* Initially allow io on all chunks and prohibit below, if necessary. */
 +      stripe_allow_io(stripe);
 +
 +      if (StripeRBW(stripe)) {
 +              r = stripe_check_merge(stripe);
 +              if (!r) {
 +                      /*
 +                       * If I could rely on valid parity (which would only
 +                       * be sure in case of a full synchronization),
 +                       * I could xor a fraction of chunks out of
 +                       * parity and back in.
 +                       *
 +                       * For the time being, I got to redo parity...
 +                       */
 +                      /* parity_xor(stripe); */       /* Xor chunks out. */
 +                      stripe_zero_chunk(stripe, stripe->idx.parity);
 +                      writes_merge(stripe);           /* Merge writes in. */
 +                      parity_xor(stripe);             /* Update parity. */
 +                      ClearStripeRBW(stripe);         /* Disable RBW. */
 +                      SetStripeMerged(stripe);        /* Writes merged. */
 +              }
 +
 +              if (r > 0)
 +                      prohibited = 1;
 +      } else if (!raid_set_degraded(rs))
 +              /* Only allow for read avoidance if not degraded. */
 +              prohibited = stripe_check_read(stripe);
 +
 +      /*
 +       * Check, if io needs to be allowed/prohibeted on certain chunks
 +       * because of a degraded set or reconstruction on a region.
 +       */
 +      stripe_check_reconstruct(stripe, prohibited);
 +
 +      /* Now submit any reads/writes. */
 +      r = stripe_page_lists_rw(rs, stripe);
 +      if (!r) {
 +              /*
 +               * No io submitted because of chunk io prohibited or
 +               * locked pages -> push to end io list for processing.
 +               */
 +              atomic_inc(rs->stats + S_NO_RW); /* REMOVEME: statistics. */
 +              stripe_endio_push(stripe);
 +              wake_do_raid(rs);       /* Wake myself. */
 +      }
 +
 +      return 0;
 +}
 +
 +/* Flush stripe either via flush list or imeediately. */
 +enum flush_type { FLUSH_DELAY, FLUSH_NOW };
 +static int stripe_flush(struct stripe *stripe, enum flush_type type)
 +{
 +      int r = 0;
 +
 +      stripe_lru_del(stripe, LIST_LOCKED);
 +
 +      /* Immediately flush. */
 +      if (type == FLUSH_NOW) {
 +              if (likely(raid_set_operational(RS(stripe->sc))))
 +                      r = stripe_rw(stripe); /* Read/write stripe. */
 +              else
 +                      /* Optimization: Fail early on failed sets. */
 +                      stripe_fail_io(stripe);
 +      /* Delay flush by putting it on io list for later processing. */
 +      } else if (type == FLUSH_DELAY)
 +              stripe_io_add(stripe, POS_TAIL, LIST_UNLOCKED);
 +      else
 +              BUG();
 +
 +      return r;
 +}
 +
 +/*
 + * Queue reads and writes to a stripe by hanging
 + * their bios off the stripsets read/write lists.
 + *
 + * Endio reads on uptodate chunks.
 + */
 +static INLINE int stripe_queue_bio(struct raid_set *rs, struct bio *bio,
 +                                 struct bio_list *reject)
 +{
 +      int r = 0;
 +      struct address addr;
 +      struct stripe *stripe =
 +              stripe_get(rs, raid_address(rs, bio->bi_sector, &addr));
 +
 +      if (stripe) {
 +              int rr, rw = bio_data_dir(bio);
 +
 +              rr = stripe_lock(rs, stripe, rw, addr.key); /* Lock stripe */
 +              if (rr) {
 +                      stripe_put(stripe);
 +                      goto out;
 +              }
 +
 +              /* Distinguish read and write cases. */
 +              bio_list_add(BL(stripe, addr.di, rw), bio);
 +
 +              /* REMOVEME: statistics */
 +              atomic_inc(rs->stats + (rw == WRITE ?
 +                         S_BIOS_ADDED_WRITE : S_BIOS_ADDED_READ));
 +
 +              if (rw == READ)
 +                      SetStripeRead(stripe);
 +              else {
 +                      SetStripeRBW(stripe);
 +
 +                      /* Inrement pending write count on region. */
 +                      dm_rh_inc(rs->recover.rh, stripe->region);
 +                      r = 1;  /* Region hash needs a flush. */
 +              }
 +
 +              /*
 +               * Optimize stripe flushing:
 +               *
 +               * o directly start io for read stripes.
 +               *
 +               * o put stripe onto stripe caches io_list for RBW,
 +               *   so that do_flush() can belabour it after we put
 +               *   more bios to the stripe for overwrite optimization.
 +               */
 +              stripe_flush(stripe,
 +                           StripeRead(stripe) ? FLUSH_NOW : FLUSH_DELAY);
 +
 +      /* Got no stripe from cache -> reject bio. */
 +      } else {
 +out:
 +              bio_list_add(reject, bio);
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + S_IOS_POST);
 +      }
 +
 +      return r;
 +}
 +
 +/*
 + * Recovery functions
 + */
 +/* Read a stripe off a raid set for recovery. */
 +static int recover_read(struct raid_set *rs, struct stripe *stripe, int idx)
 +{
 +      /* Invalidate all pages so that they get read in. */
 +      stripe_pages_invalidate(stripe);
 +
 +      /* Allow io on all recovery chunks. */
 +      stripe_allow_io(stripe);
 +
 +      if (idx > -1)
 +              ProhibitPageIO(PAGE(stripe, idx));
 +
 +      stripe->key = rs->recover.pos;
 +      return stripe_page_lists_rw(rs, stripe);
 +}
 +
 +/* Write a stripe to a raid set for recovery. */
 +static int recover_write(struct raid_set *rs, struct stripe *stripe, int idx)
 +{
 +      /*
 +       * If this is a reconstruct of a particular device, then
 +       * reconstruct the respective page(s), else create parity page(s).
 +       */
 +      if (idx > -1) {
 +              struct page *page = PAGE(stripe, idx);
 +
 +              AllowPageIO(page);
 +              stripe_zero_chunk(stripe, idx);
 +              common_xor(stripe, stripe->io.size, 0, idx);
 +              page_set(page, DIRTY);
 +      } else
 +              parity_xor(stripe);
 +
 +      return stripe_page_lists_rw(rs, stripe);
 +}
 +
 +/* Recover bandwidth available ?. */
 +static int recover_bandwidth(struct raid_set *rs)
 +{
 +      int r, work;
 +
 +      /* On reset -> allow recovery. */
 +      r = recover_io_reset(rs);
 +      if (r || RSBandwidth(rs))
 +              goto out;
 +
 +      work = atomic_read(rs->recover.io_count + IO_WORK);
 +      if (work) {
 +              /* Pay attention to larger recover stripe size. */
 +              int recover =
 +                  atomic_read(rs->recover.io_count + IO_RECOVER) *
 +                              rs->recover.io_size /
 +                              rs->set.io_size;
 +
 +              /*
 +               * Don't use more than given bandwidth of
 +               * the work io for recovery.
 +               */
 +              if (recover > work / rs->recover.bandwidth_work) {
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_NO_BANDWIDTH);
 +                      return 0;
 +              }
 +      }
 +
 +out:
 +      atomic_inc(rs->stats + S_BANDWIDTH);    /* REMOVEME: statistics. */
 +      return 1;
 +}
 +
 +/* Try to get a region to recover. */
 +static int recover_get_region(struct raid_set *rs)
 +{
 +      struct recover *rec = &rs->recover;
 +      struct dm_region_hash *rh = rec->rh;
 +
 +      /* Start quiescing some regions. */
 +      if (!RSRegionGet(rs)) {
 +              int r = recover_bandwidth(rs); /* Enough bandwidth ?. */
 +
 +              if (r) {
 +                      r = dm_rh_recovery_prepare(rh);
 +                      if (r < 0) {
 +                              DMINFO("No %sregions to recover",
 +                                     rec->nr_regions_to_recover ?
 +                                     "more " : "");
 +                              return -ENOENT;
 +                      }
 +              } else
 +                      return -EAGAIN;
 +
 +              SetRSRegionGet(rs);
 +      }
 +
 +      if (!rec->reg) {
 +              rec->reg = dm_rh_recovery_start(rh);
 +              if (rec->reg) {
 +                      /*
 +                       * A reference for the the region I'll
 +                       * keep till I've completely synced it.
 +                       */
 +                      io_get(rs);
 +                      rec->pos = dm_rh_region_to_sector(rh,
 +                              dm_rh_get_region_key(rec->reg));
 +                      rec->end = rec->pos + dm_rh_get_region_size(rh);
 +                      return 1;
 +              } else
 +                      return -EAGAIN;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Read/write a recovery stripe. */
 +static INLINE int recover_stripe_rw(struct raid_set *rs, struct stripe *stripe)
 +{
 +      /* Read/write flip-flop. */
 +      if (TestClearStripeRBW(stripe)) {
 +              SetStripeRead(stripe);
 +              return recover_read(rs, stripe, idx_get(rs));
 +      } else if (TestClearStripeRead(stripe))
 +              return recover_write(rs, stripe, idx_get(rs));
 +
 +      return 0;
 +}
 +
 +/* Reset recovery variables. */
 +static void recovery_region_reset(struct raid_set *rs)
 +{
 +      rs->recover.reg = NULL;
 +      ClearRSRegionGet(rs);
 +}
 +
 +/* Update region hash state. */
 +static void recover_rh_update(struct raid_set *rs, int error)
 +{
 +      struct recover *rec = &rs->recover;
 +      struct dm_region *reg = rec->reg;
 +
 +      if (reg) {
 +              dm_rh_recovery_end(reg, error);
 +              if (!error)
 +                      rec->nr_regions_recovered++;
 +
 +              recovery_region_reset(rs);
 +      }
 +
 +      dm_rh_update_states(reg->rh, 1);
 +      dm_rh_flush(reg->rh);
 +      io_put(rs);     /* Release the io reference for the region. */
 +}
 +
 +/* Called by main io daemon to recover regions. */
 +/* FIXME: cope with MAX_RECOVER > 1. */
 +static INLINE void _do_recovery(struct raid_set *rs, struct stripe *stripe)
 +{
 +      int r;
 +      struct recover *rec = &rs->recover;
 +
 +      /* If recovery is active -> return. */
 +      if (StripeActive(stripe))
 +              return;
 +
 +      /* io error is fatal for recovery -> stop it. */
 +      if (unlikely(StripeError(stripe)))
 +              goto err;
 +
 +      /* Get a region to recover. */
 +      r = recover_get_region(rs);
 +      switch (r) {
 +      case 1: /* Got a new region. */
 +              /* Flag read before write. */
 +              ClearStripeRead(stripe);
 +              SetStripeRBW(stripe);
 +              break;
 +
 +      case 0:
 +              /* Got a region in the works. */
 +              r = recover_bandwidth(rs);
 +              if (r) /* Got enough bandwidth. */
 +                      break;
 +
 +      case -EAGAIN:
 +              /* No bandwidth/quiesced region yet, try later. */
 +              wake_do_raid_delayed(rs, HZ / 10);
 +              return;
 +
 +      case -ENOENT:   /* No more regions. */
 +              dm_table_event(rs->ti->table);
 +              goto free;
 +      }
 +
 +      /* Read/write a recover stripe. */
 +      r = recover_stripe_rw(rs, stripe);
 +      if (r) {
 +              /* IO initiated, get another reference for the IO. */
 +              io_get(rs);
 +              return;
 +      }
 +
 +      /* Update recovery position within region. */
 +      rec->pos += stripe->io.size;
 +
 +      /* If we're at end of region, update region hash. */
 +      if (rec->pos >= rec->end ||
 +          rec->pos >= rs->set.sectors_per_dev)
 +              recover_rh_update(rs, 0);
 +      else
 +              SetStripeRBW(stripe);
 +
 +      /* Schedule myself for another round... */
 +      wake_do_raid(rs);
 +      return;
 +
 +err:
 +      raid_set_check_degrade(rs, stripe);
 +
 +      {
 +              char buf[BDEVNAME_SIZE];
 +
 +              DMERR("stopping recovery due to "
 +                    "ERROR on /dev/%s, stripe at offset %llu",
 +                    bdevname(rs->dev[rs->set.ei].dev->bdev, buf),
 +                    (unsigned long long) stripe->key);
 +
 +      }
 +
 +      /* Make sure, that all quiesced regions get released. */
 +      do {
 +              if (rec->reg)
 +                      dm_rh_recovery_end(rec->reg, -EIO);
 +
 +              rec->reg = dm_rh_recovery_start(rec->rh);
 +      } while (rec->reg);
 +
 +      recover_rh_update(rs, -EIO);
 +free:
 +      rs->set.dev_to_init = -1;
 +
 +      /* Check for jiffies overrun. */
 +      rs->recover.end_jiffies = jiffies;
 +      if (rs->recover.end_jiffies < rs->recover.start_jiffies)
 +              rs->recover.end_jiffies = ~0;
 +
 +      ClearRSRecover(rs);
 +}
 +
 +static INLINE void do_recovery(struct raid_set *rs)
 +{
 +      struct stripe *stripe;
 +
 +      list_for_each_entry(stripe, &rs->recover.stripes, lists[LIST_RECOVER])
 +              _do_recovery(rs, stripe);
 +
 +      if (!RSRecover(rs))
 +              stripe_recover_free(rs);
 +}
 +
 +/*
 + * END recovery functions
 + */
 +
 +/* End io process all stripes handed in by endio() callback. */
 +static void do_endios(struct raid_set *rs)
 +{
 +      struct stripe_cache *sc = &rs->sc;
 +      struct stripe *stripe;
 +
 +      while ((stripe = stripe_endio_pop(sc))) {
 +              unsigned count;
 +
 +              /* Recovery stripe special case. */
 +              if (unlikely(StripeRecover(stripe))) {
 +                      if (stripe_io(stripe))
 +                              continue;
 +
 +                      io_put(rs); /* Release region io reference. */
 +                      ClearStripeActive(stripe);
 +
 +                      /* REMOVEME: statistics*/
 +                      atomic_dec(&sc->active_stripes);
 +                      continue;
 +              }
 +
 +              /* Early end io all reads on any uptodate chunks. */
 +              stripe_endio(READ, stripe, (count = 0, &count));
 +              if (stripe_io(stripe)) {
 +                      if (count) /* REMOVEME: statistics. */
 +                              atomic_inc(rs->stats + S_ACTIVE_READS);
 +
 +                      continue;
 +              }
 +
 +              /* Set stripe inactive after all io got processed. */
 +              if (TestClearStripeActive(stripe))
 +                      atomic_dec(&sc->active_stripes);
 +
 +              /* Unlock stripe (for clustering). */
 +              stripe_unlock(rs, stripe);
 +
 +              /*
 +               * If an io error on a stripe occured and the RAID set
 +               * is still operational, requeue the stripe for io.
 +               */
 +              if (TestClearStripeError(stripe)) {
 +                      raid_set_check_degrade(rs, stripe);
 +                      ClearStripeReconstruct(stripe);
 +
 +                      if (!StripeMerged(stripe) &&
 +                          raid_set_operational(rs)) {
 +                              stripe_pages_invalidate(stripe);
 +                              stripe_flush(stripe, FLUSH_DELAY);
 +                              /* REMOVEME: statistics. */
 +                              atomic_inc(rs->stats + S_REQUEUE);
 +                              continue;
 +                      }
 +              }
 +
 +              /* Check if the RAID set is inoperational to error ios. */
 +              if (!raid_set_operational(rs)) {
 +                      ClearStripeReconstruct(stripe);
 +                      stripe_fail_io(stripe);
 +                      BUG_ON(atomic_read(&stripe->cnt));
 +                      continue;
 +              }
 +
 +              /* Got to reconstruct a missing chunk. */
 +              if (TestClearStripeReconstruct(stripe))
 +                      reconstruct_xor(stripe);
 +
 +              /*
 +               * Now that we've got a complete stripe, we can
 +               * process the rest of the end ios on reads.
 +               */
 +              BUG_ON(stripe_endio(READ, stripe, NULL));
 +              ClearStripeRead(stripe);
 +
 +              /*
 +               * Read-before-write stripes need to be flushed again in
 +               * order to work the write data into the pages *after*
 +               * they were read in.
 +               */
 +              if (TestClearStripeMerged(stripe))
 +                      /* End io all bios which got merged already. */
 +                      BUG_ON(stripe_endio(WRITE_MERGED, stripe, NULL));
 +
 +              /* Got to put on flush list because of new writes. */
 +              if (StripeRBW(stripe))
 +                      stripe_flush(stripe, FLUSH_DELAY);
 +      }
 +}
 +
 +/*
 + * Stripe cache shrinking.
 + */
 +static INLINE void do_sc_shrink(struct raid_set *rs)
 +{
 +      unsigned shrink = atomic_read(&rs->sc.stripes_to_shrink);
 +
 +      if (shrink) {
 +              unsigned cur = atomic_read(&rs->sc.stripes);
 +
 +              sc_shrink(&rs->sc, shrink);
 +              shrink -= cur - atomic_read(&rs->sc.stripes);
 +              atomic_set(&rs->sc.stripes_to_shrink, shrink);
 +
 +              /*
 +               * Wake myself up in case we failed to shrink the
 +               * requested amount in order to try again later.
 +               */
 +              if (shrink)
 +                      wake_do_raid(rs);
 +      }
 +}
 +
 +
 +/*
 + * Process all ios
 + *
 + * We do different things with the io depending on the
 + * state of the region that it's in:
 + *
 + * o reads: hang off stripe cache or postpone if full
 + *
 + * o writes:
 + *
 + *  CLEAN/DIRTY/NOSYNC:       increment pending and hang io off stripe's stripe set.
 + *                    In case stripe cache is full or busy, postpone the io.
 + *
 + *  RECOVERING:               delay the io until recovery of the region completes.
 + *
 + */
 +static INLINE void do_ios(struct raid_set *rs, struct bio_list *ios)
 +{
 +      int r;
 +      unsigned flush = 0;
 +      struct dm_region_hash *rh = rs->recover.rh;
 +      struct bio *bio;
 +      struct bio_list delay, reject;
 +
 +      bio_list_init(&delay);
 +      bio_list_init(&reject);
 +
 +      /*
 +       * Classify each io:
 +       *    o delay to recovering regions
 +       *    o queue to all other regions
 +       */
 +      while ((bio = bio_list_pop(ios))) {
 +              /*
 +               * In case we get a barrier bio, push it back onto
 +               * the input queue unless all work queues are empty
 +               * and the stripe cache is inactive.
 +               */
 +              if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_BARRIER);
 +                      if (!list_empty(rs->sc.lists + LIST_IO) ||
 +                          !bio_list_empty(&delay) ||
 +                          !bio_list_empty(&reject) ||
 +                          sc_active(&rs->sc)) {
 +                              bio_list_push(ios, bio);
 +                              break;
 +                      }
 +              }
 +
 +              r = region_state(rs, _sector(rs, bio), DM_RH_RECOVERING);
 +              if (unlikely(r)) {
 +                      /* Got to wait for recovering regions. */
 +                      bio_list_add(&delay, bio);
 +                      SetRSBandwidth(rs);
 +              } else {
 +                      /*
 +                       * Process ios to non-recovering regions by queueing
 +                       * them to stripes (does rh_inc()) for writes).
 +                       */
 +                      flush += stripe_queue_bio(rs, bio, &reject);
 +              }
 +      }
 +
 +      if (flush) {
 +              r = dm_rh_flush(rh); /* Writes got queued -> flush dirty log. */
 +              if (r)
 +                      DMERR("dirty log flush");
 +      }
 +
 +      /* Delay ios to regions which are recovering. */
 +      while ((bio = bio_list_pop(&delay))) {
 +              /* REMOVEME: statistics.*/
 +              atomic_inc(rs->stats + S_DELAYED_BIOS);
 +              atomic_inc(rs->stats + S_SUM_DELAYED_BIOS);
 +              dm_rh_delay(rh, bio);
 +
 +      }
 +
 +      /* Merge any rejected bios back to the head of the input list. */
 +      bio_list_merge_head(ios, &reject);
 +}
 +
 +/* Flush any stripes on the io list. */
 +static INLINE void do_flush(struct raid_set *rs)
 +{
 +      struct list_head *list = rs->sc.lists + LIST_IO, *pos, *tmp;
 +
 +      list_for_each_safe(pos, tmp, list) {
 +              int r = stripe_flush(list_entry(pos, struct stripe,
 +                                              lists[LIST_IO]), FLUSH_NOW);
 +
 +              /* Remove from the list only if the stripe got processed. */
 +              if (!r)
 +                      list_del_init(pos);
 +      }
 +}
 +
 +/* Send an event in case we're getting too busy. */
 +static INLINE void do_busy_event(struct raid_set *rs)
 +{
 +      if ((sc_active(&rs->sc) > atomic_read(&rs->sc.stripes) * 4 / 5)) {
 +              if (!TestSetRSScBusy(rs))
 +                      dm_table_event(rs->ti->table);
 +      } else
 +              ClearRSScBusy(rs);
 +}
 +
 +/* Unplug: let the io role on the sets devices. */
 +static INLINE void do_unplug(struct raid_set *rs)
 +{
 +      struct raid_dev *dev = rs->dev + rs->set.raid_devs;
 +
 +      while (dev-- > rs->dev) {
 +              /* Only call any device unplug function, if io got queued. */
 +              if (io_dev_clear(dev))
 +                      blk_unplug(bdev_get_queue(dev->dev->bdev));
 +      }
 +}
 +
 +/*-----------------------------------------------------------------
 + * RAID daemon
 + *---------------------------------------------------------------*/
 +/*
 + * o belabour all end ios
 + * o optionally shrink the stripe cache
 + * o update the region hash states
 + * o optionally do recovery
 + * o grab the input queue
 + * o work an all requeued or new ios and perform stripe cache flushs
 + *   unless the RAID set is inoperational (when we error ios)
 + * o check, if the stripe cache gets too busy and throw an event if so
 + * o unplug any component raid devices with queued bios
 + */
 +static void do_raid(struct work_struct *ws)
 +{
 +      struct raid_set *rs = container_of(ws, struct raid_set, io.dws.work);
 +      struct bio_list *ios = &rs->io.work, *ios_in = &rs->io.in;
 +      spinlock_t *lock = &rs->io.in_lock;
 +
 +      /*
 +       * We always need to end io, so that ios
 +       * can get errored in case the set failed
 +       * and the region counters get decremented
 +       * before we update the region hash states.
 +       */
 +redo:
 +      do_endios(rs);
 +
 +      /*
 +       * Now that we've end io'd, which may have put stripes on
 +       * the LRU list, we shrink the stripe cache if requested.
 +       */
 +      do_sc_shrink(rs);
 +
 +      /* Update region hash states before we go any further. */
 +      dm_rh_update_states(rs->recover.rh, 1);
 +
 +      /* Try to recover regions. */
 +      if (RSRecover(rs))
 +              do_recovery(rs);
 +
 +      /* More endios -> process. */
 +      if (!stripe_endio_empty(&rs->sc)) {
 +              atomic_inc(rs->stats + S_REDO);
 +              goto redo;
 +      }
 +
 +      /* Quickly grab all new ios queued and add them to the work list. */
 +      spin_lock_irq(lock);
 +      bio_list_merge(ios, ios_in);
 +      bio_list_init(ios_in);
 +      spin_unlock_irq(lock);
 +
 +      /* Let's assume we're operational most of the time ;-). */
 +      if (likely(raid_set_operational(rs))) {
 +              /* If we got ios, work them into the cache. */
 +              if (!bio_list_empty(ios)) {
 +                      do_ios(rs, ios);
 +                      do_unplug(rs);  /* Unplug the sets device queues. */
 +              }
 +
 +              do_flush(rs);           /* Flush any stripes on io list. */
 +              do_unplug(rs);          /* Unplug the sets device queues. */
 +              do_busy_event(rs);      /* Check if we got too busy. */
 +
 +              /* More endios -> process. */
 +              if (!stripe_endio_empty(&rs->sc)) {
 +                      atomic_inc(rs->stats + S_REDO);
 +                      goto redo;
 +              }
 +      } else
 +              /* No way to reconstruct data with too many devices failed. */
 +              bio_list_fail(rs, NULL, ios);
 +}
 +
 +/*
 + * Callback for region hash to dispatch
 + * delayed bios queued to recovered regions
 + * (Gets called via rh_update_states()).
 + */
 +static void dispatch_delayed_bios(void *context, struct bio_list *bl)
 +{
 +      struct raid_set *rs = context;
 +      struct bio *bio;
 +
 +      /* REMOVEME: decrement pending delayed bios counter. */
 +      bio_list_for_each(bio, bl)
 +              atomic_dec(rs->stats + S_DELAYED_BIOS);
 +
 +      /* Merge region hash private list to work list. */
 +      bio_list_merge_head(&rs->io.work, bl);
 +      bio_list_init(bl);
 +      ClearRSBandwidth(rs);
 +}
 +
 +/*************************************************************
 + * Constructor helpers
 + *************************************************************/
 +/* Calculate MB/sec. */
 +static INLINE unsigned mbpers(struct raid_set *rs, unsigned speed)
 +{
 +      return to_bytes(speed * rs->set.data_devs *
 +                      rs->recover.io_size * HZ >> 10) >> 10;
 +}
 +
 +/*
 + * Discover fastest xor algorithm and # of chunks combination.
 + */
 +/* Calculate speed for algorithm and # of chunks. */
 +static INLINE unsigned xor_speed(struct stripe *stripe)
 +{
 +      unsigned r = 0;
 +      unsigned long j;
 +
 +      /* Wait for next tick. */
 +      for (j = jiffies; j == jiffies;)
 +              ;
 +
 +      /* Do xors for a full tick. */
 +      for (j = jiffies; j == jiffies;) {
 +              mb();
 +              common_xor(stripe, stripe->io.size, 0, 0);
 +              mb();
 +              r++;
 +              mb();
 +      }
 +
 +      return r;
 +}
 +
 +/* Optimize xor algorithm for this RAID set. */
 +static unsigned xor_optimize(struct raid_set *rs)
 +{
 +      unsigned chunks_max = 2, speed_max = 0;
 +      struct xor_func *f = ARRAY_END(xor_funcs), *f_max = NULL;
 +      struct stripe *stripe;
 +
 +      BUG_ON(list_empty(&rs->recover.stripes));
 +      stripe = list_first_entry(&rs->recover.stripes, struct stripe,
 +                          lists[LIST_RECOVER]);
 +
 +      /*
 +       * Got to allow io on all chunks, so that
 +       * xor() will actually work on them.
 +       */
 +      stripe_allow_io(stripe);
 +
 +      /* Try all xor functions. */
 +      while (f-- > xor_funcs) {
 +              unsigned speed;
 +
 +              /* Set actual xor function for common_xor(). */
 +              rs->xor.f = f;
 +              rs->xor.chunks = XOR_CHUNKS_MAX + 1;
 +
 +              while (rs->xor.chunks-- > 2) {
 +                      speed = xor_speed(stripe);
 +                      if (speed > speed_max) {
 +                              speed_max = speed;
 +                              chunks_max = rs->xor.chunks;
 +                              f_max = f;
 +                      }
 +              }
 +      }
 +
 +      /* Memorize optimum parameters. */
 +      rs->xor.f = f_max;
 +      rs->xor.chunks = chunks_max;
 +      return speed_max;
 +}
 +
 +static inline int array_too_big(unsigned long fixed, unsigned long obj,
 +                                unsigned long num)
 +{
 +      return (num > (ULONG_MAX - fixed) / obj);
 +}
 +
 +static void wakeup_all_recovery_waiters(void *context)
 +{
 +}
 +
 +/*
 + * Allocate a RAID context (a RAID set)
 + */
 +static int
 +context_alloc(struct raid_set **raid_set, struct raid_type *raid_type,
 +            unsigned stripes, unsigned chunk_size, unsigned io_size,
 +            unsigned recover_io_size, unsigned raid_devs,
 +            sector_t sectors_per_dev,
 +            struct dm_target *ti, unsigned dl_parms, char **argv)
 +{
 +      int r;
 +      unsigned p;
 +      size_t len;
 +      sector_t region_size, ti_len;
 +      struct raid_set *rs = NULL;
 +      struct dm_dirty_log *dl;
 +      struct recover *rec;
 +
 +      /*
 +       * Create the dirty log
 +       *
 +       * We need to change length for the dirty log constructor,
 +       * because we want an amount of regions for all stripes derived
 +       * from the single device size, so that we can keep region
 +       * size = 2^^n independant of the number of devices
 +       */
 +      ti_len = ti->len;
 +      ti->len = sectors_per_dev;
 +      dl = dm_dirty_log_create(argv[0], ti, NULL, dl_parms, argv + 2);
 +      ti->len = ti_len;
 +      if (!dl)
 +              goto bad_dirty_log;
 +
 +      /* Chunk size *must* be smaller than region size. */
 +      region_size = dl->type->get_region_size(dl);
 +      if (chunk_size > region_size)
 +              goto bad_chunk_size;
 +
 +      /* Recover io size *must* be smaller than region size as well. */
 +      if (recover_io_size > region_size)
 +              goto bad_recover_io_size;
 +
 +      /* Size and allocate the RAID set structure. */
 +      len = sizeof(*rs->data) + sizeof(*rs->dev);
 +      if (array_too_big(sizeof(*rs), len, raid_devs))
 +              goto bad_array;
 +
 +      len = sizeof(*rs) + raid_devs * len;
 +      rs = kzalloc(len, GFP_KERNEL);
 +      if (!rs)
 +              goto bad_alloc;
 +
 +      rec = &rs->recover;
 +      atomic_set(&rs->io.in_process, 0);
 +      atomic_set(&rs->io.in_process_max, 0);
 +      rec->io_size = recover_io_size;
 +
 +      /* Pointer to data array. */
 +      rs->data = (unsigned long **)
 +                 ((void *) rs->dev + raid_devs * sizeof(*rs->dev));
 +      rec->dl = dl;
 +      rs->set.raid_devs = p = raid_devs;
 +      rs->set.data_devs = raid_devs - raid_type->parity_devs;
 +      rs->set.raid_type = raid_type;
 +
 +      /*
 +       * Set chunk and io size and respective shifts
 +       * (used to avoid divisions)
 +       */
 +      rs->set.chunk_size = chunk_size;
 +      rs->set.chunk_mask = chunk_size - 1;
 +      rs->set.chunk_shift = ffs(chunk_size) - 1;
 +
 +      rs->set.io_size = io_size;
 +      rs->set.io_mask = io_size - 1;
 +      rs->set.io_shift = ffs(io_size) - 1;
 +      rs->set.io_shift_mask = rs->set.chunk_mask & ~rs->set.io_mask;
 +
 +      rs->set.pages_per_io = chunk_pages(io_size);
 +      rs->set.sectors_per_dev = sectors_per_dev;
 +
 +      rs->set.ei = -1;        /* Indicate no failed device. */
 +      atomic_set(&rs->set.failed_devs, 0);
 +
 +      rs->ti = ti;
 +
 +      atomic_set(rec->io_count + IO_WORK, 0);
 +      atomic_set(rec->io_count + IO_RECOVER, 0);
 +
 +      /* Initialize io lock and queues. */
 +      spin_lock_init(&rs->io.in_lock);
 +      bio_list_init(&rs->io.in);
 +      bio_list_init(&rs->io.work);
 +
 +      init_waitqueue_head(&rs->io.suspendq);  /* Suspend waiters (dm-io). */
 +
 +      rec->nr_regions = dm_sector_div_up(sectors_per_dev, region_size);
 +
 +      rec->rh = dm_region_hash_create(rs, dispatch_delayed_bios, wake_do_raid,
 +                                      wakeup_all_recovery_waiters,
 +                                      rs->ti->begin, MAX_RECOVER, dl,
 +                                      region_size, rs->recover.nr_regions);
 +      if (IS_ERR(rec->rh))
 +              goto bad_rh;
 +
 +      /* Initialize stripe cache. */
 +      r = sc_init(rs, stripes);
 +      if (r)
 +              goto bad_sc;
 +
 +      /* Create dm-io client context. */
 +      rs->sc.dm_io_client = dm_io_client_create(rs->set.raid_devs *
 +                                                rs->set.pages_per_io);
 +      if (IS_ERR(rs->sc.dm_io_client))
 +              goto bad_dm_io_client;
 +
 +      /* REMOVEME: statistics. */
 +      stats_reset(rs);
 +      ClearRSDevelStats(rs);  /* Disnable development status. */
 +
 +      *raid_set = rs;
 +      return 0;
 +
 +bad_dirty_log:
 +      TI_ERR_RET("Error creating dirty log", -ENOMEM);
 +
 +
 +bad_chunk_size:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR("Chunk size larger than region size");
 +
 +bad_recover_io_size:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR("Recover stripe io size larger than region size");
 +
 +bad_array:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR("Arry too big");
 +
 +bad_alloc:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR_RET("Cannot allocate raid context", -ENOMEM);
 +
 +bad_rh:
 +      dm_dirty_log_destroy(dl);
 +      ti->error = DM_MSG_PREFIX "Error creating dirty region hash";
 +      goto free_rs;
 +
 +bad_sc:
 +      ti->error = DM_MSG_PREFIX "Error creating stripe cache";
 +      goto free;
 +
 +bad_dm_io_client:
 +      ti->error = DM_MSG_PREFIX "Error allocating dm-io resources";
 +free:
 +      dm_region_hash_destroy(rec->rh);
 +      sc_exit(&rs->sc);
 +      dm_region_hash_destroy(rec->rh); /* Destroys dirty log as well. */
 +free_rs:
 +      kfree(rs);
 +      return -ENOMEM;
 +}
 +
 +/* Free a RAID context (a RAID set). */
 +static void
 +context_free(struct raid_set *rs, struct dm_target *ti, unsigned r)
 +{
 +      while (r--)
 +              dm_put_device(ti, rs->dev[r].dev);
 +
 +      dm_io_client_destroy(rs->sc.dm_io_client);
 +      sc_exit(&rs->sc);
 +      dm_region_hash_destroy(rs->recover.rh);
 +      dm_dirty_log_destroy(rs->recover.dl);
 +      kfree(rs);
 +}
 +
 +/* Create work queue and initialize work. */
 +static int rs_workqueue_init(struct raid_set *rs)
 +{
 +      struct dm_target *ti = rs->ti;
 +
 +      rs->io.wq = create_singlethread_workqueue(DAEMON);
 +      if (!rs->io.wq)
 +              TI_ERR_RET("failed to create " DAEMON, -ENOMEM);
 +
 +      INIT_DELAYED_WORK(&rs->io.dws, do_raid);
 +      return 0;
 +}
 +
 +/* Return pointer to raid_type structure for raid name. */
 +static struct raid_type *get_raid_type(char *name)
 +{
 +      struct raid_type *r = ARRAY_END(raid_types);
 +
 +      while (r-- > raid_types) {
 +              if (!strnicmp(STR_LEN(r->name, name)))
 +                      return r;
 +      }
 +
 +      return NULL;
 +}
 +
 +/* FIXME: factor out to dm core. */
 +static int multiple(sector_t a, sector_t b, sector_t *n)
 +{
 +      sector_t r = a;
 +
 +      sector_div(r, b);
 +      *n = r;
 +      return a == r * b;
 +}
 +
 +/* Log RAID set information to kernel log. */
 +static void raid_set_log(struct raid_set *rs, unsigned speed)
 +{
 +      unsigned p;
 +      char buf[BDEVNAME_SIZE];
 +
 +      for (p = 0; p < rs->set.raid_devs; p++)
 +              DMINFO("/dev/%s is raid disk %u",
 +                     bdevname(rs->dev[p].dev->bdev, buf), p);
 +
 +      DMINFO("%d/%d/%d sectors chunk/io/recovery size, %u stripes",
 +             rs->set.chunk_size, rs->set.io_size, rs->recover.io_size,
 +             atomic_read(&rs->sc.stripes));
 +      DMINFO("algorithm \"%s\", %u chunks with %uMB/s", rs->xor.f->name,
 +             rs->xor.chunks, mbpers(rs, speed));
 +      DMINFO("%s set with net %u/%u devices", rs->set.raid_type->descr,
 +             rs->set.data_devs, rs->set.raid_devs);
 +}
 +
 +/* Get all devices and offsets. */
 +static int
 +dev_parms(struct dm_target *ti, struct raid_set *rs,
 +        char **argv, int *p)
 +{
 +      for (*p = 0; *p < rs->set.raid_devs; (*p)++, argv += 2) {
 +              int r;
 +              unsigned long long tmp;
 +              struct raid_dev *dev = rs->dev + *p;
 +              union dev_lookup dl = {.dev = dev };
 +
 +              /* Get offset and device. */
 +              r = sscanf(argv[1], "%llu", &tmp);
 +              if (r != 1)
 +                      TI_ERR("Invalid RAID device offset parameter");
 +
 +              dev->start = tmp;
 +              r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
 +                                &dev->dev);
 +              if (r)
 +                      TI_ERR_RET("RAID device lookup failure", r);
 +
 +              r = raid_dev_lookup(rs, bynumber, &dl);
 +              if (r != -ENODEV && r < *p) {
 +                      (*p)++; /* Ensure dm_put_device() on actual device. */
 +                      TI_ERR_RET("Duplicate RAID device", -ENXIO);
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +/* Set recovery bandwidth. */
 +static INLINE void
 +recover_set_bandwidth(struct raid_set *rs, unsigned bandwidth)
 +{
 +      rs->recover.bandwidth = bandwidth;
 +      rs->recover.bandwidth_work = 100 / bandwidth;
 +}
 +
 +/* Handle variable number of RAID parameters. */
 +static int
 +raid_variable_parms(struct dm_target *ti, char **argv,
 +                  unsigned i, int *raid_parms,
 +                  int *chunk_size, int *chunk_size_parm,
 +                  int *stripes, int *stripes_parm,
 +                  int *io_size, int *io_size_parm,
 +                  int *recover_io_size, int *recover_io_size_parm,
 +                  int *bandwidth, int *bandwidth_parm)
 +{
 +      /* Fetch # of variable raid parameters. */
 +      if (sscanf(argv[i++], "%d", raid_parms) != 1 ||
 +          !range_ok(*raid_parms, 0, 5))
 +              TI_ERR("Bad variable raid parameters number");
 +
 +      if (*raid_parms) {
 +              /*
 +               * If we've got variable RAID parameters,
 +               * chunk size is the first one
 +               */
 +              if (sscanf(argv[i++], "%d", chunk_size) != 1 ||
 +                  (*chunk_size != -1 &&
 +                   (!POWER_OF_2(*chunk_size) ||
 +                    !range_ok(*chunk_size, IO_SIZE_MIN, CHUNK_SIZE_MAX))))
 +                      TI_ERR("Invalid chunk size; must be 2^^n and <= 16384");
 +
 +              *chunk_size_parm = *chunk_size;
 +              if (*chunk_size == -1)
 +                      *chunk_size = CHUNK_SIZE;
 +
 +              /*
 +               * In case we've got 2 or more variable raid
 +               * parameters, the number of stripes is the second one
 +               */
 +              if (*raid_parms > 1) {
 +                      if (sscanf(argv[i++], "%d", stripes) != 1 ||
 +                          (*stripes != -1 &&
 +                           !range_ok(*stripes, STRIPES_MIN,
 +                                     STRIPES_MAX)))
 +                              TI_ERR("Invalid number of stripes: must "
 +                                     "be >= 8 and <= 8192");
 +              }
 +
 +              *stripes_parm = *stripes;
 +              if (*stripes == -1)
 +                      *stripes = STRIPES;
 +
 +              /*
 +               * In case we've got 3 or more variable raid
 +               * parameters, the io size is the third one.
 +               */
 +              if (*raid_parms > 2) {
 +                      if (sscanf(argv[i++], "%d", io_size) != 1 ||
 +                          (*io_size != -1 &&
 +                           (!POWER_OF_2(*io_size) ||
 +                            !range_ok(*io_size, IO_SIZE_MIN,
 +                                      min(BIO_MAX_SECTORS / 2,
 +                                      *chunk_size)))))
 +                              TI_ERR("Invalid io size; must "
 +                                     "be 2^^n and less equal "
 +                                     "min(BIO_MAX_SECTORS/2, chunk size)");
 +              } else
 +                      *io_size = *chunk_size;
 +
 +              *io_size_parm = *io_size;
 +              if (*io_size == -1)
 +                      *io_size = *chunk_size;
 +
 +              /*
 +               * In case we've got 4 variable raid parameters,
 +               * the recovery stripe io_size is the fourth one
 +               */
 +              if (*raid_parms > 3) {
 +                      if (sscanf(argv[i++], "%d", recover_io_size) != 1 ||
 +                          (*recover_io_size != -1 &&
 +                           (!POWER_OF_2(*recover_io_size) ||
 +                           !range_ok(*recover_io_size, RECOVER_IO_SIZE_MIN,
 +                                     BIO_MAX_SECTORS / 2))))
 +                              TI_ERR("Invalid recovery io size; must be "
 +                                     "2^^n and less equal BIO_MAX_SECTORS/2");
 +              }
 +
 +              *recover_io_size_parm = *recover_io_size;
 +              if (*recover_io_size == -1)
 +                      *recover_io_size = RECOVER_IO_SIZE;
 +
 +              /*
 +               * In case we've got 5 variable raid parameters,
 +               * the recovery io bandwidth is the fifth one
 +               */
 +              if (*raid_parms > 4) {
 +                      if (sscanf(argv[i++], "%d", bandwidth) != 1 ||
 +                          (*bandwidth != -1 &&
 +                           !range_ok(*bandwidth, BANDWIDTH_MIN,
 +                                     BANDWIDTH_MAX)))
 +                              TI_ERR("Invalid recovery bandwidth "
 +                                     "percentage; must be > 0 and <= 100");
 +              }
 +
 +              *bandwidth_parm = *bandwidth;
 +              if (*bandwidth == -1)
 +                      *bandwidth = BANDWIDTH;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Parse optional locking parameters. */
 +static int
 +raid_locking_parms(struct dm_target *ti, char **argv,
 +                 unsigned i, int *locking_parms,
 +                 struct dm_raid45_locking_type **locking_type)
 +{
 +      *locking_parms = 0;
 +      *locking_type = &locking_none;
 +
 +      if (!strnicmp(argv[i], "none", strlen(argv[i])))
 +              *locking_parms = 1;
 +      else if (!strnicmp(argv[i + 1], "locking", strlen(argv[i + 1]))) {
 +              *locking_type = &locking_none;
 +              *locking_parms = 2;
 +      } else if (!strnicmp(argv[i + 1], "cluster", strlen(argv[i + 1]))) {
 +              *locking_type = &locking_cluster;
 +              /* FIXME: namespace. */
 +              *locking_parms = 3;
 +      }
 +
 +      return *locking_parms == 1 ? -EINVAL : 0;
 +}
 +
 +/* Set backing device information properties of RAID set. */
 +static void rs_set_bdi(struct raid_set *rs, unsigned stripes, unsigned chunks)
 +{
 +      unsigned p, ra_pages;
 +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
 +      struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
 +
 +      /* Set read-ahead for the RAID set and the component devices. */
 +      bdi->ra_pages = stripes * stripe_pages(rs, rs->set.io_size);
 +      ra_pages = chunks * chunk_pages(rs->set.io_size);
 +      for (p = rs->set.raid_devs; p--; ) {
 +              struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
 +
 +              q->backing_dev_info.ra_pages = ra_pages;
 +      }
 +
 +      /* Set congested function and data. */
 +      bdi->congested_fn = raid_set_congested;
 +      bdi->congested_data = rs;
 +
 +      dm_put(md);
 +}
 +
 +/* Get backing device information properties of RAID set. */
 +static void rs_get_ra(struct raid_set *rs, unsigned *stripes, unsigned *chunks)
 +{
 +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
 +
 +       *stripes = dm_disk(md)->queue->backing_dev_info.ra_pages
 +                  / stripe_pages(rs, rs->set.io_size);
 +      *chunks = bdev_get_queue(rs->dev->dev->bdev)->backing_dev_info.ra_pages
 +                / chunk_pages(rs->set.io_size);
 +
 +      dm_put(md);
 +}
 +
 +/*
 + * Construct a RAID4/5 mapping:
 + *
 + * log_type #log_params <log_params> \
 + * raid_type [#parity_dev] #raid_variable_params <raid_params> \
 + * [locking "none"/"cluster"]
 + * #raid_devs #dev_to_initialize [<dev_path> <offset>]{3,}
 + *
 + * log_type = "core"/"disk",
 + * #log_params = 1-3 (1-2 for core dirty log type, 3 for disk dirty log only)
 + * log_params = [dirty_log_path] region_size [[no]sync])
 + *
 + * raid_type = "raid4", "raid5_la", "raid5_ra", "raid5_ls", "raid5_rs"
 + *
 + * #parity_dev = N if raid_type = "raid4"
 + * o N = -1: pick default = last device
 + * o N >= 0 and < #raid_devs: parity device index
 + *
 + * #raid_variable_params = 0-5; raid_params (-1 = default):
 + *   [chunk_size [#stripes [io_size [recover_io_size [%recovery_bandwidth]]]]]
 + *   o chunk_size (unit to calculate drive addresses; must be 2^^n, > 8
 + *     and <= CHUNK_SIZE_MAX)
 + *   o #stripes is number of stripes allocated to stripe cache
 + *     (must be > 1 and < STRIPES_MAX)
 + *   o io_size (io unit size per device in sectors; must be 2^^n and > 8)
 + *   o recover_io_size (io unit size per device for recovery in sectors;
 +       must be 2^^n, > SECTORS_PER_PAGE and <= region_size)
 + *   o %recovery_bandwith is the maximum amount spend for recovery during
 + *     application io (1-100%)
 + * If raid_variable_params = 0, defaults will be used.
 + * Any raid_variable_param can be set to -1 to apply a default
 + *
 + * #raid_devs = N (N >= 3)
 + *
 + * #dev_to_initialize = N
 + * -1: initialize parity on all devices
 + * >= 0 and < #raid_devs: initialize raid_path; used to force reconstruction
 + * of a failed devices content after replacement
 + *
 + * <dev_path> = device_path (eg, /dev/sdd1)
 + * <offset>   = begin at offset on <dev_path>
 + *
 + */
 +#define       MIN_PARMS       13
 +static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 +{
 +      int bandwidth = BANDWIDTH, bandwidth_parm = -1,
 +          chunk_size = CHUNK_SIZE, chunk_size_parm = -1,
 +          dev_to_init, dl_parms, locking_parms, parity_parm, pi = -1,
 +          i, io_size = IO_SIZE, io_size_parm = -1,
 +          r, raid_devs, raid_parms,
 +          recover_io_size = RECOVER_IO_SIZE, recover_io_size_parm = -1,
 +          stripes = STRIPES, stripes_parm = -1;
 +      unsigned speed;
 +      sector_t tmp, sectors_per_dev;
 +      struct dm_raid45_locking_type *locking;
 +      struct raid_set *rs;
 +      struct raid_type *raid_type;
 +
 +      /* Ensure minimum number of parameters. */
 +      if (argc < MIN_PARMS)
 +              TI_ERR("Not enough parameters");
 +
 +      /* Fetch # of dirty log parameters. */
 +      if (sscanf(argv[1], "%d", &dl_parms) != 1
 +          || !range_ok(dl_parms, 1, 4711))
 +              TI_ERR("Bad dirty log parameters number");
 +
 +      /* Check raid_type. */
 +      raid_type = get_raid_type(argv[dl_parms + 2]);
 +      if (!raid_type)
 +              TI_ERR("Bad raid type");
 +
 +      /* In case of RAID4, parity drive is selectable. */
 +      parity_parm = !!(raid_type->level == raid4);
 +
 +      /* Handle variable number of RAID parameters. */
 +      r = raid_variable_parms(ti, argv, dl_parms + parity_parm + 3,
 +                              &raid_parms,
 +                              &chunk_size, &chunk_size_parm,
 +                              &stripes, &stripes_parm,
 +                              &io_size, &io_size_parm,
 +                              &recover_io_size, &recover_io_size_parm,
 +                              &bandwidth, &bandwidth_parm);
 +      if (r)
 +              return r;
 +
 +      r = raid_locking_parms(ti, argv,
 +                             dl_parms + parity_parm + raid_parms + 4,
 +                             &locking_parms, &locking);
 +      if (r)
 +              return r;
 +
 +      /* # of raid devices. */
 +      i = dl_parms + parity_parm + raid_parms + locking_parms + 4;
 +      if (sscanf(argv[i], "%d", &raid_devs) != 1 ||
 +          raid_devs < raid_type->minimal_devs)
 +              TI_ERR("Invalid number of raid devices");
 +
 +      /* In case of RAID4, check parity drive index is in limits. */
 +      if (raid_type->level == raid4) {
 +              /* Fetch index of parity device. */
 +              if (sscanf(argv[dl_parms + 3], "%d", &pi) != 1 ||
 +                  !range_ok(pi, 0, raid_devs - 1))
 +                      TI_ERR("Invalid RAID4 parity device index");
 +      }
 +
 +      /*
 +       * Index of device to initialize starts at 0
 +       *
 +       * o -1 -> don't initialize a particular device,
 +       * o 0..raid_devs-1 -> initialize respective device
 +       *   (used for reconstruction of a replaced device)
 +       */
 +      if (sscanf
 +          (argv[dl_parms + parity_parm + raid_parms + locking_parms + 5],
 +           "%d", &dev_to_init) != 1
 +          || !range_ok(dev_to_init, -1, raid_devs - 1))
 +              TI_ERR("Invalid number for raid device to initialize");
 +
 +      /* Check # of raid device arguments. */
 +      if (argc - dl_parms - parity_parm - raid_parms - 6 !=
 +          2 * raid_devs)
 +              TI_ERR("Wrong number of raid device/offset arguments");
 +
 +      /*
 +       * Check that the table length is devisable
 +       * w/o rest by (raid_devs - parity_devs)
 +       */
 +      if (!multiple(ti->len, raid_devs - raid_type->parity_devs,
 +                    &sectors_per_dev))
 +              TI_ERR
 +                  ("Target length not divisable by number of data devices");
 +
 +      /*
 +       * Check that the device size is
 +       * devisable w/o rest by chunk size
 +       */
 +      if (!multiple(sectors_per_dev, chunk_size, &tmp))
 +              TI_ERR("Device length not divisable by chunk_size");
 +
 +      /****************************************************************
 +       * Now that we checked the constructor arguments ->
 +       * let's allocate the RAID set
 +       ****************************************************************/
 +      r = context_alloc(&rs, raid_type, stripes, chunk_size, io_size,
 +                        recover_io_size, raid_devs, sectors_per_dev,
 +                        ti, dl_parms, argv);
 +      if (r)
 +              return r;
 +
 +      /*
 +       * Set these here in order to avoid passing
 +       * too many arguments to context_alloc()
 +       */
 +      rs->set.dev_to_init_parm = dev_to_init;
 +      rs->set.dev_to_init = dev_to_init;
 +      rs->set.pi_parm = pi;
 +      rs->set.pi = (pi == -1) ? rs->set.data_devs : pi;
 +      rs->set.raid_parms = raid_parms;
 +      rs->set.chunk_size_parm = chunk_size_parm;
 +      rs->set.io_size_parm = io_size_parm;
 +      rs->sc.stripes_parm = stripes_parm;
 +      rs->recover.io_size_parm = recover_io_size_parm;
 +      rs->recover.bandwidth_parm = bandwidth_parm;
 +      recover_set_bandwidth(rs, bandwidth);
 +
 +      /* Use locking type to lock stripe access. */
 +      rs->locking = locking;
 +
 +      /* Get the device/offset tupels. */
 +      argv += dl_parms + 6 + parity_parm + raid_parms;
 +      r = dev_parms(ti, rs, argv, &i);
 +      if (r)
 +              goto err;
 +
 +      /* Initialize recovery. */
 +      rs->recover.start_jiffies = jiffies;
 +      rs->recover.end_jiffies = 0;
 +      recovery_region_reset(rs);
 +
 +      /* Allow for recovery of any nosync regions. */
 +      SetRSRecover(rs);
 +
 +      /* Set backing device information (eg. read ahead). */
 +      rs_set_bdi(rs, chunk_size * 2, io_size * 4);
 +      SetRSCheckOverwrite(rs); /* Allow chunk overwrite checks. */
 +
 +      speed = xor_optimize(rs); /* Select best xor algorithm. */
 +
 +      /* Initialize work queue to handle this RAID set's io. */
 +      r = rs_workqueue_init(rs);
 +      if (r)
 +              goto err;
 +
 +      raid_set_log(rs, speed); /* Log information about RAID set. */
 +
 +      /*
 +       * Make sure that dm core only hands maximum io size
 +       * length down and pays attention to io boundaries.
 +       */
 +      ti->split_io = rs->set.io_size;
 +      ti->private = rs;
 +      return 0;
 +
 +err:
 +      context_free(rs, ti, i);
 +      return r;
 +}
 +
 +/*
 + * Destruct a raid mapping
 + */
 +static void raid_dtr(struct dm_target *ti)
 +{
 +      struct raid_set *rs = ti->private;
 +
 +      /* Indicate recovery end so that ios in flight drain. */
 +      ClearRSRecover(rs);
 +
 +      wake_do_raid(rs);       /* Wake daemon. */
 +      wait_ios(rs);           /* Wait for any io still being processed. */
 +      destroy_workqueue(rs->io.wq);
 +      context_free(rs, ti, rs->set.raid_devs);
 +}
 +
 +/* Queues ios to RAID sets. */
 +static inline void queue_bio(struct raid_set *rs, struct bio *bio)
 +{
 +      int wake;
 +      struct bio_list *in = &rs->io.in;
 +      spinlock_t *in_lock = &rs->io.in_lock;
 +
 +      spin_lock_irq(in_lock);
 +      wake = bio_list_empty(in);
 +      bio_list_add(in, bio);
 +      spin_unlock_irq(in_lock);
 +
 +      /* Wake daemon if input list was empty. */
 +      if (wake)
 +              wake_do_raid(rs);
 +}
 +
 +/* Raid mapping function. */
 +static int raid_map(struct dm_target *ti, struct bio *bio,
 +                  union map_info *map_context)
 +{
 +      /* I don't want to waste stripe cache capacity. */
 +      if (bio_rw(bio) == READA)
 +              return -EIO;
 +      else {
 +              struct raid_set *rs = ti->private;
 +
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats +
 +                         (bio_data_dir(bio) == WRITE ?
 +                          S_BIOS_WRITE : S_BIOS_READ));
 +
 +              /*
 +               * Get io reference to be waiting for to drop
 +               * to zero on device suspension/destruction.
 +               */
 +              io_get(rs);
 +              bio->bi_sector -= ti->begin;    /* Remap sector. */
 +              queue_bio(rs, bio);             /* Queue to the daemon. */
 +              return DM_MAPIO_SUBMITTED;      /* Handle later. */
 +      }
 +}
 +
 +/* Device suspend. */
 +static void raid_postsuspend(struct dm_target *ti)
 +{
 +      struct raid_set *rs = ti->private;
 +      struct dm_dirty_log *dl = rs->recover.dl;
 +
 +      SetRSSuspended(rs);
 +
 +      if (RSRecover(rs))
 +              dm_rh_stop_recovery(rs->recover.rh); /* Wakes do_raid(). */
 +      else
 +              wake_do_raid(rs);
 +
 +      wait_ios(rs);   /* Wait for completion of all ios being processed. */
 +      if (dl->type->postsuspend && dl->type->postsuspend(dl))
 +              /* Suspend dirty log. */
 +              /* FIXME: need better error handling. */
 +              DMWARN("log suspend failed");
 +}
 +
 +/* Device resume. */
 +static void raid_resume(struct dm_target *ti)
 +{
 +      struct raid_set *rs = ti->private;
 +      struct recover *rec = &rs->recover;
 +      struct dm_dirty_log *dl = rec->dl;
 +
 +      if (dl->type->resume && dl->type->resume(dl))
 +              /* Resume dirty log. */
 +              /* FIXME: need better error handling. */
 +              DMWARN("log resume failed");
 +
 +      rec->nr_regions_to_recover =
 +          rec->nr_regions - dl->type->get_sync_count(dl);
 +
 +      ClearRSSuspended(rs);
 +
 +      /* Reset any unfinished recovery. */
 +      if (RSRecover(rs)) {
 +              recovery_region_reset(rs);
 +              dm_rh_start_recovery(rec->rh);/* Calls wake_do_raid(). */
 +      } else
 +              wake_do_raid(rs);
 +}
 +
 +static INLINE unsigned sc_size(struct raid_set *rs)
 +{
 +      return to_sector(atomic_read(&rs->sc.stripes) *
 +                       (sizeof(struct stripe) +
 +                        (sizeof(struct stripe_set) +
 +                         (sizeof(struct page_list) +
 +                          to_bytes(rs->set.io_size) *
 +                          rs->set.raid_devs)) +
 +                        (rs->recover.
 +                         end_jiffies ? 0 : to_bytes(rs->set.raid_devs *
 +                                                    rs->recover.
 +                                                    io_size))));
 +}
 +
 +/* REMOVEME: status output for development. */
 +static void
 +raid_devel_stats(struct dm_target *ti, char *result,
 +               unsigned *size, unsigned maxlen)
 +{
 +      unsigned chunks, stripes, sz = *size;
 +      unsigned long j;
 +      char buf[BDEVNAME_SIZE], *p;
 +      struct stats_map *sm, *sm_end = ARRAY_END(stats_map);
 +      struct raid_set *rs = ti->private;
 +      struct recover *rec = &rs->recover;
 +      struct timespec ts;
 +
 +      DMEMIT("%s ", version);
 +      DMEMIT("io_inprocess=%d ", atomic_read(&rs->io.in_process));
 +      DMEMIT("io_inprocess_max=%d ", atomic_read(&rs->io.in_process_max));
 +
 +      for (sm = stats_map; sm < sm_end; sm++)
 +              DMEMIT("%s%d", sm->str, atomic_read(rs->stats + sm->type));
 +
 +      DMEMIT(" overwrite=%s ", RSCheckOverwrite(rs) ? "on" : "off");
 +      DMEMIT("sc=%u/%u/%u/%u/%u ", rs->set.chunk_size, rs->set.io_size,
 +             atomic_read(&rs->sc.stripes), rs->sc.hash.buckets,
 +             sc_size(rs));
 +
 +      j = (rec->end_jiffies ? rec->end_jiffies : jiffies) -
 +          rec->start_jiffies;
 +      jiffies_to_timespec(j, &ts);
 +      sprintf(buf, "%ld.%ld", ts.tv_sec, ts.tv_nsec);
 +      p = strchr(buf, '.');
 +      p[3] = 0;
 +
 +      DMEMIT("rg=%llu%s/%llu/%llu/%u %s ",
 +             (unsigned long long) rec->nr_regions_recovered,
 +             RSRegionGet(rs) ? "+" : "",
 +             (unsigned long long) rec->nr_regions_to_recover,
 +             (unsigned long long) rec->nr_regions, rec->bandwidth, buf);
 +
 +      rs_get_ra(rs, &stripes, &chunks);
 +      DMEMIT("ra=%u/%u ", stripes, chunks);
 +
 +      *size = sz;
 +}
 +
 +static int
 +raid_status(struct dm_target *ti, status_type_t type,
 +          char *result, unsigned maxlen)
 +{
 +      unsigned i, sz = 0;
 +      char buf[BDEVNAME_SIZE];
 +      struct raid_set *rs = ti->private;
 +
 +      switch (type) {
 +      case STATUSTYPE_INFO:
 +              /* REMOVEME: statistics. */
 +              if (RSDevelStats(rs))
 +                      raid_devel_stats(ti, result, &sz, maxlen);
 +
 +              DMEMIT("%u ", rs->set.raid_devs);
 +
 +              for (i = 0; i < rs->set.raid_devs; i++)
 +                      DMEMIT("%s ",
 +                             format_dev_t(buf, rs->dev[i].dev->bdev->bd_dev));
 +
 +              DMEMIT("1 ");
 +              for (i = 0; i < rs->set.raid_devs; i++) {
 +                      DMEMIT("%c", dev_operational(rs, i) ? 'A' : 'D');
 +
 +                      if (rs->set.raid_type->level == raid4 &&
 +                          i == rs->set.pi)
 +                              DMEMIT("p");
 +
 +                      if (rs->set.dev_to_init == i)
 +                              DMEMIT("i");
 +              }
 +
 +              break;
 +
 +      case STATUSTYPE_TABLE:
 +              sz = rs->recover.dl->type->status(rs->recover.dl, type,
 +                                                result, maxlen);
 +              DMEMIT("%s %u ", rs->set.raid_type->name,
 +                     rs->set.raid_parms);
 +
 +              if (rs->set.raid_type->level == raid4)
 +                      DMEMIT("%d ", rs->set.pi_parm);
 +
 +              if (rs->set.raid_parms)
 +                      DMEMIT("%d ", rs->set.chunk_size_parm);
 +
 +              if (rs->set.raid_parms > 1)
 +                      DMEMIT("%d ", rs->sc.stripes_parm);
 +
 +              if (rs->set.raid_parms > 2)
 +                      DMEMIT("%d ", rs->set.io_size_parm);
 +
 +              if (rs->set.raid_parms > 3)
 +                      DMEMIT("%d ", rs->recover.io_size_parm);
 +
 +              if (rs->set.raid_parms > 4)
 +                      DMEMIT("%d ", rs->recover.bandwidth_parm);
 +
 +              DMEMIT("%u %d ", rs->set.raid_devs, rs->set.dev_to_init);
 +
 +              for (i = 0; i < rs->set.raid_devs; i++)
 +                      DMEMIT("%s %llu ",
 +                             format_dev_t(buf,
 +                                          rs->dev[i].dev->bdev->bd_dev),
 +                             (unsigned long long) rs->dev[i].start);
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * Message interface
 + */
 +enum raid_msg_actions {
 +      act_bw,                 /* Recovery bandwidth switch. */
 +      act_dev,                /* Device failure switch. */
 +      act_overwrite,          /* Stripe overwrite check. */
 +      act_read_ahead,         /* Set read ahead. */
 +      act_stats,              /* Development statistics switch. */
 +      act_sc,                 /* Stripe cache switch. */
 +
 +      act_on,                 /* Set entity on. */
 +      act_off,                /* Set entity off. */
 +      act_reset,              /* Reset entity. */
 +
 +      act_set = act_on,       /* Set # absolute. */
 +      act_grow = act_off,     /* Grow # by an amount. */
 +      act_shrink = act_reset, /* Shrink # by an amount. */
 +};
 +
 +/* Turn a delta to absolute. */
 +static int _absolute(unsigned long action, int act, int r)
 +{
 +      /* Make delta absolute. */
 +      if (test_bit(act_set, &action))
 +              ;
 +      else if (test_bit(act_grow, &action))
 +              r += act;
 +      else if (test_bit(act_shrink, &action))
 +              r = act - r;
 +      else
 +              r = -EINVAL;
 +
 +      return r;
 +}
 +
 + /* Change recovery io bandwidth. */
 +static int bandwidth_change(struct dm_msg *msg, void *context)
 +{
 +      struct raid_set *rs = context;
 +      int act = rs->recover.bandwidth;
 +      int bandwidth = DM_MSG_INT_ARG(msg);
 +
 +      if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
 +              /* Make delta bandwidth absolute. */
 +              bandwidth = _absolute(msg->action, act, bandwidth);
 +
 +              /* Check range. */
 +              if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
 +                      recover_set_bandwidth(rs, bandwidth);
 +                      return 0;
 +              }
 +      }
 +
 +      set_bit(dm_msg_ret_arg, &msg->ret);
 +      set_bit(dm_msg_ret_inval, &msg->ret);
 +      return -EINVAL;
 +}
 +
 +/* Change state of a device (running/offline). */
 +/* FIXME: this only works while recovering!. */
 +static int device_state(struct dm_msg *msg, void *context)
 +{
 +      int r;
 +      const char *str = "is already ";
 +      union dev_lookup dl = { .dev_name = DM_MSG_STR_ARG(msg) };
 +      struct raid_set *rs = context;
 +
 +      r = raid_dev_lookup(rs, strchr(dl.dev_name, ':') ?
 +                          bymajmin : byname, &dl);
 +      if (r == -ENODEV) {
 +              DMERR("device %s is no member of this set", dl.dev_name);
 +              return r;
 +      }
 +
 +      if (test_bit(act_off, &msg->action)) {
 +              if (dev_operational(rs, r))
 +                      str = "";
 +      } else if (!dev_operational(rs, r))
 +              str = "";
 +
 +      DMINFO("/dev/%s %s%s", dl.dev_name, str,
 +             test_bit(act_off, &msg->action) ? "offline" : "running");
 +
 +      return test_bit(act_off, &msg->action) ?
 +             raid_set_check_and_degrade(rs, NULL, r) :
 +             raid_set_check_and_upgrade(rs, r);
 +}
 +
 +/* Set/reset development feature flags. */
 +static int devel_flags(struct dm_msg *msg, void *context)
 +{
 +      struct raid_set *rs = context;
 +
 +      if (test_bit(act_on, &msg->action))
 +              return test_and_set_bit(msg->spec->parm,
 +                                      &rs->io.flags) ? -EPERM : 0;
 +      else if (test_bit(act_off, &msg->action))
 +              return test_and_clear_bit(msg->spec->parm,
 +                                        &rs->io.flags) ? 0 : -EPERM;
 +      else if (test_bit(act_reset, &msg->action)) {
 +              if (test_bit(act_stats, &msg->action)) {
 +                      stats_reset(rs);
 +                      goto on;
 +              } else if (test_bit(act_overwrite, &msg->action)) {
 +on:
 +                      set_bit(msg->spec->parm, &rs->io.flags);
 +                      return 0;
 +              }
 +      }
 +
 +      return -EINVAL;
 +}
 +
 + /* Set stripe and chunk read ahead pages. */
 +static int read_ahead_set(struct dm_msg *msg, void *context)
 +{
 +      int stripes = DM_MSG_INT_ARGS(msg, 0);
 +      int chunks  = DM_MSG_INT_ARGS(msg, 1);
 +
 +      if (range_ok(stripes, 1, 512) &&
 +          range_ok(chunks, 1, 512)) {
 +              rs_set_bdi(context, stripes, chunks);
 +              return 0;
 +      }
 +
 +      set_bit(dm_msg_ret_arg, &msg->ret);
 +      set_bit(dm_msg_ret_inval, &msg->ret);
 +      return -EINVAL;
 +}
 +
 +/* Resize the stripe cache. */
 +static int stripecache_resize(struct dm_msg *msg, void *context)
 +{
 +      int act, stripes;
 +      struct raid_set *rs = context;
 +
 +      /* Deny permission in case the daemon is still shrinking!. */
 +      if (atomic_read(&rs->sc.stripes_to_shrink))
 +              return -EPERM;
 +
 +      stripes = DM_MSG_INT_ARG(msg);
 +      if (stripes > 0) {
 +              act = atomic_read(&rs->sc.stripes);
 +
 +              /* Make delta stripes absolute. */
 +              stripes = _absolute(msg->action, act, stripes);
 +
 +              /*
 +               * Check range and that the # of stripes changes.
 +               * We can grow from gere but need to leave any
 +               * shrinking to the worker for synchronization.
 +               */
 +              if (range_ok(stripes, STRIPES_MIN, STRIPES_MAX)) {
 +                      if (stripes > act)
 +                              return sc_grow(&rs->sc, stripes - act, SC_GROW);
 +                      else if (stripes < act) {
 +                              atomic_set(&rs->sc.stripes_to_shrink,
 +                                         act - stripes);
 +                              wake_do_raid(rs);
 +                      }
 +
 +                      return 0;
 +              }
 +      }
 +
 +      set_bit(dm_msg_ret_arg, &msg->ret);
 +      set_bit(dm_msg_ret_inval, &msg->ret);
 +      return -EINVAL;
 +}
 +
 +/* Parse the RAID message action. */
 +/*
 + * 'ba[ndwidth] {se[t],g[row],sh[rink]} #'    # e.g 'ba se 50'
 + * 'de{vice] o[ffline]/r[unning] DevName/maj:min' # e.g 'device o /dev/sda'
 + * "o[verwrite]  {on,of[f],r[eset]}'          # e.g. 'o of'
 + * "r[ead_ahead] set #stripes #chunks         # e.g. 'r se 3 2'
 + * 'sta[tistics] {on,of[f],r[eset]}'          # e.g. 'stat of'
 + * 'str[ipecache] {se[t],g[row],sh[rink]} #'  # e.g. 'stripe set 1024'
 + *
 + */
 +static int
 +raid_message(struct dm_target *ti, unsigned argc, char **argv)
 +{
 +      /* Variables to store the parsed parameters im. */
 +      static int i[2];
 +      static unsigned long *i_arg[] = {
 +              (unsigned long *) i + 0,
 +              (unsigned long *) i + 1,
 +      };
 +      static char *p;
 +      static unsigned long *p_arg[] = { (unsigned long *) &p };
 +
 +      /* Declare all message option strings. */
 +      static char *str_sgs[] = { "set", "grow", "shrink" };
 +      static char *str_dev[] = { "running", "offline" };
 +      static char *str_oor[] = { "on", "off", "reset" };
 +
 +      /* Declare all actions. */
 +      static unsigned long act_sgs[] = { act_set, act_grow, act_shrink };
 +      static unsigned long act_oor[] = { act_on, act_off, act_reset };
 +
 +      /* Bandwidth option. */
 +      static struct dm_message_option bw_opt = { 3, str_sgs, act_sgs };
 +      static struct dm_message_argument bw_args = {
 +              1, i_arg, { dm_msg_int_t }
 +      };
 +
 +      /* Device option. */
 +      static struct dm_message_option dev_opt = { 2, str_dev, act_oor };
 +      static struct dm_message_argument dev_args = {
 +              1, p_arg, { dm_msg_base_t }
 +      };
 +
 +      /* Read ahead option. */
 +      static struct dm_message_option ra_opt = { 1, str_sgs, act_sgs };
 +      static struct dm_message_argument ra_args = {
 +              2, i_arg, { dm_msg_int_t, dm_msg_int_t }
 +      };
 +
 +      static struct dm_message_argument null_args = {
 +              0, NULL, { dm_msg_int_t }
 +      };
 +
 +      /* Overwrite and statistics option. */
 +      static struct dm_message_option ovr_stats_opt = { 3, str_oor, act_oor };
 +
 +      /* Sripecache option. */
 +      static struct dm_message_option stripe_opt = { 3, str_sgs, act_sgs };
 +
 +      /* Declare messages. */
 +      static struct dm_msg_spec specs[] = {
 +              { "bandwidth", act_bw, &bw_opt, &bw_args,
 +                0, bandwidth_change },
 +              { "device", act_dev, &dev_opt, &dev_args,
 +                0, device_state },
 +              { "overwrite", act_overwrite, &ovr_stats_opt, &null_args,
 +                RS_CHECK_OVERWRITE, devel_flags },
 +              { "read_ahead", act_read_ahead, &ra_opt, &ra_args,
 +                0, read_ahead_set },
 +              { "statistics", act_stats, &ovr_stats_opt, &null_args,
 +                RS_DEVEL_STATS, devel_flags },
 +              { "stripecache", act_sc, &stripe_opt, &bw_args,
 +                0, stripecache_resize },
 +      };
 +
 +      /* The message for the parser. */
 +      struct dm_msg msg = {
 +              .num_specs = ARRAY_SIZE(specs),
 +              .specs = specs,
 +      };
 +
 +      return dm_message_parse(TARGET, &msg, ti->private, argc, argv);
 +}
 +/*
 + * END message interface
 + */
 +
 +static struct target_type raid_target = {
 +      .name = "raid45",
 +      .version = {1, 0, 0},
 +      .module = THIS_MODULE,
 +      .ctr = raid_ctr,
 +      .dtr = raid_dtr,
 +      .map = raid_map,
 +      .postsuspend = raid_postsuspend,
 +      .resume = raid_resume,
 +      .status = raid_status,
 +      .message = raid_message,
 +};
 +
 +static void init_exit(const char *bad_msg, const char *good_msg, int r)
 +{
 +      if (r)
 +              DMERR("Failed to %sregister target [%d]", bad_msg, r);
 +      else
 +              DMINFO("%s %s", good_msg, version);
 +}
 +
 +static int __init dm_raid_init(void)
 +{
 +      int r;
 +
 +      r = dm_register_target(&raid_target);
 +      init_exit("", "initialized", r);
 +      return r;
 +}
 +
 +static void __exit dm_raid_exit(void)
 +{
 +      dm_unregister_target(&raid_target);
 +      init_exit("un", "exit", 0);
 +}
 +
 +/* Module hooks. */
 +module_init(dm_raid_init);
 +module_exit(dm_raid_exit);
 +
 +MODULE_DESCRIPTION(DM_NAME " raid4/5 target");
 +MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
 +MODULE_LICENSE("GPL");
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
  #include <linux/skbuff.h>
  #include <linux/ethtool.h>
  #include <linux/if_ether.h>
 -#include <linux/tcp.h>
 -#include <linux/udp.h>
  #include <linux/moduleparam.h>
  #include <linux/mm.h>
+ #include <linux/slab.h>
  #include <net/ip.h>
  
  #include <xen/xen.h>
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
  #include <linux/ctype.h>
  #include <linux/fcntl.h>
  #include <linux/mm.h>
 +#include <linux/sched.h>
  #include <linux/proc_fs.h>
  #include <linux/notifier.h>
 -#include <linux/kthread.h>
  #include <linux/mutex.h>
 -#include <linux/io.h>
+ #include <linux/slab.h>
 +#include <linux/io.h>
  
  #include <asm/page.h>
  #include <asm/pgtable.h>
Simple merge
@@@ -38,7 -38,7 +38,8 @@@
  #include <linux/namei.h>
  #include <linux/miscdevice.h>
  #include <linux/magic.h>
+ #include <linux/slab.h>
 +#include <linux/precache.h>
  #include "compat.h"
  #include "ctree.h"
  #include "disk-io.h"
Simple merge
diff --cc fs/mpage.c
Simple merge
Simple merge
diff --cc fs/nfs/file.c
Simple merge
diff --cc fs/nfs/inode.c
Simple merge
index 8aba096,0000000..8e59585
mode 100644,000000..100644
--- /dev/null
@@@ -1,574 -1,0 +1,575 @@@
 +/*
 + * Copyright (C) 2006 Andreas Gruenbacher <a.gruenbacher@computer.org>
 + *
 + * This program is free software; you can redistribute it and/or modify it
 + * under the terms of the GNU General Public License as published by the
 + * Free Software Foundation; either version 2, or (at your option) any
 + * later version.
 + *
 + * This program is distributed in the hope that it will be useful, but
 + * WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * General Public License for more details.
 + */
 +
++#include <linux/slab.h>
 +#include <linux/sched.h>
 +#include <linux/module.h>
 +#include <linux/fs.h>
 +#include <linux/fs_struct.h>
 +#include <linux/nfs4acl.h>
 +
 +MODULE_LICENSE("GPL");
 +
 +/*
 + * ACL entries that have ACE4_SPECIAL_WHO set in ace->e_flags use the
 + * pointer values of these constants in ace->u.e_who to avoid massive
 + * amounts of string comparisons.
 + */
 +
 +const char nfs4ace_owner_who[]          = "OWNER@";
 +const char nfs4ace_group_who[]          = "GROUP@";
 +const char nfs4ace_everyone_who[] = "EVERYONE@";
 +
 +EXPORT_SYMBOL(nfs4ace_owner_who);
 +EXPORT_SYMBOL(nfs4ace_group_who);
 +EXPORT_SYMBOL(nfs4ace_everyone_who);
 +
 +/**
 + * nfs4acl_alloc  -  allocate an acl
 + * @count:    number of entries
 + */
 +struct nfs4acl *
 +nfs4acl_alloc(int count)
 +{
 +      size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace);
 +      struct nfs4acl *acl = kmalloc(size, GFP_KERNEL);
 +
 +      if (acl) {
 +              memset(acl, 0, size);
 +              atomic_set(&acl->a_refcount, 1);
 +              acl->a_count = count;
 +      }
 +      return acl;
 +}
 +EXPORT_SYMBOL(nfs4acl_alloc);
 +
 +/**
 + * nfs4acl_clone  -  create a copy of an acl
 + */
 +struct nfs4acl *
 +nfs4acl_clone(const struct nfs4acl *acl)
 +{
 +      int count = acl->a_count;
 +      size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace);
 +      struct nfs4acl *dup = kmalloc(size, GFP_KERNEL);
 +
 +      if (dup) {
 +              memcpy(dup, acl, size);
 +              atomic_set(&dup->a_refcount, 1);
 +      }
 +      return dup;
 +}
 +
 +/*
 + * The POSIX permissions are supersets of the below mask flags.
 + *
 + * The ACE4_READ_ATTRIBUTES and ACE4_READ_ACL flags are always granted
 + * in POSIX. The ACE4_SYNCHRONIZE flag has no meaning under POSIX. We
 + * make sure that we do not mask them if they are set, so that users who
 + * rely on these flags won't get confused.
 + */
 +#define ACE4_POSIX_MODE_READ ( \
 +      ACE4_READ_DATA | ACE4_LIST_DIRECTORY )
 +#define ACE4_POSIX_MODE_WRITE ( \
 +      ACE4_WRITE_DATA | ACE4_ADD_FILE | \
 +      ACE4_APPEND_DATA | ACE4_ADD_SUBDIRECTORY | \
 +      ACE4_DELETE_CHILD )
 +#define ACE4_POSIX_MODE_EXEC ( \
 +      ACE4_EXECUTE)
 +
 +static int
 +nfs4acl_mask_to_mode(unsigned int mask)
 +{
 +      int mode = 0;
 +
 +      if (mask & ACE4_POSIX_MODE_READ)
 +              mode |= MAY_READ;
 +      if (mask & ACE4_POSIX_MODE_WRITE)
 +              mode |= MAY_WRITE;
 +      if (mask & ACE4_POSIX_MODE_EXEC)
 +              mode |= MAY_EXEC;
 +
 +      return mode;
 +}
 +
 +/**
 + * nfs4acl_masks_to_mode  -  compute file mode permission bits from file masks
 + *
 + * Compute the file mode permission bits from the file masks in the acl.
 + */
 +int
 +nfs4acl_masks_to_mode(const struct nfs4acl *acl)
 +{
 +      return nfs4acl_mask_to_mode(acl->a_owner_mask) << 6 |
 +             nfs4acl_mask_to_mode(acl->a_group_mask) << 3 |
 +             nfs4acl_mask_to_mode(acl->a_other_mask);
 +}
 +EXPORT_SYMBOL(nfs4acl_masks_to_mode);
 +
 +static unsigned int
 +nfs4acl_mode_to_mask(mode_t mode)
 +{
 +      unsigned int mask = ACE4_POSIX_ALWAYS_ALLOWED;
 +
 +      if (mode & MAY_READ)
 +              mask |= ACE4_POSIX_MODE_READ;
 +      if (mode & MAY_WRITE)
 +              mask |= ACE4_POSIX_MODE_WRITE;
 +      if (mode & MAY_EXEC)
 +              mask |= ACE4_POSIX_MODE_EXEC;
 +
 +      return mask;
 +}
 +
 +/**
 + * nfs4acl_chmod  -  update the file masks to reflect the new mode
 + * @mode:     file mode permission bits to apply to the @acl
 + *
 + * Converts the mask flags corresponding to the owner, group, and other file
 + * permissions and computes the file masks. Returns @acl if it already has the
 + * appropriate file masks, or updates the flags in a copy of @acl. Takes over
 + * @acl.
 + */
 +struct nfs4acl *
 +nfs4acl_chmod(struct nfs4acl *acl, mode_t mode)
 +{
 +      unsigned int owner_mask, group_mask, other_mask;
 +      struct nfs4acl *clone;
 +
 +      owner_mask = nfs4acl_mode_to_mask(mode >> 6);
 +      group_mask = nfs4acl_mode_to_mask(mode >> 3);
 +      other_mask = nfs4acl_mode_to_mask(mode);
 +
 +      if (acl->a_owner_mask == owner_mask &&
 +          acl->a_group_mask == group_mask &&
 +          acl->a_other_mask == other_mask &&
 +          (!nfs4acl_is_auto_inherit(acl) || nfs4acl_is_protected(acl)))
 +              return acl;
 +
 +      clone = nfs4acl_clone(acl);
 +      nfs4acl_put(acl);
 +      if (!clone)
 +              return ERR_PTR(-ENOMEM);
 +
 +      clone->a_owner_mask = owner_mask;
 +      clone->a_group_mask = group_mask;
 +      clone->a_other_mask = other_mask;
 +      if (nfs4acl_is_auto_inherit(clone))
 +              clone->a_flags |= ACL4_PROTECTED;
 +
 +      if (nfs4acl_write_through(&clone)) {
 +              nfs4acl_put(clone);
 +              clone = ERR_PTR(-ENOMEM);
 +      }
 +      return clone;
 +}
 +EXPORT_SYMBOL(nfs4acl_chmod);
 +
 +/**
 + * nfs4acl_want_to_mask  - convert permission want argument to a mask
 + * @want:     @want argument of the permission inode operation
 + *
 + * When checking for append, @want is (MAY_WRITE | MAY_APPEND).
 + */
 +unsigned int
 +nfs4acl_want_to_mask(int want)
 +{
 +      unsigned int mask = 0;
 +
 +      if (want & MAY_READ)
 +              mask |= ACE4_READ_DATA;
 +      if (want & MAY_APPEND)
 +              mask |= ACE4_APPEND_DATA;
 +      else if (want & MAY_WRITE)
 +              mask |= ACE4_WRITE_DATA;
 +      if (want & MAY_EXEC)
 +              mask |= ACE4_EXECUTE;
 +
 +      return mask;
 +}
 +EXPORT_SYMBOL(nfs4acl_want_to_mask);
 +
 +/**
 + * nfs4acl_capability_check  -  check for capabilities overriding read/write access
 + * @inode:    inode to check
 + * @mask:     requested access (ACE4_* bitmask)
 + *
 + * Capabilities other than CAP_DAC_OVERRIDE and CAP_DAC_READ_SEARCH must be checked
 + * separately.
 + */
 +static inline int nfs4acl_capability_check(struct inode *inode, unsigned int mask)
 +{
 +      /*
 +       * Read/write DACs are always overridable.
 +       * Executable DACs are overridable if at least one exec bit is set.
 +       */
 +      if (!(mask & (ACE4_WRITE_ACL | ACE4_WRITE_OWNER)) &&
 +          (!(mask & ACE4_EXECUTE) ||
 +          (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)))
 +              if (capable(CAP_DAC_OVERRIDE))
 +                      return 0;
 +
 +      /*
 +       * Searching includes executable on directories, else just read.
 +       */
 +      if (!(mask & ~(ACE4_READ_DATA | ACE4_EXECUTE)) &&
 +          (S_ISDIR(inode->i_mode) || !(mask & ACE4_EXECUTE)))
 +              if (capable(CAP_DAC_READ_SEARCH))
 +                      return 0;
 +
 +      return -EACCES;
 +}
 +
 +/**
 + * nfs4acl_permission  -  permission check algorithm with masking
 + * @inode:    inode to check
 + * @acl:      nfs4 acl of the inode
 + * @mask:     requested access (ACE4_* bitmask)
 + *
 + * Checks if the current process is granted @mask flags in @acl. With
 + * write-through, the OWNER@ is always granted the owner file mask, the
 + * GROUP@ is always granted the group file mask, and EVERYONE@ is always
 + * granted the other file mask. Otherwise, processes are only granted
 + * @mask flags which they are granted in the @acl as well as in their
 + * file mask.
 + */
 +int nfs4acl_permission(struct inode *inode, const struct nfs4acl *acl,
 +                     unsigned int mask)
 +{
 +      const struct nfs4ace *ace;
 +      unsigned int file_mask, requested = mask, denied = 0;
 +      int in_owning_group = in_group_p(inode->i_gid);
 +      int owner_or_group_class = in_owning_group;
 +
 +      /*
 +       * A process is in the
 +       *   - owner file class if it owns the file, in the
 +       *   - group file class if it is in the file's owning group or
 +       *     it matches any of the user or group entries, and in the
 +       *   - other file class otherwise.
 +       */
 +
 +      nfs4acl_for_each_entry(ace, acl) {
 +              unsigned int ace_mask = ace->e_mask;
 +
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_owner(ace)) {
 +                      if (current_fsuid() != inode->i_uid)
 +                              continue;
 +                      goto is_owner;
 +              } else if (nfs4ace_is_group(ace)) {
 +                      if (!in_owning_group)
 +                              continue;
 +              } else if (nfs4ace_is_unix_id(ace)) {
 +                      if (ace->e_flags & ACE4_IDENTIFIER_GROUP) {
 +                              if (!in_group_p(ace->u.e_id))
 +                                      continue;
 +                      } else {
 +                              if (current_fsuid() != ace->u.e_id)
 +                                      continue;
 +                      }
 +              } else
 +                      goto is_everyone;
 +
 +              /*
 +               * Apply the group file mask to entries other than OWNER@ and
 +               * EVERYONE@. This is not required for correct access checking
 +               * but ensures that we grant the same permissions as the acl
 +               * computed by nfs4acl_apply_masks().
 +               *
 +               * For example, without this restriction, 'group@:rw::allow'
 +               * with mode 0600 would grant rw access to owner processes
 +               * which are also in the owning group. This cannot be expressed
 +               * in an acl.
 +               */
 +              if (nfs4ace_is_allow(ace))
 +                      ace_mask &= acl->a_group_mask;
 +
 +          is_owner:
 +              /* The process is in the owner or group file class. */
 +              owner_or_group_class = 1;
 +
 +          is_everyone:
 +              /* Check which mask flags the ACE allows or denies. */
 +              if (nfs4ace_is_deny(ace))
 +                      denied |= ace_mask & mask;
 +              mask &= ~ace_mask;
 +
 +              /* Keep going until we know which file class the process is in. */
 +              if (!mask && owner_or_group_class)
 +                      break;
 +      }
 +      denied |= mask;
 +
 +      /*
 +       * Figure out which file mask applies.
 +       * Clear write-through if the process is in the file group class but
 +       * not in the owning group, and so the denied permissions apply.
 +       */
 +      if (current_fsuid() == inode->i_uid)
 +              file_mask = acl->a_owner_mask;
 +      else if (in_owning_group || owner_or_group_class)
 +              file_mask = acl->a_group_mask;
 +      else
 +              file_mask = acl->a_other_mask;
 +
 +      denied |= requested & ~file_mask;
 +      if (!denied)
 +              return 0;
 +      return nfs4acl_capability_check(inode, requested);
 +}
 +EXPORT_SYMBOL(nfs4acl_permission);
 +
 +/**
 + * nfs4acl_generic_permission  -  permission check algorithm without explicit acl
 + * @inode:    inode to check permissions for
 + * @mask:     requested access (ACE4_* bitmask)
 + *
 + * The file mode of a file without ACL corresponds to an ACL with a single
 + * "EVERYONE:~0::ALLOW" entry, with file masks that correspond to the file mode
 + * permissions. Instead of constructing a temporary ACL and applying
 + * nfs4acl_permission() to it, compute the identical result directly from the file
 + * mode.
 + */
 +int nfs4acl_generic_permission(struct inode *inode, unsigned int mask)
 +{
 +      int mode = inode->i_mode;
 +
 +      if (current_fsuid() == inode->i_uid)
 +              mode >>= 6;
 +      else if (in_group_p(inode->i_gid))
 +              mode >>= 3;
 +      if (!(mask & ~nfs4acl_mode_to_mask(mode)))
 +              return 0;
 +      return nfs4acl_capability_check(inode, mask);
 +}
 +EXPORT_SYMBOL(nfs4acl_generic_permission);
 +
 +/*
 + * nfs4ace_is_same_who  -  do both acl entries refer to the same identifier?
 + */
 +int
 +nfs4ace_is_same_who(const struct nfs4ace *a, const struct nfs4ace *b)
 +{
 +#define WHO_FLAGS (ACE4_SPECIAL_WHO | ACE4_IDENTIFIER_GROUP)
 +      if ((a->e_flags & WHO_FLAGS) != (b->e_flags & WHO_FLAGS))
 +              return 0;
 +      if (a->e_flags & ACE4_SPECIAL_WHO)
 +              return a->u.e_who == b->u.e_who;
 +      else
 +              return a->u.e_id == b->u.e_id;
 +#undef WHO_FLAGS
 +}
 +
 +/**
 + * nfs4acl_set_who  -  set a special who value
 + * @ace:      acl entry
 + * @who:      who value to use
 + */
 +int
 +nfs4ace_set_who(struct nfs4ace *ace, const char *who)
 +{
 +      if (!strcmp(who, nfs4ace_owner_who))
 +              who = nfs4ace_owner_who;
 +      else if (!strcmp(who, nfs4ace_group_who))
 +              who = nfs4ace_group_who;
 +      else if (!strcmp(who, nfs4ace_everyone_who))
 +              who = nfs4ace_everyone_who;
 +      else
 +              return -EINVAL;
 +
 +      ace->u.e_who = who;
 +      ace->e_flags |= ACE4_SPECIAL_WHO;
 +      ace->e_flags &= ~ACE4_IDENTIFIER_GROUP;
 +      return 0;
 +}
 +EXPORT_SYMBOL(nfs4ace_set_who);
 +
 +/**
 + * nfs4acl_allowed_to_who  -  mask flags allowed to a specific who value
 + *
 + * Computes the mask values allowed to a specific who value, taking
 + * EVERYONE@ entries into account.
 + */
 +static unsigned int
 +nfs4acl_allowed_to_who(struct nfs4acl *acl, struct nfs4ace *who)
 +{
 +      struct nfs4ace *ace;
 +      unsigned int allowed = 0;
 +
 +      nfs4acl_for_each_entry_reverse(ace, acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_same_who(ace, who) ||
 +                  nfs4ace_is_everyone(ace)) {
 +                      if (nfs4ace_is_allow(ace))
 +                              allowed |= ace->e_mask;
 +                      else if (nfs4ace_is_deny(ace))
 +                              allowed &= ~ace->e_mask;
 +              }
 +      }
 +      return allowed;
 +}
 +
 +/**
 + * nfs4acl_compute_max_masks  -  compute upper bound masks
 + *
 + * Computes upper bound owner, group, and other masks so that none of
 + * the mask flags allowed by the acl are disabled (for any choice of the
 + * file owner or group membership).
 + */
 +static void
 +nfs4acl_compute_max_masks(struct nfs4acl *acl)
 +{
 +      struct nfs4ace *ace;
 +
 +      acl->a_owner_mask = 0;
 +      acl->a_group_mask = 0;
 +      acl->a_other_mask = 0;
 +
 +      nfs4acl_for_each_entry_reverse(ace, acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +
 +              if (nfs4ace_is_owner(ace)) {
 +                      if (nfs4ace_is_allow(ace))
 +                              acl->a_owner_mask |= ace->e_mask;
 +                      else if (nfs4ace_is_deny(ace))
 +                              acl->a_owner_mask &= ~ace->e_mask;
 +              } else if (nfs4ace_is_everyone(ace)) {
 +                      if (nfs4ace_is_allow(ace)) {
 +                              struct nfs4ace who = {
 +                                      .e_flags = ACE4_SPECIAL_WHO,
 +                                      .u.e_who = nfs4ace_group_who,
 +                              };
 +
 +                              acl->a_other_mask |= ace->e_mask;
 +                              acl->a_group_mask |=
 +                                      nfs4acl_allowed_to_who(acl, &who);
 +                              acl->a_owner_mask |= ace->e_mask;
 +                      } else if (nfs4ace_is_deny(ace)) {
 +                              acl->a_other_mask &= ~ace->e_mask;
 +                              acl->a_group_mask &= ~ace->e_mask;
 +                              acl->a_owner_mask &= ~ace->e_mask;
 +                      }
 +              } else {
 +                      if (nfs4ace_is_allow(ace)) {
 +                              unsigned int mask =
 +                                      nfs4acl_allowed_to_who(acl, ace);
 +
 +                              acl->a_group_mask |= mask;
 +                              acl->a_owner_mask |= mask;
 +                      }
 +              }
 +      }
 +}
 +
 +/**
 + * nfs4acl_inherit  -  compute the acl a new file will inherit
 + * @dir_acl:  acl of the containing direcory
 + * @mode:     file type and create mode of the new file
 + *
 + * Given the containing directory's acl, this function will compute the
 + * acl that new files in that directory will inherit, or %NULL if
 + * @dir_acl does not contain acl entries inheritable by this file.
 + *
 + * Without write-through, the file masks in the returned acl are set to
 + * the intersection of the create mode and the maximum permissions
 + * allowed to each file class. With write-through, the file masks are
 + * set to the create mode.
 + */
 +struct nfs4acl *
 +nfs4acl_inherit(const struct nfs4acl *dir_acl, mode_t mode)
 +{
 +      const struct nfs4ace *dir_ace;
 +      struct nfs4acl *acl;
 +      struct nfs4ace *ace;
 +      int count = 0;
 +
 +      if (S_ISDIR(mode)) {
 +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
 +                      if (!nfs4ace_is_inheritable(dir_ace))
 +                              continue;
 +                      count++;
 +              }
 +              if (!count)
 +                      return NULL;
 +              acl = nfs4acl_alloc(count);
 +              if (!acl)
 +                      return ERR_PTR(-ENOMEM);
 +              ace = acl->a_entries;
 +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
 +                      if (!nfs4ace_is_inheritable(dir_ace))
 +                              continue;
 +                      memcpy(ace, dir_ace, sizeof(struct nfs4ace));
 +                      if (dir_ace->e_flags & ACE4_NO_PROPAGATE_INHERIT_ACE)
 +                              nfs4ace_clear_inheritance_flags(ace);
 +                      if ((dir_ace->e_flags & ACE4_FILE_INHERIT_ACE) &&
 +                          !(dir_ace->e_flags & ACE4_DIRECTORY_INHERIT_ACE))
 +                              ace->e_flags |= ACE4_INHERIT_ONLY_ACE;
 +                      ace++;
 +              }
 +      } else {
 +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
 +                      if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE))
 +                              continue;
 +                      count++;
 +              }
 +              if (!count)
 +                      return NULL;
 +              acl = nfs4acl_alloc(count);
 +              if (!acl)
 +                      return ERR_PTR(-ENOMEM);
 +              ace = acl->a_entries;
 +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
 +                      if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE))
 +                              continue;
 +                      memcpy(ace, dir_ace, sizeof(struct nfs4ace));
 +                      nfs4ace_clear_inheritance_flags(ace);
 +                      ace++;
 +              }
 +      }
 +
 +      /* The maximum max flags that the owner, group, and other classes
 +         are allowed. */
 +      if (dir_acl->a_flags & ACL4_WRITE_THROUGH) {
 +              acl->a_owner_mask = ACE4_VALID_MASK;
 +              acl->a_group_mask = ACE4_VALID_MASK;
 +              acl->a_other_mask = ACE4_VALID_MASK;
 +
 +              mode &= ~current->fs->umask;
 +      } else
 +              nfs4acl_compute_max_masks(acl);
 +
 +      /* Apply the create mode. */
 +      acl->a_owner_mask &= nfs4acl_mode_to_mask(mode >> 6);
 +      acl->a_group_mask &= nfs4acl_mode_to_mask(mode >> 3);
 +      acl->a_other_mask &= nfs4acl_mode_to_mask(mode);
 +
 +      if (nfs4acl_write_through(&acl)) {
 +              nfs4acl_put(acl);
 +              return ERR_PTR(-ENOMEM);
 +      }
 +
 +      acl->a_flags = (dir_acl->a_flags & ~ACL4_PROTECTED);
 +      if (nfs4acl_is_auto_inherit(acl)) {
 +              nfs4acl_for_each_entry(ace, acl)
 +                      ace->e_flags |= ACE4_INHERITED_ACE;
 +              acl->a_flags |= ACL4_PROTECTED;
 +      }
 +
 +      return acl;
 +}
 +EXPORT_SYMBOL(nfs4acl_inherit);
index 296859e,0000000..3ecd404
mode 100644,000000..100644
--- /dev/null
@@@ -1,757 -1,0 +1,758 @@@
 +/*
 + * Copyright (C) 2006 Andreas Gruenbacher <a.gruenbacher@computer.org>
 + *
 + * This program is free software; you can redistribute it and/or modify it
 + * under the terms of the GNU General Public License as published by the
 + * Free Software Foundation; either version 2, or (at your option) any
 + * later version.
 + *
 + * This program is distributed in the hope that it will be useful, but
 + * WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * General Public License for more details.
 + */
 +
++#include <linux/slab.h>
 +#include <linux/module.h>
 +#include <linux/fs.h>
 +#include <linux/nfs4acl.h>
 +
 +/**
 + * struct nfs4acl_alloc  -  remember how many entries are actually allocated
 + * @acl:      acl with a_count <= @count
 + * @count:    the actual number of entries allocated in @acl
 + *
 + * We pass around this structure while modifying an acl, so that we do
 + * not have to reallocate when we remove existing entries followed by
 + * adding new entries.
 + */
 +struct nfs4acl_alloc {
 +      struct nfs4acl *acl;
 +      unsigned int count;
 +};
 +
 +/**
 + * nfs4acl_delete_entry  -  delete an entry in an acl
 + * @x:                acl and number of allocated entries
 + * @ace:      an entry in @x->acl
 + *
 + * Updates @ace so that it points to the entry before the deleted entry
 + * on return. (When deleting the first entry, @ace will point to the
 + * (non-existant) entry before the first entry). This behavior is the
 + * expected behavior when deleting entries while forward iterating over
 + * an acl.
 + */
 +static void
 +nfs4acl_delete_entry(struct nfs4acl_alloc *x, struct nfs4ace **ace)
 +{
 +      void *end = x->acl->a_entries + x->acl->a_count;
 +
 +      memmove(*ace, *ace + 1, end - (void *)(*ace + 1));
 +      (*ace)--;
 +      x->acl->a_count--;
 +}
 +
 +/**
 + * nfs4acl_insert_entry  -  insert an entry in an acl
 + * @x:                acl and number of allocated entries
 + * @ace:      entry before which the new entry shall be inserted
 + *
 + * Insert a new entry in @x->acl at position @ace, and zero-initialize
 + * it.  This may require reallocating @x->acl.
 + */
 +static int
 +nfs4acl_insert_entry(struct nfs4acl_alloc *x, struct nfs4ace **ace)
 +{
 +      if (x->count == x->acl->a_count) {
 +              int n = *ace - x->acl->a_entries;
 +              struct nfs4acl *acl2;
 +
 +              acl2 = nfs4acl_alloc(x->acl->a_count + 1);
 +              if (!acl2)
 +                      return -1;
 +              acl2->a_flags = x->acl->a_flags;
 +              acl2->a_owner_mask = x->acl->a_owner_mask;
 +              acl2->a_group_mask = x->acl->a_group_mask;
 +              acl2->a_other_mask = x->acl->a_other_mask;
 +              memcpy(acl2->a_entries, x->acl->a_entries,
 +                     n * sizeof(struct nfs4ace));
 +              memcpy(acl2->a_entries + n + 1, *ace,
 +                     (x->acl->a_count - n) * sizeof(struct nfs4ace));
 +              kfree(x->acl);
 +              x->acl = acl2;
 +              x->count = acl2->a_count;
 +              *ace = acl2->a_entries + n;
 +      } else {
 +              void *end = x->acl->a_entries + x->acl->a_count;
 +
 +              memmove(*ace + 1, *ace, end - (void *)*ace);
 +              x->acl->a_count++;
 +      }
 +      memset(*ace, 0, sizeof(struct nfs4ace));
 +      return 0;
 +}
 +
 +/**
 + * nfs4ace_change_mask  -  change the mask in @ace to @mask
 + * @x:                acl and number of allocated entries
 + * @ace:      entry to modify
 + * @mask:     new mask for @ace
 + *
 + * Set the effective mask of @ace to @mask. This will require splitting
 + * off a separate acl entry if @ace is inheritable. In that case, the
 + * effective- only acl entry is inserted after the inheritable acl
 + * entry, end the inheritable acl entry is set to inheritable-only. If
 + * @mode is 0, either set the original acl entry to inheritable-only if
 + * it was inheritable, or remove it otherwise.  The returned @ace points
 + * to the modified or inserted effective-only acl entry if that entry
 + * exists, to the entry that has become inheritable-only, or else to the
 + * previous entry in the acl. This is the expected behavior when
 + * modifying masks while forward iterating over an acl.
 + */
 +static int
 +nfs4ace_change_mask(struct nfs4acl_alloc *x, struct nfs4ace **ace,
 +                         unsigned int mask)
 +{
 +      if (mask && (*ace)->e_mask == mask)
 +              return 0;
 +      if (mask & ~ACE4_POSIX_ALWAYS_ALLOWED) {
 +              if (nfs4ace_is_inheritable(*ace)) {
 +                      if (nfs4acl_insert_entry(x, ace))
 +                              return -1;
 +                      memcpy(*ace, *ace + 1, sizeof(struct nfs4ace));
 +                      (*ace)->e_flags |= ACE4_INHERIT_ONLY_ACE;
 +                      (*ace)++;
 +                      nfs4ace_clear_inheritance_flags(*ace);
 +              }
 +              (*ace)->e_mask = mask;
 +      } else {
 +              if (nfs4ace_is_inheritable(*ace))
 +                      (*ace)->e_flags |= ACE4_INHERIT_ONLY_ACE;
 +              else
 +                      nfs4acl_delete_entry(x, ace);
 +      }
 +      return 0;
 +}
 +
 +/**
 + * nfs4acl_move_everyone_aces_down  -  move everyone@ acl entries to the end
 + * @x:                acl and number of allocated entries
 + *
 + * Move all everyone acl entries to the bottom of the acl so that only a
 + * single everyone@ allow acl entry remains at the end, and update the
 + * mask fields of all acl entries on the way. If everyone@ is not
 + * granted any permissions, no empty everyone@ acl entry is inserted.
 + *
 + * This transformation does not modify the permissions that the acl
 + * grants, but we need it to simplify successive transformations.
 + */
 +static int
 +nfs4acl_move_everyone_aces_down(struct nfs4acl_alloc *x)
 +{
 +      struct nfs4ace *ace;
 +      unsigned int allowed = 0, denied = 0;
 +
 +      nfs4acl_for_each_entry(ace, x->acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_everyone(ace)) {
 +                      if (nfs4ace_is_allow(ace))
 +                              allowed |= (ace->e_mask & ~denied);
 +                      else if (nfs4ace_is_deny(ace))
 +                              denied |= (ace->e_mask & ~allowed);
 +                      else
 +                              continue;
 +                      if (nfs4ace_change_mask(x, &ace, 0))
 +                              return -1;
 +              } else {
 +                      if (nfs4ace_is_allow(ace)) {
 +                              if (nfs4ace_change_mask(x, &ace, allowed |
 +                                              (ace->e_mask & ~denied)))
 +                                      return -1;
 +                      } else if (nfs4ace_is_deny(ace)) {
 +                              if (nfs4ace_change_mask(x, &ace, denied |
 +                                              (ace->e_mask & ~allowed)))
 +                                      return -1;
 +                      }
 +              }
 +      }
 +      if (allowed & ~ACE4_POSIX_ALWAYS_ALLOWED) {
 +              struct nfs4ace *last_ace = ace - 1;
 +
 +              if (nfs4ace_is_everyone(last_ace) &&
 +                  nfs4ace_is_allow(last_ace) &&
 +                  nfs4ace_is_inherit_only(last_ace) &&
 +                  last_ace->e_mask == allowed)
 +                      last_ace->e_flags &= ~ACE4_INHERIT_ONLY_ACE;
 +              else {
 +                      if (nfs4acl_insert_entry(x, &ace))
 +                              return -1;
 +                      ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
 +                      ace->e_flags = ACE4_SPECIAL_WHO;
 +                      ace->e_mask = allowed;
 +                      ace->u.e_who = nfs4ace_everyone_who;
 +              }
 +      }
 +      return 0;
 +}
 +
 +/**
 + * __nfs4acl_propagate_everyone  -  propagate everyone@ mask flags up for @who
 + * @x:                acl and number of allocated entries
 + * @who:      identifier to propagate mask flags for
 + * @allow:    mask flags to propagate up
 + *
 + * Propagate mask flags from the trailing everyone@ allow acl entry up
 + * for the specified @who.
 + *
 + * The idea here is to precede the trailing EVERYONE@ ALLOW entry by an
 + * additional @who ALLOW entry, but with the following optimizations:
 + * (1) we don't bother setting any flags in the new @who ALLOW entry
 + * that has already been allowed or denied by a previous @who entry, (2)
 + * we merge the new @who entry with a previous @who entry if there is
 + * such a previous @who entry and there are no intervening DENY entries
 + * with mask flags that overlap the flags we care about.
 + */
 +static int
 +__nfs4acl_propagate_everyone(struct nfs4acl_alloc *x, struct nfs4ace *who,
 +                        unsigned int allow)
 +{
 +      struct nfs4ace *allow_last = NULL, *ace;
 +
 +      /* Remove the mask flags from allow that are already determined for
 +         this who value, and figure out if there is an ALLOW entry for
 +         this who value that is "reachable" from the trailing EVERYONE@
 +         ALLOW ACE. */
 +      nfs4acl_for_each_entry(ace, x->acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_allow(ace)) {
 +                      if (nfs4ace_is_same_who(ace, who)) {
 +                              allow &= ~ace->e_mask;
 +                              allow_last = ace;
 +                      }
 +              } else if (nfs4ace_is_deny(ace)) {
 +                      if (nfs4ace_is_same_who(ace, who))
 +                              allow &= ~ace->e_mask;
 +                      if (allow & ace->e_mask)
 +                              allow_last = NULL;
 +              }
 +      }
 +
 +      if (allow) {
 +              if (allow_last)
 +                      return nfs4ace_change_mask(x, &allow_last,
 +                                                 allow_last->e_mask | allow);
 +              else {
 +                      struct nfs4ace who_copy;
 +
 +                      ace = x->acl->a_entries + x->acl->a_count - 1;
 +                      memcpy(&who_copy, who, sizeof(struct nfs4ace));
 +                      if (nfs4acl_insert_entry(x, &ace))
 +                              return -1;
 +                      memcpy(ace, &who_copy, sizeof(struct nfs4ace));
 +                      ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
 +                      nfs4ace_clear_inheritance_flags(ace);
 +                      ace->e_mask = allow;
 +              }
 +      }
 +      return 0;
 +}
 +
 +/**
 + * nfs4acl_propagate_everyone  -  propagate everyone@ mask flags up the acl
 + * @x:                acl and number of allocated entries
 + *
 + * Make sure for owner@, group@, and all other users, groups, and
 + * special identifiers that they are allowed or denied all permissions
 + * that are granted be the trailing everyone@ acl entry. If they are
 + * not, try to add the missing permissions to existing allow acl entries
 + * for those users, or introduce additional acl entries if that is not
 + * possible.
 + *
 + * We do this so that no mask flags will get lost when finally applying
 + * the file masks to the acl entries: otherwise, with an other file mask
 + * that is more restrictive than the owner and/or group file mask, mask
 + * flags that were allowed to processes in the owner and group classes
 + * and that the other mask denies would be lost. For example, the
 + * following two acls show the problem when mode 0664 is applied to
 + * them:
 + *
 + *    masking without propagation (wrong)
 + *    ===========================================================
 + *    joe:r::allow            => joe:r::allow
 + *    everyone@:rwx::allow    => everyone@:r::allow
 + *    -----------------------------------------------------------
 + *    joe:w::deny             => joe:w::deny
 + *    everyone@:rwx::allow       everyone@:r::allow
 + *
 + * Note that the permissions of joe end up being more restrictive than
 + * what the acl would allow when first computing the allowed flags and
 + * then applying the respective mask. With propagation of permissions,
 + * we get:
 + *
 + *    masking after propagation (correct)
 + *    ===========================================================
 + *    joe:r::allow            => joe:rw::allow
 + *                               owner@:rw::allow
 + *                               group@:rw::allow
 + *    everyone@:rwx::allow       everyone@:r::allow
 + *    -----------------------------------------------------------
 + *    joe:w::deny             => owner@:x::deny
 + *                               joe:w::deny
 + *                               owner@:rw::allow
 + *                               owner@:rw::allow
 + *                               joe:r::allow
 + *    everyone@:rwx::allow       everyone@:r::allow
 + *
 + * The examples show the acls that would result from propagation with no
 + * masking performed. In fact, we do apply the respective mask to the
 + * acl entries before computing the propagation because this will save
 + * us from adding acl entries that would end up with empty mask fields
 + * after applying the masks.
 + *
 + * It is ensured that no more than one entry will be inserted for each
 + * who value, no matter how many entries each who value has already.
 + */
 +static int
 +nfs4acl_propagate_everyone(struct nfs4acl_alloc *x)
 +{
 +      int write_through = (x->acl->a_flags & ACL4_WRITE_THROUGH);
 +      struct nfs4ace who = { .e_flags = ACE4_SPECIAL_WHO };
 +      struct nfs4ace *ace;
 +      unsigned int owner_allow, group_allow;
 +      int retval;
 +
 +      if (!((x->acl->a_owner_mask | x->acl->a_group_mask) &
 +            ~x->acl->a_other_mask))
 +              return 0;
 +      if (!x->acl->a_count)
 +              return 0;
 +      ace = x->acl->a_entries + x->acl->a_count - 1;
 +      if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_everyone(ace))
 +              return 0;
 +      if (!(ace->e_mask & ~x->acl->a_other_mask)) {
 +              /* None of the allowed permissions will get masked. */
 +              return 0;
 +      }
 +      owner_allow = ace->e_mask & x->acl->a_owner_mask;
 +      group_allow = ace->e_mask & x->acl->a_group_mask;
 +
 +      /* Propagate everyone@ permissions through to owner@. */
 +      if (owner_allow && !write_through &&
 +          (x->acl->a_owner_mask & ~x->acl->a_other_mask)) {
 +              who.u.e_who = nfs4ace_owner_who;
 +              retval = __nfs4acl_propagate_everyone(x, &who, owner_allow);
 +              if (retval)
 +                      return -1;
 +      }
 +
 +      if (group_allow && (x->acl->a_group_mask & ~x->acl->a_other_mask)) {
 +              int n;
 +
 +              if (!write_through) {
 +                      /* Propagate everyone@ permissions through to group@. */
 +                      who.u.e_who = nfs4ace_group_who;
 +                      retval = __nfs4acl_propagate_everyone(x, &who,
 +                                                            group_allow);
 +                      if (retval)
 +                              return -1;
 +              }
 +
 +              /* Start from the entry before the trailing EVERYONE@ ALLOW
 +                 entry. We will not hit EVERYONE@ entries in the loop. */
 +              for (n = x->acl->a_count - 2; n != -1; n--) {
 +                      ace = x->acl->a_entries + n;
 +
 +                      if (nfs4ace_is_inherit_only(ace) ||
 +                          nfs4ace_is_owner(ace) ||
 +                          nfs4ace_is_group(ace))
 +                              continue;
 +                      if (nfs4ace_is_allow(ace) || nfs4ace_is_deny(ace)) {
 +                              /* Any inserted entry will end up below the
 +                                 current entry. */
 +                              retval = __nfs4acl_propagate_everyone(x, ace,
 +                                                                 group_allow);
 +                              if (retval)
 +                                      return -1;
 +                      }
 +              }
 +      }
 +      return 0;
 +}
 +
 +/**
 + * __nfs4acl_apply_masks  -  apply the masks to the acl entries
 + * @x:                acl and number of allocated entries
 + *
 + * Apply the owner file mask to owner@ entries, the intersection of the
 + * group and other file masks to everyone@ entries, and the group file
 + * mask to all other entries.
 + */
 +static int
 +__nfs4acl_apply_masks(struct nfs4acl_alloc *x)
 +{
 +      struct nfs4ace *ace;
 +
 +      nfs4acl_for_each_entry(ace, x->acl) {
 +              unsigned int mask;
 +
 +              if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_allow(ace))
 +                      continue;
 +              if (nfs4ace_is_owner(ace))
 +                      mask = x->acl->a_owner_mask;
 +              else if (nfs4ace_is_everyone(ace))
 +                      mask = x->acl->a_other_mask;
 +              else
 +                      mask = x->acl->a_group_mask;
 +              if (nfs4ace_change_mask(x, &ace, ace->e_mask & mask))
 +                      return -1;
 +      }
 +      return 0;
 +}
 +
 +/**
 + * nfs4acl_max_allowed  -  maximum mask flags that anybody is allowed
 + */
 +static unsigned int
 +nfs4acl_max_allowed(struct nfs4acl *acl)
 +{
 +      struct nfs4ace *ace;
 +      unsigned int allowed = 0;
 +
 +      nfs4acl_for_each_entry_reverse(ace, acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_allow(ace))
 +                      allowed |= ace->e_mask;
 +              else if (nfs4ace_is_deny(ace)) {
 +                      if (nfs4ace_is_everyone(ace))
 +                              allowed &= ~ace->e_mask;
 +              }
 +      }
 +      return allowed;
 +}
 +
 +/**
 + * nfs4acl_isolate_owner_class  -  limit the owner class to the owner file mask
 + * @x:                acl and number of allocated entries
 + *
 + * Make sure the owner class (owner@) is granted no more than the owner
 + * mask by first checking which permissions anyone is granted, and then
 + * denying owner@ all permissions beyond that.
 + */
 +static int
 +nfs4acl_isolate_owner_class(struct nfs4acl_alloc *x)
 +{
 +      struct nfs4ace *ace;
 +      unsigned int allowed = 0;
 +
 +      allowed = nfs4acl_max_allowed(x->acl);
 +      if (allowed & ~x->acl->a_owner_mask) {
 +              /* Figure out if we can update an existig OWNER@ DENY entry. */
 +              nfs4acl_for_each_entry(ace, x->acl) {
 +                      if (nfs4ace_is_inherit_only(ace))
 +                              continue;
 +                      if (nfs4ace_is_deny(ace)) {
 +                              if (nfs4ace_is_owner(ace))
 +                                      break;
 +                      } else if (nfs4ace_is_allow(ace)) {
 +                              ace = x->acl->a_entries + x->acl->a_count;
 +                              break;
 +                      }
 +              }
 +              if (ace != x->acl->a_entries + x->acl->a_count) {
 +                      if (nfs4ace_change_mask(x, &ace, ace->e_mask |
 +                                      (allowed & ~x->acl->a_owner_mask)))
 +                              return -1;
 +              } else {
 +                      /* Insert an owner@ deny entry at the front. */
 +                      ace = x->acl->a_entries;
 +                      if (nfs4acl_insert_entry(x, &ace))
 +                              return -1;
 +                      ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE;
 +                      ace->e_flags = ACE4_SPECIAL_WHO;
 +                      ace->e_mask = allowed & ~x->acl->a_owner_mask;
 +                      ace->u.e_who = nfs4ace_owner_who;
 +              }
 +      }
 +      return 0;
 +}
 +
 +/**
 + * __nfs4acl_isolate_who  -  isolate entry from EVERYONE@ ALLOW entry
 + * @x:                acl and number of allocated entries
 + * @who:      identifier to isolate
 + * @deny:     mask flags this identifier should not be allowed
 + *
 + * Make sure that @who is not allowed any mask flags in @deny by checking
 + * which mask flags this identifier is allowed, and adding excess allowed
 + * mask flags to an existing DENY entry before the trailing EVERYONE@ ALLOW
 + * entry, or inserting such an entry.
 + */
 +static int
 +__nfs4acl_isolate_who(struct nfs4acl_alloc *x, struct nfs4ace *who,
 +                    unsigned int deny)
 +{
 +      struct nfs4ace *ace;
 +      unsigned int allowed = 0, n;
 +
 +      /* Compute the mask flags granted to this who value. */
 +      nfs4acl_for_each_entry_reverse(ace, x->acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_same_who(ace, who)) {
 +                      if (nfs4ace_is_allow(ace))
 +                              allowed |= ace->e_mask;
 +                      else if (nfs4ace_is_deny(ace))
 +                              allowed &= ~ace->e_mask;
 +                      deny &= ~ace->e_mask;
 +              }
 +      }
 +      if (!deny)
 +              return 0;
 +
 +      /* Figure out if we can update an existig DENY entry.  Start
 +         from the entry before the trailing EVERYONE@ ALLOW entry. We
 +         will not hit EVERYONE@ entries in the loop. */
 +      for (n = x->acl->a_count - 2; n != -1; n--) {
 +              ace = x->acl->a_entries + n;
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_deny(ace)) {
 +                      if (nfs4ace_is_same_who(ace, who))
 +                              break;
 +              } else if (nfs4ace_is_allow(ace) &&
 +                         (ace->e_mask & deny)) {
 +                      n = -1;
 +                      break;
 +              }
 +      }
 +      if (n != -1) {
 +              if (nfs4ace_change_mask(x, &ace, ace->e_mask | deny))
 +                      return -1;
 +      } else {
 +              /* Insert a eny entry before the trailing EVERYONE@ DENY
 +                 entry. */
 +              struct nfs4ace who_copy;
 +
 +              ace = x->acl->a_entries + x->acl->a_count - 1;
 +              memcpy(&who_copy, who, sizeof(struct nfs4ace));
 +              if (nfs4acl_insert_entry(x, &ace))
 +                      return -1;
 +              memcpy(ace, &who_copy, sizeof(struct nfs4ace));
 +              ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE;
 +              nfs4ace_clear_inheritance_flags(ace);
 +              ace->e_mask = deny;
 +      }
 +      return 0;
 +}
 +
 +/**
 + * nfs4acl_isolate_group_class  -  limit the group class to the group file mask
 + * @x:                acl and number of allocated entries
 + *
 + * Make sure the group class (all entries except owner@ and everyone@) is
 + * granted no more than the group mask by inserting DENY entries for group
 + * class entries where necessary.
 + */
 +static int
 +nfs4acl_isolate_group_class(struct nfs4acl_alloc *x)
 +{
 +      struct nfs4ace who = {
 +              .e_flags = ACE4_SPECIAL_WHO,
 +              .u.e_who = nfs4ace_group_who,
 +      };
 +      struct nfs4ace *ace;
 +      unsigned int deny;
 +
 +      if (!x->acl->a_count)
 +              return 0;
 +      ace = x->acl->a_entries + x->acl->a_count - 1;
 +      if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_everyone(ace))
 +              return 0;
 +      deny = ace->e_mask & ~x->acl->a_group_mask;
 +
 +      if (deny) {
 +              unsigned int n;
 +
 +              if (__nfs4acl_isolate_who(x, &who, deny))
 +                      return -1;
 +
 +              /* Start from the entry before the trailing EVERYONE@ ALLOW
 +                 entry. We will not hit EVERYONE@ entries in the loop. */
 +              for (n = x->acl->a_count - 2; n != -1; n--) {
 +                      ace = x->acl->a_entries + n;
 +
 +                      if (nfs4ace_is_inherit_only(ace) ||
 +                          nfs4ace_is_owner(ace) ||
 +                          nfs4ace_is_group(ace))
 +                              continue;
 +                      if (__nfs4acl_isolate_who(x, ace, deny))
 +                              return -1;
 +              }
 +      }
 +      return 0;
 +}
 +
 +/**
 + * __nfs4acl_write_through  -  grant the full masks to owner@, group@, everyone@
 + *
 + * Make sure that owner, group@, and everyone@ are allowed the full mask
 + * permissions, and not only the permissions granted both by the acl and
 + * the masks.
 + */
 +static int
 +__nfs4acl_write_through(struct nfs4acl_alloc *x)
 +{
 +      struct nfs4ace *ace;
 +      unsigned int allowed;
 +
 +      /* Remove all owner@ and group@ ACEs: we re-insert them at the
 +         top. */
 +      nfs4acl_for_each_entry(ace, x->acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if ((nfs4ace_is_owner(ace) || nfs4ace_is_group(ace)) &&
 +                  nfs4ace_change_mask(x, &ace, 0))
 +                      return -1;
 +      }
 +
 +      /* Insert the everyone@ allow entry at the end, or update the
 +         existing entry. */
 +      allowed = x->acl->a_other_mask;
 +      if (allowed & ~ACE4_POSIX_ALWAYS_ALLOWED) {
 +              ace = x->acl->a_entries + x->acl->a_count - 1;
 +              if (x->acl->a_count && nfs4ace_is_everyone(ace) &&
 +                  !nfs4ace_is_inherit_only(ace)) {
 +                      if (nfs4ace_change_mask(x, &ace, allowed))
 +                              return -1;
 +              } else {
 +                      ace = x->acl->a_entries + x->acl->a_count;
 +                      if (nfs4acl_insert_entry(x, &ace))
 +                              return -1;
 +                      ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
 +                      ace->e_flags = ACE4_SPECIAL_WHO;
 +                      ace->e_mask = allowed;
 +                      ace->u.e_who = nfs4ace_everyone_who;
 +              }
 +      }
 +
 +      /* Compute the permissions that owner@ and group@ are already granted
 +         though the everyone@ allow entry at the end. Note that the acl
 +         contains no owner@ or group@ entries at this point. */
 +      allowed = 0;
 +      nfs4acl_for_each_entry_reverse(ace, x->acl) {
 +              if (nfs4ace_is_inherit_only(ace))
 +                      continue;
 +              if (nfs4ace_is_allow(ace)) {
 +                      if (nfs4ace_is_everyone(ace))
 +                              allowed |= ace->e_mask;
 +              } else if (nfs4ace_is_deny(ace))
 +                              allowed &= ~ace->e_mask;
 +      }
 +
 +      /* Insert the appropriate group@ allow entry at the front. */
 +      if (x->acl->a_group_mask & ~allowed) {
 +              ace = x->acl->a_entries;
 +              if (nfs4acl_insert_entry(x, &ace))
 +                      return -1;
 +              ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
 +              ace->e_flags = ACE4_SPECIAL_WHO;
 +              ace->e_mask = x->acl->a_group_mask /*& ~allowed*/;
 +              ace->u.e_who = nfs4ace_group_who;
 +      }
 +
 +      /* Insert the appropriate owner@ allow entry at the front. */
 +      if (x->acl->a_owner_mask & ~allowed) {
 +              ace = x->acl->a_entries;
 +              if (nfs4acl_insert_entry(x, &ace))
 +                      return -1;
 +              ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
 +              ace->e_flags = ACE4_SPECIAL_WHO;
 +              ace->e_mask = x->acl->a_owner_mask /*& ~allowed*/;
 +              ace->u.e_who = nfs4ace_owner_who;
 +      }
 +
 +      /* Insert the appropriate owner@ deny entry at the front. */
 +      allowed = nfs4acl_max_allowed(x->acl);
 +      if (allowed & ~x->acl->a_owner_mask) {
 +              nfs4acl_for_each_entry(ace, x->acl) {
 +                      if (nfs4ace_is_inherit_only(ace))
 +                              continue;
 +                      if (nfs4ace_is_allow(ace)) {
 +                              ace = x->acl->a_entries + x->acl->a_count;
 +                              break;
 +                      }
 +                      if (nfs4ace_is_deny(ace) && nfs4ace_is_owner(ace))
 +                              break;
 +              }
 +              if (ace != x->acl->a_entries + x->acl->a_count) {
 +                      if (nfs4ace_change_mask(x, &ace, ace->e_mask |
 +                                      (allowed & ~x->acl->a_owner_mask)))
 +                              return -1;
 +              } else {
 +                      ace = x->acl->a_entries;
 +                      if (nfs4acl_insert_entry(x, &ace))
 +                              return -1;
 +                      ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE;
 +                      ace->e_flags = ACE4_SPECIAL_WHO;
 +                      ace->e_mask = allowed & ~x->acl->a_owner_mask;
 +                      ace->u.e_who = nfs4ace_owner_who;
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +/**
 + * nfs4acl_apply_masks  -  apply the masks to the acl
 + *
 + * Apply the masks so that the acl allows no more flags than the
 + * intersection between the flags that the original acl allows and the
 + * mask matching the process.
 + *
 + * Note: this algorithm may push the number of entries in the acl above
 + * ACL4_XATTR_MAX_COUNT, so a read-modify-write cycle would fail.
 + */
 +int
 +nfs4acl_apply_masks(struct nfs4acl **acl)
 +{
 +      struct nfs4acl_alloc x = {
 +              .acl = *acl,
 +              .count = (*acl)->a_count,
 +      };
 +      int retval = 0;
 +
 +      if (nfs4acl_move_everyone_aces_down(&x) ||
 +          nfs4acl_propagate_everyone(&x) ||
 +          __nfs4acl_apply_masks(&x) ||
 +          nfs4acl_isolate_owner_class(&x) ||
 +          nfs4acl_isolate_group_class(&x))
 +              retval = -ENOMEM;
 +
 +      *acl = x.acl;
 +      return retval;
 +}
 +EXPORT_SYMBOL(nfs4acl_apply_masks);
 +
 +int nfs4acl_write_through(struct nfs4acl **acl)
 +{
 +      struct nfs4acl_alloc x = {
 +              .acl = *acl,
 +              .count = (*acl)->a_count,
 +      };
 +      int retval = 0;
 +
 +      if (!((*acl)->a_flags & ACL4_WRITE_THROUGH))
 +              goto out;
 +
 +      if (nfs4acl_move_everyone_aces_down(&x) ||
 +          nfs4acl_propagate_everyone(&x) ||
 +          __nfs4acl_write_through(&x))
 +              retval = -ENOMEM;
 +
 +      *acl = x.acl;
 +out:
 +      return retval;
 +}
Simple merge
Simple merge
diff --cc fs/nfsd/vfs.c
Simple merge
Simple merge
Simple merge
diff --cc fs/proc/array.c
Simple merge
diff --cc fs/proc/base.c
Simple merge
diff --cc fs/proc/kcore.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -395,8 -396,8 +399,9 @@@ static inline int module_is_live(struc
  struct module *__module_text_address(unsigned long addr);
  struct module *__module_address(unsigned long addr);
  bool is_module_address(unsigned long addr);
+ bool is_module_percpu_address(unsigned long addr);
  bool is_module_text_address(unsigned long addr);
 +const char *supported_printable(int taint);
  
  static inline int within_module_core(unsigned long addr, struct module *mod)
  {
Simple merge
@@@ -106,9 -106,8 +106,10 @@@ int kmem_cache_shrink(struct kmem_cach
  void kmem_cache_free(struct kmem_cache *, void *);
  unsigned int kmem_cache_size(struct kmem_cache *);
  const char *kmem_cache_name(struct kmem_cache *);
+ int kern_ptr_validate(const void *ptr, unsigned long size);
  int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 +unsigned kmem_alloc_estimate(struct kmem_cache *cachep,
 +                      gfp_t flags, int objects);
  
  /*
   * Please use this macro to create slab caches. Simply specify the
@@@ -51,7 -51,7 +51,8 @@@
  #include <linux/skbuff.h>     /* struct sk_buff */
  #include <linux/mm.h>
  #include <linux/security.h>
+ #include <linux/slab.h>
 +#include <linux/reserve.h>
  
  #include <linux/filter.h>
  #include <linux/rculist_nulls.h>
@@@ -39,7 -39,7 +39,8 @@@
  #include <linux/mutex.h>
  #include <linux/completion.h>
  #include <linux/init.h>
+ #include <linux/slab.h>
 +#include <linux/err.h>
  #include <xen/interface/xen.h>
  #include <xen/interface/grant_table.h>
  #include <xen/interface/io/xenbus.h>
Simple merge
diff --cc init/main.c
Simple merge
index 8c6b8fe,0000000..9f7655d
mode 100644,000000..100644
--- /dev/null
@@@ -1,7257 -1,0 +1,7258 @@@
 +/*
 + *
 + * Most of this code is borrowed and adapted from the lkcd command "lcrash"
 + * and its supporting libarary.
 + *
 + * This kdb commands for casting memory structures.
 + * It provides
 + *  "print" "px", "pd" *
 + *
 + * Careful of porting the klib KL_XXX functions (they call thru a jump table
 + * that we don't use here)
 + *
 + * The kernel type information is added be insmod'g the kdb debuginfo module
 + * It loads symbolic debugging info (provided from lcrash -o),
 + * (this information originally comes from the lcrash "kerntypes" file)
 + *
 + */
 +
 +#define VMALLOC_START_IA64 0xa000000200000000
 +#include <linux/kernel.h>
 +#include <linux/module.h>
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>
 +#include <linux/fs.h>
 +#include <asm/processor.h>
 +#include <asm/uaccess.h>
 +#include <asm/fcntl.h>
 +#include <linux/vmalloc.h>
 +#include <linux/ctype.h>
 +#include <linux/file.h>
 +#include <linux/err.h>
++#include <linux/slab.h>
 +#include "modules/lcrash/klib.h"
 +#include "modules/lcrash/kl_stringtab.h"
 +#include "modules/lcrash/kl_btnode.h"
 +#include "modules/lcrash/lc_eval.h"
 +
 +#undef next_node /* collision with nodemask.h */
 +int           have_debug_file = 0;
 +dbg_sym_t *types_tree_head;
 +dbg_sym_t *typedefs_tree_head;
 +kltype_t      *kltype_array;
 +dbg_sym_t     *dsym_types_array;
 +
 +
 +EXPORT_SYMBOL(types_tree_head);
 +EXPORT_SYMBOL(typedefs_tree_head);
 +EXPORT_SYMBOL(kltype_array);
 +EXPORT_SYMBOL(dsym_types_array);
 +
 +#define C_HEX         0x0002
 +#define C_WHATIS      0x0004
 +#define C_NOVARS      0x0008
 +#define C_SIZEOF      0x0010
 +#define C_SHOWOFFSET  0x0020
 +#define       C_LISTHEAD      0x0040
 +#define C_LISTHEAD_N    0x0080 /* walk using list_head.next */
 +#define C_LISTHEAD_P    0x0100 /* walk using list_head.prev */
 +#define C_BINARY      0x0200
 +#define MAX_LONG_LONG 0xffffffffffffffffULL
 +klib_t   kdb_klib;
 +klib_t   *KLP = &kdb_klib;
 +k_error_t klib_error = 0;
 +dbg_sym_t *type_tree = (dbg_sym_t *)NULL;
 +dbg_sym_t *typedef_tree = (dbg_sym_t *)NULL;
 +dbg_sym_t *func_tree = (dbg_sym_t *)NULL;
 +dbg_sym_t *srcfile_tree = (dbg_sym_t *)NULL;
 +dbg_sym_t *var_tree = (dbg_sym_t *)NULL;
 +dbg_sym_t *xtype_tree = (dbg_sym_t *)NULL;
 +dbg_hashrec_t *dbg_hash[TYPE_NUM_SLOTS];
 +int all_count, deall_count;
 +void single_type(char *str);
 +void sizeof_type(char *str);
 +typedef struct chunk_s {
 +      struct chunk_s  *next;  /* Must be first */
 +      struct chunk_s  *prev;  /* Must be second */
 +      void            *addr;
 +      struct bucket_s *bucketp;
 +      uint32_t        chunksz;  /* size of memory chunk (via malloc()) */
 +      uint32_t        blksz;  /* Not including header */
 +      short           blkcount; /* Number of blksz blocks in chunk */
 +} chunk_t;
 +
 +typedef struct blkhdr_s {
 +      struct blkhdr_s *next;
 +      union {
 +              struct blkhdr_s *prev;
 +      chunk_t *chunkp;
 +      } b_un;
 +      int     flg;
 +      int     size;
 +} blkhdr_t;
 +
 +int ptrsz64 = ((int)sizeof(void *) == 8);
 +alloc_functions_t alloc_functions;
 +
 +/*
 + * return 1 if addr is invalid
 + */
 +static int
 +invalid_address(kaddr_t addr, int count)
 +{
 +      unsigned char c;
 +      unsigned long lcount;
 +      /* FIXME: untested? */
 +      lcount = count;
 +      /* FIXME: use kdb_verify_area */
 +      while (count--) {
 +              if (kdb_getarea(c, addr))
 +                      return 1;
 +      }
 +      return 0;
 +}
 +
 +/*
 + * wrappers for calls to kernel-style allocation/deallocation
 + */
 +static void *
 +kl_alloc_block(int size)
 +{
 +      void    *vp;
 +
 +      vp = kmalloc(size, GFP_KERNEL);
 +      if (!vp) {
 +              kdb_printf ("kmalloc of %d bytes failed\n", size);
 +      }
 +      /* important: the lcrash code sometimes assumes that the
 +       *            allocation is zeroed out
 +       */
 +      memset(vp, 0, size);
 +      all_count++;
 +      return vp;
 +}
 +static void
 +kl_free_block(void *vp)
 +{
 +      kfree(vp);
 +      deall_count++;
 +      return;
 +}
 +
 +int
 +get_value(char *s, uint64_t *value)
 +{
 +      return kl_get_value(s, NULL, 0, value);
 +}
 +
 +/*
 + * kl_get_block()
 + *
 + *   Read a size block from virtual address addr in the system memory image.
 + */
 +k_error_t
 +kl_get_block(kaddr_t addr, unsigned size, void *bp, void *mmap)
 +{
 +      if (!bp) {
 +              return(KLE_NULL_BUFF);
 +      } else if (!size) {
 +              return(KLE_ZERO_SIZE);
 +      }
 +
 +      memcpy(bp, (void *)addr, size);
 +
 +      return(0);
 +}
 +
 +/*
 + * print_value()
 + */
 +void
 +print_value(char *ldstr, uint64_t value, int width)
 +{
 +      int w = 0;
 +      char fmtstr[12], f, s[2]="\000\000";
 +
 +      if (ldstr) {
 +              kdb_printf("%s", ldstr);
 +      }
 +        s[0] = '#';
 +      f = 'x';
 +      if (width) {
 +              if (ptrsz64) {
 +                      w = 18; /* due to leading "0x" */
 +              } else {
 +                      w = 10; /* due to leading "0x" */
 +              }
 +      }
 +      if (w) {
 +              sprintf(fmtstr, "%%%s%d"FMT64"%c", s, w, f);
 +      } else {
 +              sprintf(fmtstr, "%%%s"FMT64"%c", s, f);
 +      }
 +      kdb_printf(fmtstr, value);
 +}
 +
 +/*
 + * print_list_head()
 + */
 +void
 +print_list_head(kaddr_t saddr)
 +{
 +      print_value("STRUCT ADDR: ", (uint64_t)saddr, 8);
 +      kdb_printf("\n");
 +}
 +
 +/*
 + * check_prev_ptr()
 + */
 +void
 +check_prev_ptr(kaddr_t ptr, kaddr_t prev)
 +{
 +      if(ptr != prev) {
 +              kdb_printf("\nWARNING: Pointer broken. %#"FMTPTR"x,"
 +                      " SHOULD BE: %#"FMTPTR"x\n", prev, ptr);
 +      }
 +}
 +
 +/*
 + * kl_kaddr() -- Return a kernel virtual address stored in a structure
 + *
 + *   Pointer 'p' points to a kernel structure
 + *   of type 's.' Get the kernel address located in member 'm.'
 + */
 +kaddr_t
 +kl_kaddr(void *p, char *s, char *m)
 +{
 +      uint64_t *u64p;
 +      int     offset;
 +
 +      offset = kl_member_offset(s, m);
 +      u64p = (uint64_t *)(p + offset);
 +      return((kaddr_t)*u64p);
 +}
 +
 +/*
 + * walk_structs() -- walk linked lists of kernel data structures
 + */
 +int
 +walk_structs(char *s, char *f, char *member, kaddr_t addr, int flags)
 +{
 +      int size, offset, mem_offset=0;
 +      kaddr_t last = 0, next;
 +      kltype_t *klt = (kltype_t *)NULL, *memklt=(kltype_t *)NULL;
 +      unsigned long long iter_threshold = 10000;
 +
 +      int counter = 0;
 +      kaddr_t head=0, head_next=0, head_prev=0, entry=0;
 +      kaddr_t entry_next=0, entry_prev;
 +
 +      /* field name of link pointer, determine its offset in the struct.  */
 +      if ((offset = kl_member_offset(s, f)) == -1) {
 +              kdb_printf("Could not determine offset for member %s of %s.\n",
 +                      f, s);
 +              return 0;
 +      }
 +
 +      /* Get the type of the enclosing structure */
 +      if (!(klt = kl_find_type(s, (KLT_STRUCT|KLT_UNION)))) {
 +              kdb_printf("Could not find the type of %s\n", s);
 +              return(1);
 +      }
 +
 +      /* Get the struct size */
 +      if ((size = kl_struct_len(s)) == 0) {
 +              kdb_printf ("could not get the length of %s\n", s);
 +              return(1);
 +      }
 +
 +      /* test for a named member of the structure that should be displayed */
 +      if (member) {
 +              memklt = kl_get_member(klt, member);
 +              if (!memklt) {
 +                      kdb_printf ("%s has no member %s\n", s, member);
 +                      return 1;
 +              }
 +              mem_offset = kl_get_member_offset(klt, member);
 +      }
 +
 +      if ((next = addr)) {
 +              /* get head of list (anchor) when struct list_head is used */
 +              if (flags & C_LISTHEAD) {
 +                      head = next;
 +                      if (invalid_address(head, sizeof(head))) {
 +                              kdb_printf ("invalid address %#lx\n",
 +                                      head);
 +                              return 1;
 +                      }
 +                      /* get contents of addr  struct member */
 +                      head_next = kl_kaddr((void *)head, "list_head", "next");
 +                      if (invalid_address(head, sizeof(head_next))) {
 +                              kdb_printf ("invalid address %#lx\n",
 +                                      head_next);
 +                              return 1;
 +                      }
 +                      /* get prev field of anchor */
 +                      head_prev = kl_kaddr((void *)head, "list_head", "prev");
 +                      if (invalid_address(head, sizeof(head_prev))) {
 +                              kdb_printf ("invalid address %#lx\n",
 +                                      head_prev);
 +                              return 1;
 +                      }
 +                      entry = 0;
 +              }
 +      }
 +
 +      while(next && counter < iter_threshold) {
 +              counter++;
 +              if (counter > iter_threshold) {
 +                      kdb_printf("\nWARNING: Iteration threshold reached.\n");
 +                        kdb_printf("Current threshold: %lld\n", iter_threshold);
 +                      break;
 +              }
 +              if(flags & C_LISTHEAD) {
 +                      if(!(entry)){
 +                              if(flags & C_LISTHEAD_N){
 +                                      entry = head_next;
 +                              } else {
 +                                      entry = head_prev;
 +                              }
 +                              last = head;
 +                      }
 +
 +                      if(head == entry) {
 +                              if(flags & C_LISTHEAD_N){
 +                                      check_prev_ptr(last, head_prev);
 +                              } else {
 +                                      check_prev_ptr(last, head_next);
 +                              }
 +                              break;
 +                      }
 +
 +                      next = entry - offset; /* next structure */
 +                      /* check that the whole structure can be addressed */
 +                      if (invalid_address(next, size)) {
 +                              kdb_printf(
 +                              "invalid struct address %#lx\n", next);
 +                              return 1;
 +                      }
 +                      /* and validate that it points to valid addresses */
 +                      entry_next = kl_kaddr((void *)entry,"list_head","next");
 +                      if (invalid_address(entry_next, sizeof(entry_next))) {
 +                              kdb_printf("invalid address %#lx\n",
 +                                      entry_next);
 +                              return 1;
 +                      }
 +                      entry_prev = kl_kaddr((void *)entry,"list_head","prev");
 +                      if (invalid_address(entry_prev, sizeof(entry_prev))) {
 +                              kdb_printf("invalid address %#lx\n",
 +                                      entry_prev);
 +                              return 1;
 +                      }
 +                      if(flags & C_LISTHEAD_N){
 +                              check_prev_ptr(last, entry_prev);
 +                      } else {
 +                              check_prev_ptr(last, entry_next);
 +                      }
 +                      print_list_head(next);
 +                      last = entry;
 +                      if(flags & C_LISTHEAD_N){
 +                              entry = entry_next; /* next list_head */
 +                      } else {
 +                              entry = entry_prev; /* next list_head */
 +                      }
 +              }
 +
 +              if (memklt) {
 +                      /* print named sub-structure in C-like struct format. */
 +                      kl_print_member(
 +                              (void *)((unsigned long)next+mem_offset),
 +                              memklt, 0, C_HEX);
 +              } else {
 +                      /* print entire structure in C-like struct format. */
 +                      kl_print_type((void *)next, klt, 0, C_HEX);
 +              }
 +
 +              if(!(flags & C_LISTHEAD)) {
 +                      last = next;
 +                      next = (kaddr_t) (*(uint64_t*)(next + offset));
 +              }
 +      }
 +
 +      return(0);
 +}
 +
 +/*
 + * Implement the lcrash walk -s command
 + *  see lcrash cmd_walk.c
 + */
 +int
 +kdb_walk(int argc, const char **argv)
 +{
 +      int     i, nonoptc=0, optc=0, flags=0, init_len=0;
 +      char    *cmd, *arg, *structp=NULL, *forwp=NULL, *memberp=NULL;
 +      char    *addrp=NULL;
 +      uint64_t value;
 +      kaddr_t start_addr;
 +
 +      all_count=0;
 +      deall_count=0;
 +      if (!have_debug_file) {
 +              kdb_printf("no debuginfo file\n");
 +              return 0;
 +      }
 +      /* If there is nothing to evaluate, just return */
 +      if (argc == 0) {
 +              return 0;
 +      }
 +      cmd = (char *)*argv; /* s/b "walk" */
 +      if (strcmp(cmd,"walk")) {
 +              kdb_printf("got %s, not \"walk\"\n", cmd);
 +              return 0;
 +      }
 +
 +      for (i=1; i<=argc; i++) {
 +              arg = (char *)*(argv+i);
 +              if (*arg == '-') {
 +                      optc++;
 +                      if (optc > 2) {
 +                              kdb_printf("too many options\n");
 +                              kdb_printf("see 'walkhelp'\n");
 +                              return 0;
 +                      }
 +                      if (*(arg+1) == 's') {
 +                              continue; /* ignore -s */
 +                      } else if (*(arg+1) == 'h') {
 +                              if ((init_len=kl_struct_len("list_head"))
 +                                                              == 0) {
 +                                      kdb_printf(
 +                                              "could not find list_head\n");
 +                                      return 0;
 +                              }
 +                              if (*(arg+2) == 'p') {
 +                                      flags = C_LISTHEAD;
 +                                      flags |= C_LISTHEAD_P;
 +                              } else if (*(arg+2) == 'n') {
 +                                      flags = C_LISTHEAD;
 +                                      flags |= C_LISTHEAD_N;
 +                              } else {
 +                                      kdb_printf("invalid -h option <%s>\n",
 +                                              arg);
 +                                      kdb_printf("see 'walkhelp'\n");
 +                                      return 0;
 +                              }
 +                      } else {
 +                              kdb_printf("invalid option <%s>\n", arg);
 +                              kdb_printf("see 'walkhelp'\n");
 +                              return 0;
 +                      }
 +              }  else {
 +                      nonoptc++;
 +                      if (nonoptc > 4) {
 +                              kdb_printf("too many arguments\n");
 +                              kdb_printf("see 'walkhelp'\n");
 +                              return 0;
 +                      }
 +                      if (nonoptc == 1) {
 +                              structp = arg;
 +                      } else if (nonoptc == 2) {
 +                              forwp = arg;
 +                      } else if (nonoptc == 3) {
 +                              addrp = arg;
 +                      } else if (nonoptc == 4) {
 +                              /* the member is optional; if we get
 +                                 a fourth, the previous was the member */
 +                              memberp = addrp;
 +                              addrp = arg;
 +                      } else {
 +                              kdb_printf("invalid argument <%s>\n", arg);
 +                              kdb_printf("see 'walkhelp'\n");
 +                              return 0;
 +                      }
 +              }
 +      }
 +      if (nonoptc < 3) {
 +              kdb_printf("too few arguments\n");
 +              kdb_printf("see 'walkhelp'\n");
 +              return 0;
 +      }
 +      if (!(flags & C_LISTHEAD)) {
 +              if ((init_len=kl_struct_len(structp)) == 0) {
 +                      kdb_printf("could not find %s\n", structp);
 +                      return 0;
 +              }
 +      }
 +
 +      /* Get the start address of the structure */
 +      if (get_value(addrp, &value)) {
 +              kdb_printf ("address %s invalid\n", addrp);
 +              return 0;
 +      }
 +      start_addr = (kaddr_t)value;
 +      if (invalid_address(start_addr, init_len)) {
 +              kdb_printf ("address %#lx invalid\n", start_addr);
 +              return 0;
 +      }
 +
 +      if (memberp) {
 +      }
 +
 +      if (walk_structs(structp, forwp, memberp, start_addr, flags)) {
 +              kdb_printf ("walk_structs failed\n");
 +              return 0;
 +      }
 +      /* kdb_printf("ptc allocated:%d deallocated:%d\n",
 +               all_count, deall_count); */
 +      return 0;
 +}
 +
 +/*
 + * Implement the lcrash px (print, pd) command
 + *  see lcrash cmd_print.c
 + *
 + *     px <expression>
 + *       e.g. px *(task_struct *) <address>
 + */
 +int
 +kdb_debuginfo_print(int argc, const char **argv)
 +{
 +      /* argc does not count the command itself, which is argv[0] */
 +      char            *cmd, *next, *end, *exp, *cp;
 +      unsigned char   *buf;
 +      int             i, j, iflags;
 +      node_t          *np;
 +      uint64_t        flags = 0;
 +
 +      /* If there is nothing to evaluate, just return */
 +      if (argc == 0) {
 +              return 0;
 +      }
 +      all_count=0;
 +      deall_count=0;
 +
 +      cmd = (char *)*argv;
 +
 +      /* Set up the flags value. If this command was invoked via
 +       * "pd" or "px", then make sure the appropriate flag is set.
 +       */
 +      flags = 0;
 +      if (!strcmp(cmd, "pd") || !strcmp(cmd, "print")) {
 +              flags = 0;
 +      } else if (!strcmp(cmd, "px")) {
 +              flags |= C_HEX;
 +      } else if (!strcmp(cmd, "whatis")) {
 +              if (argc != 1) {
 +                      kdb_printf("usage: whatis <symbol | type>\n");
 +                      return 0;
 +              }
 +              cp = (char *)*(argv+1);
 +              single_type(cp);
 +              /* kdb_printf("allocated:%d deallocated:%d\n",
 +                       all_count, deall_count); */
 +              return 0;
 +      } else if (!strcmp(cmd, "sizeof")) {
 +              if (!have_debug_file) {
 +                      kdb_printf("no debuginfo file\n");
 +                      return 0;
 +              }
 +              if (argc != 1) {
 +                      kdb_printf("usage: sizeof type\n");
 +                      return 0;
 +              }
 +              cp = (char *)*(argv+1);
 +              sizeof_type(cp);
 +              return 0;
 +      } else {
 +              kdb_printf("command error: %s\n", cmd);
 +              return 0;
 +      }
 +
 +      /*
 +       * Count the number of bytes necessary to hold the entire expression
 +       * string.
 +       */
 +      for (i=1, j=0; i <= argc; i++) {
 +              j += (strlen(*(argv+i)) + 1);
 +      }
 +
 +      /*
 +       * Allocate space for the expression string and copy the individual
 +       * arguments into it.
 +       */
 +      buf = kl_alloc_block(j);
 +      if (!buf) {
 +              return 0;
 +      }
 +
 +      for (i=1; i <= argc; i++) {
 +              strcat(buf, *(argv+i));
 +              /* put spaces between arguments */
 +              if (i < argc) {
 +                      strcat(buf, " ");
 +              }
 +      }
 +
 +      /* Walk through the expression string, expression by expression.
 +       * Note that a comma (',') is the delimiting character between
 +       * expressions.
 +       */
 +      next = buf;
 +      while (next) {
 +              if ((end = strchr(next, ','))) {
 +                      *end = (char)0;
 +              }
 +
 +              /* Copy the next expression to a separate expression string.
 +               * A separate expresison string is necessary because it is
 +               * likely to get freed up in eval() when variables get expanded.
 +               */
 +              i = strlen(next)+1;
 +              exp = (char *)kl_alloc_block(i);
 +              if (!exp) {
 +                      return 0;
 +              }
 +              strcpy(exp, next);
 +
 +              /* Evaluate the expression */
 +              np = eval(&exp, 0);
 +              if (!np || eval_error) {
 +                      print_eval_error(cmd, exp,
 +                              (error_token ? error_token : (char*)NULL),
 +                              eval_error, CMD_NAME_FLG);
 +                      if (np) {
 +                              free_nodes(np);
 +                      }
 +                      kl_free_block(buf);
 +                      kl_free_block(exp);
 +                      free_eval_memory();
 +                      return 0;
 +              }
 +              iflags = flags;
 +              if (print_eval_results(np, iflags)) {
 +                      free_nodes(np);
 +                      kl_free_block(buf);
 +                      free_eval_memory();
 +                      return 0;
 +              }
 +              kl_free_block(exp);
 +
 +              if (end) {
 +                      next = end + 1;
 +                      kdb_printf(" ");
 +              } else {
 +                      next = (char*)NULL;
 +                      kdb_printf("\n");
 +              }
 +              free_nodes(np);
 +      }
 +      free_eval_memory();
 +      kl_free_block(buf);
 +      /* kdb_printf("allocated:%d deallocated:%d\n",
 +                       all_count, deall_count); */
 +      return 0;
 +}
 +
 +/*
 + * Display help for the px command
 + */
 +int
 +kdb_pxhelp(int argc, const char **argv)
 +{
 +      if (have_debug_file) {
 + kdb_printf ("Some examples of using the px command:\n");
 + kdb_printf (" the whole structure:\n");
 + kdb_printf ("  px *(task_struct *)0xe0000...\n");
 + kdb_printf (" one member:\n");
 + kdb_printf ("  px (*(task_struct *)0xe0000...)->comm\n");
 + kdb_printf (" the address of a member\n");
 + kdb_printf ("  px &((task_struct *)0xe0000...)->children\n");
 + kdb_printf (" a structure pointed to by a member:\n");
 + kdb_printf ("  px ((*(class_device *)0xe0000...)->class)->name\n");
 + kdb_printf (" array element:\n");
 + kdb_printf ("  px (cache_sizes *)0xa0000...[0]\n");
 + kdb_printf ("  px (task_struct *)(0xe0000...)->cpus_allowed.bits[0]\n");
 +      } else {
 +              kdb_printf ("There is no debug info file.\n");
 +              kdb_printf ("The px/pd/print commands can only evaluate ");
 +              kdb_printf ("arithmetic expressions.\n");
 +      }
 + return 0;
 +}
 +
 +/*
 + * Display help for the walk command
 + */
 +int
 +kdb_walkhelp(int argc, const char **argv)
 +{
 +      if (!have_debug_file) {
 +              kdb_printf("no debuginfo file\n");
 +              return 0;
 +      }
 + kdb_printf ("Using the walk command:\n");
 + kdb_printf (" (only the -s (symbolic) form is supported, so -s is ignored)\n");
 + kdb_printf ("\n");
 + kdb_printf (" If the list is not linked with list_head structures:\n");
 + kdb_printf ("  walk [-s] struct name-of-forward-pointer address\n");
 + kdb_printf ("  example: walk xyz_struct next 0xe00....\n");
 + kdb_printf ("\n");
 + kdb_printf (" If the list is linked with list_head structures, use -hn\n");
 + kdb_printf (" to walk the 'next' list, -hp for the 'prev' list\n");
 + kdb_printf ("  walk -h[n|p] struct name-of-forward-pointer [member-to-show] address-of-list-head\n");
 + kdb_printf ("  example, to show the entire task_struct:\n");
 + kdb_printf ("   walk -hn task_struct tasks 0xe000....\n");
 + kdb_printf ("  example, to show the task_struct member comm:\n");
 + kdb_printf ("   walk -hn task_struct tasks comm 0xe000....\n");
 + kdb_printf ("  (address is not the address of first member's list_head, ");
 + kdb_printf     ("but of the anchoring list_head\n");
 + return 0;
 +}
 +
 +/*
 + * dup_block()
 + */
 +void *
 +dup_block(void *b, int len)
 +{
 +      void *b2;
 +
 +      if ((b2 = kl_alloc_block(len))) {
 +              memcpy(b2, b, len); /* dst, src, sz */
 +      }
 +      return(b2);
 +}
 +
 +/*
 + * kl_reset_error()
 + */
 +void
 +kl_reset_error(void)
 +{
 +        klib_error = 0;
 +}
 +
 +/*
 + * given a symbol name, look up its address
 + *
 + * in lcrash, this would return a pointer to the syment_t in
 + * a binary tree of them
 + *
 + * In this one, look up the symbol in the standard kdb way,
 + * which fills in the kdb_symtab_t.
 + * Then fill in the  global syment_t "lkup_syment" -- assuming
 + * we'll only need one at a time!
 + *
 + * kl_lkup_symname returns the address of syment_t if the symbol is
 + * found, else null.
 + *
 + * Note: we allocate a syment_t   the caller should kfree it
 + */
 +syment_t *
 +kl_lkup_symname (char *cp)
 +{
 +      syment_t  *sp;
 +      kdb_symtab_t kdb_symtab;
 +
 +      if (kdbgetsymval(cp, &kdb_symtab)) {
 +              sp = (syment_t *)kl_alloc_block(sizeof(syment_t));
 +              sp->s_addr = (kaddr_t)kdb_symtab.sym_start;
 +              KL_ERROR = 0;
 +              return (sp);
 +      } else {
 +              /* returns 0 if the symbol is not found */
 +              KL_ERROR = KLE_INVALID_VALUE;
 +              return ((syment_t *)0);
 +      }
 +}
 +
 +/*
 + * kl_get_ra()
 + *
 + * This function returns its own return address.
 + * Usefule when trying to capture where we came from.
 + */
 +void*
 +kl_get_ra(void)
 +{
 +      return (__builtin_return_address(0));
 +}
 +
 +/* start kl_util.c */
 +/*
 + * Definitions for the do_math() routine.
 + */
 +#define M_ADD      '+'
 +#define M_SUBTRACT '-'
 +#define M_MULTIPLY '*'
 +#define M_DIVIDE   '/'
 +
 +/*
 + * do_math() -- Calculate some math values based on a string argument
 + *              passed into the function.  For example, if you use:
 + *
 + *              0xffffc000*2+6/5-3*19-8
 + *
 + *              And you will get the value 0xffff7fc0 back.  I could
 + *              probably optimize this a bit more, but right now, it
 + *              works, which is good enough for me.
 + */
 +static uint64_t
 +do_math(char *str)
 +{
 +      int i = 0;
 +      char *buf, *loc;
 +      uint64_t value1, value2;
 +      syment_t *sp;
 +
 +      buf = (char *)kl_alloc_block((strlen(str) + 1));
 +      sprintf(buf, "%s", str);
 +      for (i = strlen(str); i >= 0; i--) {
 +              if ((str[i] == M_ADD) || (str[i] == M_SUBTRACT)) {
 +                      buf[i] = '\0';
 +                      value1 = do_math(buf);
 +                      value2 = do_math(&str[i+1]);
 +                      kl_free_block((void *)buf);
 +                      if (str[i] == M_SUBTRACT) {
 +                              return value1 - value2;
 +                      } else {
 +                              return value1 + value2;
 +                      }
 +              }
 +      }
 +
 +      for (i = strlen(str); i >= 0; i--) {
 +              if ((str[i] == M_MULTIPLY) || (str[i] == M_DIVIDE)) {
 +                      buf[i] = '\0';
 +                      value1 = do_math(buf);
 +                      value2 = do_math(&str[i+1]);
 +                      kl_free_block((void *)buf);
 +                      if (str[i] == M_MULTIPLY) {
 +                              return (value1 * value2);
 +                      } else {
 +                              if (value2 == 0) {
 +                                      /* handle divide by zero */
 +                                      /* XXX -- set proper error code */
 +                                      klib_error = 1;
 +                                      return (0);
 +                              } else {
 +                                      return (value1 / value2);
 +                              }
 +                      }
 +              }
 +      }
 +
 +      /*
 +       * Otherwise, just process the value, and return it.
 +       */
 +      sp = kl_lkup_symname(buf);
 +      if (KL_ERROR) {
 +              KL_ERROR = 0;
 +              value2 = kl_strtoull(buf, &loc, 10);
 +              if (((!value2) && (buf[0] != '0')) || (*loc) ||
 +                      (!strncmp(buf, "0x", 2)) || (!strncmp(buf, "0X", 2))) {
 +                      value1 = (kaddr_t)kl_strtoull(buf, (char**)NULL, 16);
 +              } else {
 +                      value1 = (unsigned)kl_strtoull(buf, (char**)NULL, 10);
 +              }
 +      } else {
 +              value1 = (kaddr_t)sp->s_addr;
 +              kl_free_block((void *)sp);
 +      }
 +      kl_free_block((void *)buf);
 +      return (value1);
 +}
 +/*
 + * kl_get_value() -- Translate numeric input strings
 + *
 + *   A generic routine for translating an input string (param) in a
 + *   number of dfferent ways. If the input string is an equation
 + *   (contains the characters '+', '-', '/', and '*'), then perform
 + *   the math evaluation and return one of the following modes (if
 + *   mode is passed):
 + *
 + *   0 -- if the resulting value is <= elements, if elements (number
 + *        of elements in a table) is passed.
 + *
 + *   1 -- if the first character in param is a pound sign ('#').
 + *
 + *   3 -- the numeric result of an equation.
 + *
 + *   If the input string is NOT an equation, mode (if passed) will be
 + *   set in one of the following ways (depending on the contents of
 + *   param and elements).
 + *
 + *   o When the first character of param is a pound sign ('#'), mode
 + *     is set equal to one and the trailing numeric value (assumed to
 + *     be decimal) is returned.
 + *
 + *   o When the first two characters in param are "0x" or "0X," or
 + *     when when param contains one of the characers "abcdef," or when
 + *     the length of the input value is eight characters. mode is set
 + *     equal to two and the numeric value contained in param is
 + *     translated as hexadecimal and returned.
 + *
 + *   o The value contained in param is translated as decimal and mode
 + *     is set equal to zero. The resulting value is then tested to see
 + *     if it exceeds elements (if passed). If it does, then value is
 + *     translated as hexadecimal and mode is set equal to two.
 + *
 + *   Note that mode is only set when a pointer is passed in the mode
 + *   paramater. Also note that when elements is set equal to zero, any
 + *   non-hex (as determined above) value not starting with a pound sign
 + *   will be translated as hexadecimal (mode will be set equal to two) --
 + *   IF the length of the string of characters is less than 16 (kaddr_t).
 + *
 + */
 +int
 +kl_get_value(char *param, int *mode, int elements, uint64_t *value)
 +{
 +      char *loc;
 +      uint64_t v;
 +
 +      kl_reset_error();
 +
 +      /* Check to see if we are going to need to do any math
 +       */
 +      if (strpbrk(param, "+-/*")) {
 +              if (!strncmp(param, "#", 1)) {
 +                      v = do_math(&param[1]);
 +                      if (mode) {
 +                              *mode = 1;
 +                      }
 +              } else {
 +                      v = do_math(param);
 +                      if (mode) {
 +                              if (elements && (*value <= elements)) {
 +                                      *mode = 0;
 +                              } else {
 +                                      *mode = 3;
 +                              }
 +                      }
 +              }
 +      } else {
 +              if (!strncmp(param, "#", 1)) {
 +                      if (!strncmp(param, "0x", 2)
 +                                      || !strncmp(param, "0X", 2)
 +                                      || strpbrk(param, "abcdef")) {
 +                              v = kl_strtoull(&param[1], &loc, 16);
 +                      } else {
 +                              v = kl_strtoull(&param[1], &loc, 10);
 +                      }
 +                      if (loc) {
 +                              KL_ERROR = KLE_INVALID_VALUE;
 +                              return (1);
 +                      }
 +                      if (mode) {
 +                              *mode = 1;
 +                      }
 +              } else if (!strncmp(param, "0x", 2) || !strncmp(param, "0X", 2)
 +                                      || strpbrk(param, "abcdef")) {
 +                      v = kl_strtoull(param, &loc, 16);
 +                      if (loc) {
 +                              KL_ERROR = KLE_INVALID_VALUE;
 +                              return (1);
 +                      }
 +                      if (mode) {
 +                              *mode = 2; /* HEX VALUE */
 +                      }
 +              } else if (elements || (strlen(param) < 16) ||
 +                              (strlen(param) > 16)) {
 +                      v = kl_strtoull(param, &loc, 10);
 +                      if (loc) {
 +                              KL_ERROR = KLE_INVALID_VALUE;
 +                              return (1);
 +                      }
 +                      if (elements && (v >= elements)) {
 +                              v = (kaddr_t)kl_strtoull(param,
 +                                              (char**)NULL, 16);
 +                              if (mode) {
 +                                      *mode = 2; /* HEX VALUE */
 +                              }
 +                      } else if (mode) {
 +                              *mode = 0;
 +                      }
 +              } else {
 +                      v = kl_strtoull(param, &loc, 16);
 +                      if (loc) {
 +                              KL_ERROR = KLE_INVALID_VALUE;
 +                              return (1);
 +                      }
 +                      if (mode) {
 +                              *mode = 2; /* ASSUME HEX VALUE */
 +                      }
 +              }
 +      }
 +      *value = v;
 +      return (0);
 +}
 +/* end kl_util.c */
 +
 +/* start kl_libutil.c */
 +static int
 +valid_digit(char c, int base)
 +{
 +      switch(base) {
 +              case 2:
 +                      if ((c >= '0') && (c <= '1')) {
 +                              return(1);
 +                      } else {
 +                              return(0);
 +                      }
 +              case 8:
 +                      if ((c >= '0') && (c <= '7')) {
 +                              return(1);
 +                      } else {
 +                              return(0);
 +                      }
 +              case 10:
 +                      if ((c >= '0') && (c <= '9')) {
 +                              return(1);
 +                      } else {
 +                              return(0);
 +                      }
 +              case 16:
 +                      if (((c >= '0') && (c <= '9'))
 +                                      || ((c >= 'a') && (c <= 'f'))
 +                                      || ((c >= 'A') && (c <= 'F'))) {
 +                              return(1);
 +                      } else {
 +                              return(0);
 +                      }
 +      }
 +      return(0);
 +}
 +
 +static int
 +digit_value(char c, int base, int *val)
 +{
 +      if (!valid_digit(c, base)) {
 +              return(1);
 +      }
 +      switch (base) {
 +              case 2:
 +              case 8:
 +              case 10:
 +                      *val = (int)((int)(c - 48));
 +                      break;
 +              case 16:
 +                      if ((c >= 'a') && (c <= 'f')) {
 +                              *val = ((int)(c - 87));
 +                      } else if ((c >= 'A') && (c <= 'F')) {
 +                              *val = ((int)(c - 55));
 +                      } else {
 +                              *val = ((int)(c - 48));
 +                      }
 +      }
 +      return(0);
 +}
 +
 +uint64_t
 +kl_strtoull(char *str, char **loc, int base)
 +{
 +      int dval;
 +      uint64_t i = 1, v, value = 0;
 +      char *c, *cp = str;
 +
 +      *loc = (char *)NULL;
 +      if (base == 0) {
 +              if (!strncmp(cp, "0x", 2) || !strncmp(cp, "0X", 2)) {
 +                      base = 16;
 +              } else if (cp[0] == '0') {
 +                      if (cp[1] == 'b') {
 +                              base = 2;
 +                      } else {
 +                              base = 8;
 +                      }
 +              } else if (strpbrk(cp, "abcdefABCDEF")) {
 +                      base = 16;
 +              } else {
 +                      base = 10;
 +              }
 +      }
 +      if ((base == 8) && (*cp == '0')) {
 +              cp += 1;
 +      } else if ((base == 2) && !strncmp(cp, "0b", 2)) {
 +              cp += 2;
 +      } else if ((base == 16) &&
 +                      (!strncmp(cp, "0x", 2) || !strncmp(cp, "0X", 2))) {
 +              cp += 2;
 +      }
 +      c = &cp[strlen(cp) - 1];
 +      while (c >= cp) {
 +
 +              if (digit_value(*c, base, &dval)) {
 +                      if (loc) {
 +                              *loc = c;
 +                      }
 +                      return(value);
 +              }
 +              v = dval * i;
 +              if ((MAX_LONG_LONG - value) < v) {
 +                      return(MAX_LONG_LONG);
 +              }
 +              value += v;
 +              i *= (uint64_t)base;
 +              c--;
 +      }
 +      return(value);
 +}
 +/* end kl_libutil.c */
 +
 +/*
 + * dbg_hash_sym()
 + */
 +void
 +dbg_hash_sym(uint64_t typenum, dbg_sym_t *stp)
 +{
 +      dbg_hashrec_t *shp, *hshp;
 +
 +      if ((typenum == 0) || (!stp)) {
 +              return;
 +      }
 +      shp = (dbg_hashrec_t *)kl_alloc_block(sizeof(dbg_hashrec_t));
 +      shp->h_typenum = typenum;
 +      shp->h_ptr = stp;
 +      shp->h_next = (dbg_hashrec_t *)NULL;
 +      if ((hshp = dbg_hash[TYPE_NUM_HASH(typenum)])) {
 +              while (hshp->h_next) {
 +                      hshp = hshp->h_next;
 +              }
 +              hshp->h_next = shp;
 +      } else {
 +              dbg_hash[TYPE_NUM_HASH(typenum)] = shp;
 +      }
 +}
 +
 +/*
 + * dbg_find_sym()
 + */
 +dbg_sym_t *
 +dbg_find_sym(char *name, int type, uint64_t typenum)
 +{
 +      dbg_sym_t *stp = (dbg_sym_t *)NULL;
 +
 +      if (name && strlen(name)) {
 +              /* Cycle through the type flags and see if any records are
 +               * present. Note that if multiple type flags or DBG_ALL is
 +               * passed in, only the first occurance of 'name' will be
 +               * found and returned. If name exists in multiple trees,
 +               * then multiple searches are necessary to find them.
 +               */
 +              if (type & DBG_TYPE) {
 +                      if ((stp = (dbg_sym_t *)kl_find_btnode((btnode_t *)
 +                                      type_tree, name, (int *)NULL))) {
 +                              goto found_sym;
 +                      }
 +              }
 +              if (type & DBG_TYPEDEF) {
 +                      if ((stp = (dbg_sym_t *)kl_find_btnode((btnode_t *)
 +                                      typedef_tree, name, (int *)NULL))) {
 +                              goto found_sym;
 +                      }
 +              }
 +              if (!stp) {
 +                      return((dbg_sym_t*)NULL);
 +              }
 +      }
 +found_sym:
 +      if (typenum) {
 +              dbg_hashrec_t *hshp;
 +
 +              if (stp) {
 +                      if (stp->sym_typenum == typenum) {
 +                              return(stp);
 +                      }
 +              } else if ((hshp = dbg_hash[TYPE_NUM_HASH(typenum)])) {
 +                      while (hshp) {
 +                              if (hshp->h_typenum == typenum) {
 +                                      return(hshp->h_ptr);
 +                              }
 +                              hshp = hshp->h_next;
 +                      }
 +              }
 +      }
 +      return(stp);
 +}
 +
 +/*
 + * kl_find_type() -- find a KLT type by name.
 + */
 +kltype_t *
 +kl_find_type(char *name, int tnum)
 +{
 +      dbg_sym_t *stp;
 +      kltype_t *kltp = (kltype_t *)NULL;
 +
 +      if (!have_debug_file) {
 +              kdb_printf("no debuginfo file\n");
 +              return kltp;
 +      }
 +
 +      if (!tnum || IS_TYPE(tnum)) {
 +              if ((stp = dbg_find_sym(name, DBG_TYPE, 0))) {
 +                      kltp = (kltype_t *)stp->sym_kltype;
 +                      if (tnum && !(kltp->kl_type & tnum)) {
 +                              /* We have found a type by this name
 +                               * but it does not have the right
 +                               * type number (e.g., we're looking
 +                               * for a struct and we don't find
 +                               * a KLT_STRUCT type by this name).
 +                               */
 +                              return((kltype_t *)NULL);
 +                      }
 +              }
 +      }
 +      if (!tnum || IS_TYPEDEF(tnum)) {
 +              if ((stp = dbg_find_sym(name, DBG_TYPEDEF, 0))) {
 +                      kltp = (kltype_t *)stp->sym_kltype;
 +              }
 +      }
 +      return(kltp);
 +}
 +
 +/*
 + * kl_first_btnode() -- non-recursive implementation.
 + */
 +btnode_t *
 +kl_first_btnode(btnode_t *np)
 +{
 +      if (!np) {
 +              return((btnode_t *)NULL);
 +      }
 +
 +      /* Walk down the left side 'til the end...
 +       */
 +      while (np->bt_left) {
 +              np = np->bt_left;
 +      }
 +      return(np);
 +}
 +
 +/*
 + * kl_next_btnode() -- non-recursive implementation.
 + */
 +btnode_t *
 +kl_next_btnode(btnode_t *node)
 +{
 +      btnode_t *np = node, *parent;
 +
 +      if (np) {
 +              if (np->bt_right) {
 +                      return(kl_first_btnode(np->bt_right));
 +              } else {
 +                      parent = np->bt_parent;
 +next:
 +                      if (parent) {
 +                              if (parent->bt_left == np) {
 +                                      return(parent);
 +                              }
 +                              np = parent;
 +                              parent = parent->bt_parent;
 +                              goto next;
 +                      }
 +              }
 +      }
 +      return((btnode_t *)NULL);
 +}
 +
 +/*
 + * dbg_next_sym()
 + */
 +dbg_sym_t *
 +dbg_next_sym(dbg_sym_t *stp)
 +{
 +      dbg_sym_t *next_stp;
 +
 +      next_stp = (dbg_sym_t *)kl_next_btnode((btnode_t *)stp);
 +      return(next_stp);
 +}
 +
 +/*
 + * kl_prev_btnode() -- non-recursive implementation.
 + */
 +btnode_t *
 +kl_prev_btnode(btnode_t *node)
 +{
 +      btnode_t *np = node, *parent;
 +
 +      if (np) {
 +              if (np->bt_left) {
 +                      np = np->bt_left;
 +                      while (np->bt_right) {
 +                              np = np->bt_right;
 +                      }
 +                      return(np);
 +              }
 +              parent = np->bt_parent;
 +next:
 +              if (parent) {
 +                      if (parent->bt_right == np) {
 +                              return(parent);
 +                      }
 +                      np = parent;
 +                      parent = parent->bt_parent;
 +                      goto next;
 +              }
 +      }
 +      return((btnode_t *)NULL);
 +}
 +
 +/*
 + * dbg_prev_sym()
 + */
 +dbg_sym_t *
 +dbg_prev_sym(dbg_sym_t *stp)
 +{
 +      dbg_sym_t *prev_stp;
 +
 +      prev_stp = (dbg_sym_t *)kl_prev_btnode((btnode_t *)stp);
 +      return(prev_stp);
 +}
 +
 +/*
 + * kl_find_next_type() -- find next KLT type
 + */
 +kltype_t *
 +kl_find_next_type(kltype_t *kltp, int type)
 +{
 +      kltype_t *nkltp = NULL;
 +      dbg_sym_t *nstp;
 +
 +      if (kltp && kltp->kl_ptr) {
 +              nstp = (dbg_sym_t *)kltp->kl_ptr;
 +              nkltp = (kltype_t *)nstp->sym_kltype;
 +              if (type) {
 +                      while(nkltp && !(nkltp->kl_type & type)) {
 +                              if ((nstp = dbg_next_sym(nstp))) {
 +                                      nkltp = (kltype_t *)nstp->sym_kltype;
 +                              } else {
 +                                      nkltp = (kltype_t *)NULL;
 +                              }
 +                      }
 +              }
 +      }
 +      return(nkltp);
 +}
 +
 +/*
 + * dbg_first_sym()
 + */
 +dbg_sym_t *
 +dbg_first_sym(int type)
 +{
 +      dbg_sym_t *stp = (dbg_sym_t *)NULL;
 +
 +      switch(type) {
 +              case DBG_TYPE:
 +                      stp = (dbg_sym_t *)
 +                              kl_first_btnode((btnode_t *)type_tree);
 +                      break;
 +              case DBG_TYPEDEF:
 +                      stp = (dbg_sym_t *)
 +                              kl_first_btnode((btnode_t *)typedef_tree);
 +                      break;
 +      }
 +      return(stp);
 +}
 +
 +/*
 + * kl_first_type()
 + */
 +kltype_t *
 +kl_first_type(int tnum)
 +{
 +      kltype_t *kltp = NULL;
 +      dbg_sym_t *stp;
 +
 +      if (IS_TYPE(tnum)) {
 +              /* If (tnum == KLT_TYPE), then return the first type
 +               * record, regardless of the type. Otherwise, search
 +               * for the frst type that mapps into tnum.
 +               */
 +              if ((stp = dbg_first_sym(DBG_TYPE))) {
 +                      kltp = (kltype_t *)stp->sym_kltype;
 +                      if (tnum != KLT_TYPE) {
 +                              while (kltp && !(kltp->kl_type & tnum)) {
 +                                      if ((stp = dbg_next_sym(stp))) {
 +                                              kltp = (kltype_t *)stp->sym_kltype;
 +                                      } else {
 +                                              kltp = (kltype_t *)NULL;
 +                                      }
 +                              }
 +                      }
 +              }
 +      } else if (IS_TYPEDEF(tnum)) {
 +              if ((stp = dbg_first_sym(DBG_TYPEDEF))) {
 +                      kltp = (kltype_t *)stp->sym_kltype;
 +              }
 +      }
 +      return(kltp);
 +}
 +
 +/*
 + * kl_next_type()
 + */
 +kltype_t *
 +kl_next_type(kltype_t *kltp)
 +{
 +      dbg_sym_t *stp, *nstp;
 +      kltype_t *nkltp = (kltype_t *)NULL;
 +
 +      if (!kltp) {
 +              return((kltype_t *)NULL);
 +      }
 +      stp = (dbg_sym_t *)kltp->kl_ptr;
 +      if ((nstp = dbg_next_sym(stp))) {
 +              nkltp = (kltype_t *)nstp->sym_kltype;
 +      }
 +      return(nkltp);
 +}
 +
 +/*
 + * kl_prev_type()
 + */
 +kltype_t *
 +kl_prev_type(kltype_t *kltp)
 +{
 +      dbg_sym_t *stp, *pstp;
 +      kltype_t *pkltp = (kltype_t *)NULL;
 +
 +      if (!kltp) {
 +              return((kltype_t *)NULL);
 +      }
 +      stp = (dbg_sym_t *)kltp->kl_ptr;
 +      if ((pstp = dbg_prev_sym(stp))) {
 +              pkltp = (kltype_t *)pstp->sym_kltype;
 +      }
 +      return(pkltp);
 +}
 +
 +/*
 + * kl_realtype()
 + */
 +kltype_t *
 +kl_realtype(kltype_t *kltp, int tnum)
 +{
 +      kltype_t *rkltp = kltp;
 +
 +      while (rkltp) {
 +              if (tnum && (rkltp->kl_type == tnum)) {
 +                      break;
 +              }
 +              if (!rkltp->kl_realtype) {
 +                      break;
 +              }
 +              if (rkltp->kl_realtype == rkltp) {
 +                      break;
 +              }
 +              rkltp = rkltp->kl_realtype;
 +              if (rkltp == kltp) {
 +                      break;
 +              }
 +      }
 +      return(rkltp);
 +}
 +
 +/*
 + * dbg_find_typenum()
 + */
 +dbg_type_t *
 +dbg_find_typenum(uint64_t typenum)
 +{
 +        dbg_sym_t *stp;
 +        dbg_type_t *sp = (dbg_type_t *)NULL;
 +
 +        if ((stp = dbg_find_sym(0, DBG_TYPE, typenum))) {
 +                sp = (dbg_type_t *)stp->sym_kltype;
 +        }
 +        return(sp);
 +}
 +
 +/*
 + * find type by typenum
 + */
 +kltype_t *
 +kl_find_typenum(uint64_t typenum)
 +{
 +      kltype_t *kltp;
 +
 +      kltp = (kltype_t *)dbg_find_typenum(typenum);
 +      return(kltp);
 +}
 +
 +/*
 + * kl_find_btnode() -- non-recursive implementation.
 + */
 +btnode_t *
 +_kl_find_btnode(btnode_t *np, char *key, int *max_depth, size_t len)
 +{
 +      int ret;
 +        btnode_t *next, *prev;
 +
 +      if (np) {
 +              if (max_depth) {
 +                      (*max_depth)++;
 +              }
 +              next = np;
 +again:
 +              if (len) {
 +                      ret = strncmp(key, next->bt_key, len);
 +              } else {
 +                      ret = strcmp(key, next->bt_key);
 +              }
 +              if (ret == 0) {
 +                      if ((prev = kl_prev_btnode(next))) {
 +                              if (len) {
 +                                      ret = strncmp(key, prev->bt_key, len);
 +                              } else {
 +                                      ret = strcmp(key, prev->bt_key);
 +                              }
 +                              if (ret == 0) {
 +                                      next = prev;
 +                                      goto again;
 +                              }
 +                      }
 +                      return(next);
 +              } else if (ret < 0) {
 +                      if ((next = next->bt_left)) {
 +                              goto again;
 +                      }
 +              } else {
 +                      if ((next = next->bt_right)) {
 +                              goto again;
 +                      }
 +              }
 +      }
 +      return((btnode_t *)NULL);
 +}
 +
 +/*
 + * kl_type_size()
 + */
 +int
 +kl_type_size(kltype_t *kltp)
 +{
 +      kltype_t *rkltp;
 +
 +      if (!kltp) {
 +              return(0);
 +      }
 +      if (!(rkltp = kl_realtype(kltp, 0))) {
 +              return(0);
 +      }
 +      return(rkltp->kl_size);
 +}
 +
 +/*
 + * kl_struct_len()
 + */
 +int
 +kl_struct_len(char *s)
 +{
 +      kltype_t *kltp;
 +
 +      if ((kltp = kl_find_type(s, (KLT_TYPES)))) {
 +              return kl_type_size(kltp);
 +      }
 +      return(0);
 +}
 +
 +/*
 + * kl_get_member()
 + */
 +kltype_t *
 +kl_get_member(kltype_t *kltp, char *f)
 +{
 +      kltype_t *mp;
 +
 +      if ((mp = kltp->kl_member)) {
 +              while (mp) {
 +                      if (mp->kl_flags & TYP_ANONYMOUS_FLG) {
 +                              kltype_t *amp;
 +
 +                              if ((amp = kl_get_member(mp->kl_realtype, f))) {
 +                                      return(amp);
 +                              }
 +                      } else if (!strcmp(mp->kl_name, f)) {
 +                              break;
 +                      }
 +                      mp = mp->kl_member;
 +              }
 +      }
 +      return(mp);
 +}
 +
 +/*
 + * kl_member()
 + */
 +kltype_t *
 +kl_member(char *s, char *f)
 +{
 +      kltype_t *kltp, *mp = NULL;
 +
 +      if (!(kltp = kl_find_type(s, (KLT_STRUCT|KLT_UNION)))) {
 +              if ((kltp = kl_find_type(s, KLT_TYPEDEF))) {
 +                      kltp = kl_realtype(kltp, 0);
 +              }
 +      }
 +      if (kltp) {
 +              mp = kl_get_member(kltp, f);
 +      }
 +      return(mp);
 +}
 +
 +
 +/*
 + * kl_get_member_offset()
 + */
 +int
 +kl_get_member_offset(kltype_t *kltp, char *f)
 +{
 +      kltype_t *mp;
 +
 +      if ((mp = kltp->kl_member)) {
 +              while (mp) {
 +                      if (mp->kl_flags & TYP_ANONYMOUS_FLG) {
 +                              int off;
 +
 +                              /* Drill down to see if the member we are looking for is in
 +                               * an anonymous union or struct. Since this call is recursive,
 +                               * the drill down may actually be multi-layer.
 +                               */
 +                              off = kl_get_member_offset(mp->kl_realtype, f);
 +                              if (off >= 0) {
 +                                      return(mp->kl_offset + off);
 +                              }
 +                      } else if (!strcmp(mp->kl_name, f)) {
 +                              return(mp->kl_offset);
 +                      }
 +                      mp = mp->kl_member;
 +              }
 +      }
 +      return(-1);
 +}
 +
 +/*
 + * kl_member_offset()
 + */
 +int
 +kl_member_offset(char *s, char *f)
 +{
 +      int off = -1;
 +      kltype_t *kltp;
 +
 +    if (!(kltp = kl_find_type(s, (KLT_STRUCT|KLT_UNION)))) {
 +        if ((kltp = kl_find_type(s, KLT_TYPEDEF))) {
 +            kltp = kl_realtype(kltp, 0);
 +        }
 +    }
 +      if (kltp) {
 +              off = kl_get_member_offset(kltp, f);
 +      }
 +      return(off);
 +}
 +
 +/*
 + * kl_is_member()
 + */
 +int
 +kl_is_member(char *s, char *f)
 +{
 +      kltype_t *mp;
 +
 +      if ((mp = kl_member(s, f))) {
 +              return(1);
 +      }
 +      return(0);
 +}
 +
 +/*
 + * kl_member_size()
 + */
 +int
 +kl_member_size(char *s, char *f)
 +{
 +      kltype_t *mp;
 +
 +      if ((mp = kl_member(s, f))) {
 +              return(mp->kl_size);
 +      }
 +      return(0);
 +}
 +
 +#define TAB_SPACES                 8
 +#define LEVEL_INDENT(level, flags) {\
 +      int i, j; \
 +      if (!(flags & NO_INDENT)) { \
 +              for (i = 0; i < level; i++) { \
 +                      for (j = 0; j < TAB_SPACES; j++) { \
 +                              kdb_printf(" "); \
 +                      } \
 +              }\
 +      } \
 +}
 +#define PRINT_NL(flags) \
 +      if (!(flags & SUPPRESS_NL)) { \
 +              kdb_printf("\n"); \
 +      }
 +#define PRINT_SEMI_COLON(level, flags) \
 +      if (level && (!(flags & SUPPRESS_SEMI_COLON))) { \
 +              kdb_printf(";"); \
 +      }
 +
 +/*
 + * print_realtype()
 + */
 +static void
 +print_realtype(kltype_t *kltp)
 +{
 +      kltype_t *rkltp;
 +
 +      if ((rkltp = kltp->kl_realtype)) {
 +              while (rkltp && rkltp->kl_realtype) {
 +                      rkltp = rkltp->kl_realtype;
 +              }
 +              if (rkltp->kl_type == KLT_BASE) {
 +                      kdb_printf(" (%s)", rkltp->kl_name);
 +              }
 +      }
 +}
 +
 +int align_chk = 0;
 +/*
 + *  kl_print_uint16()
 + *
 + */
 +void
 +kl_print_uint16(void *ptr, int flags)
 +{
 +      unsigned long long a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       *          * dump core)
 +       *                   */
 +      if (align_chk && (uaddr_t)ptr % 16) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(unsigned long long *) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("%#llx", a);
 +      } else if (flags & C_BINARY) {
 +              kdb_printf("0b");
 +              kl_binary_print(a);
 +      } else {
 +              kdb_printf("%llu", a);
 +      }
 +}
 +
 +#if 0
 +/*
 + * kl_print_float16()
 + *
 + */
 +void
 +kl_print_float16(void *ptr, int flags)
 +{
 +      double a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       *          * dump core)
 +       *                   */
 +      if (align_chk && (uaddr_t)ptr % 16) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(double*) ptr;
 +      kdb_printf("%f", a);
 +}
 +#endif
 +
 +/*
 + * kl_print_int16()
 + *
 + */
 +void
 +kl_print_int16(void *ptr, int flags)
 +{
 +      long long a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       *          * dump core)
 +       *                   */
 +      if (align_chk && (uaddr_t)ptr % 16) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(long long *) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("%#llx", a);
 +      } else if (flags & C_BINARY) {
 +              kdb_printf("0b");
 +              kl_binary_print(a);
 +      } else {
 +              kdb_printf("%lld", a);
 +      }
 +}
 +
 +/*
 + * kl_print_int8()
 + */
 +void
 +kl_print_int8(void *ptr, int flags)
 +{
 +      long long a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core)
 +       */
 +      if (align_chk && (uaddr_t)ptr % 8) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(long long *) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("%#llx", a);
 +      } else if (flags & C_BINARY) {
 +              kdb_printf("0b");
 +              kl_binary_print(a);
 +      } else {
 +              kdb_printf("%lld", a);
 +      }
 +}
 +
 +#if 0
 +/*
 + * kl_print_float8()
 + */
 +void
 +kl_print_float8(void *ptr, int flags)
 +{
 +      double a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core)
 +       */
 +      if (align_chk && (uaddr_t)ptr % 8) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(double*) ptr;
 +      kdb_printf("%f", a);
 +}
 +#endif
 +
 +/*
 + * kl_print_uint8()
 + */
 +void
 +kl_print_uint8(void *ptr, int flags)
 +{
 +      unsigned long long a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core)
 +       */
 +      if (align_chk && (uaddr_t)ptr % 8) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(unsigned long long *) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("%#llx", a);
 +      } else if (flags & C_BINARY) {
 +              kdb_printf("0b");
 +              kl_binary_print(a);
 +      } else {
 +              kdb_printf("%llu", a);
 +      }
 +}
 +
 +/*
 + * kl_print_int4()
 + */
 +void
 +kl_print_int4(void *ptr, int flags)
 +{
 +      int32_t a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core
 +       */
 +      if (align_chk && (uaddr_t)ptr % 4) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(int32_t*) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("0x%x", a);
 +      } else if (flags & C_BINARY) {
 +              uint64_t value = a & 0xffffffff;
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              kdb_printf("%d", a);
 +      }
 +}
 +
 +#if 0
 +/*
 + * kl_print_float4()
 + */
 +void
 +kl_print_float4(void *ptr, int flags)
 +{
 +      float a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core)
 +       */
 +      if (align_chk && (uaddr_t)ptr % 4) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(float*) ptr;
 +      kdb_printf("%f", a);
 +}
 +#endif
 +
 +/*
 + * kl_print_uint4()
 + */
 +void
 +kl_print_uint4(void *ptr, int flags)
 +{
 +      uint32_t a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core)
 +       */
 +      if (align_chk && (uaddr_t)ptr % 4) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(uint32_t*) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("0x%x", a);
 +      } else if (flags & C_BINARY) {
 +              uint64_t value = a & 0xffffffff;
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              kdb_printf("%u", a);
 +      }
 +}
 +
 +/*
 + * kl_print_int2()
 + */
 +void
 +kl_print_int2(void *ptr, int flags)
 +{
 +      int16_t a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core
 +       */
 +      if (align_chk && (uaddr_t)ptr % 2) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(int16_t*) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("0x%hx", a);
 +      } else if (flags & C_BINARY) {
 +              uint64_t value = a & 0xffff;
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              kdb_printf("%hd", a);
 +      }
 +}
 +
 +/*
 + * kl_print_uint2()
 + */
 +void
 +kl_print_uint2(void *ptr, int flags)
 +{
 +      uint16_t a;
 +
 +      /* Make sure the pointer is properly aligned (or we will
 +       * dump core
 +       */
 +      if (align_chk && (uaddr_t)ptr % 2) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      a = *(uint16_t*) ptr;
 +      if (flags & C_HEX) {
 +              kdb_printf("0x%hx", a);
 +      } else if (flags & C_BINARY) {
 +              uint64_t value = a & 0xffff;
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              kdb_printf("%hu", a);
 +      }
 +}
 +
 +/*
 + * kl_print_char()
 + */
 +void
 +kl_print_char(void *ptr, int flags)
 +{
 +      char c;
 +
 +      if (flags & C_HEX) {
 +              kdb_printf("0x%x", (*(char *)ptr) & 0xff);
 +      } else if (flags & C_BINARY) {
 +              uint64_t value = (*(char *)ptr) & 0xff;
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              c = *(char *)ptr;
 +
 +              kdb_printf("\'\\%03o\'", (unsigned char)c);
 +              switch (c) {
 +                      case '\a' :
 +                              kdb_printf(" = \'\\a\'");
 +                              break;
 +                      case '\b' :
 +                              kdb_printf(" = \'\\b\'");
 +                              break;
 +                      case '\t' :
 +                              kdb_printf(" = \'\\t\'");
 +                              break;
 +                      case '\n' :
 +                              kdb_printf(" = \'\\n\'");
 +                              break;
 +                      case '\f' :
 +                              kdb_printf(" = \'\\f\'");
 +                              break;
 +                      case '\r' :
 +                              kdb_printf(" = \'\\r\'");
 +                              break;
 +                      case '\e' :
 +                              kdb_printf(" = \'\\e\'");
 +                              break;
 +                      default :
 +                              if( !iscntrl((unsigned char) c) ) {
 +                                      kdb_printf(" = \'%c\'", c);
 +                              }
 +                              break;
 +              }
 +      }
 +}
 +
 +/*
 + * kl_print_uchar()
 + */
 +void
 +kl_print_uchar(void *ptr, int flags)
 +{
 +      if (flags & C_HEX) {
 +              kdb_printf("0x%x", *(unsigned char *)ptr);
 +      } else if (flags & C_BINARY) {
 +              uint64_t value = (*(unsigned char *)ptr) & 0xff;
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              kdb_printf("%u", *(unsigned char *)ptr);
 +      }
 +}
 +
 +/*
 + * kl_print_base()
 + */
 +void
 +kl_print_base(void *ptr, int size, int encoding, int flags)
 +{
 +      /* FIXME: untested */
 +      if (invalid_address((kaddr_t)ptr, size)) {
 +              kdb_printf("ILLEGAL ADDRESS (%lx)", (uaddr_t)ptr);
 +              return;
 +      }
 +      switch (size) {
 +
 +              case 1:
 +                      if (encoding == ENC_UNSIGNED) {
 +                              kl_print_uchar(ptr, flags);
 +                      } else {
 +                              kl_print_char(ptr, flags);
 +                      }
 +                      break;
 +
 +              case 2:
 +                      if (encoding == ENC_UNSIGNED) {
 +                              kl_print_uint2(ptr, flags);
 +                      } else {
 +                              kl_print_int2(ptr, flags);
 +                      }
 +                      break;
 +
 +              case 4:
 +                      if (encoding == ENC_UNSIGNED) {
 +                              kl_print_uint4(ptr, flags);
 +                      } else if (encoding == ENC_FLOAT) {
 +                              printk("error: print of 4-byte float\n");
 +                              /* kl_print_float4(ptr, flags); */
 +                      } else {
 +                              kl_print_int4(ptr, flags);
 +                      }
 +                      break;
 +
 +              case 8:
 +                      if (encoding == ENC_UNSIGNED) {
 +                              kl_print_uint8(ptr, flags);
 +                      } else if (encoding == ENC_FLOAT) {
 +                              printk("error: print of 8-byte float\n");
 +                              /* kl_print_float8(ptr, flags); */
 +                      } else {
 +                              kl_print_int8(ptr, flags);
 +                      }
 +                      break;
 +
 +              case 16:
 +                      if (encoding == ENC_UNSIGNED) {
 +                              /* Ex: unsigned long long */
 +                              kl_print_uint16(ptr, flags);
 +                      } else if (encoding == ENC_FLOAT) {
 +                              printk("error: print of 16-byte float\n");
 +                              /* Ex: long double */
 +                              /* kl_print_float16(ptr, flags); */
 +                      } else {
 +                              /* Ex: long long */
 +                              kl_print_int16(ptr, flags);
 +                      }
 +                      break;
 +
 +              default:
 +                      break;
 +      }
 +}
 +
 +/*
 + * kl_print_base_value()
 + */
 +void
 +kl_print_base_value(void *ptr, kltype_t *kltp, int flags)
 +{
 +      kltype_t *rkltp=NULL;
 +
 +      if (kltp->kl_type != KLT_BASE) {
 +              if (!(rkltp = kltp->kl_realtype)) {
 +                      return;
 +              }
 +              if (rkltp->kl_type != KLT_BASE) {
 +                      return;
 +              }
 +      } else {
 +              rkltp = kltp;
 +      }
 +      kl_print_base(ptr, rkltp->kl_size, rkltp->kl_encoding, flags);
 +}
 +
 +/*
 + * kl_print_typedef_type()
 + */
 +void
 +kl_print_typedef_type(
 +      void *ptr,
 +      kltype_t *kltp,
 +      int level,
 +      int flags)
 +{
 +      char *name;
 +      kltype_t *rkltp;
 +
 +      if (ptr) {
 +              rkltp = kltp->kl_realtype;
 +              while (rkltp->kl_type == KLT_TYPEDEF) {
 +                      if (rkltp->kl_realtype) {
 +                              rkltp = rkltp->kl_realtype;
 +                      }
 +              }
 +              if (rkltp->kl_type == KLT_POINTER) {
 +                      kl_print_pointer_type(ptr, kltp, level, flags);
 +                      return;
 +              }
 +              switch (rkltp->kl_type) {
 +                      case KLT_BASE:
 +                              kl_print_base_type(ptr, kltp,
 +                                      level, flags);
 +                              break;
 +
 +                      case KLT_UNION:
 +                      case KLT_STRUCT:
 +                              kl_print_struct_type(ptr, kltp,
 +                                      level, flags);
 +                              break;
 +
 +                      case KLT_ARRAY:
 +                              kl_print_array_type(ptr, kltp,
 +                                      level, flags);
 +                              break;
 +
 +                      case KLT_ENUMERATION:
 +                              kl_print_enumeration_type(ptr,
 +                                      kltp, level, flags);
 +                              break;
 +
 +                      default:
 +                              kl_print_base_type(ptr, kltp,
 +                                      level, flags);
 +                              break;
 +              }
 +      } else {
 +              LEVEL_INDENT(level, flags);
 +              if (flags & NO_REALTYPE) {
 +                      rkltp = kltp;
 +              } else {
 +                      rkltp = kltp->kl_realtype;
 +                      while (rkltp && rkltp->kl_type == KLT_POINTER) {
 +                              rkltp = rkltp->kl_realtype;
 +                      }
 +              }
 +              if (!rkltp) {
 +                      if (SUPPRESS_NAME) {
 +                              kdb_printf("<UNKNOWN>");
 +                      } else {
 +                              kdb_printf( "typedef <UNKNOWN>%s;",
 +                                      kltp->kl_name);
 +                      }
 +                      return;
 +              }
 +              if (rkltp->kl_type == KLT_FUNCTION) {
 +                      if (kltp->kl_realtype->kl_type == KLT_POINTER) {
 +                              kdb_printf("typedef %s(*%s)();",
 +                                      kltp->kl_typestr, kltp->kl_name);
 +                      } else {
 +                              kdb_printf( "typedef %s(%s)();",
 +                                      kltp->kl_typestr, kltp->kl_name);
 +                      }
 +              } else if (rkltp->kl_type == KLT_ARRAY) {
 +                      kl_print_array_type(ptr, rkltp, level, flags);
 +              } else if (rkltp->kl_type == KLT_TYPEDEF) {
 +                      if (!(name = rkltp->kl_name)) {
 +                              name = rkltp->kl_typestr;
 +                      }
 +
 +                      if (SUPPRESS_NAME) {
 +                              kdb_printf("%s", name);
 +                      } else {
 +                              kdb_printf("typedef %s%s;",
 +                                      name, kltp->kl_name);
 +                      }
 +                      print_realtype(rkltp);
 +              } else {
 +                      kl_print_type(ptr, rkltp, level, flags);
 +              }
 +              PRINT_NL(flags);
 +      }
 +}
 +
 +/*
 + * kl_print_pointer_type()
 + */
 +void
 +kl_print_pointer_type(
 +      void *ptr,
 +      kltype_t *kltp,
 +      int level,
 +      int flags)
 +{
 +      kltype_t *itp;
 +
 +      if (kltp->kl_type == KLT_MEMBER) {
 +              itp = kltp->kl_realtype;
 +      } else {
 +              itp = kltp;
 +      }
 +
 +      /* See if this is a pointer to a function. If it is, then it
 +       * has to be handled differently...
 +       */
 +      while (itp->kl_type == KLT_POINTER) {
 +              if ((itp = itp->kl_realtype)) {
 +                      if (itp->kl_type == KLT_FUNCTION) {
 +                              kl_print_function_type(ptr,
 +                                      kltp, level, flags);
 +                              return;
 +                      }
 +              } else {
 +                      LEVEL_INDENT(level, flags);
 +                      kdb_printf("%s%s;\n",
 +                              kltp->kl_typestr, kltp->kl_name);
 +                      return;
 +              }
 +      }
 +
 +      LEVEL_INDENT(level, flags);
 +      if (ptr) {
 +              kaddr_t tmp;
 +              tmp = *(kaddr_t *)ptr;
 +              flags |= SUPPRESS_SEMI_COLON;
 +              if(kltp->kl_name){
 +                      if (*(kaddr_t *)ptr) {
 +                              kdb_printf("%s = 0x%"FMTPTR"x",
 +                                      kltp->kl_name, tmp);
 +                      } else {
 +                              kdb_printf("%s = (nil)", kltp->kl_name);
 +                      }
 +              } else {
 +                      if (tmp != 0) {
 +                              kdb_printf("0x%"FMTPTR"x", tmp);
 +                      } else {
 +                              kdb_printf( "(nil)");
 +                      }
 +              }
 +      } else {
 +              if (kltp->kl_typestr) {
 +                      if (kltp->kl_name && !(flags & SUPPRESS_NAME)) {
 +                              kdb_printf("%s%s",
 +                                      kltp->kl_typestr, kltp->kl_name);
 +                      } else {
 +                              kdb_printf("%s", kltp->kl_typestr);
 +                      }
 +              } else {
 +                      kdb_printf("<UNKNOWN>");
 +              }
 +      }
 +      PRINT_SEMI_COLON(level, flags);
 +      PRINT_NL(flags);
 +}
 +
 +/*
 + * kl_print_function_type()
 + */
 +void
 +kl_print_function_type(
 +      void *ptr,
 +      kltype_t *kltp,
 +      int level,
 +      int flags)
 +{
 +      LEVEL_INDENT(level, flags);
 +      if (ptr) {
 +              kaddr_t a;
 +
 +              a = *(kaddr_t *)ptr;
 +              kdb_printf("%s = 0x%"FMTPTR"x", kltp->kl_name, a);
 +      } else {
 +              if (flags & SUPPRESS_NAME) {
 +                      kdb_printf("%s(*)()", kltp->kl_typestr);
 +              } else {
 +                      kdb_printf("%s(*%s)();",
 +                              kltp->kl_typestr, kltp->kl_name);
 +              }
 +      }
 +      PRINT_NL(flags);
 +}
 +
 +/*
 + * kl_print_array_type()
 + */
 +void
 +kl_print_array_type(void *ptr, kltype_t *kltp, int level, int flags)
 +{
 +      int i, count = 0, anon = 0, size, low, high, multi = 0;
 +      char typestr[128], *name, *p;
 +      kltype_t *rkltp, *etp, *retp;
 +
 +      if (kltp->kl_type != KLT_ARRAY) {
 +              if ((rkltp = kltp->kl_realtype)) {
 +                      while (rkltp->kl_type != KLT_ARRAY) {
 +                              if (!(rkltp = rkltp->kl_realtype)) {
 +                                      break;
 +                              }
 +                      }
 +              }
 +              if (!rkltp) {
 +                      LEVEL_INDENT(level, flags);
 +                      kdb_printf("<ARRAY_TYPE>");
 +                      PRINT_SEMI_COLON(level, flags);
 +                      PRINT_NL(flags);
 +                      return;
 +              }
 +      } else {
 +              rkltp = kltp;
 +      }
 +
 +      etp = rkltp->kl_elementtype;
 +      if (!etp) {
 +              LEVEL_INDENT(level, flags);
 +              kdb_printf("<BAD_ELEMENT_TYPE> %s", rkltp->kl_name);
 +              PRINT_SEMI_COLON(level, flags);
 +              PRINT_NL(flags);
 +              return;
 +      }
 +
 +      /* Set retp to point to the actual element type. This is necessary
 +       * for multi-dimensional arrays, which link using the kl_elementtype
 +       * member.
 +       */
 +      retp = etp;
 +      while (retp->kl_type == KLT_ARRAY) {
 +              retp = retp->kl_elementtype;
 +      }
 +      low = rkltp->kl_low_bounds + 1;
 +      high = rkltp->kl_high_bounds;
 +
 +      if (ptr) {
 +
 +              p = ptr;
 +
 +              if ((retp->kl_size == 1) && (retp->kl_encoding == ENC_CHAR)) {
 +                      if (kltp->kl_type == KLT_MEMBER) {
 +                              LEVEL_INDENT(level, flags);
 +                      }
 +                      if (flags & SUPPRESS_NAME) {
 +                              kdb_printf("\"");
 +                              flags &= ~SUPPRESS_NAME;
 +                      } else {
 +                              kdb_printf("%s = \"", kltp->kl_name);
 +                      }
 +                      for (i = 0; i < high; i++) {
 +                              if (*(char*)p == 0) {
 +                                      break;
 +                              }
 +                              kdb_printf("%c", *(char *)p);
 +                              p++;
 +                      }
 +                      kdb_printf("\"");
 +                      PRINT_NL(flags);
 +              } else {
 +                      if (kltp->kl_type == KLT_MEMBER) {
 +                              LEVEL_INDENT(level, flags);
 +                      }
 +
 +                      if (flags & SUPPRESS_NAME) {
 +                              kdb_printf("{\n");
 +                              flags &= ~SUPPRESS_NAME;
 +                      } else {
 +                              kdb_printf("%s = {\n", kltp->kl_name);
 +                      }
 +
 +                      if (retp->kl_type == KLT_POINTER) {
 +                              size = sizeof(void *);
 +                      } else {
 +                              while (retp->kl_realtype) {
 +                                      retp = retp->kl_realtype;
 +                              }
 +                              size = retp->kl_size;
 +                      }
 +                      if ((retp->kl_type != KLT_STRUCT) &&
 +                                      (retp->kl_type != KLT_UNION)) {
 +                              /* Turn off the printing of names for all
 +                               * but structs and unions.
 +                               */
 +                              flags |= SUPPRESS_NAME;
 +                      }
 +                      for (i = low; i <= high; i++) {
 +
 +                              LEVEL_INDENT(level + 1, flags);
 +                              kdb_printf("[%d] ", i);
 +
 +                              switch (retp->kl_type) {
 +                                      case KLT_POINTER :
 +                                              kl_print_pointer_type(
 +                                                      p, retp, level,
 +                                                      flags|NO_INDENT);
 +                                              break;
 +
 +                                      case KLT_TYPEDEF:
 +                                              kl_print_typedef_type(
 +                                                      p, retp, level,
 +                                                      flags|NO_INDENT);
 +                                              break;
 +
 +                                      case KLT_BASE:
 +                                              kl_print_base_value(p,
 +                                                      retp, flags|NO_INDENT);
 +                                              kdb_printf("\n");
 +                                              break;
 +
 +                                      case KLT_ARRAY:
 +                                              kl_print_array_type(p, retp,
 +                                                      level + 1,
 +                                                      flags|SUPPRESS_NAME);
 +                                              break;
 +
 +                                      case KLT_STRUCT:
 +                                      case KLT_UNION:
 +                                              kl_print_struct_type(p,
 +                                                      retp, level + 1,
 +                                                      flags|NO_INDENT);
 +                                              break;
 +
 +                                      default:
 +                                              kl_print_base_value(
 +                                                      p, retp,
 +                                                      flags|NO_INDENT);
 +                                              kdb_printf("\n");
 +                                              break;
 +                              }
 +                              p = (void *)((uaddr_t)p + size);
 +                      }
 +                      LEVEL_INDENT(level, flags);
 +                      kdb_printf("}");
 +                      PRINT_SEMI_COLON(level, flags);
 +                      PRINT_NL(flags);
 +              }
 +      } else {
 +              if (rkltp) {
 +                      count = (rkltp->kl_high_bounds -
 +                                      rkltp->kl_low_bounds) + 1;
 +              } else {
 +                      count = 1;
 +              }
 +
 +              if (!strcmp(retp->kl_typestr, "struct ") ||
 +                              !strcmp(retp->kl_typestr, "union ")) {
 +                      anon = 1;
 +              }
 +next_dimension:
 +              switch (retp->kl_type) {
 +
 +                        case KLT_UNION:
 +                        case KLT_STRUCT:
 +                              if (anon) {
 +                                      if (multi) {
 +                                              kdb_printf("[%d]", count);
 +                                              break;
 +                                      }
 +                                      kl_print_struct_type(ptr, retp, level,
 +                                              flags|
 +                                              SUPPRESS_NL|
 +                                              SUPPRESS_SEMI_COLON);
 +                                      if (kltp->kl_type == KLT_MEMBER) {
 +                                              kdb_printf(" %s[%d]",
 +                                                      kltp->kl_name, count);
 +                                      } else {
 +                                              kdb_printf(" [%d]", count);
 +                                      }
 +                                      break;
 +                              }
 +                              /* else drop through */
 +
 +                      default:
 +                              LEVEL_INDENT(level, flags);
 +                              if (multi) {
 +                                      kdb_printf("[%d]", count);
 +                                      break;
 +                              }
 +                              name = kltp->kl_name;
 +                              if (retp->kl_type == KLT_TYPEDEF) {
 +                                      strcpy(typestr, retp->kl_name);
 +                                      strcat(typestr, " ");
 +                              } else {
 +                                      strcpy(typestr, retp->kl_typestr);
 +                              }
 +                              if (!name || (flags & SUPPRESS_NAME)) {
 +                                      kdb_printf("%s[%d]", typestr, count);
 +                              } else {
 +                                      kdb_printf("%s%s[%d]",
 +                                              typestr, name, count);
 +                              }
 +              }
 +              if (etp->kl_type == KLT_ARRAY) {
 +                      count = etp->kl_high_bounds - etp->kl_low_bounds + 1;
 +                      etp = etp->kl_elementtype;
 +                      multi++;
 +                      goto next_dimension;
 +              }
 +              PRINT_SEMI_COLON(level, flags);
 +              PRINT_NL(flags);
 +      }
 +}
 +
 +/*
 + * kl_print_enumeration_type()
 + */
 +void
 +kl_print_enumeration_type(
 +      void *ptr,
 +      kltype_t *kltp,
 +      int level,
 +      int flags)
 +{
 +      unsigned long long val = 0;
 +      kltype_t *mp, *rkltp;
 +
 +      rkltp = kl_realtype(kltp, KLT_ENUMERATION);
 +      if (ptr) {
 +              switch (kltp->kl_size) {
 +                      case 1:
 +                              val = *(unsigned long long *)ptr;
 +                              break;
 +
 +                      case 2:
 +                              val = *(uint16_t *)ptr;
 +                              break;
 +
 +                      case 4:
 +                              val = *(uint32_t *)ptr;
 +                              break;
 +
 +                      case 8:
 +                              val = *(uint64_t *)ptr;
 +                              break;
 +              }
 +              mp = rkltp->kl_member;
 +              while (mp) {
 +                      if (mp->kl_value == val) {
 +                              break;
 +                      }
 +                      mp = mp->kl_member;
 +              }
 +              LEVEL_INDENT(level, flags);
 +              if (mp) {
 +                      kdb_printf("%s = (%s=%lld)",
 +                              kltp->kl_name, mp->kl_name, val);
 +              } else {
 +                      kdb_printf("%s = %lld", kltp->kl_name, val);
 +              }
 +              PRINT_NL(flags);
 +      } else {
 +              LEVEL_INDENT(level, flags);
 +              kdb_printf ("%s {", kltp->kl_typestr);
 +              mp = rkltp->kl_member;
 +              while (mp) {
 +                      kdb_printf("%s = %d", mp->kl_name, mp->kl_value);
 +                      if ((mp = mp->kl_member)) {
 +                              kdb_printf(", ");
 +                      }
 +              }
 +              mp = kltp;
 +              if (level) {
 +                      kdb_printf("} %s;", mp->kl_name);
 +              } else {
 +                      kdb_printf("};");
 +              }
 +              PRINT_NL(flags);
 +      }
 +}
 +
 +/*
 + * kl_binary_print()
 + */
 +void
 +kl_binary_print(uint64_t num)
 +{
 +      int i, pre = 1;
 +
 +      for (i = 63; i >= 0; i--) {
 +              if (num & ((uint64_t)1 << i)) {
 +                      kdb_printf("1");
 +                      if (pre) {
 +                              pre = 0;
 +                      }
 +              } else {
 +                      if (!pre) {
 +                              kdb_printf("0");
 +                      }
 +              }
 +      }
 +      if (pre) {
 +              kdb_printf("0");
 +      }
 +}
 +
 +/*
 + * kl_get_bit_value()
 + *
 + * x = byte_size, y = bit_size, z = bit_offset
 + */
 +uint64_t
 +kl_get_bit_value(void *ptr, unsigned int x, unsigned int y, unsigned int z)
 +{
 +      uint64_t value=0, mask;
 +
 +      /* handle x bytes of buffer -- doing just memcpy won't work
 +       * on big endian architectures
 +       */
 +        switch (x) {
 +      case 5:
 +      case 6:
 +      case 7:
 +      case 8:
 +              x = 8;
 +              value = *(uint64_t*) ptr;
 +              break;
 +      case 3:
 +      case 4:
 +              x = 4;
 +              value = *(uint32_t*) ptr;
 +              break;
 +      case 2:
 +              value = *(uint16_t*) ptr;
 +              break;
 +      case 1:
 +              value = *(uint8_t *)ptr;
 +              break;
 +      default:
 +              /* FIXME: set KL_ERROR */
 +              return(0);
 +        }
 +      /*
 +          o FIXME: correct handling of overlapping fields
 +      */
 +
 +      /* goto bit offset */
 +      value = value >> z;
 +
 +      /* mask bit size bits */
 +      mask = (((uint64_t)1 << y) - 1);
 +      return (value & mask);
 +}
 +
 +/*
 + * kl_print_bit_value()
 + *
 + * x = byte_size, y = bit_size, z = bit_offset
 + */
 +void
 +kl_print_bit_value(void *ptr, int x, int y, int z, int flags)
 +{
 +      unsigned long long value;
 +
 +      value = kl_get_bit_value(ptr, x, y, z);
 +      if (flags & C_HEX) {
 +              kdb_printf("%#llx", value);
 +      } else if (flags & C_BINARY) {
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              kdb_printf("%lld", value);
 +      }
 +}
 +
 +/*
 + * kl_print_base_type()
 + */
 +void
 +kl_print_base_type(void *ptr, kltype_t *kltp, int level, int flags)
 +{
 +      LEVEL_INDENT(level, flags);
 +      if (ptr) {
 +              if (!(flags & SUPPRESS_NAME))  {
 +                      kdb_printf ("%s = ", kltp->kl_name);
 +              }
 +      }
 +      if (kltp->kl_type == KLT_MEMBER) {
 +              if (kltp->kl_bit_size < (kltp->kl_size * 8)) {
 +                      if (ptr) {
 +                              kl_print_bit_value(ptr, kltp->kl_size,
 +                                      kltp->kl_bit_size,
 +                                      kltp->kl_bit_offset, flags);
 +                      } else {
 +                              if (kltp->kl_name) {
 +                                      kdb_printf ("%s%s :%d;",
 +                                              kltp->kl_typestr,
 +                                              kltp->kl_name,
 +                                              kltp->kl_bit_size);
 +                              } else {
 +                                      kdb_printf ("%s :%d;",
 +                                              kltp->kl_typestr,
 +                                              kltp->kl_bit_size);
 +                              }
 +                      }
 +                      PRINT_NL(flags);
 +                      return;
 +              }
 +      }
 +      if (ptr) {
 +              kltype_t *rkltp;
 +
 +              rkltp = kl_realtype(kltp, 0);
 +              if (rkltp->kl_encoding == ENC_UNDEFINED) {
 +                      /* This is a void value
 +                       */
 +                      kdb_printf("<VOID>");
 +              } else {
 +                      kl_print_base(ptr, kltp->kl_size,
 +                              rkltp->kl_encoding, flags);
 +              }
 +      } else {
 +              if (kltp->kl_type == KLT_MEMBER) {
 +                      if (flags & SUPPRESS_NAME) {
 +                              kdb_printf ("%s", kltp->kl_typestr);
 +                      } else {
 +                              if (kltp->kl_name) {
 +                                      kdb_printf("%s%s;", kltp->kl_typestr,
 +                                              kltp->kl_name);
 +                              } else {
 +                                      kdb_printf ("%s :%d;",
 +                                              kltp->kl_typestr,
 +                                              kltp->kl_bit_size);
 +                              }
 +                      }
 +              } else {
 +                      if (SUPPRESS_NAME) {
 +                              kdb_printf("%s", kltp->kl_name);
 +                      } else {
 +                              kdb_printf("%s;", kltp->kl_name);
 +                      }
 +              }
 +      }
 +      PRINT_NL(flags);
 +}
 +
 +/*
 + * kl_print_member()
 + */
 +void
 +kl_print_member(void *ptr, kltype_t *mp, int level, int flags)
 +{
 +      int kl_type = 0;
 +      kltype_t *rkltp;
 +
 +      if (flags & C_SHOWOFFSET) {
 +              kdb_printf("%#x ", mp->kl_offset);
 +      }
 +
 +      if ((rkltp = mp->kl_realtype)) {
 +              kl_type = rkltp->kl_type;
 +      } else
 +              kl_type = mp->kl_type;
 +      switch (kl_type) {
 +              case KLT_STRUCT:
 +              case KLT_UNION:
 +                      kl_print_struct_type(ptr, mp, level, flags);
 +                      break;
 +              case KLT_ARRAY:
 +                      kl_print_array_type(ptr, mp, level, flags);
 +                      break;
 +              case KLT_POINTER:
 +                      kl_print_pointer_type(ptr, mp, level, flags);
 +                      break;
 +              case KLT_FUNCTION:
 +                      kl_print_function_type(ptr, mp, level, flags);
 +                      break;
 +              case KLT_BASE:
 +                      kl_print_base_type(ptr, mp, level, flags);
 +                      break;
 +              case KLT_ENUMERATION:
 +                      kl_print_enumeration_type(ptr, mp, level, flags);
 +                      break;
 +              case KLT_TYPEDEF:
 +                      while (rkltp && rkltp->kl_realtype) {
 +                              if (rkltp->kl_realtype == rkltp) {
 +                                      break;
 +                              }
 +                              rkltp = rkltp->kl_realtype;
 +                      }
 +                      if (ptr) {
 +                              kl_print_typedef_type(ptr, mp,
 +                                      level, flags);
 +                              break;
 +                      }
 +                      LEVEL_INDENT(level, flags);
 +                      if (flags & SUPPRESS_NAME) {
 +                              if (rkltp && (mp->kl_bit_size <
 +                                              (rkltp->kl_size * 8))) {
 +                                      kdb_printf ("%s :%d",
 +                                              mp->kl_typestr,
 +                                              mp->kl_bit_size);
 +                              } else {
 +                                      kdb_printf("%s",
 +                                              mp->kl_realtype->kl_name);
 +                              }
 +                              print_realtype(mp->kl_realtype);
 +                      } else {
 +                              if (rkltp && (mp->kl_bit_size <
 +                                              (rkltp->kl_size * 8))) {
 +                                      if (mp->kl_name) {
 +                                              kdb_printf ("%s%s :%d;",
 +                                                      mp->kl_typestr,
 +                                                      mp->kl_name,
 +                                                      mp->kl_bit_size);
 +                                      } else {
 +                                              kdb_printf ("%s :%d;",
 +                                                      mp->kl_typestr,
 +                                                      mp->kl_bit_size);
 +                                      }
 +                              } else {
 +                                      kdb_printf("%s %s;",
 +                                              mp->kl_realtype->kl_name,
 +                                              mp->kl_name);
 +                              }
 +                      }
 +                      PRINT_NL(flags);
 +                      break;
 +
 +              default:
 +                      LEVEL_INDENT(level, flags);
 +                      if (mp->kl_typestr) {
 +                              kdb_printf("%s%s;",
 +                                      mp->kl_typestr, mp->kl_name);
 +                      } else {
 +                              kdb_printf("<\?\?\? kl_type:%d> %s;",
 +                                      kl_type, mp->kl_name);
 +                      }
 +                      PRINT_NL(flags);
 +                      break;
 +      }
 +}
 +
 +/*
 + * kl_print_struct_type()
 + */
 +void
 +kl_print_struct_type(void *buf, kltype_t *kltp, int level, int flags)
 +{
 +      int orig_flags = flags;
 +      void *ptr = NULL;
 +      kltype_t *mp, *rkltp;
 +
 +      /* If we are printing out an actual struct, then don't print any
 +       * semi colons.
 +       */
 +      if (buf) {
 +              flags |= SUPPRESS_SEMI_COLON;
 +      }
 +
 +      LEVEL_INDENT(level, flags);
 +      if ((level == 0) || (flags & NO_INDENT)) {
 +              kdb_printf("%s{\n", kltp->kl_typestr);
 +      } else {
 +              if (buf) {
 +                      if (level && !(kltp->kl_flags & TYP_ANONYMOUS_FLG)) {
 +                              kdb_printf("%s = %s{\n",
 +                                      kltp->kl_name, kltp->kl_typestr);
 +                      } else {
 +                              kdb_printf("%s{\n", kltp->kl_typestr);
 +                      }
 +                      flags &= (~SUPPRESS_NL);
 +              } else {
 +                      if (kltp->kl_typestr) {
 +                              kdb_printf("%s{\n", kltp->kl_typestr);
 +                      } else {
 +                              kdb_printf("<UNKNOWN> {\n");
 +                      }
 +              }
 +      }
 +
 +      /* If the SUPPRESS_NL, SUPPRESS_SEMI_COLON, and SUPPRESS_NAME flags
 +       * are set and buf is NULL, then turn them off as they only apply
 +       * at the end of the struct. We save the original flags for that
 +       * purpose.
 +       */
 +      if (!buf) {
 +              flags &= ~(SUPPRESS_NL|SUPPRESS_SEMI_COLON|SUPPRESS_NAME);
 +      }
 +
 +      /* If the NO_INDENT is set, we need to turn it off at this
 +       * point -- just in case we come across a member of this struct
 +       * that is also a struct.
 +       */
 +      if (flags & NO_INDENT) {
 +              flags &= ~(NO_INDENT);
 +      }
 +
 +      if (kltp->kl_type == KLT_MEMBER) {
 +              rkltp = kl_realtype(kltp, 0);
 +      } else {
 +              rkltp = kltp;
 +      }
 +      level++;
 +      if ((mp = rkltp->kl_member)) {
 +              while (mp) {
 +                      if (buf) {
 +                              ptr = buf + mp->kl_offset;
 +                      }
 +                      kl_print_member(ptr, mp, level, flags);
 +                      mp = mp->kl_member;
 +              }
 +      } else {
 +              if (kltp->kl_flags & TYP_INCOMPLETE_FLG) {
 +                      LEVEL_INDENT(level, flags);
 +                      kdb_printf("<INCOMPLETE TYPE>\n");
 +              }
 +      }
 +      level--;
 +      LEVEL_INDENT(level, flags);
 +
 +      /* kl_size = 0 for empty structs */
 +      if (ptr || ((kltp->kl_size == 0) && buf)) {
 +              kdb_printf("}");
 +      } else if ((kltp->kl_type == KLT_MEMBER) &&
 +                      !(orig_flags & SUPPRESS_NAME) &&
 +                      !(kltp->kl_flags & TYP_ANONYMOUS_FLG)) {
 +              kdb_printf("} %s", kltp->kl_name);
 +      } else {
 +              kdb_printf("}");
 +      }
 +      PRINT_SEMI_COLON(level, orig_flags);
 +      PRINT_NL(orig_flags);
 +}
 +
 +/*
 + * kl_print_type()
 + */
 +void
 +kl_print_type(void *buf, kltype_t *kltp, int level, int flags)
 +{
 +      void *ptr;
 +
 +      if (buf) {
 +              if (kltp->kl_offset) {
 +                      ptr = (void *)((uaddr_t)buf + kltp->kl_offset);
 +              } else {
 +                      ptr = buf;
 +              }
 +      } else {
 +              ptr = 0;
 +      }
 +
 +      /* Only allow binary printing for base types
 +       */
 +      if (kltp->kl_type != KLT_BASE) {
 +              flags &= (~C_BINARY);
 +      }
 +      switch (kltp->kl_type) {
 +
 +              case KLT_TYPEDEF:
 +                      kl_print_typedef_type(ptr, kltp, level, flags);
 +                      break;
 +
 +              case KLT_STRUCT:
 +              case KLT_UNION:
 +                      kl_print_struct_type(ptr, kltp, level, flags);
 +                      break;
 +
 +              case KLT_MEMBER:
 +                      kl_print_member(ptr, kltp, level, flags);
 +                      break;
 +
 +              case KLT_POINTER:
 +                      kl_print_pointer_type(ptr, kltp, level, flags);
 +                      break;
 +
 +              case KLT_FUNCTION:
 +                      LEVEL_INDENT(level, flags);
 +                      kl_print_function_type(ptr, kltp, level, flags);
 +                      break;
 +
 +              case KLT_ARRAY:
 +                      kl_print_array_type(ptr, kltp, level, flags);
 +                      break;
 +
 +              case KLT_ENUMERATION:
 +                      kl_print_enumeration_type(ptr,
 +                              kltp, level, flags);
 +                      break;
 +
 +              case KLT_BASE:
 +                      kl_print_base_type(ptr, kltp, level, flags);
 +                      break;
 +
 +              default:
 +                      LEVEL_INDENT(level, flags);
 +                      if (flags & SUPPRESS_NAME) {
 +                              kdb_printf ("%s", kltp->kl_name);
 +                      } else {
 +                              kdb_printf ("%s %s;",
 +                                      kltp->kl_name, kltp->kl_name);
 +                      }
 +                      PRINT_NL(flags);
 +      }
 +}
 +
 +/*
 + * eval is from lcrash eval.c
 + */
 +
 +/* Forward declarations */
 +static void free_node(node_t *);
 +static node_t *make_node(token_t *, int);
 +static node_t *get_node_list(token_t *, int);
 +static node_t *do_eval(int);
 +static int is_unary(int);
 +static int is_binary(int);
 +static int precedence(int);
 +static node_t *get_sizeof(void);
 +static int replace_cast(node_t *, int);
 +static int replace_unary(node_t *, int);
 +static node_t *replace(node_t *, int);
 +static void array_to_element(node_t*, node_t*);
 +static int type_to_number(node_t *);
 +kltype_t *number_to_type(node_t *);
 +static type_t *eval_type(node_t *);
 +static type_t *get_type(char *, int);
 +static int add_rchild(node_t *, node_t *);
 +static void free_nodelist(node_t *);
 +
 +/* Global variables
 + */
 +static int logical_flag;
 +static node_t *node_list = (node_t *)NULL;
 +uint64_t eval_error;
 +char *error_token;
 +
 +/*
 + * set_eval_error()
 + */
 +static void
 +set_eval_error(uint64_t ecode)
 +{
 +      eval_error = ecode;
 +}
 +
 +/*
 + * is_typestr()
 + *
 + * We check for "struct", "union", etc. separately because they
 + * would not be an actual part of the type name. We also assume
 + * that the string passed in
 + *
 + * - does not have any leading blanks or tabs
 + * - is NULL terminated
 + * - contains only one type name to check
 + * - does not contain any '*' characters
 + */
 +static int
 +is_typestr(char *str)
 +{
 +      int len;
 +
 +      len = strlen(str);
 +      if ((len >= 6) && !strncmp(str, "struct", 6)) {
 +              return(1);
 +      } else if ((len >= 5) &&!strncmp(str, "union", 5)) {
 +              return(1);
 +      } else if ((len >= 5) &&!strncmp(str, "short", 5)) {
 +              return(1);
 +      } else if ((len >= 8) &&!strncmp(str, "unsigned", 8)) {
 +              return(1);
 +      } else if ((len >= 6) &&!strncmp(str, "signed", 6)) {
 +              return(1);
 +      } else if ((len >= 4) &&!strncmp(str, "long", 4)) {
 +              return(1);
 +      }
 +      /* Strip off any trailing blanks
 +       */
 +      while(*str && ((str[strlen(str) - 1] == ' ')
 +                      || (str[strlen(str) - 1] == '\t'))) {
 +              str[strlen(str) - 1] = 0;
 +      }
 +      if (kl_find_type(str, KLT_TYPES)) {
 +              return (1);
 +      }
 +      return(0);
 +}
 +
 +/*
 + * free_tokens()
 + */
 +static void
 +free_tokens(token_t *tp)
 +{
 +      token_t *t, *tnext;
 +
 +      t = tp;
 +      while (t) {
 +              tnext = t->next;
 +              if (t->string) {
 +                      kl_free_block((void *)t->string);
 +              }
 +              kl_free_block((void *)t);
 +              t = tnext;
 +      }
 +}
 +
 +/*
 + * process_text()
 + */
 +static int
 +process_text(char **str, token_t *tok)
 +{
 +      char *cp = *str;
 +      char *s = NULL;
 +      int len = 0;
 +
 +      /* Check and see if this token is a STRING or CHARACTER
 +       * type (beginning with a single or double quote).
 +       */
 +      if (*cp == '\'') {
 +              /* make sure that only a single character is between
 +               * the single quotes (it can be an escaped character
 +               * too).
 +               */
 +              s = strpbrk((cp + 1), "\'");
 +              if (!s) {
 +                      set_eval_error(E_SINGLE_QUOTE);
 +                      error_token = tok->ptr;
 +                      return(1);
 +              }
 +              len = (uaddr_t)s - (uaddr_t)cp;
 +              if ((*(cp+1) == '\\')) {
 +                      if (*(cp+2) == '0') {
 +                              long int val;
 +                              unsigned long uval;
 +                              char *ep;
 +
 +                              uval = kl_strtoull((char*)(cp+2),
 +                                              (char **)&ep, 8);
 +                              val = uval;
 +                              if ((val > 255) || (*ep != '\'')) {
 +                                      set_eval_error(E_BAD_CHAR);
 +                                      error_token = tok->ptr;
 +                                      return(1);
 +                              }
 +                      } else if (*(cp+3) != '\'') {
 +                              set_eval_error(E_BAD_CHAR);
 +                              error_token = tok->ptr;
 +                              return(1);
 +                      }
 +                      tok->type = CHARACTER;
 +              } else if (len == 2) {
 +                      tok->type = CHARACTER;
 +              } else {
 +
 +                      /* Treat as a single token entry. It's possible
 +                       * that what's between the single quotes is a
 +                       * type name. That will be determined later on.
 +                       */
 +                      tok->type = STRING;
 +              }
 +              *str = cp + len;
 +      } else if (*cp == '\"') {
 +              s = strpbrk((cp + 1), "\"");
 +              if (!s) {
 +                      set_eval_error(E_BAD_STRING);
 +                      error_token = tok->ptr;
 +                      return(1);
 +              }
 +              len = (uaddr_t)s - (uaddr_t)cp;
 +              tok->type = TEXT;
 +              *str = cp + len;
 +      }
 +      if ((tok->type == STRING) || (tok->type == TEXT)) {
 +
 +              if ((tok->type == TEXT) && (strlen(cp) > (len + 1))) {
 +
 +                      /* Check to see if there is a comma or semi-colon
 +                       * directly following the string. If there is,
 +                       * then the string is OK (the following characters
 +                       * are part of the next expression). Also, it's OK
 +                       * to have trailing blanks as long as that's all
 +                       * threre is.
 +                       */
 +                      char *c;
 +
 +                      c = s + 1;
 +                      while (*c) {
 +                              if ((*c == ',') || (*c == ';')) {
 +                                      break;
 +                              } else if (*c != ' ') {
 +                                      set_eval_error(E_END_EXPECTED);
 +                                      tok->ptr = c;
 +                                      error_token = tok->ptr;
 +                                      return(1);
 +                              }
 +                              c++;
 +                      }
 +                      /* Truncate the trailing blanks (they are not
 +                       * part of the string).
 +                       */
 +                      if (c != (s + 1)) {
 +                              *(s + 1) = 0;
 +                      }
 +              }
 +              tok->string = (char *)kl_alloc_block(len);
 +              memcpy(tok->string, (cp + 1), len - 1);
 +              tok->string[len - 1] = 0;
 +      }
 +      return(0);
 +}
 +
 +/*
 + * get_token_list()
 + */
 +static token_t *
 +get_token_list(char *str)
 +{
 +      int paren_count = 0;
 +      char *cp;
 +      token_t *tok = (token_t*)NULL, *tok_head = (token_t*)NULL;
 +      token_t *tok_last = (token_t*)NULL;
 +
 +      cp = str;
 +      eval_error = 0;
 +
 +      while (*cp) {
 +
 +              /* Skip past any "white space" (spaces and tabs).
 +               */
 +              switch (*cp) {
 +                      case ' ' :
 +                      case '\t' :
 +                      case '`' :
 +                              cp++;
 +                              continue;
 +                      default :
 +                              break;
 +              }
 +
 +              /* Allocate space for the next token */
 +              tok = (token_t *)kl_alloc_block(sizeof(token_t));
 +              tok->ptr = cp;
 +
 +              switch(*cp) {
 +
 +                      /* Check for operators
 +                       */
 +                      case '+' :
 +                              if (*((char*)cp + 1) == '+') {
 +
 +                                      /* We aren't doing asignment here,
 +                                       * so the ++ operator is not
 +                                       * considered valid.
 +                                       */
 +                                      set_eval_error(E_BAD_OPERATOR);
 +                                      error_token = tok_last->ptr;
 +                                      free_tokens(tok_head);
 +                                      free_tokens(tok);
 +                                      return ((token_t*)NULL);
 +                              } else if (!tok_last ||
 +                                      (tok_last->operator &&
 +                                      (tok_last->operator != CLOSE_PAREN))) {
 +                                      tok->operator = UNARY_PLUS;
 +                              } else {
 +                                      tok->operator = ADD;
 +                              }
 +                              break;
 +
 +                      case '-' :
 +                              if (*((char*)cp + 1) == '-') {
 +
 +                                      /* We aren't doing asignment here, so
 +                                       * the -- operator is not considered
 +                                       * valid.
 +                                       */
 +                                      set_eval_error(E_BAD_OPERATOR);
 +                                      error_token = tok_last->ptr;
 +                                      free_tokens(tok_head);
 +                                      free_tokens(tok);
 +                                      return ((token_t*)NULL);
 +                              } else if (*((char*)cp + 1) == '>') {
 +                                      tok->operator = RIGHT_ARROW;
 +                                      cp++;
 +                              } else if (!tok_last || (tok_last->operator &&
 +                                      (tok_last->operator != CLOSE_PAREN))) {
 +                                      tok->operator = UNARY_MINUS;
 +                              } else {
 +                                      tok->operator = SUBTRACT;
 +                              }
 +                              break;
 +
 +                      case '.' :
 +                              /* XXX - need to check to see if this is a
 +                               * decimal point in the middle fo a floating
 +                               * point value.
 +                               */
 +                              tok->operator = DOT;
 +                              break;
 +
 +                      case '*' :
 +                              /* XXX - need a better way to tell if this is
 +                               * an INDIRECTION. perhaps check the next
 +                               * token?
 +                               */
 +                              if (!tok_last || (tok_last->operator &&
 +                                      ((tok_last->operator != CLOSE_PAREN) &&
 +                                      (tok_last->operator != CAST)))) {
 +                                      tok->operator = INDIRECTION;
 +                              } else {
 +                                      tok->operator = MULTIPLY;
 +                              }
 +                              break;
 +
 +                      case '/' :
 +                              tok->operator = DIVIDE;
 +                              break;
 +
 +                      case '%' :
 +                              tok->operator = MODULUS;
 +                              break;
 +
 +                      case '(' : {
 +                              char *s, *s1, *s2;
 +                              int len;
 +
 +                              /* Make sure the previous token is an operator
 +                               */
 +                              if (tok_last && !tok_last->operator) {
 +                                      set_eval_error(E_SYNTAX_ERROR);
 +                                      error_token = tok_last->ptr;
 +                                      free_tokens(tok_head);
 +                                      free_tokens(tok);
 +                                      return ((token_t*)NULL);
 +                              }
 +
 +                              if (tok_last &&
 +                                      ((tok_last->operator == RIGHT_ARROW) ||
 +                                              (tok_last->operator == DOT))) {
 +                                      set_eval_error(E_SYNTAX_ERROR);
 +                                      error_token = tok_last->ptr;
 +                                      free_tokens(tok_head);
 +                                      free_tokens(tok);
 +                                      return ((token_t*)NULL);
 +                              }
 +
 +                              /* Check here to see if following tokens
 +                               * constitute a cast.
 +                               */
 +
 +                              /* Skip past any "white space" (spaces
 +                               * and tabs)
 +                               */
 +                              while ((*(cp+1) == ' ') || (*(cp+1) == '\t')) {
 +                                      cp++;
 +                              }
 +                              if ((*(cp+1) == '(') || isdigit(*(cp+1)) ||
 +                                      (*(cp+1) == '+') || (*(cp+1) == '-') ||
 +                                      (*(cp+1) == '*') || (*(cp+1) == '&') ||
 +                                              (*(cp+1) == ')')){
 +                                      tok->operator = OPEN_PAREN;
 +                                      paren_count++;
 +                                      break;
 +                              }
 +
 +                              /* Make sure we have a CLOSE_PAREN.
 +                               */
 +                              if (!(s1 = strchr(cp+1, ')'))) {
 +                                      set_eval_error(E_OPEN_PAREN);
 +                                      error_token = tok->ptr;
 +                                      free_tokens(tok_head);
 +                                      free_tokens(tok);
 +                                      return ((token_t*)NULL);
 +                              }
 +                              /* Check to see if this is NOT a simple
 +                               * typecast.
 +                               */
 +                              if (!(s2 = strchr(cp+1, '.'))) {
 +                                      s2 = strstr(cp+1, "->");
 +                              }
 +                              if (s2 && (s2 < s1)) {
 +                                      tok->operator = OPEN_PAREN;
 +                                      paren_count++;
 +                                      break;
 +                              }
 +
 +                              if ((s = strpbrk(cp+1, "*)"))) {
 +                                      char str[128];
 +
 +                                      len = (uaddr_t)s - (uaddr_t)(cp+1);
 +                                      strncpy(str, cp+1, len);
 +                                      str[len] = 0;
 +                                      if (!is_typestr(str)) {
 +                                              set_eval_error(E_BAD_TYPE);
 +                                              error_token = tok->ptr;
 +                                              free_tokens(tok_head);
 +                                              free_tokens(tok);
 +                                              return ((token_t*)NULL);
 +                                      }
 +                                      if (!(s = strpbrk((cp+1), ")"))) {
 +                                              set_eval_error(E_OPEN_PAREN);
 +                                              error_token = tok->ptr;
 +                                              free_tokens(tok_head);
 +                                              free_tokens(tok);
 +                                              return ((token_t*)NULL);
 +                                      }
 +                                      len = (uaddr_t)s - (uaddr_t)(cp+1);
 +                                      tok->string = (char *)
 +                                              kl_alloc_block(len + 1);
 +                                      memcpy(tok->string, (cp+1), len);
 +                                      tok->string[len] = 0;
 +                                      tok->operator = CAST;
 +                                      cp = (char *)((uaddr_t)(cp+1) + len);
 +                                      break;
 +                              }
 +                              tok->operator = OPEN_PAREN;
 +                              paren_count++;
 +                              break;
 +                      }
 +
 +                      case ')' :
 +                              if (tok_last && ((tok_last->operator ==
 +                                              RIGHT_ARROW) ||
 +                                              (tok_last->operator == DOT))) {
 +                                      set_eval_error(E_SYNTAX_ERROR);
 +                                      error_token = tok_last->ptr;
 +                                      free_tokens(tok_head);
 +                                      free_tokens(tok);
 +                                      return ((token_t*)NULL);
 +                              }
 +                              tok->operator = CLOSE_PAREN;
 +                              paren_count--;
 +                              break;
 +
 +                      case '&' :
 +                              if (*((char*)cp + 1) == '&') {
 +                                      tok->operator = LOGICAL_AND;
 +                                      cp++;
 +                              } else if (!tok_last || (tok_last &&
 +                                      (tok_last->operator &&
 +                                              tok_last->operator !=
 +                                              CLOSE_PAREN))) {
 +                                      tok->operator = ADDRESS;
 +                              } else {
 +                                      tok->operator = BITWISE_AND;
 +                              }
 +                              break;
 +
 +                      case '|' :
 +                              if (*((char*)cp + 1) == '|') {
 +                                      tok->operator = LOGICAL_OR;
 +                                      cp++;
 +                              } else {
 +                                      tok->operator = BITWISE_OR;
 +                              }
 +                              break;
 +
 +                      case '=' :
 +                              if (*((char*)cp + 1) == '=') {
 +                                      tok->operator = EQUAL;
 +                                      cp++;
 +                              } else {
 +                                      /* ASIGNMENT -- NOT IMPLEMENTED
 +                                       */
 +                                      tok->operator = NOT_YET;
 +                              }
 +                              break;
 +
 +                      case '<' :
 +                              if (*((char*)cp + 1) == '<') {
 +                                      tok->operator = LEFT_SHIFT;
 +                                      cp++;
 +                              } else if (*((char*)cp + 1) == '=') {
 +                                      tok->operator = LESS_THAN_OR_EQUAL;
 +                                      cp++;
 +                              } else {
 +                                      tok->operator = LESS_THAN;
 +                              }
 +                              break;
 +
 +                      case '>' :
 +                              if (*((char*)(cp + 1)) == '>') {
 +                                      tok->operator = RIGHT_SHIFT;
 +                                      cp++;
 +                              } else if (*((char*)cp + 1) == '=') {
 +                                      tok->operator = GREATER_THAN_OR_EQUAL;
 +                                      cp++;
 +                              } else {
 +                                      tok->operator = GREATER_THAN;
 +                              }
 +                              break;
 +
 +                      case '!' :
 +                              if (*((char*)cp + 1) == '=') {
 +                                      tok->operator = NOT_EQUAL;
 +                                      cp++;
 +                              } else {
 +                                      tok->operator = LOGICAL_NEGATION;
 +                              }
 +                              break;
 +
 +                      case '$' :
 +                              set_eval_error(E_NOT_IMPLEMENTED);
 +                              error_token = tok->ptr;
 +                              free_tokens(tok_head);
 +                              free_tokens(tok);
 +                              return((token_t*)NULL);
 +                      case '~' :
 +                              tok->operator = ONES_COMPLEMENT;
 +                              break;
 +
 +                      case '^' :
 +                              tok->operator = BITWISE_EXCLUSIVE_OR;
 +                              break;
 +
 +                      case '?' :
 +                              set_eval_error(E_NOT_IMPLEMENTED);
 +                              error_token = tok->ptr;
 +                              free_tokens(tok_head);
 +                              free_tokens(tok);
 +                              return((token_t*)NULL);
 +                      case ':' :
 +                              set_eval_error(E_NOT_IMPLEMENTED);
 +                              error_token = tok->ptr;
 +                              free_tokens(tok_head);
 +                              free_tokens(tok);
 +                              return((token_t*)NULL);
 +                      case '[' :
 +                              tok->operator = OPEN_SQUARE_BRACKET;;
 +                              break;
 +
 +                      case ']' :
 +                              tok->operator = CLOSE_SQUARE_BRACKET;;
 +                              break;
 +
 +                      default: {
 +
 +                              char *s;
 +                              int len;
 +
 +                              /* See if the last token is a RIGHT_ARROW
 +                               * or a DOT. If it is, then this token must
 +                               * be the name of a struct/union member.
 +                               */
 +                              if (tok_last &&
 +                                      ((tok_last->operator == RIGHT_ARROW) ||
 +                                               (tok_last->operator == DOT))) {
 +                                      tok->type = MEMBER;
 +                              } else if (process_text(&cp, tok)) {
 +                                      free_tokens(tok_head);
 +                                      free_tokens(tok);
 +                                      return((token_t*)NULL);
 +                              }
 +                              if (tok->type == TEXT) {
 +                                      return(tok);
 +                              } else if (tok->type == STRING) {
 +                                      if (is_typestr(tok->string)) {
 +                                              tok->type = TYPE_DEF;
 +                                      } else {
 +                                              tok->operator = TEXT;
 +                                              return(tok);
 +                                      }
 +                                      break;
 +                              } else if (tok->type == CHARACTER) {
 +                                      break;
 +                              }
 +
 +                              /* Check and See if the entire string is
 +                               * a typename (valid only for whatis case).
 +                               */
 +                              s = strpbrk(cp,
 +                                      ".\t+-*/()[]|~!$&%^<>?:&=^\"\'");
 +                              if (!s && !tok->type && is_typestr(cp)) {
 +                                      tok->type = TYPE_DEF;
 +                                      len = strlen(cp) + 1;
 +                                      tok->string = (char *)
 +                                              kl_alloc_block(len);
 +                                      memcpy(tok->string, cp, len - 1);
 +                                      tok->string[len - 1] = 0;
 +                                      cp = (char *)((uaddr_t)cp + len - 2);
 +                                      break;
 +                              }
 +
 +                              /* Now check for everything else
 +                               */
 +                              if ((s = strpbrk(cp,
 +                                      " .\t+-*/()[]|~!$&%^<>?:&=^\"\'"))) {
 +                                      len = (uaddr_t)s - (uaddr_t)cp + 1;
 +                              } else {
 +                                      len = strlen(cp) + 1;
 +                              }
 +
 +                              tok->string =
 +                                      (char *)kl_alloc_block(len);
 +                              memcpy(tok->string, cp, len - 1);
 +                              tok->string[len - 1] = 0;
 +
 +                              cp = (char *)((uaddr_t)cp + len - 2);
 +
 +                              /* Check to see if this is the keyword
 +                               * "sizeof". If not, then check to see if
 +                               * the string is a member name.
 +                               */
 +                              if (!strcmp(tok->string, "sizeof")) {
 +                                      tok->operator = SIZEOF;
 +                                      kl_free_block((void *)tok->string);
 +                                      tok->string = 0;
 +                              } else if (tok_last &&
 +                                      ((tok_last->operator == RIGHT_ARROW) ||
 +                                       (tok_last->operator == DOT))) {
 +                                      tok->type = MEMBER;
 +                              } else {
 +                                      tok->type = STRING;
 +                              }
 +                              break;
 +                      }
 +              }
 +              if (!(tok->type)) {
 +                      tok->type = OPERATOR;
 +              }
 +              if (!tok_head) {
 +                      tok_head = tok_last = tok;
 +              } else {
 +                      tok_last->next = tok;
 +                      tok_last = tok;
 +              }
 +              cp++;
 +      }
 +      if (paren_count < 0) {
 +              set_eval_error(E_CLOSE_PAREN);
 +              error_token = tok->ptr;
 +              free_tokens(tok_head);
 +              return((token_t*)NULL);
 +      } else if (paren_count > 0) {
 +              set_eval_error(E_OPEN_PAREN);
 +              error_token = tok->ptr;
 +              free_tokens(tok_head);
 +              return((token_t*)NULL);
 +      }
 +      return(tok_head);
 +}
 +
 +/*
 + * valid_binary_args()
 + */
 +int
 +valid_binary_args(node_t *np, node_t *left, node_t *right)
 +{
 +      int op = np->operator;
 +
 +      if ((op == RIGHT_ARROW) || (op == DOT)) {
 +              if (!left) {
 +                      set_eval_error(E_MISSING_STRUCTURE);
 +                      error_token = np->tok_ptr;
 +                      return(0);
 +              } else if (!(left->node_type == TYPE_DEF) &&
 +                              !(left->node_type == MEMBER) &&
 +                              !(left->operator == CLOSE_PAREN) &&
 +                              !(left->operator == CLOSE_SQUARE_BRACKET)) {
 +                      set_eval_error(E_BAD_STRUCTURE);
 +                      error_token = left->tok_ptr;
 +                      return(0);
 +              }
 +              if (!right || (!(right->node_type == MEMBER))) {
 +                      set_eval_error(E_BAD_MEMBER);
 +                      error_token = np->tok_ptr;
 +                      return(0);
 +              }
 +              return(1);
 +      }
 +      if (!left || !right) {
 +              set_eval_error(E_MISSING_OPERAND);
 +              error_token = np->tok_ptr;
 +              return(0);
 +      }
 +      switch (left->operator) {
 +              case CLOSE_PAREN:
 +              case CLOSE_SQUARE_BRACKET:
 +                      break;
 +              default:
 +                      switch(left->node_type) {
 +                              case NUMBER:
 +                              case STRING:
 +                              case TEXT:
 +                              case CHARACTER:
 +                              case EVAL_VAR:
 +                              case MEMBER:
 +                                      break;
 +                              default:
 +                                      set_eval_error(E_BAD_OPERAND);
 +                                      error_token = np->tok_ptr;
 +                                      return(0);
 +                      }
 +      }
 +      switch (right->operator) {
 +              case OPEN_PAREN:
 +                      break;
 +              default:
 +                      switch(right->node_type) {
 +                              case NUMBER:
 +                              case STRING:
 +                              case TEXT:
 +                              case CHARACTER:
 +                              case EVAL_VAR:
 +                              case MEMBER:
 +                                      break;
 +                              default:
 +                                      set_eval_error(E_BAD_OPERAND);
 +                                      error_token = np->tok_ptr;
 +                                      return(0);
 +                      }
 +      }
 +      return(1);
 +}
 +
 +/*
 + * get_node_list()
 + */
 +static node_t *
 +get_node_list(token_t *tp, int flags)
 +{
 +      node_t *root = (node_t *)NULL;
 +      node_t *np = (node_t *)NULL;
 +      node_t *last = (node_t *)NULL;
 +
 +      /* Loop through the tokens and convert them to nodes.
 +       */
 +      while (tp) {
 +              np = make_node(tp, flags);
 +              if (eval_error) {
 +                      return((node_t *)NULL);
 +              }
 +              if (root) {
 +                      last->next = np;
 +                      last = np;
 +              } else {
 +                      root = last = np;
 +              }
 +              tp = tp->next;
 +      }
 +      last->next = (node_t *)NULL; /* cpw patch */
 +      last = (node_t *)NULL;
 +      for (np = root; np; np = np->next) {
 +              if (is_binary(np->operator)) {
 +                      if (!valid_binary_args(np, last, np->next)) {
 +                              free_nodelist(root);
 +                              return((node_t *)NULL);
 +                      }
 +              }
 +              last = np;
 +      }
 +      return(root);
 +}
 +
 +/*
 + * next_node()
 + */
 +static node_t *
 +next_node(void)
 +{
 +      node_t *np;
 +      if ((np = node_list)) {
 +              node_list = node_list->next;
 +              np->next = (node_t*)NULL;
 +      }
 +      return(np);
 +}
 +
 +/*
 + * eval_unary()
 + */
 +static node_t *
 +eval_unary(node_t *curnp, int flags)
 +{
 +      node_t *n0, *n1;
 +
 +      n0 = curnp;
 +
 +      /* Peek ahead and make sure there is a next node.
 +       * Also check to see if the next node requires
 +       * a recursive call to do_eval(). If it does, we'll
 +       * let the do_eval() call take care of pulling it
 +       * off the list.
 +       */
 +      if (!node_list) {
 +              set_eval_error(E_SYNTAX_ERROR);
 +              error_token = n0->tok_ptr;
 +              free_nodes(n0);
 +              return((node_t*)NULL);
 +      }
 +      if (n0->operator == CAST) {
 +              if (node_list->operator == CLOSE_PAREN) {
 +
 +                      /* Free the CLOSE_PAREN and return
 +                       */
 +                      free_node(next_node());
 +                      return(n0);
 +              }
 +              if (!(node_list->node_type == NUMBER) &&
 +                              !(node_list->node_type == VADDR) &&
 +                              !((node_list->operator == ADDRESS) ||
 +                              (node_list->operator == CAST) ||
 +                              (node_list->operator == UNARY_MINUS) ||
 +                              (node_list->operator == UNARY_PLUS) ||
 +                              (node_list->operator == INDIRECTION) ||
 +                              (node_list->operator == OPEN_PAREN))) {
 +                      set_eval_error(E_SYNTAX_ERROR);
 +                      error_token = node_list->tok_ptr;
 +                      free_nodes(n0);
 +                      return((node_t*)NULL);
 +              }
 +      }
 +      if ((n0->operator == INDIRECTION) ||
 +                      (n0->operator == ADDRESS) ||
 +                      (n0->operator == OPEN_PAREN) ||
 +                      is_unary(node_list->operator)) {
 +              n1 = do_eval(flags);
 +              if (eval_error) {
 +                      free_nodes(n0);
 +                      free_nodes(n1);
 +                      return((node_t*)NULL);
 +              }
 +      } else {
 +              n1 = next_node();
 +      }
 +
 +      if (n1->operator == OPEN_PAREN) {
 +              /* Get the value contained within the parenthesis.
 +               * If there was an error, just return.
 +               */
 +              free_node(n1);
 +              n1 = do_eval(flags);
 +              if (eval_error) {
 +                      free_nodes(n1);
 +                      free_nodes(n0);
 +                      return((node_t*)NULL);
 +              }
 +      }
 +
 +      n0->right = n1;
 +      if (replace_unary(n0, flags) == -1) {
 +              if (!eval_error) {
 +                      set_eval_error(E_SYNTAX_ERROR);
 +                      error_token = n0->tok_ptr;
 +              }
 +              free_nodes(n0);
 +              return((node_t*)NULL);
 +      }
 +      return(n0);
 +}
 +
 +/*
 + * do_eval() -- Reduces an equation to a single value.
 + *
 + *   Any parenthesis (and nested parenthesis) within the equation will
 + *   be solved first via recursive calls to do_eval().
 + */
 +static node_t *
 +do_eval(int flags)
 +{
 +      node_t *root = (node_t*)NULL, *curnp, *n0, *n1;
 +
 +      /* Loop through the list of nodes until we run out of nodes
 +       * or we hit a CLOSE_PAREN. If we hit an OPEN_PAREN, make a
 +       * recursive call to do_eval().
 +       */
 +      curnp = next_node();
 +      while (curnp) {
 +              n0 = n1 = (node_t *)NULL;
 +
 +              if (curnp->operator == OPEN_PAREN) {
 +                      /* Get the value contained within the parenthesis.
 +                       * If there was an error, just return.
 +                       */
 +                      free_node(curnp);
 +                      n0 = do_eval(flags);
 +                      if (eval_error) {
 +                              free_nodes(n0);
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +
 +              } else if (curnp->operator == SIZEOF) {
 +                      /* Free the SIZEOF node and then make a call
 +                       * to the get_sizeof() function (which will
 +                       * get the next node off the list).
 +                       */
 +                      n0 = get_sizeof();
 +                      if (eval_error) {
 +                              if (!error_token) {
 +                                      error_token = curnp->tok_ptr;
 +                              }
 +                              free_node(curnp);
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +                      free_node(curnp);
 +                      curnp = (node_t *)NULL;
 +              } else if (is_unary(curnp->operator)) {
 +                      n0 = eval_unary(curnp, flags);
 +              } else {
 +                      n0 = curnp;
 +                      curnp = (node_t *)NULL;
 +              }
 +              if (eval_error) {
 +                      free_nodes(n0);
 +                      free_nodes(root);
 +                      return((node_t *)NULL);
 +              }
 +
 +              /* n0 should now contain a non-operator node. Check to see if
 +               * there is a next token. If there isn't, just add the last
 +               * rchild and return.
 +               */
 +              if (!node_list) {
 +                      if (root) {
 +                              add_rchild(root, n0);
 +                      } else {
 +                              root = n0;
 +                      }
 +                      replace(root, flags);
 +                      if (eval_error) {
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +                      return(root);
 +              }
 +
 +              /* Make sure the next token is an operator.
 +               */
 +              if (!node_list->operator) {
 +                      free_nodes(root);
 +                      free_node(n0);
 +                      set_eval_error(E_SYNTAX_ERROR);
 +                      error_token = node_list->tok_ptr;
 +                      return((node_t *)NULL);
 +              } else if ((node_list->operator == CLOSE_PAREN) ||
 +                      (node_list->operator == CLOSE_SQUARE_BRACKET)) {
 +
 +                      if (root) {
 +                              add_rchild(root, n0);
 +                      } else {
 +                              root = n0;
 +                      }
 +
 +                      /* Reduce the resulting tree to a single value
 +                       */
 +                      replace(root, flags);
 +                      if (eval_error) {
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +
 +                      /* Step over the CLOSE_PAREN or CLOSE_SQUARE_BRACKET
 +                       * and then return.
 +                       */
 +                      free_node(next_node());
 +                      return(root);
 +              } else if (node_list->operator == OPEN_SQUARE_BRACKET) {
 +next_dimension1:
 +                      /* skip over the OPEN_SQUARE_BRACKET token
 +                       */
 +                      free_node(next_node());
 +
 +                      /* Get the value contained within the brackets. This
 +                       * value must represent an array index (value or
 +                       * equation).
 +                       */
 +                      n1 = do_eval(0);
 +                      if (eval_error) {
 +                              free_nodes(root);
 +                              free_node(n0);
 +                              free_node(n1);
 +                              return((node_t *)NULL);
 +                      }
 +
 +                      /* Convert the array (or pointer type) to an
 +                       * element type using the index value obtained
 +                       * above. Make sure that n0 contains some sort
 +                       * of type definition first, however.
 +                       */
 +                      if (n0->node_type != TYPE_DEF) {
 +                              set_eval_error(E_BAD_TYPE);
 +                              error_token = n0->tok_ptr;
 +                              free_nodes(n0);
 +                              free_nodes(n1);
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +                      array_to_element(n0, n1);
 +                      free_node(n1);
 +                      if (eval_error) {
 +                              free_nodes(root);
 +                              free_nodes(n0);
 +                              return((node_t *)NULL);
 +                      }
 +
 +                      /* If there aren't any more nodes, just
 +                       * return.
 +                       */
 +                      if (!node_list) {
 +                              return(n0);
 +                      }
 +                      if (node_list->operator == OPEN_SQUARE_BRACKET) {
 +                              goto next_dimension1;
 +                      }
 +              } else if (!is_binary(node_list->operator)) {
 +                      set_eval_error(E_BAD_OPERATOR);
 +                      error_token = node_list->tok_ptr;
 +                      free_nodes(root);
 +                      free_nodes(n0);
 +                      return((node_t *)NULL);
 +              }
 +
 +              /* Now get the operator node
 +               */
 +              if (!(n1 = next_node())) {
 +                      set_eval_error(E_SYNTAX_ERROR);
 +                      error_token = n0->tok_ptr;
 +                      free_nodes(n0);
 +                      free_nodes(root);
 +                      return((node_t *)NULL);
 +              }
 +
 +              /* Check to see if this binary operator is RIGHT_ARROW or DOT.
 +               * If it is, we need to reduce it to a single value node now.
 +               */
 +              while ((n1->operator == RIGHT_ARROW) || (n1->operator == DOT)) {
 +
 +                      /* The next node must contain the name of the
 +                       * struct|union member.
 +                       */
 +                      if (!node_list || (node_list->node_type != MEMBER)) {
 +                              set_eval_error(E_BAD_MEMBER);
 +                              error_token = n1->tok_ptr;
 +                              free_nodes(n0);
 +                              free_nodes(n1);
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +                      n1->left = n0;
 +
 +                      /* Now get the next node and link it as the
 +                       * right child.
 +                       */
 +                      if (!(n0 = next_node())) {
 +                              set_eval_error(E_SYNTAX_ERROR);
 +                              error_token = n1->tok_ptr;
 +                              free_nodes(n1);
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +                      n1->right = n0;
 +                      if (!(n0 = replace(n1, flags))) {
 +                              if (!(eval_error)) {
 +                                      set_eval_error(E_SYNTAX_ERROR);
 +                                      error_token = n1->tok_ptr;
 +                              }
 +                              free_nodes(n1);
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +                      n1 = (node_t *)NULL;
 +
 +                      /* Check to see if there is a next node. If there
 +                       * is, check to see if it is the operator CLOSE_PAREN.
 +                       * If it is, then return (skipping over the
 +                       * CLOSE_PAREN first).
 +                       */
 +                      if (node_list && ((node_list->operator == CLOSE_PAREN)
 +                                              || (node_list->operator ==
 +                                              CLOSE_SQUARE_BRACKET))) {
 +                              if (root) {
 +                                      add_rchild(root, n0);
 +                              } else {
 +                                      root = n0;
 +                              }
 +
 +                              /* Reduce the resulting tree to a single
 +                               * value
 +                               */
 +                              replace(root, flags);
 +                              if (eval_error) {
 +                                      free_nodes(root);
 +                                      return((node_t *)NULL);
 +                              }
 +
 +                              /* Advance the token pointer past the
 +                               * CLOSE_PAREN and then return.
 +                               */
 +                              free_node(next_node());
 +                              return(root);
 +                      }
 +
 +                      /* Check to see if the next node is an
 +                       * OPEN_SQUARE_BRACKET. If it is, then we have to
 +                       * reduce the contents of the square brackets to
 +                       * an index array.
 +                       */
 +                      if (node_list && (node_list->operator
 +                                              == OPEN_SQUARE_BRACKET)) {
 +
 +                              /* Advance the token pointer and call
 +                               * do_eval() again.
 +                               */
 +                              free_node(next_node());
 +next_dimension2:
 +                              n1 = do_eval(0);
 +                              if (eval_error) {
 +                                      free_node(n0);
 +                                      free_node(n1);
 +                                      free_nodes(root);
 +                                      return((node_t *)NULL);
 +                              }
 +
 +                              /* Convert the array (or pointer type) to
 +                               * an element type using the index value
 +                               * obtained above. Make sure that n0
 +                               * contains some sort of type definition
 +                               * first, however.
 +                               */
 +                              if (n0->node_type != TYPE_DEF) {
 +                                      set_eval_error(E_BAD_TYPE);
 +                                      error_token = n0->tok_ptr;
 +                                      free_node(n0);
 +                                      free_node(n1);
 +                                      free_node(root);
 +                                      return((node_t *)NULL);
 +                              }
 +                              array_to_element(n0, n1);
 +                              free_node(n1);
 +                              if (eval_error) {
 +                                      free_node(n0);
 +                                      free_node(root);
 +                                      return((node_t *)NULL);
 +                              }
 +                      }
 +
 +                      /* Now get the next operator node (if there is one).
 +                       */
 +                      if (!node_list) {
 +                              if (root) {
 +                                      add_rchild(root, n0);
 +                              } else {
 +                                      root = n0;
 +                              }
 +                              return(root);
 +                      }
 +                      n1 = next_node();
 +                      if (n1->operator == OPEN_SQUARE_BRACKET) {
 +                              goto next_dimension2;
 +                      }
 +              }
 +
 +              if (n1 && ((n1->operator == CLOSE_PAREN) ||
 +                              (n1->operator == CLOSE_SQUARE_BRACKET))) {
 +                      free_node(n1);
 +                      if (root) {
 +                              add_rchild(root, n0);
 +                      } else {
 +                              root = n0;
 +                      }
 +                      replace(root, flags);
 +                      if (eval_error) {
 +                              free_nodes(root);
 +                              return((node_t *)NULL);
 +                      }
 +                      return(root);
 +              }
 +
 +              if (!root) {
 +                      root = n1;
 +                      n1->left = n0;
 +              } else if (precedence(root->operator)
 +                              >= precedence(n1->operator)) {
 +                      add_rchild(root, n0);
 +                      n1->left = root;
 +                      root = n1;
 +              } else {
 +                      if (!root->right) {
 +                              n1->left = n0;
 +                              root->right = n1;
 +                      } else {
 +                              add_rchild(root, n0);
 +                              n1->left = root->right;
 +                              root->right = n1;
 +                      }
 +              }
 +              curnp = next_node();
 +      } /* while(curnp) */
 +      return(root);
 +}
 +
 +/*
 + * is_unary()
 + */
 +static int
 +is_unary(int op)
 +{
 +      switch (op) {
 +              case LOGICAL_NEGATION :
 +              case ADDRESS :
 +              case INDIRECTION :
 +              case UNARY_MINUS :
 +              case UNARY_PLUS :
 +              case ONES_COMPLEMENT :
 +              case CAST :
 +                      return(1);
 +
 +              default :
 +                      return(0);
 +      }
 +}
 +
 +
 +/*
 + * is_binary()
 + */
 +static int
 +is_binary(int op)
 +{
 +      switch (op) {
 +
 +              case BITWISE_OR :
 +              case BITWISE_EXCLUSIVE_OR :
 +              case BITWISE_AND :
 +              case RIGHT_SHIFT :
 +              case LEFT_SHIFT :
 +              case ADD :
 +              case SUBTRACT :
 +              case MULTIPLY :
 +              case DIVIDE :
 +              case MODULUS :
 +              case LOGICAL_OR :
 +              case LOGICAL_AND :
 +              case EQUAL :
 +              case NOT_EQUAL :
 +              case LESS_THAN :
 +              case GREATER_THAN :
 +              case LESS_THAN_OR_EQUAL :
 +              case GREATER_THAN_OR_EQUAL :
 +              case RIGHT_ARROW :
 +              case DOT :
 +                      return(1);
 +
 +              default :
 +                      return(0);
 +      }
 +}
 +
 +/*
 + * precedence()
 + */
 +static int
 +precedence(int a)
 +{
 +      if ((a >= CONDITIONAL) && (a <= CONDITIONAL_ELSE)) {
 +              return(1);
 +      } else if (a == LOGICAL_OR) {
 +              return(2);
 +      } else if (a == LOGICAL_AND) {
 +              return(3);
 +      } else if (a == BITWISE_OR) {
 +              return(4);
 +      } else if (a == BITWISE_EXCLUSIVE_OR) {
 +              return(5);
 +      } else if (a == BITWISE_AND) {
 +              return(6);
 +      } else if ((a >= EQUAL) && (a <= NOT_EQUAL)) {
 +              return(7);
 +      } else if ((a >= LESS_THAN) && (a <= GREATER_THAN_OR_EQUAL)) {
 +              return(8);
 +      } else if ((a >= RIGHT_SHIFT) && (a <= LEFT_SHIFT)) {
 +              return(9);
 +      } else if ((a >= ADD) && (a <= SUBTRACT)) {
 +              return(10);
 +      } else if ((a >= MULTIPLY) && (a <= MODULUS)) {
 +              return(11);
 +      } else if ((a >= LOGICAL_NEGATION) && (a <= SIZEOF)) {
 +              return(12);
 +      } else if ((a >= RIGHT_ARROW) && (a <= DOT)) {
 +              return(13);
 +      } else {
 +              return(0);
 +      }
 +}
 +
 +/*
 + * esc_char()
 + */
 +char
 +esc_char(char *str)
 +{
 +      long int val;
 +      unsigned long uval;
 +      char ch;
 +
 +      if (strlen(str) > 1) {
 +              uval = kl_strtoull(str, (char **)NULL, 8);
 +              val = uval;
 +              ch = (char)val;
 +      } else {
 +              ch = str[0];
 +      }
 +      switch (ch) {
 +              case 'a' :
 +                      return((char)7);
 +              case 'b' :
 +                      return((char)8);
 +              case 't' :
 +                      return((char)9);
 +              case 'n' :
 +                      return((char)10);
 +              case 'f' :
 +                      return((char)12);
 +              case 'r' :
 +                      return((char)13);
 +              case 'e' :
 +                      return((char)27);
 +              default:
 +                      return(ch);
 +      }
 +}
 +
 +/*
 + * make_node()
 + */
 +static node_t *
 +make_node(token_t *t, int flags)
 +{
 +      node_t *np;
 +
 +      set_eval_error(0);
 +      np = (node_t*)kl_alloc_block(sizeof(*np));
 +
 +      if (t->type == OPERATOR) {
 +
 +              /* Check to see if this token represents a typecast
 +               */
 +              if (t->operator == CAST) {
 +                      type_t *tp;
 +
 +                      if (!(np->type = get_type(t->string, flags))) {
 +                              set_eval_error(E_BAD_CAST);
 +                              error_token = t->ptr;
 +                              free_nodes(np);
 +                              return((node_t*)NULL);
 +                      }
 +
 +                      /* Determin if this is a pointer to a type
 +                       */
 +                      tp = np->type;
 +                      if (tp->flag == POINTER_FLAG) {
 +                              np->flags = POINTER_FLAG;
 +                              tp = tp->t_next;
 +                              while (tp->flag == POINTER_FLAG) {
 +                                      tp = tp->t_next;
 +                              }
 +                      }
 +                      switch(tp->flag) {
 +                              case KLTYPE_FLAG:
 +                                      np->flags |= KLTYPE_FLAG;
 +                                      break;
 +
 +                              default:
 +                                      free_nodes(np);
 +                                      set_eval_error(E_BAD_CAST);
 +                                      error_token = t->ptr;
 +                                      return((node_t*)NULL);
 +                      }
 +                      if (!t->next) {
 +                              if (flags & C_WHATIS) {
 +                                      np->node_type = TYPE_DEF;
 +                              } else {
 +                                      set_eval_error(E_BAD_CAST);
 +                                      error_token = t->ptr;
 +                                      return((node_t*)NULL);
 +                              }
 +                      } else {
 +                              np->node_type = OPERATOR;
 +                              np->operator = CAST;
 +                      }
 +              } else {
 +                      np->node_type = OPERATOR;
 +                      np->operator = t->operator;
 +              }
 +      } else if (t->type == MEMBER) {
 +              np->name = (char *)dup_block((void *)t->string, strlen(t->string)+1);
 +              np->node_type = MEMBER;
 +      } else if ((t->type == STRING) || (t->type == TYPE_DEF)) {
 +              syment_t *sp;
 +              dbg_sym_t *stp;
 +              dbg_type_t *sttp;
 +
 +              if ((sp = kl_lkup_symname(t->string))) {
 +                  if (!(flags & C_NOVARS)) {
 +                      int has_type = 0;
 +
 +                      /* The string is a symbol name. We'll treat it as
 +                       * a global kernel variable and, at least, gather in
 +                       * the address of the symbol and the value it points
 +                       * to.
 +                       */
 +                      np->address = sp->s_addr;
 +                      np->flags |= ADDRESS_FLAG;
 +                      np->name = t->string;
 +                      t->string = (char*)NULL;
 +
 +                      /* Need to see if there is type information available
 +                       * for this variable. Since this mapping is not
 +                       * available yet, we will just attach a type struct
 +                       * for either uint32_t or uint64_t (depending on the
 +                       * size of a kernel pointer).  That will at least let
 +                       * us do something and will prevent the scenario where
 +                       * we have a type node with out a pointer to a type
 +                       * struct!
 +                       */
 +                      np->node_type = TYPE_DEF;
 +                      np->flags |= KLTYPE_FLAG;
 +                      np->value = *((kaddr_t *)np->address);
 +                      /* try to get the actual type info for the variable */
 +                      if(((stp = dbg_find_sym(sp->s_name, DBG_VAR,
 +                                              (uint64_t)0)) != NULL)){
 +                              if((sttp = (dbg_type_t *)
 +                                      kl_find_typenum(stp->sym_typenum))
 +                                              != NULL){
 +                                      /* kl_get_typestring(sttp); */
 +                                      has_type = 1;
 +                                      if(sttp->st_klt.kl_type == KLT_POINTER){
 +                                              np->flags ^= KLTYPE_FLAG;
 +                                              np->flags |= POINTER_FLAG;
 +                                              np->type =
 +                                                get_type(sttp->st_typestr,
 +                                                              flags);
 +                                      } else {
 +                                              np->type =
 +                                               kl_alloc_block(sizeof(type_t));
 +                                              np->type->un.kltp =
 +                                                      &sttp->st_klt;
 +                                      }
 +                              }
 +                      }
 +                      /* no type info for the variable found */
 +                      if(!has_type){
 +                              if (ptrsz64) {
 +                                      np->type = get_type("uint64_t", flags);
 +                              } else {
 +                                      np->type = get_type("uint32_t", flags);
 +                              }
 +                      }
 +                  }
 +                  kl_free_block((void *)sp);
 +              } else if (flags & (C_WHATIS|C_SIZEOF)) {
 +
 +                      kltype_t *kltp;
 +
 +                      if ((kltp = kl_find_type(t->string, KLT_TYPES))) {
 +
 +                              np->node_type = TYPE_DEF;
 +                              np->flags = KLTYPE_FLAG;
 +                              np->type = (type_t*)
 +                                      kl_alloc_block(sizeof(type_t));
 +                              np->type->flag = KLTYPE_FLAG;
 +                              np->type->t_kltp = kltp;
 +                      } else {
 +                              if (get_value(t->string,
 +                                      (uint64_t *)&np->value)) {
 +                                      set_eval_error(E_BAD_VALUE);
 +                                      error_token = t->ptr;
 +                                      free_nodes(np);
 +                                      return((node_t*)NULL);
 +                              }
 +                              if (!strncmp(t->string, "0x", 2) ||
 +                                              !strncmp(t->string, "0X", 2)) {
 +                                      np->flags |= UNSIGNED_FLAG;
 +                              }
 +                              np->node_type = NUMBER;
 +                      }
 +                      np->tok_ptr = t->ptr;
 +                      return(np);
 +              } else {
 +                      if (get_value(t->string, (uint64_t *)&np->value)) {
 +                              set_eval_error(E_BAD_VALUE);
 +                              error_token = t->ptr;
 +                              free_nodes(np);
 +                              return((node_t*)NULL);
 +                      }
 +                      if (np->value > 0xffffffff) {
 +                              np->byte_size = 8;
 +                      } else {
 +                              np->byte_size = 4;
 +                      }
 +                      if (!strncmp(t->string, "0x", 2) ||
 +                                      !strncmp(t->string, "0X", 2)) {
 +                              np->flags |= UNSIGNED_FLAG;
 +                      }
 +                      np->node_type = NUMBER;
 +              }
 +      } else if (t->type == CHARACTER) {
 +              char *cp;
 +
 +              /* Step over the single quote
 +               */
 +              cp = (t->ptr + 1);
 +              if (*cp == '\\') {
 +                      int i = 0;
 +                      char str[16];
 +
 +                      /* Step over the back slash
 +                       */
 +                      cp++;
 +                      while (*cp != '\'') {
 +                              str[i++] = *cp++;
 +                      }
 +                      str[i] = 0;
 +                      np->value = esc_char(str);
 +              } else {
 +                      np->value = *cp;
 +              }
 +              np->type = get_type("char", flags);
 +              np->node_type = TYPE_DEF;
 +              np->flags |= KLTYPE_FLAG;
 +      } else if (t->type == TEXT) {
 +              np->node_type = TEXT;
 +              np->name = t->string;
 +              /* So the block doesn't get freed twice */
 +              t->string = (char*)NULL;
 +      } else {
 +              set_eval_error(E_SYNTAX_ERROR);
 +              error_token = t->ptr;
 +              return((node_t*)NULL);
 +      }
 +      np->tok_ptr = t->ptr;
 +      return(np);
 +}
 +
 +/*
 + * add_node()
 + */
 +static int
 +add_node(node_t *root, node_t *new_node)
 +{
 +      node_t *n = root;
 +
 +      /* Find the most lower-right node
 +       */
 +      while (n->right) {
 +              n = n->right;
 +      }
 +
 +      /* If the node we found is a leaf node, return an error (we will
 +       * have to insert the node instead).
 +       */
 +      if (n->node_type == NUMBER) {
 +              return(-1);
 +      } else {
 +              n->right = new_node;
 +      }
 +      return(0);
 +}
 +
 +/*
 + * add_rchild()
 + */
 +static int
 +add_rchild(node_t *root, node_t *new_node)
 +{
 +      if (add_node(root, new_node) == -1) {
 +              return(-1);
 +      }
 +      return(0);
 +}
 +
 +/*
 + * free_type()
 + */
 +static void
 +free_type(type_t *head)
 +{
 +      type_t *t0, *t1;
 +
 +      t0 = head;
 +      while(t0) {
 +              if (t0->flag == POINTER_FLAG) {
 +                      t1 = t0->t_next;
 +                      kl_free_block((void *)t0);
 +                      t0 = t1;
 +              } else {
 +                      if (t0->flag != KLTYPE_FLAG) {
 +                              kl_free_block((void *)t0->t_kltp);
 +                      }
 +                      kl_free_block((void *)t0);
 +                      t0 = (type_t *)NULL;
 +              }
 +      }
 +      return;
 +}
 +
 +/*
 + * get_type() -- Convert a typecast string into a type.
 + *
 + *   Returns a pointer to a struct containing type information.
 + *   The type of struct returned is indicated by the contents
 + *   of type. If the typecast contains an asterisk, set ptr_type
 + *   equal to one, otherwise set it equal to zero.
 + */
 +static type_t *
 +get_type(char *s, int flags)
 +{
 +      int len, type = 0;
 +      char *cp, typename[128];
 +      type_t *t, *head, *last;
 +      kltype_t *kltp;
 +
 +      head = last = (type_t *)NULL;
 +
 +      /* Get the type string
 +       */
 +      if (!strncmp(s, "struct", 6)) {
 +              if ((cp = strpbrk(s + 7, " \t*"))) {
 +                      len = cp - (s + 7);
 +              } else {
 +                      len = strlen(s + 7);
 +              }
 +              memcpy(typename, s + 7, len);
 +      } else if (!strncmp(s, "union", 5)) {
 +              if ((cp = strpbrk(s + 6, " \t*"))) {
 +                      len = cp - (s + 6);
 +              } else {
 +                      len = strlen(s + 6);
 +              }
 +              memcpy(typename, s + 6, len);
 +      } else {
 +              if ((cp = strpbrk(s, "*)"))) {
 +                      len = cp - s;
 +              } else {
 +                      len = strlen(s);
 +              }
 +              memcpy(typename, s, len);
 +      }
 +
 +      /* Strip off any trailing spaces
 +       */
 +      while (len && ((typename[len - 1] == ' ') ||
 +                      (typename[len - 1] == '\t'))) {
 +              len--;
 +      }
 +      typename[len] = 0;
 +
 +      if (!(kltp = kl_find_type(typename, KLT_TYPES))) {
 +              return ((type_t *)NULL);
 +      }
 +      type = KLTYPE_FLAG;
 +
 +      /* check to see if this cast is a pointer to a type, a pointer
 +       * to a pointer to a type, etc.
 +       */
 +      cp = s;
 +      while ((cp = strpbrk(cp, "*"))) {
 +              t = (type_t *)kl_alloc_block(sizeof(type_t));
 +              t->flag = POINTER_FLAG;
 +              if (last) {
 +                      last->t_next = t;
 +                      last = t;
 +              } else {
 +                      head = last = t;
 +              }
 +              cp++;
 +      }
 +
 +      /* Allocate a type block that will point to the type specific
 +       * record.
 +       */
 +      t = (type_t *)kl_alloc_block(sizeof(type_t));
 +      t->flag = type;
 +
 +      switch (t->flag) {
 +
 +              case KLTYPE_FLAG:
 +                      t->t_kltp = kltp;
 +                      break;
 +
 +              default:
 +                      free_type(head);
 +                      return((type_t*)NULL);
 +      }
 +      if (last) {
 +              last->t_next = t;
 +      } else {
 +              head = t;
 +      }
 +      return(head);
 +}
 +
 +/*
 + * free_node()
 + */
 +static void
 +free_node(node_t *np)
 +{
 +      /* If there is nothing to free, just return.
 +       */
 +      if (!np) {
 +              return;
 +      }
 +      if (np->name) {
 +              kl_free_block((void *)np->name);
 +      }
 +      free_type(np->type);
 +      kl_free_block((void *)np);
 +}
 +
 +/*
 + * free_nodes()
 + */
 +void
 +free_nodes(node_t *np)
 +{
 +      node_t *q;
 +
 +      /* If there is nothing to free, just return.
 +       */
 +      if (!np) {
 +              return;
 +      }
 +      if ((q = np->left)) {
 +              free_nodes(q);
 +      }
 +      if ((q = np->right)) {
 +              free_nodes(q);
 +      }
 +      if (np->name) {
 +              kl_free_block((void *)np->name);
 +      }
 +      free_type(np->type);
 +      kl_free_block((void *)np);
 +}
 +
 +/*
 + * free_nodelist()
 + */
 +static void
 +free_nodelist(node_t *np)
 +{
 +      node_t *nnp;
 +
 +      while(np) {
 +              nnp = np->next;
 +              free_node(np);
 +              np = nnp;
 +      }
 +}
 +
 +extern int alloc_debug;
 +
 +/*
 + * free_eval_memory()
 + */
 +void
 +free_eval_memory(void)
 +{
 +      free_nodelist(node_list);
 +      node_list = (node_t*)NULL;
 +}
 +
 +/*
 + * get_sizeof()
 + */
 +static node_t *
 +get_sizeof()
 +{
 +      node_t *curnp, *n0 = NULL;
 +
 +      if (!(curnp = next_node())) {
 +              set_eval_error(E_SYNTAX_ERROR);
 +              return((node_t*)NULL);
 +      }
 +
 +      /* The next token should be a CAST or an open paren.
 +       * If it's something else, then return an error.
 +       */
 +      if (curnp->operator == OPEN_PAREN) {
 +              free_nodes(curnp);
 +              n0 = do_eval(C_SIZEOF);
 +              if (eval_error) {
 +                      error_token = n0->tok_ptr;
 +                      free_nodes(n0);
 +                      return((node_t*)NULL);
 +              }
 +      } else if (curnp->operator == CAST) {
 +              n0 = curnp;
 +      } else {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n0->tok_ptr;
 +              free_nodes(n0);
 +              return((node_t*)NULL);
 +      }
 +
 +      if (!n0->type) {
 +              set_eval_error(E_NOTYPE);
 +              error_token = n0->tok_ptr;
 +              free_nodes(n0);
 +              return((node_t*)NULL);
 +      }
 +
 +      if (n0->type->flag & POINTER_FLAG) {
 +              n0->value = sizeof(void *);
 +      } else if (n0->type->flag & KLTYPE_FLAG) {
 +              kltype_t *kltp;
 +
 +              kltp = kl_realtype(n0->type->t_kltp, 0);
 +
 +              if (kltp->kl_bit_size) {
 +                      n0->value = kltp->kl_bit_size / 8;
 +                      if (kltp->kl_bit_size % 8) {
 +                              n0->value += 1;
 +                      }
 +              } else {
 +                      n0->value = kltp->kl_size;
 +              }
 +      } else {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n0->tok_ptr;
 +              free_nodes(n0);
 +              return((node_t*)NULL);
 +      }
 +      n0->node_type = NUMBER;
 +      n0->flags = 0;
 +      n0->operator = 0;
 +      n0->byte_size = 0;
 +      n0->address = 0;
 +      if (n0->type) {
 +              free_type(n0->type);
 +              n0->type = 0;
 +      }
 +      return(n0);
 +}
 +
 +/*
 + * apply_unary()
 + */
 +static int
 +apply_unary(node_t *n, uint64_t *value)
 +{
 +      if (!n || !n->right) {
 +              return(-1);
 +      }
 +
 +      switch (n->operator) {
 +
 +              case UNARY_MINUS :
 +                      *value = (0 - n->right->value);
 +                      break;
 +
 +              case UNARY_PLUS :
 +                      *value = (n->right->value);
 +                      break;
 +
 +              case ONES_COMPLEMENT :
 +                      *value = ~(n->right->value);
 +                      break;
 +
 +              case LOGICAL_NEGATION :
 +                      if (n->right->value) {
 +                              *value = 0;
 +                      } else {
 +                              *value = 1;
 +                      }
 +                      logical_flag++;
 +                      break;
 +
 +              default :
 +                      break;
 +      }
 +      return(0);
 +}
 +
 +/*
 + * pointer_math()
 + */
 +static int
 +pointer_math(node_t *np, uint64_t *value, int type, int flags)
 +{
 +      int size;
 +      uint64_t lvalue, rvalue;
 +      type_t *tp = NULL, *tp1;
 +
 +      if (type < 0) {
 +              if (np->left->flags & POINTER_FLAG) {
 +
 +                      /* Since we only allow pointer math,
 +                       * anything other than a pointer causes
 +                       * failure.
 +                       */
 +                      tp = (type_t*)np->left->type;
 +                      if (tp->flag != POINTER_FLAG) {
 +                              set_eval_error(E_SYNTAX_ERROR);
 +                              error_token = np->left->tok_ptr;
 +                              return(-1);
 +                      }
 +
 +                      tp = tp->t_next;
 +
 +                      switch (tp->flag) {
 +
 +                              case POINTER_FLAG :
 +                                      size = sizeof(void *);
 +                                      break;
 +
 +                              case KLTYPE_FLAG : {
 +                                      /* Get the size of the real type,
 +                                       * not just the size of a pointer
 +                                       * If there isn't any type info,
 +                                       * then just set size equal to the
 +                                       * size of a pointer.
 +                                       */
 +                                      kltype_t *kltp, *rkltp;
 +
 +                                      kltp = tp->t_kltp;
 +                                      rkltp = kl_realtype(kltp, 0);
 +                                      if (!(size = rkltp->kl_size)) {
 +                                              if (kltp != rkltp) {
 +                                                      size = kltp->kl_size;
 +                                              } else {
 +                                                      size = sizeof(void *);
 +                                              }
 +                                      }
 +                                      break;
 +                              }
 +
 +                              default :
 +                                      set_eval_error(E_SYNTAX_ERROR);
 +                                      error_token = np->left->tok_ptr;
 +                                      return(-1);
 +                      }
 +                      lvalue = np->left->value;
 +              } else {
 +                      size = sizeof(void *);
 +                      lvalue = np->left->address;
 +              }
 +              switch (np->operator) {
 +                      case ADD :
 +                              *value = lvalue + (np->right->value * size);
 +                              break;
 +
 +                      case SUBTRACT :
 +                              *value = lvalue - (np->right->value * size);
 +                              break;
 +
 +                      default :
 +                              set_eval_error(E_BAD_OPERATOR);
 +                              error_token = np->tok_ptr;
 +                              return(-1);
 +              }
 +      } else if (type > 0) {
 +              if (np->right->flags & POINTER_FLAG) {
 +
 +                      /* Since we only allow pointer math,
 +                       * anything other than a pointer causes
 +                       * failure.
 +                       */
 +                      tp = (type_t*)np->right->type;
 +                      if (tp->flag != POINTER_FLAG) {
 +                              set_eval_error(E_SYNTAX_ERROR);
 +                              error_token = np->right->tok_ptr;
 +                              return(-1);
 +                      }
 +
 +                      tp = tp->t_next;
 +
 +                      switch (tp->flag) {
 +
 +                              case POINTER_FLAG :
 +                                      size = sizeof(void *);
 +                                      break;
 +
 +                              case KLTYPE_FLAG :
 +                                      size = tp->t_kltp->kl_size;
 +                                      break;
 +
 +                              default :
 +                                      set_eval_error(E_SYNTAX_ERROR);
 +                                      error_token = np->right->tok_ptr;
 +                                      return(-1);
 +                      }
 +                      rvalue = np->right->value;
 +              } else {
 +                      size = sizeof(void *);
 +                      rvalue = np->right->address;
 +              }
 +              switch (np->operator) {
 +                      case ADD :
 +                              *value = rvalue + (np->left->value * size);
 +                              break;
 +
 +                      case SUBTRACT :
 +                              *value = rvalue - (np->left->value * size);
 +                              break;
 +
 +                      default :
 +                              set_eval_error(E_BAD_OPERATOR);
 +                              error_token = np->tok_ptr;
 +                              return(-1);
 +              }
 +      } else {
 +              return(-1);
 +      }
 +      tp1 = (type_t *)kl_alloc_block(sizeof(type_t));
 +      tp1->flag = POINTER_FLAG;
 +      np->type = tp1;
 +      while (tp->flag == POINTER_FLAG) {
 +              tp1->t_next = (type_t *)kl_alloc_block(sizeof(type_t));
 +              tp1->flag = POINTER_FLAG;
 +              tp1 = tp1->t_next;
 +              tp = tp->t_next;
 +      }
 +      if (tp) {
 +              tp1->t_next = (type_t *)kl_alloc_block(sizeof(type_t));
 +              tp1 = tp1->t_next;
 +              tp1->flag = KLTYPE_FLAG;
 +              tp1->t_kltp = tp->t_kltp;
 +              if (type < 0) {
 +                      if (np->left->flags & POINTER_FLAG) {
 +                              np->flags |= POINTER_FLAG;
 +                      } else {
 +                              np->flags |= VADDR;
 +                      }
 +              } else {
 +                      if (np->right->flags & POINTER_FLAG) {
 +                              np->flags |= POINTER_FLAG;
 +                      } else {
 +                              np->flags |= VADDR;
 +                      }
 +              }
 +      }
 +      return(0);
 +}
 +
 +/*
 + * check_unsigned()
 + */
 +int
 +check_unsigned(node_t *np)
 +{
 +      kltype_t *kltp, *rkltp;
 +
 +      if (np->flags & UNSIGNED_FLAG) {
 +              return(1);
 +      }
 +      if (!np->type) {
 +              return(0);
 +      }
 +      if (np->type->flag == POINTER_FLAG) {
 +              return(0);
 +      }
 +      kltp = np->type->t_kltp;
 +      if ((rkltp = kl_realtype(kltp, 0))) {
 +              if (rkltp->kl_encoding == ENC_UNSIGNED) {
 +                      np->flags |= UNSIGNED_FLAG;
 +                      return(1);
 +              }
 +      }
 +      return(0);
 +}
 +
 +/*
 + * apply()
 + */
 +static int
 +apply(node_t *np, uint64_t *value, int flags)
 +{
 +      int ltype, rtype, do_signed = 0;
 +
 +      /* There must be two operands
 +       */
 +      if (!np->right || !np->left) {
 +              set_eval_error(E_MISSING_OPERAND);
 +              error_token = np->tok_ptr;
 +              return(-1);
 +      }
 +
 +      if (np->right->node_type == OPERATOR) {
 +              replace(np->right, flags);
 +              if (eval_error) {
 +                      return(-1);
 +              }
 +      }
 +
 +      ltype = np->left->node_type;
 +      rtype = np->right->node_type;
 +      if ((ltype == TYPE_DEF) || (ltype == VADDR)) {
 +              if ((rtype == TYPE_DEF) || (rtype == VADDR)) {
 +                      set_eval_error(E_NO_VALUE);
 +                      error_token = np->tok_ptr;
 +                      return(-1);
 +              }
 +              if (check_unsigned(np->left)) {
 +                      np->flags |= UNSIGNED_FLAG;
 +              } else {
 +                      do_signed++;
 +              }
 +              if (!type_to_number(np->left)) {
 +                      return(pointer_math(np, value, -1, flags));
 +              }
 +              np->byte_size = np->left->byte_size;
 +      } else if ((rtype == TYPE_DEF) || (rtype == VADDR)) {
 +              if ((ltype == TYPE_DEF) || (ltype == VADDR)) {
 +                      error_token = np->tok_ptr;
 +                      set_eval_error(E_NO_VALUE);
 +                      return(-1);
 +              }
 +              if (check_unsigned(np->right)) {
 +                      np->flags |= UNSIGNED_FLAG;
 +              } else {
 +                      do_signed++;
 +              }
 +              if (!type_to_number(np->right)) {
 +                      return(pointer_math(np, value, 1, flags));
 +              }
 +              np->byte_size = np->right->byte_size;
 +      } else if ((np->left->flags & UNSIGNED_FLAG) ||
 +                      (np->right->flags & UNSIGNED_FLAG)) {
 +              np->flags |= UNSIGNED_FLAG;
 +      } else {
 +              do_signed++;
 +      }
 +
 +      if (do_signed) {
 +              switch (np->operator) {
 +                      case ADD :
 +                              *value = (int64_t)np->left->value +
 +                                      (int64_t)np->right->value;
 +                              break;
 +
 +                      case SUBTRACT :
 +                              *value = (int64_t)np->left->value -
 +                                      (int64_t)np->right->value;
 +                              break;
 +
 +                      case MULTIPLY :
 +                              *value = (int64_t)np->left->value *
 +                                      (int64_t)np->right->value;
 +                              break;
 +
 +                      case DIVIDE :
 +                              if ((int64_t)np->right->value == 0) {
 +                                      set_eval_error(E_DIVIDE_BY_ZERO);
 +                                      error_token = np->right->tok_ptr;
 +                                      return(-1);
 +                              }
 +                              *value = (int64_t)np->left->value /
 +                                      (int64_t)np->right->value;
 +                              break;
 +
 +                      case BITWISE_OR :
 +                              *value = (int64_t)np->left->value |
 +                                      (int64_t)np->right->value;
 +                              break;
 +
 +                      case BITWISE_AND :
 +                              *value = (int64_t)np->left->value &
 +                                      (int64_t)np->right->value;
 +                              break;
 +
 +                      case MODULUS :
 +                              if ((int64_t)np->right->value == 0) {
 +                                      set_eval_error(E_DIVIDE_BY_ZERO);
 +                                      error_token = np->right->tok_ptr;
 +                                      return(-1);
 +                              }
 +                              *value = (int64_t)np->left->value %
 +                                      (int64_t)np->right->value;
 +                              break;
 +
 +                      case RIGHT_SHIFT :
 +                              *value =
 +                                      (int64_t)np->left->value >>
 +                                              (int64_t)np->right->value;
 +                              break;
 +
 +                      case LEFT_SHIFT :
 +                              *value =
 +                                      (int64_t)np->left->value <<
 +                                              (int64_t)np->right->value;
 +                              break;
 +
 +                      case LOGICAL_OR :
 +                              if ((int64_t)np->left->value ||
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case LOGICAL_AND :
 +                              if ((int64_t)np->left->value &&
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case EQUAL :
 +                              if ((int64_t)np->left->value ==
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case NOT_EQUAL :
 +                              if ((int64_t)np->left->value !=
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case LESS_THAN :
 +                              if ((int64_t)np->left->value <
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case GREATER_THAN :
 +                              if ((int64_t)np->left->value >
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case LESS_THAN_OR_EQUAL :
 +                              if ((int64_t)np->left->value <=
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case GREATER_THAN_OR_EQUAL :
 +                              if ((int64_t)np->left->value >=
 +                                              (int64_t)np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      default :
 +                              break;
 +              }
 +      } else {
 +              switch (np->operator) {
 +                      case ADD :
 +                              *value = np->left->value + np->right->value;
 +                              break;
 +
 +                      case SUBTRACT :
 +                              *value = np->left->value - np->right->value;
 +                              break;
 +
 +                      case MULTIPLY :
 +                              *value = np->left->value * np->right->value;
 +                              break;
 +
 +                      case DIVIDE :
 +                              *value = np->left->value / np->right->value;
 +                              break;
 +
 +                      case BITWISE_OR :
 +                              *value = np->left->value | np->right->value;
 +                              break;
 +
 +                      case BITWISE_AND :
 +                              *value = np->left->value & np->right->value;
 +                              break;
 +
 +                      case MODULUS :
 +                              *value = np->left->value % np->right->value;
 +                              break;
 +
 +                      case RIGHT_SHIFT :
 +                              *value = np->left->value >> np->right->value;
 +                              break;
 +
 +                      case LEFT_SHIFT :
 +                              *value = np->left->value << np->right->value;
 +                              break;
 +
 +                      case LOGICAL_OR :
 +                              if (np->left->value || np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case LOGICAL_AND :
 +                              if (np->left->value && np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case EQUAL :
 +                              if (np->left->value == np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case NOT_EQUAL :
 +                              if (np->left->value != np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case LESS_THAN :
 +                              if (np->left->value < np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case GREATER_THAN :
 +                              if (np->left->value > np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case LESS_THAN_OR_EQUAL :
 +                              if (np->left->value <= np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      case GREATER_THAN_OR_EQUAL :
 +                              if (np->left->value >= np->right->value) {
 +                                      *value = 1;
 +                              } else {
 +                                      *value = 0;
 +                              }
 +                              logical_flag++;
 +                              break;
 +
 +                      default :
 +                              break;
 +              }
 +      }
 +      return(0);
 +}
 +
 +/*
 + * member_to_type()
 + */
 +static type_t *
 +member_to_type(kltype_t *kltp, int flags)
 +{
 +      kltype_t *rkltp;
 +      type_t *tp, *head = (type_t *)NULL, *last = (type_t *)NULL;
 +
 +      /* Make sure this is a member
 +       */
 +      if (kltp->kl_type != KLT_MEMBER) {
 +              return((type_t *)NULL);
 +      }
 +
 +      rkltp = kltp->kl_realtype;
 +      while (rkltp && rkltp->kl_type == KLT_POINTER) {
 +              tp = (type_t *)kl_alloc_block(sizeof(type_t));
 +              tp->flag = POINTER_FLAG;
 +              if (last) {
 +                      last->t_next = tp;
 +                      last = tp;
 +              } else {
 +                      head = last = tp;
 +              }
 +              rkltp = rkltp->kl_realtype;
 +      }
 +
 +      /* If We step past all the pointer records and don't point
 +       * at anything, this must be a void pointer. Setup a VOID
 +       * type struct so that we can maintain a pointer to some
 +       * type info.
 +       */
 +      if (!rkltp) {
 +              tp = (type_t *)kl_alloc_block(sizeof(type_t));
 +              tp->flag = VOID_FLAG;
 +              tp->t_kltp = kltp;
 +              if (last) {
 +                      last->t_next = tp;
 +                      last = tp;
 +              } else {
 +                      head = last = tp;
 +              }
 +              return(head);
 +      }
 +
 +      tp = (type_t *)kl_alloc_block(sizeof(type_t));
 +      tp->flag = KLTYPE_FLAG;
 +      tp->t_kltp = kltp;
 +      if (last) {
 +              last->t_next = tp;
 +      } else {
 +              head = tp;
 +      }
 +      return(head);
 +}
 +
 +/*
 + * replace() --
 + *
 + * Replace the tree with a node containing the numerical result of
 + * the equation. If pointer math is performed, the result will have
 + * the same type as the pointer.
 + */
 +static node_t *
 +replace(node_t *np, int flags)
 +{
 +      int offset;
 +      uint64_t value;
 +      node_t *q;
 +
 +      if (!np) {
 +              return((node_t *)NULL);
 +      }
 +
 +      if (np->node_type == OPERATOR) {
 +              if (!(q = np->left)) {
 +                      return((node_t *)NULL);
 +              }
 +              while (q) {
 +                      if (!replace(q, flags)) {
 +                              return((node_t *)NULL);
 +                      }
 +                      q = q->right;
 +              }
 +
 +              if ((np->operator == RIGHT_ARROW) || (np->operator == DOT)) {
 +                      kaddr_t addr = 0;
 +                      type_t *tp;
 +
 +                      if (!have_debug_file) {
 +                              kdb_printf("no debuginfo file\n");
 +                              return 0;
 +                      }
 +
 +                      /* The left node must point to a TYPE_DEF
 +                       */
 +                      if (np->left->node_type != TYPE_DEF) {
 +                              if (np->left->flags & NOTYPE_FLAG) {
 +                                      set_eval_error(E_NOTYPE);
 +                                      error_token = np->left->tok_ptr;
 +                              } else {
 +                                      set_eval_error(E_BAD_TYPE);
 +                                      error_token = np->left->tok_ptr;
 +                              }
 +                              return((node_t *)NULL);
 +                      }
 +
 +                      /* Get the type information.  Check to see if we
 +                       * have a pointer to a type. If we do, we need
 +                       * to strip off the pointer and get the type info.
 +                       */
 +                      if (np->left->type->flag == POINTER_FLAG) {
 +                              tp = np->left->type->t_next;
 +                              kl_free_block((void *)np->left->type);
 +                      } else {
 +                              tp = np->left->type;
 +                      }
 +
 +                      /* We need to zero out the left child's type pointer
 +                       * to prevent the type structs from being prematurely
 +                       * freed (upon success). We have to remember, however,
 +                       * to the free the type information before we return.
 +                       */
 +                      np->left->type = (type_t*)NULL;
 +
 +                      /* tp should now point at a type_t struct that
 +                       * references a kltype_t struct. If it points
 +                       * to anything else, return failure.
 +                       *
 +                       */
 +                      if (tp->flag != KLTYPE_FLAG) {
 +                              set_eval_error(E_BAD_TYPE);
 +                              error_token = np->left->tok_ptr;
 +                              free_type(tp);
 +                              return((node_t *)NULL);
 +                      }
 +
 +                      switch (tp->flag) {
 +                              case KLTYPE_FLAG: {
 +                                      /* Make sure that the type referenced
 +                                       * is a struct, union, or pointer to
 +                                       * a struct or union. If it isn't one
 +                                       * of these, then return failure.
 +                                       */
 +                                      kltype_t *kltp, *kltmp;
 +
 +                                      kltp = kl_realtype(tp->t_kltp, 0);
 +                                      if ((kltp->kl_type != KLT_STRUCT) &&
 +                                              (kltp->kl_type != KLT_UNION)) {
 +                                              error_token =
 +                                                      np->left->tok_ptr;
 +                                              set_eval_error(E_BAD_TYPE);
 +                                              free_type(tp);
 +                                              return((node_t *)NULL);
 +                                      }
 +
 +                                      /* Get type information for member.
 +                                       * If member is a pointer to a type,
 +                                       * get the pointer address and load
 +                                       * it into value. In any event, load
 +                                       * the struct/union address plus the
 +                                       * offset of the member.
 +                                       */
 +                                      kltmp = kl_get_member(kltp,
 +                                                      np->right->name);
 +                                      if (!kltmp) {
 +                                              set_eval_error(E_BAD_MEMBER);
 +                                              error_token =
 +                                                      np->right->tok_ptr;
 +                                              free_type(tp);
 +                                              return((node_t *)NULL);
 +                                      }
 +
 +                                      /* We can't just use the offset value
 +                                       * for the member. That's because it
 +                                       * may be from an anonymous struct or
 +                                       * union within another struct
 +                                       * definition.
 +                                       */
 +                                      offset = kl_get_member_offset(kltp,
 +                                              np->right->name);
 +                                      np->type = member_to_type(kltmp, flags);
 +                                      if (!np->type) {
 +                                              set_eval_error(E_BAD_MEMBER);
 +                                              error_token =
 +                                                      np->right->tok_ptr;
 +                                              free_type(tp);
 +                                              return((node_t *)NULL);
 +                                      }
 +
 +                                      /* Now free the struct type information
 +                                       */
 +                                      free_type(tp);
 +                                      np->node_type = TYPE_DEF;
 +                                      np->flags |= KLTYPE_FLAG;
 +                                      np->operator = 0;
 +                                      addr = 0;
 +                                      if (np->left->flags & POINTER_FLAG) {
 +                                              addr =  np->left->value +
 +                                                      offset;
 +                                      } else if (np->left->flags &
 +                                                      ADDRESS_FLAG) {
 +                                              addr =  np->left->address +
 +                                                      offset;
 +                                      }
 +                                      if (addr) {
 +                                              np->address = addr;
 +                                              np->flags |= ADDRESS_FLAG;
 +                                      }
 +
 +                                      if (np->type->flag == POINTER_FLAG) {
 +                                              np->flags |= POINTER_FLAG;
 +                                              np->value = *((kaddr_t *)addr);
 +                                      } else {
 +                                              np->value = addr;
 +                                      }
 +                                      break;
 +                              }
 +                      }
 +                      free_nodes(np->left);
 +                      free_nodes(np->right);
 +                      np->left = np->right = (node_t*)NULL;
 +                      return(np);
 +              } else {
 +                      if (!np->left || !np->right) {
 +                              set_eval_error(E_MISSING_OPERAND);
 +                              error_token = np->tok_ptr;
 +                              return((node_t *)NULL);
 +                      }
 +                      if (np->left->byte_size && np->right->byte_size) {
 +                              if (np->left->byte_size >
 +                                              np->right->byte_size) {
 +
 +                                      /* Left byte_size is greater than right
 +                                       */
 +                                      np->byte_size = np->left->byte_size;
 +                                      np->type = np->left->type;
 +                                      np->flags = np->left->flags;
 +                                      free_type(np->right->type);
 +                              } else if (np->left->byte_size <
 +                                              np->right->byte_size) {
 +
 +                                      /* Right byte_size is greater than left
 +                                       */
 +                                      np->byte_size = np->right->byte_size;
 +                                      np->type = np->right->type;
 +                                      np->flags = np->right->flags;
 +                                      free_type(np->left->type);
 +                              } else {
 +
 +                                      /* Left and right byte_size is equal
 +                                       */
 +                                      if (np->left->flags & UNSIGNED_FLAG) {
 +                                              np->byte_size =
 +                                                      np->left->byte_size;
 +                                              np->type = np->left->type;
 +                                              np->flags = np->left->flags;
 +                                              free_type(np->right->type);
 +                                      } else if (np->right->flags &
 +                                                      UNSIGNED_FLAG) {
 +                                              np->byte_size =
 +                                                      np->right->byte_size;
 +                                              np->type = np->right->type;
 +                                              np->flags = np->right->flags;
 +                                              free_type(np->left->type);
 +                                      } else {
 +                                              np->byte_size =
 +                                                      np->left->byte_size;
 +                                              np->type = np->left->type;
 +                                              np->flags = np->left->flags;
 +                                              free_type(np->right->type);
 +                                      }
 +                              }
 +                      } else if (np->left->byte_size) {
 +                              np->byte_size = np->left->byte_size;
 +                              np->type = np->left->type;
 +                              np->flags = np->left->flags;
 +                              free_type(np->right->type);
 +                      } else if (np->right->byte_size) {
 +                              np->byte_size = np->right->byte_size;
 +                              np->type = np->right->type;
 +                              np->flags = np->right->flags;
 +                      } else {
 +                              /* XXX - No byte sizes
 +                               */
 +                      }
 +
 +                      if (apply(np, &value, flags)) {
 +                              return((node_t *)NULL);
 +                      }
 +              }
 +              np->right->type = np->left->type = (type_t*)NULL;
 +
 +              /* Flesh out the rest of the node struct.
 +               */
 +              if (np->type) {
 +                      np->node_type = TYPE_DEF;
 +                      np->flags |= KLTYPE_FLAG;
 +              } else {
 +                      np->node_type = NUMBER;
 +                      np->flags &= ~(KLTYPE_FLAG);
 +              }
 +              np->operator = 0;
 +              np->value = value;
 +              kl_free_block((void *)np->left);
 +              kl_free_block((void *)np->right);
 +              np->left = np->right = (node_t*)NULL;
 +      }
 +      return(np);
 +}
 +
 +/*
 + * replace_cast()
 + */
 +static int
 +replace_cast(node_t *n, int flags)
 +{
 +      type_t *t;
 +
 +      if (!n) {
 +              set_eval_error(E_SYNTAX_ERROR);
 +              return(-1);
 +      } else if (!n->right) {
 +              set_eval_error(E_SYNTAX_ERROR);
 +              error_token = n->tok_ptr;
 +              return(-1);
 +      }
 +      if (n->flags & POINTER_FLAG) {
 +              if (n->right->node_type == VADDR) {
 +                      if (n->right->flags & ADDRESS_FLAG) {
 +                              n->value = n->right->address;
 +                      } else {
 +                              set_eval_error(E_SYNTAX_ERROR);
 +                              error_token = n->right->tok_ptr;
 +                              return(-1);
 +                      }
 +
 +              } else {
 +                      n->value = n->right->value;
 +                      n->address = 0;
 +              }
 +      } else if (n->right->flags & ADDRESS_FLAG) {
 +              n->flags |= ADDRESS_FLAG;
 +              n->address = n->right->address;
 +              n->value = n->right->value;
 +      } else {
 +              kltype_t *kltp;
 +
 +              if (!(t = eval_type(n))) {
 +                      set_eval_error(E_BAD_TYPE);
 +                      error_token = n->tok_ptr;
 +                      return(-1);
 +              }
 +              if (t->t_kltp->kl_type != KLT_BASE) {
 +
 +                      kltp = kl_realtype(t->t_kltp, 0);
 +                      if (kltp->kl_type != KLT_BASE) {
 +                              set_eval_error(E_BAD_CAST);
 +                              error_token = n->tok_ptr;
 +                              return(-1);
 +                      }
 +              }
 +              n->value = n->right->value;
 +              n->type = t;
 +      }
 +      n->node_type = TYPE_DEF;
 +      n->operator = 0;
 +      free_node(n->right);
 +      n->right = (node_t *)NULL;
 +      return(0);
 +}
 +
 +/*
 + * replace_indirection()
 + */
 +static int
 +replace_indirection(node_t *n, int flags)
 +{
 +      kaddr_t addr;
 +      type_t *t, *tp, *rtp;
 +
 +      /* Make sure there is a right child and that it is a TYPE_DEF.
 +       */
 +      if (!n->right) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n->tok_ptr;
 +              return(-1);
 +      } else if (n->right->node_type != TYPE_DEF) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n->right->tok_ptr;
 +              return(-1);
 +      }
 +
 +      /* Make sure the right node contains a pointer or address value.
 +       * Note that it's possible for the whatis command to generate
 +       * this case without any actual pointer/address value.
 +       */
 +      if (!(n->right->flags & (POINTER_FLAG|ADDRESS_FLAG))) {
 +              set_eval_error(E_BAD_POINTER);
 +              error_token = n->right->tok_ptr;
 +              return(-1);
 +      }
 +
 +      /* Get the pointer to the first type struct and make sure
 +       * it's a pointer.
 +       */
 +      if (!(tp = n->right->type) || (tp->flag != POINTER_FLAG)) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n->right->tok_ptr;
 +              return(-1);
 +      }
 +
 +      /* Make sure we have a pointer to a type structure.
 +       */
 +      if (!(n->right->flags & KLTYPE_FLAG)) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n->right->tok_ptr;
 +              return(-1);
 +      }
 +
 +      n->node_type = TYPE_DEF;
 +      n->flags = KLTYPE_FLAG;
 +      n->operator = 0;
 +
 +      if (!(t = tp->t_next)) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n->right->tok_ptr;
 +              return(-1);
 +      }
 +
 +      if (!(rtp = eval_type(n->right))) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = n->right->tok_ptr;
 +              return(-1);
 +      }
 +
 +      /* Zero out the type field in the right child so
 +       * it wont accidently be freed when the right child
 +       * is freed (upon success).
 +       */
 +      n->right->type = (type_t*)NULL;
 +
 +      n->type = t;
 +
 +      /* Free the pointer struct
 +       */
 +      kl_free_block((void *)tp);
 +
 +      /* Get the pointer address
 +       */
 +      addr = n->address = n->right->value;
 +      n->flags |= ADDRESS_FLAG;
 +
 +      if (rtp->t_kltp->kl_type == KLT_MEMBER) {
 +              /* If this is a member, we have to step over the KLT_MEMBER
 +               * struct and then make sure we have a KLT_POINTER struct.
 +               * If we do, we step over it too...otherwise return an
 +               * error.
 +               */
 +              if (rtp->t_kltp->kl_realtype->kl_type != KLT_POINTER) {
 +                      set_eval_error(E_BAD_TYPE);
 +                      error_token = n->right->tok_ptr;
 +                      return(-1);
 +              }
 +              rtp->t_kltp = rtp->t_kltp->kl_realtype;
 +      }
 +
 +      if (rtp->t_kltp->kl_type == KLT_POINTER) {
 +              /* Strip off the pointer type record so that
 +               * we pick up the actual type definition with
 +               * our indirection.
 +               */
 +              rtp->t_kltp = rtp->t_kltp->kl_realtype;
 +              if (rtp->t_kltp->kl_name &&
 +                              !strcmp(rtp->t_kltp->kl_name, "char")) {
 +                      n->flags |= STRING_FLAG;
 +              }
 +      }
 +
 +
 +      /* If this is a pointer to a pointer, get the next
 +       * pointer value.
 +       */
 +      if (n->type->flag == POINTER_FLAG) {
 +              n->value = *((kaddr_t *)addr);
 +
 +              /* Set the appropriate node flag values
 +               */
 +              n->flags |= POINTER_FLAG;
 +              free_node(n->right);
 +              n->left = n->right = (node_t *)NULL;
 +              return(0);
 +      }
 +      /* Zero out the type field in the right child so it doesn't
 +       * accidently get freed up when the right child is freed
 +       * (upon success).
 +       */
 +      n->right->type = (type_t*)NULL;
 +      free_node(n->right);
 +      n->left = n->right = (node_t *)NULL;
 +      return(0);
 +}
 +
 +/*
 + * replace_unary()
 + *
 + * Convert a unary operator node that contains a pointer to a value
 + * with a node containing the numerical result. Free the node that
 + * originally contained the value.
 + */
 +static int
 +replace_unary(node_t *n, int flags)
 +{
 +      uint64_t value;
 +
 +      if (!n->right) {
 +              set_eval_error(E_MISSING_OPERAND);
 +              error_token = n->tok_ptr;
 +              return(-1);
 +      }
 +      if (is_unary(n->right->operator)) {
 +              if (replace_unary(n->right, flags) == -1) {
 +                      return(-1);
 +              }
 +      }
 +      if (n->operator == CAST) {
 +              return(replace_cast(n, flags));
 +      } else if (n->operator == INDIRECTION) {
 +              return(replace_indirection(n, flags));
 +      } else if (n->operator == ADDRESS) {
 +              type_t *t;
 +
 +              if (n->right->node_type == TYPE_DEF) {
 +                      if (!(n->right->flags & ADDRESS_FLAG)) {
 +                              set_eval_error(E_NO_ADDRESS);
 +                              error_token = n->right->tok_ptr;
 +                              return(-1);
 +                      }
 +                      t = n->right->type;
 +              } else {
 +                      set_eval_error(E_BAD_TYPE);
 +                      error_token = n->right->tok_ptr;
 +                      return(-1);
 +              }
 +              n->type = (type_t*)kl_alloc_block(sizeof(type_t));
 +              n->type->flag = POINTER_FLAG;
 +              n->type->t_next = t;
 +              n->node_type = TYPE_DEF;
 +              n->operator = 0;
 +              n->value = n->right->address;
 +              n->flags = POINTER_FLAG;
 +              if (!(t = eval_type(n))) {
 +                      set_eval_error(E_BAD_TYPE);
 +                      error_token = n->tok_ptr;
 +                      return(-1);
 +              }
 +              n->flags |= t->flag;
 +              n->right->type = 0;
 +              free_nodes(n->right);
 +              n->left = n->right = (node_t *)NULL;
 +              return(0);
 +      } else if (apply_unary(n, &value) == -1) {
 +              return(-1);
 +      }
 +      free_nodes(n->right);
 +      n->node_type = NUMBER;
 +      n->operator = 0;
 +      n->left = n->right = (node_t *)NULL;
 +      memcpy(&n->value, &value, sizeof(uint64_t));
 +      return(0);
 +}
 +
 +/*
 + * pointer_to_element()
 + */
 +static void
 +pointer_to_element(node_t *n0, node_t *n1)
 +{
 +      int size;
 +      kltype_t *kltp, *rkltp;
 +      type_t *tp;
 +
 +      if (!(tp = n0->type)) {
 +              set_eval_error(E_BAD_INDEX);
 +              error_token = n0->tok_ptr;
 +              return;
 +      }
 +      if (tp->t_next->flag == POINTER_FLAG) {
 +              size = sizeof(void *);
 +      } else {
 +              kltp = tp->t_next->t_kltp;
 +              if (!(rkltp = kl_realtype(kltp, 0))) {
 +                      set_eval_error(E_BAD_INDEX);
 +                      error_token = n0->tok_ptr;
 +                      return;
 +              }
 +              size = rkltp->kl_size;
 +      }
 +
 +      /* Get the details on the array element
 +       */
 +      n0->flags |= ADDRESS_FLAG;
 +      n0->address = n0->value + (n1->value * size);
 +      n0->type = tp->t_next;
 +      kl_free_block((char *)tp);
 +      if (tp->t_next->flag == POINTER_FLAG) {
 +              n0->flags |= POINTER_FLAG;
 +              n0->value = *((kaddr_t *)n0->address);
 +      } else {
 +              n0->flags &= (~POINTER_FLAG);
 +              n0->value = 0;
 +      }
 +}
 +
 +/*
 + * array_to_element()
 + */
 +static void
 +array_to_element(node_t *n0, node_t *n1)
 +{
 +      kltype_t *kltp, *rkltp, *ip, *ep;
 +      type_t *tp, *troot = (type_t *)NULL;
 +
 +      if (!(tp = n0->type)) {
 +              set_eval_error(E_BAD_INDEX);
 +              error_token = n0->tok_ptr;
 +              return;
 +      }
 +
 +      /* If we are indexing a pointer, then make a call to the
 +       * pointer_to_element() and return.
 +       */
 +      if (tp->flag == POINTER_FLAG) {
 +              return(pointer_to_element(n0, n1));
 +      }
 +
 +      if (!(kltp = n0->type->t_kltp)) {
 +              set_eval_error(E_BAD_INDEX);
 +              error_token = n0->tok_ptr;
 +              return;
 +      }
 +      if (!(rkltp = kl_realtype(kltp, KLT_ARRAY))) {
 +              set_eval_error(E_BAD_INDEX);
 +              error_token = n0->tok_ptr;
 +              return;
 +      }
 +      ip = rkltp->kl_indextype;
 +      ep = rkltp->kl_elementtype;
 +      if (!ip || !ep) {
 +              set_eval_error(E_BAD_INDEX);
 +              error_token = n1->tok_ptr;
 +              return;
 +      }
 +      /* Get the details on the array element
 +       */
 +      n0->address = n0->address + (n1->value * ep->kl_size);
 +      if (ep->kl_type == KLT_POINTER) {
 +              n0->flags |= POINTER_FLAG;
 +              n0->value = *((kaddr_t *)n0->address);
 +      } else {
 +              n0->value = 0;
 +      }
 +      n0->flags |= ADDRESS_FLAG;
 +      kltp = ep;
 +      while (kltp->kl_type == KLT_POINTER) {
 +              if (troot) {
 +                      tp->t_next = (type_t*)kl_alloc_block(sizeof(type_t));
 +                      tp = tp->t_next;
 +              } else {
 +                      tp = (type_t*)kl_alloc_block(sizeof(type_t));
 +                      troot = tp;
 +              }
 +              tp->flag = POINTER_FLAG;
 +              kltp = kltp->kl_realtype;
 +      }
 +      if (troot) {
 +              tp->t_next = (type_t*)kl_alloc_block(sizeof(type_t));
 +              tp = tp->t_next;
 +              n0->type = troot;
 +      } else {
 +              tp = (type_t*)kl_alloc_block(sizeof(type_t));
 +              n0->type = tp;
 +      }
 +      tp->flag = KLTYPE_FLAG;
 +      tp->t_kltp = ep;
 +}
 +
 +/*
 + * number_to_size()
 + */
 +int
 +number_to_size(node_t *np)
 +{
 +      int unsigned_flag = 0;
 +
 +      if (np->node_type != NUMBER) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = np->tok_ptr;
 +              return(0);
 +      }
 +      if (np->flags & UNSIGNED_FLAG) {
 +              unsigned_flag = 1;
 +      }
 +      if ((np->value >= 0) && (np->value <= 0xffffffff)) {
 +              return(4);
 +      } else if (((np->value >> 32) & 0xffffffff) == 0xffffffff) {
 +              if (unsigned_flag) {
 +                      return(8);
 +              } else if (sizeof(void *) == 4) {
 +                      return(4);
 +              } else {
 +                      return(8);
 +              }
 +      }
 +      return(8);
 +}
 +
 +/*
 + * number_to_type()
 + */
 +kltype_t *
 +number_to_type(node_t *np)
 +{
 +      int unsigned_flag = 0;
 +      kltype_t *kltp, *rkltp = (kltype_t *)NULL;
 +
 +      if (np->node_type != NUMBER) {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = np->tok_ptr;
 +              return((kltype_t *)NULL);
 +      }
 +      if (np->flags & UNSIGNED_FLAG) {
 +              unsigned_flag = 1;
 +      }
 +      if ((np->value >= 0) && (np->value <= 0xffffffff)) {
 +              if (unsigned_flag) {
 +                      kltp = kl_find_type("uint32_t", KLT_TYPEDEF);
 +              } else {
 +                      kltp = kl_find_type("int32_t", KLT_TYPEDEF);
 +              }
 +      } else if (((np->value >> 32) & 0xffffffff) == 0xffffffff) {
 +              if (unsigned_flag) {
 +                      kltp = kl_find_type("uint64_t", KLT_TYPEDEF);
 +              } else if (sizeof(void *) == 4) {
 +                      kltp = kl_find_type("int32_t", KLT_TYPEDEF);
 +              } else {
 +                      kltp = kl_find_type("int64_t", KLT_TYPEDEF);
 +              }
 +      } else {
 +              if (unsigned_flag) {
 +                      kltp = kl_find_type("uint64_t", KLT_TYPEDEF);
 +              } else {
 +                      kltp = kl_find_type("int64_t", KLT_TYPEDEF);
 +              }
 +      }
 +      if (kltp) {
 +              if (!(rkltp = kl_realtype(kltp, 0))) {
 +                      rkltp = kltp;
 +              }
 +      } else {
 +              set_eval_error(E_BAD_TYPE);
 +              error_token = np->tok_ptr;
 +      }
 +      return(rkltp);
 +}
 +
 +/*
 + * type_to_number()
 + *
 + * Convert a base type to a numeric value. Return 1 on successful
 + * conversion, 0 if nothing was done.
 + */
 +static int
 +type_to_number(node_t *np)
 +{
 +      int byte_size, bit_offset, bit_size, encoding;
 +      uint64_t value, value1;
 +      kltype_t *kltp, *rkltp;
 +
 +      /* Sanity check...
 +       */
 +      if (np->node_type != TYPE_DEF) {
 +              set_eval_error(E_NOTYPE);
 +              error_token = np->tok_ptr;
 +              return(0);
 +      }
 +      if (!np->type) {
 +              set_eval_error(E_NOTYPE);
 +              error_token = np->tok_ptr;
 +              return(0);
 +      }
 +      if (np->type->flag == POINTER_FLAG) {
 +              return(0);
 +      }
 +
 +      /* Get the real type record and make sure that it is
 +       * for a base type.
 +       */
 +      kltp = np->type->t_kltp;
 +      rkltp = kl_realtype(kltp, 0);
 +      if (rkltp->kl_type != KLT_BASE) {
 +              set_eval_error(E_NOTYPE);
 +              error_token = np->tok_ptr;
 +              return(0);
 +      }
 +
 +      byte_size = rkltp->kl_size;
 +      bit_offset = rkltp->kl_bit_offset;
 +      if (!(bit_size = rkltp->kl_bit_size)) {
 +              bit_size = byte_size * 8;
 +      }
 +      encoding = rkltp->kl_encoding;
 +      if (np->flags & ADDRESS_FLAG) {
 +              /* FIXME: untested */
 +              if (invalid_address(np->address, byte_size)) {
 +                      kdb_printf("ILLEGAL ADDRESS (%lx)",
 +                                              (uaddr_t)np->address);
 +                      return (0);
 +              }
 +              kl_get_block(np->address, byte_size,(void *)&value1,(void *)0);
 +      } else {
 +              value1 = np->value;
 +      }
 +      value = kl_get_bit_value(&value1, byte_size, bit_size, bit_offset);
 +      switch (byte_size) {
 +
 +              case 1 :
 +                      if (encoding == ENC_UNSIGNED) {
 +                              np->value = (unsigned char)value;
 +                              np->flags |= UNSIGNED_FLAG;
 +                      } else if (encoding == ENC_SIGNED) {
 +                              np->value = (signed char)value;
 +                      } else {
 +                              np->value = (char)value;
 +                      }
 +                      break;
 +
 +              case 2 :
 +                      if (encoding == ENC_UNSIGNED) {
 +                              np->value = (uint16_t)value;
 +                              np->flags |= UNSIGNED_FLAG;
 +                      } else {
 +                              np->value = (int16_t)value;
 +                      }
 +                      break;
 +
 +              case 4 :
 +                      if (encoding == ENC_UNSIGNED) {
 +                              np->value = (uint32_t)value;
 +                              np->flags |= UNSIGNED_FLAG;
 +                      } else {
 +                              np->value = (int32_t)value;
 +                      }
 +                      break;
 +
 +              case 8 :
 +                      if (encoding == ENC_UNSIGNED) {
 +                              np->value = (uint64_t)value;
 +                              np->flags |= UNSIGNED_FLAG;
 +                      } else {
 +                              np->value = (int64_t)value;
 +                      }
 +                      break;
 +
 +              default :
 +                      set_eval_error(E_BAD_TYPE);
 +                      error_token = np->tok_ptr;
 +                      return(0);
 +      }
 +      np->byte_size = byte_size;
 +      np->node_type = NUMBER;
 +      return(1);
 +}
 +
 +/*
 + * eval_type()
 + */
 +static type_t *
 +eval_type(node_t *n)
 +{
 +      type_t *t;
 +
 +      if (!(t = n->type)) {
 +              return((type_t*)NULL);
 +      }
 +      while (t->flag == POINTER_FLAG) {
 +              t = t->t_next;
 +
 +              /* If for some reason, there is no type pointer (this shouldn't
 +               * happen but...), we have to make sure that we don't try to
 +               * reference a NULL pointer and get a SEGV. Return an error if
 +               * 't' is NULL.
 +               */
 +               if (!t) {
 +                      return((type_t*)NULL);
 +               }
 +      }
 +      if (t->flag == KLTYPE_FLAG) {
 +              return (t);
 +      }
 +      return((type_t*)NULL);
 +}
 +
 +/*
 + * expand_variables()
 + */
 +static char *
 +expand_variables(char *exp, int flags)
 +{
 +      return((char *)NULL);
 +}
 +
 +/*
 + * eval()
 + */
 +node_t *
 +eval(char **exp, int flags)
 +{
 +      token_t *tok;
 +      node_t *n, *root;
 +      char *e, *s;
 +
 +      eval_error = 0;
 +      logical_flag = 0;
 +
 +      /* Make sure there is an expression to evaluate
 +       */
 +      if (!(*exp)) {
 +              return ((node_t*)NULL);
 +      }
 +
 +      /* Expand any variables that are in the expression string. If
 +       * a new string is allocated by the expand_variables() function,
 +       * we need to make sure the original expression string gets
 +       * freed. In any event, point s at the current expression string
 +       * so that it gets freed up when we are done.
 +       */
 +      if ((e = expand_variables(*exp, 0))) {
 +              kl_free_block((void *)*exp);
 +              *exp = e;
 +      } else if (eval_error) {
 +              eval_error |= E_BAD_EVAR;
 +              error_token = *exp;
 +      }
 +      s = *exp;
 +      tok = get_token_list(s);
 +      if (eval_error) {
 +              return((node_t*)NULL);
 +      }
 +
 +      /* Get the node_list and evaluate the expression.
 +       */
 +      node_list = get_node_list(tok, flags);
 +      if (eval_error) {
 +              free_nodelist(node_list);
 +              node_list = (node_t*)NULL;
 +              free_tokens(tok);
 +              return((node_t*)NULL);
 +      }
 +      if (!(n = do_eval(flags))) {
 +              if (!eval_error) {
 +                      set_eval_error(E_SYNTAX_ERROR);
 +                      error_token = s + strlen(s) - 1;
 +              }
 +              free_nodes(n);
 +              free_tokens(tok);
 +              return((node_t*)NULL);
 +      }
 +
 +      if (!(root = replace(n, flags))) {
 +              if (eval_error) {
 +                      free_nodes(n);
 +                      free_tokens(tok);
 +                      return((node_t*)NULL);
 +              }
 +              root = n;
 +      }
 +
 +      /* Check to see if the the result should
 +       * be interpreted as 'true' or 'false'
 +       */
 +      if (logical_flag && ((root->value == 0) || (root->value == 1))) {
 +              root->flags |= BOOLIAN_FLAG;
 +      }
 +      free_tokens(tok);
 +      return(root);
 +}
 +
 +/*
 + * print_number()
 + */
 +void
 +print_number(node_t *np, int flags)
 +{
 +      int size;
 +      unsigned long long value;
 +
 +      if ((size = number_to_size(np)) && (size != sizeof(uint64_t))) {
 +              value = np->value & (((uint64_t)1 << (uint64_t)(size*8))-1);
 +      } else {
 +              value = np->value;
 +      }
 +      if (flags & C_HEX) {
 +              kdb_printf("0x%llx", value);
 +      } else if (flags & C_BINARY) {
 +              kdb_printf("0b");
 +              kl_binary_print(value);
 +      } else {
 +              if (np->flags & UNSIGNED_FLAG) {
 +                      kdb_printf("%llu", value);
 +              } else {
 +                      kdb_printf("%lld", np->value);
 +              }
 +      }
 +}
 +
 +/*
 + * print_string()
 + */
 +void
 +print_string(kaddr_t addr, int size)
 +{
 +      int i;
 +      char *str;
 +
 +      if (!size) {
 +              size = 255;
 +      }
 +      /* FIXME: untested */
 +      if (invalid_address(addr, size)) {
 +              klib_error = KLE_INVALID_PADDR;
 +              return;
 +      }
 +      str = (char*)kl_alloc_block(size);
 +      kl_get_block(addr, size, (void *)str, (void *)0);
 +      kdb_printf("\"%s", str);
 +      for (i = 0; i < size; i++) {
 +              if (!str[i]) {
 +                      break;
 +              }
 +      }
 +      if (KL_ERROR || (i == size)) {
 +              kdb_printf("...");
 +      }
 +      kdb_printf("\"");
 +      kl_free_block(str);
 +}
 +
 +/*
 + * kl_print_error()
 + */
 +void
 +kl_print_error(void)
 +{
 +      int ecode;
 +
 +      ecode = klib_error & 0xffffffff;
 +      switch(ecode) {
 +
 +              /** General klib error codes
 +               **/
 +              case KLE_NO_MEMORY:
 +                      kdb_printf("insufficient memory");
 +                      break;
 +              case KLE_OPEN_ERROR:
 +                      kdb_printf("unable to open file");
 +                      break;
 +              case KLE_ZERO_BLOCK:
 +                      kdb_printf("tried to allocate a zero-sized block");
 +                      break;
 +              case KLE_INVALID_VALUE:
 +                      kdb_printf("invalid input value");
 +                      break;
 +              case KLE_NULL_BUFF:
 +                      kdb_printf( "NULL buffer pointer");
 +                      break;
 +              case KLE_ZERO_SIZE:
 +                      kdb_printf("zero sized block requested");
 +                      break;
 +              case KLE_ACTIVE:
 +                      kdb_printf("operation not supported on a live system");
 +                      break;
 +              case KLE_UNSUPPORTED_ARCH:
 +                      kdb_printf("unsupported architecture");
 +                      break;
 +              case KLE_MISC_ERROR:
 +                      kdb_printf("KLIB error");
 +                      break;
 +              case KLE_NOT_SUPPORTED:
 +                      kdb_printf("operation not supported");
 +                      break;
 +              case KLE_UNKNOWN_ERROR:
 +                      kdb_printf("unknown error");
 +                      break;
 +
 +              /** memory error codes
 +               **/
 +              case KLE_BAD_MAP_FILE:
 +                      kdb_printf("bad map file");
 +                      break;
 +              case KLE_BAD_DUMP:
 +                      kdb_printf("bad dump file");
 +                      break;
 +              case KLE_BAD_DUMPTYPE:
 +                      kdb_printf("bad dumptype");
 +                      break;
 +              case KLE_INVALID_LSEEK:
 +                      kdb_printf("lseek error");
 +                      break;
 +              case KLE_INVALID_READ:
 +                      kdb_printf("not found in dump file");
 +                      break;
 +              case KLE_BAD_KERNINFO:
 +                      kdb_printf("bad kerninfo struct");
 +                      break;
 +              case KLE_INVALID_PADDR:
 +                      kdb_printf("invalid physical address");
 +                      break;
 +              case KLE_INVALID_VADDR:
 +                      kdb_printf("invalid virtual address");
 +                      break;
 +              case KLE_INVALID_VADDR_ALIGN:
 +                      kdb_printf("invalid vaddr alignment");
 +                      break;
 +              case KLE_INVALID_MAPPING:
 +                      kdb_printf("invalid address mapping");
 +                      break;
 +              case KLE_PAGE_NOT_PRESENT:
 +                      kdb_printf("page not present");
 +                      break;
 +              case KLE_BAD_ELF_FILE:
 +                      kdb_printf("bad elf file");
 +                      break;
 +              case KLE_ARCHIVE_FILE:
 +                      kdb_printf("archive file");
 +                      break;
 +              case KLE_MAP_FILE_PRESENT:
 +                      kdb_printf("map file present");
 +                      break;
 +              case KLE_BAD_MAP_FILENAME:
 +                      kdb_printf("bad map filename");
 +                      break;
 +              case KLE_BAD_DUMP_FILENAME:
 +                      kdb_printf("bad dump filename");
 +                      break;
 +              case KLE_BAD_NAMELIST_FILE:
 +                      kdb_printf("bad namelist file");
 +                      break;
 +              case KLE_BAD_NAMELIST_FILENAME:
 +                      kdb_printf("bad namelist filename");
 +                      break;
 +
 +              /** symbol error codes
 +               **/
 +              case KLE_NO_SYMTAB:
 +                      kdb_printf("no symtab");
 +                      break;
 +              case KLE_NO_SYMBOLS:
 +                      kdb_printf("no symbol information");
 +                      break;
 +              case KLE_NO_MODULE_LIST:
 +                      kdb_printf("kernel without module support");
 +                      break;
 +
 +              /** kernel data error codes
 +               **/
 +              case KLE_INVALID_KERNELSTACK:
 +                      kdb_printf("invalid kernel stack");
 +                      break;
 +              case KLE_INVALID_STRUCT_SIZE:
 +                      kdb_printf("invalid struct size");
 +                      break;
 +              case KLE_BEFORE_RAM_OFFSET:
 +                      kdb_printf("physical address proceeds start of RAM");
 +                      break;
 +              case KLE_AFTER_MAXPFN:
 +                      kdb_printf("PFN exceeds maximum PFN");
 +                      break;
 +              case KLE_AFTER_PHYSMEM:
 +                      kdb_printf("address exceeds physical memory");
 +                      break;
 +              case KLE_AFTER_MAXMEM:
 +                      kdb_printf("address exceeds maximum physical address");
 +                      break;
 +              case KLE_PHYSMEM_NOT_INSTALLED:
 +                      kdb_printf("physical memory not installed");
 +                      break;
 +              case KLE_NO_DEFTASK:
 +                      kdb_printf("default task not set");
 +                      break;
 +              case KLE_PID_NOT_FOUND:
 +                      kdb_printf("PID not found");
 +                      break;
 +              case KLE_DEFTASK_NOT_ON_CPU:
 +                      kdb_printf("default task not running on a cpu");
 +                      break;
 +              case KLE_NO_CURCPU:
 +                      kdb_printf("current cpu could not be determined");
 +                      break;
 +
 +              case KLE_KERNEL_MAGIC_MISMATCH:
 +                      kdb_printf("kernel_magic mismatch "
 +                              "of map and memory image");
 +                      break;
 +
 +              case KLE_INVALID_DUMP_HEADER:
 +                      kdb_printf("invalid dump header in dump");
 +                      break;
 +
 +              case KLE_DUMP_INDEX_CREATION:
 +                      kdb_printf("cannot create index file");
 +                      break;
 +
 +              case KLE_DUMP_HEADER_ONLY:
 +                      kdb_printf("dump only has a dump header");
 +                      break;
 +
 +              case KLE_NO_END_SYMBOL:
 +                      kdb_printf("no _end symbol in kernel");
 +                      break;
 +
 +              case KLE_NO_CPU:
 +                      kdb_printf("CPU not installed");
 +                      break;
 +
 +              default:
 +                      break;
 +      }
 +      kdb_printf("\n");
 +}
 +
 +/*
 + * kl_print_string()
 + *
 + *   print out a string, translating all embeded control characters
 + *   (e.g., '\n' for newline, '\t' for tab, etc.)
 + */
 +void
 +kl_print_string(char *s)
 +{
 +      char *sp, *cp;
 +
 +      kl_reset_error();
 +
 +      if (!(sp = s)) {
 +              klib_error = KLE_BAD_STRING;
 +              return;
 +      }
 +      /* FIXME: untested */
 +      if (invalid_address((kaddr_t)sp, 1)) {
 +              klib_error = KLE_INVALID_PADDR;
 +              return;
 +      }
 +
 +      while (sp) {
 +              if ((cp = strchr(sp, '\\'))) {
 +                      switch (*(cp + 1)) {
 +
 +                              case 'n' :
 +                                      *cp++ = '\n';
 +                                      *cp++ = 0;
 +                                      break;
 +
 +                              case 't' :
 +                                      *cp++ = '\t';
 +                                      *cp++ = 0;
 +                                      break;
 +
 +                              default :
 +                                      if (*(cp + 1) == 0) {
 +                                              klib_error = KLE_BAD_STRING;
 +                                              return;
 +                                      }
 +                                      /* Change the '\' character to a zero
 +                                       * and then print the string (the rest
 +                                       * of the string will be picked
 +                                       * up on the next pass).
 +                                       */
 +                                      *cp++ = 0;
 +                                      break;
 +                      }
 +                      kdb_printf("%s", sp);
 +                      sp = cp;
 +              } else {
 +                      kdb_printf("%s", sp);
 +                      sp = 0;
 +              }
 +      }
 +}
 +
 +/*
 + * print_eval_results()
 + */
 +int
 +print_eval_results(node_t *np, int flags)
 +{
 +      int size, i, count, ptr_cnt = 0;
 +      kaddr_t addr;
 +      char *typestr;
 +      kltype_t *kltp, *rkltp = NULL, *nkltp;
 +      type_t *tp;
 +
 +      /* Print the results
 +       */
 +      switch (np->node_type) {
 +
 +              case NUMBER:
 +                      print_number(np, flags);
 +                      break;
 +
 +              case TYPE_DEF: {
 +
 +                      /* First, determine the number of levels of indirection
 +                       * by determining the number of pointer type records.
 +                       */
 +                      if ((tp = np->type)) {
 +                              while (tp && (tp->flag == POINTER_FLAG)) {
 +                                      ptr_cnt++;
 +                                      tp = tp->t_next;
 +                              }
 +                              if (tp) {
 +                                      rkltp = tp->t_kltp;
 +                              }
 +                      }
 +                      if (!rkltp) {
 +                              kdb_printf("Type information not available\n");
 +                              return(1);
 +                      }
 +
 +                      if (ptr_cnt) {
 +
 +                              /* If this is a member, we need to get the
 +                               * first type record.
 +                               */
 +                              if (rkltp->kl_type == KLT_MEMBER) {
 +                                      /* We need to get down to the first
 +                                       * real type record...
 +                                       */
 +                                      rkltp = rkltp->kl_realtype;
 +                              }
 +
 +                              /* step over any KLT_POINTER type records.
 +                               */
 +                              while (rkltp && rkltp->kl_type == KLT_POINTER) {
 +                                      rkltp = rkltp->kl_realtype;
 +                              }
 +                              if (!rkltp) {
 +                                      kdb_printf("Bad type information\n");
 +                                      return(1);
 +                              }
 +                              typestr = rkltp->kl_typestr;
 +                              if (rkltp->kl_type == KLT_FUNCTION) {
 +                                      kdb_printf("%s(", typestr);
 +                              } else if (rkltp->kl_type == KLT_ARRAY) {
 +                                      kdb_printf("(%s(", typestr);
 +                              } else {
 +                                      kdb_printf("(%s", typestr);
 +                              }
 +                              for (i = 0; i < ptr_cnt; i++) {
 +                                      kdb_printf("*");
 +                              }
 +                              if (rkltp->kl_type == KLT_FUNCTION) {
 +                                      kdb_printf(")(");
 +                              } else if (rkltp->kl_type == KLT_ARRAY) {
 +                                      kdb_printf(")");
 +
 +                                      nkltp = rkltp;
 +                                      while (nkltp->kl_type == KLT_ARRAY) {
 +                                              count = nkltp->kl_high_bounds -
 +                                                nkltp->kl_low_bounds + 1;
 +                                              kdb_printf("[%d]", count);
 +                                              nkltp = nkltp->kl_elementtype;
 +                                      }
 +                              }
 +                              kdb_printf(") ");
 +                              kdb_printf("0x%llx", np->value);
 +
 +                              if (ptr_cnt > 1) {
 +                                      break;
 +                              }
 +
 +                              if ((rkltp->kl_type == KLT_BASE) &&
 +                                      rkltp->kl_encoding == ENC_CHAR) {
 +                                      kdb_printf(" = ");
 +                                      print_string(np->value, 0);
 +                              }
 +                              break;
 +                      }
 +                      if (np->flags & KLTYPE_FLAG) {
 +                              void * ptr;
 +
 +                              /* Get the type information. It's possible
 +                               * that the type is a member. In which case,
 +                               * the size may only be from this record
 +                               * (which would be the casse if this is an
 +                               * array). We must check the original type
 +                               * record first, and try the realtype record
 +                               * if the value is zero.
 +                               */
 +                              kltp = np->type->t_kltp;
 +
 +                              if (kltp->kl_type == KLT_MEMBER) {
 +                                      rkltp = kltp->kl_realtype;
 +                              } else {
 +                                      rkltp = kltp;
 +                              }
 +
 +                              /* Check to see if this is a typedef. If
 +                               * it is, then it might be a typedef for
 +                               * a pointer type. Don't walk to the last
 +                               * type record.
 +                               */
 +                              while (rkltp->kl_type == KLT_TYPEDEF) {
 +                                      rkltp = rkltp->kl_realtype;
 +                              }
 +
 +                              if (rkltp->kl_type == KLT_POINTER) {
 +                                      kdb_printf("0x%llx", np->value);
 +                                      break;
 +                              }
 +                              if((rkltp->kl_name != 0) &&
 +                                      !(strcmp(rkltp->kl_name, "void"))) {
 +                                      /* we are about to dereference
 +                                       * a void pointer.
 +                                       */
 +                                      kdb_printf("Can't dereference a "
 +                                              "generic pointer.\n");
 +                                      return(1);
 +                              }
 +
 +                              size = rkltp->kl_size;
 +                              if (!size || (size < 0)) {
 +                                      size = kltp->kl_size;
 +                              }
 +
 +                              if(rkltp->kl_type==KLT_ARRAY) {
 +                                      size = rkltp->kl_high_bounds -
 +                                              rkltp->kl_low_bounds + 1;
 +                                      if(rkltp->kl_elementtype == NULL){
 +                                              kdb_printf("Incomplete array"
 +                                                      " type.\n");
 +                                                      return(1);
 +                                      }
 +                                      if(rkltp->kl_elementtype->kl_type ==
 +                                                      KLT_POINTER){
 +                                              size *= sizeof(void *);
 +                                      } else {
 +                                              size *= rkltp->kl_elementtype->kl_size;
 +                                      }
 +                              }
 +                              if(size){
 +                                      ptr = kl_alloc_block(size);
 +                              } else {
 +                                      ptr = NULL;
 +                              }
 +                              if ((rkltp->kl_type == KLT_BASE) &&
 +                                              !(np->flags & ADDRESS_FLAG)) {
 +                                      switch (size) {
 +                                              case 1:
 +                                                      *(unsigned char *)ptr =
 +                                                              np->value;
 +                                                      break;
 +
 +                                              case 2:
 +                                                      *(unsigned short *)ptr =
 +                                                              np->value;
 +                                                      break;
 +
 +                                              case 4:
 +                                                      *(unsigned int *)ptr =
 +                                                              np->value;
 +                                                      break;
 +
 +                                              case 8:
 +                                                      *(unsigned long long *)
 +                                                              ptr = np->value;
 +                                                      break;
 +                                      }
 +                                      kl_print_type(ptr, rkltp, 0,
 +                                              flags|SUPPRESS_NAME);
 +                                      kl_free_block(ptr);
 +                                      return(1);
 +                              }
 +
 +                              if(size){
 +                                      addr = np->address;
 +                                      if (invalid_address(addr, size)) {
 +                                              kdb_printf (
 +                                               "invalid address %#lx\n",
 +                                                       addr);
 +                                              return 1;
 +                                      }
 +                                      kl_get_block(addr, size, (void *)ptr,
 +                                                      (void *)0);
 +                                      if (KL_ERROR) {
 +                                              kl_print_error();
 +                                              kl_free_block(ptr);
 +                                              return(1);
 +                                      }
 +                              }
 +                              /* Print out the actual type
 +                               */
 +                              switch (rkltp->kl_type) {
 +                                      case KLT_STRUCT:
 +                                      case KLT_UNION:
 +                                              kl_print_type(ptr, rkltp, 0,
 +                                                      flags);
 +                                              break;
 +
 +                                      case KLT_ARRAY:
 +                                              kl_print_type(ptr, rkltp, 0,
 +                                                      flags| SUPPRESS_NAME);
 +                                              break;
 +
 +                                      default:
 +                                              kl_print_type(ptr, rkltp, 0,
 +                                                      (flags|
 +                                                      SUPPRESS_NAME|
 +                                                      SUPPRESS_NL));
 +                                              break;
 +                              }
 +                              if(ptr){
 +                                      kl_free_block(ptr);
 +                              }
 +                      }
 +                      break;
 +              }
 +
 +              case VADDR:
 +                      /* If we get here, there was no type info available.
 +                       * The ADDRESS_FLAG should be set (otherwise we
 +                       * would have returned an error). So, print out
 +                       * the address.
 +                       */
 +                      kdb_printf("0x%lx", np->address);
 +                      break;
 +
 +              default:
 +                      if (np->node_type == TEXT) {
 +                              kl_print_string(np->name);
 +                              if (KL_ERROR) {
 +                                      kl_print_error();
 +                                      return(1);
 +                              }
 +                      } else if (np->node_type == CHARACTER) {
 +                              kdb_printf("\'%c\'", (char)np->value);
 +                      }
 +                      break;
 +      }
 +      return(0);
 +}
 +
 +/*
 + * print_eval_error()
 + */
 +void
 +print_eval_error(
 +      char *cmdname,
 +      char *s,
 +      char *bad_ptr,
 +      uint64_t error,
 +      int flags)
 +{
 +      int i, cmd_len;
 +
 +      kdb_printf("%s %s\n", cmdname, s);
 +      cmd_len = strlen(cmdname);
 +
 +      if (!bad_ptr) {
 +              for (i = 0; i < (strlen(s) + cmd_len); i++) {
 +                      kdb_printf(" ");
 +              }
 +      } else {
 +              for (i = 0; i < (bad_ptr - s + 1 + cmd_len); i++) {
 +                      kdb_printf(" ");
 +              }
 +      }
 +      kdb_printf("^ ");
 +      switch (error) {
 +              case E_OPEN_PAREN :
 +                      kdb_printf("Too many open parenthesis\n");
 +                      break;
 +
 +              case E_CLOSE_PAREN :
 +                      kdb_printf("Too many close parenthesis\n");
 +                      break;
 +
 +              case E_BAD_STRUCTURE :
 +                      kdb_printf("Invalid structure\n");
 +                      break;
 +
 +              case E_MISSING_STRUCTURE :
 +                      kdb_printf("Missing structure\n");
 +                      break;
 +
 +              case E_BAD_MEMBER :
 +                      kdb_printf("No such member\n");
 +                      break;
 +
 +              case E_BAD_OPERATOR :
 +                      kdb_printf("Invalid operator\n");
 +                      break;
 +
 +              case E_MISSING_OPERAND :
 +                      kdb_printf("Missing operand\n");
 +                      break;
 +
 +              case E_BAD_OPERAND :
 +                      kdb_printf("Invalid operand\n");
 +                      break;
 +
 +              case E_BAD_TYPE :
 +                      kdb_printf("Invalid type\n");
 +                      if (!have_debug_file) {
 +                              kdb_printf("no debuginfo file\n");
 +                              return;
 +                      }
 +                      break;
 +
 +              case E_NOTYPE :
 +                      kdb_printf("Could not find type information\n");
 +                      break;
 +
 +              case E_BAD_POINTER :
 +                      kdb_printf("Invalid pointer\n");
 +                      break;
 +
 +              case E_BAD_INDEX :
 +                      kdb_printf("Invalid array index\n");
 +                      break;
 +
 +              case E_BAD_CHAR :
 +                      kdb_printf("Invalid character value\n");
 +                      break;
 +
 +              case E_BAD_STRING :
 +                      kdb_printf("Non-termining string\n");
 +                      break;
 +
 +              case E_END_EXPECTED :
 +                      kdb_printf(
 +                              "Expected end of print statement\n");
 +                      break;
 +
 +              case E_BAD_EVAR :
 +                      kdb_printf("Invalid eval variable\n");
 +                      break;
 +
 +              case E_BAD_VALUE :
 +                      kdb_printf("Invalid value\n");
 +                      break;
 +
 +              case E_NO_VALUE :
 +                      kdb_printf("No value supplied\n");
 +                      break;
 +
 +              case E_DIVIDE_BY_ZERO :
 +                      kdb_printf("Divide by zero\n");
 +                      break;
 +
 +              case E_BAD_CAST :
 +                      kdb_printf("Invalid cast\n");
 +                      break;
 +
 +              case E_NO_ADDRESS :
 +                      kdb_printf("Not an address\n");
 +                      break;
 +
 +              case E_SINGLE_QUOTE :
 +                      kdb_printf("Missing single quote\n");
 +                      break;
 +
 +              case E_BAD_WHATIS :
 +                      kdb_printf("Invalid whatis Operation\n");
 +                      break;
 +
 +              case E_NOT_IMPLEMENTED :
 +                      kdb_printf("Not implemented\n");
 +                      break;
 +
 +              default :
 +                      kdb_printf("Syntax error\n");
 +                      break;
 +      }
 +}
 +
 +/*
 + * single_type()
 + */
 +void
 +single_type(char *str)
 +{
 +      char            buffer[256], *type_name;
 +      kltype_t        *kltp;
 +      syment_t        *sp;
 +
 +      type_name = buffer;
 +      strcpy(type_name, str);
 +
 +      if (have_debug_file) {
 +              if ((kltp = kl_find_type(type_name, KLT_TYPE))) {
 +                      kl_print_type((void *)NULL, kltp, 0, C_SHOWOFFSET);
 +                      return;
 +              }
 +              if ((kltp = kl_find_type(type_name, KLT_TYPEDEF))) {
 +                      kdb_printf ("typedef %s:\n", type_name);
 +                      kl_print_type((void *)NULL, kltp, 0, C_SHOWOFFSET);
 +                      return;
 +              }
 +      }
 +      if ((sp = kl_lkup_symname(type_name))) {
 +              kdb_printf ("symbol %s value: %#lx\n", str, sp->s_addr);
 +              kl_free_block((void *)sp);
 +              return;
 +      }
 +      kdb_printf("could not find type or symbol information for %s\n",
 +              type_name);
 +      return;
 +}
 +
 +/*
 + * sizeof_type()
 + */
 +void
 +sizeof_type(char *str)
 +{
 +      char            buffer[256], *type_name;
 +      kltype_t        *kltp;
 +
 +      type_name = buffer;
 +      strcpy(type_name, str);
 +
 +      if ((kltp = kl_find_type(type_name, KLT_TYPE))) {
 +              kdb_printf ("%s %d %#x\n", kltp->kl_typestr,
 +                              kltp->kl_size, kltp->kl_size);
 +              return;
 +      }
 +      if ((kltp = kl_find_type(type_name, KLT_TYPEDEF))) {
 +              kdb_printf ("%s %d %#x\n", kltp->kl_typestr,
 +                              kltp->kl_size, kltp->kl_size);
 +              return;
 +      }
 +      kdb_printf("could not find type information for %s\n", type_name);
 +}
 +
 +EXPORT_SYMBOL(have_debug_file);
 +EXPORT_SYMBOL(type_tree);
 +EXPORT_SYMBOL(typedef_tree);
 +
 +#if defined(CONFIG_X86_32)
 +/* needed for i386: */
 +#include <linux/types.h>
 +#include <asm/div64.h>
 +/*
 + * Generic C version of full 64 bit by 64 bit division
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * version 2 as published by the Free Software Foundation.
 + *
 + * Code generated for this function might be very inefficient
 + * for some CPUs, can be overridden by linking arch-specific
 + * assembly versions such as arch/sparc/lib/udivdi.S
 + */
 +uint64_t
 +__udivdi3(uint64_t dividend, uint64_t divisor)
 +{
 +      uint32_t d = divisor;
 +      /* Scale divisor to 32 bits */
 +      if (divisor > 0xffffffffULL) {
 +              unsigned int shift = fls(divisor >> 32);
 +              d = divisor >> shift;
 +              dividend >>= shift;
 +      }
 +      /* avoid 64 bit division if possible */
 +      if (dividend >> 32)
 +              do_div(dividend, d);
 +      else
 +              dividend = (uint32_t) dividend / d;
 +      return dividend;
 +}
 +
 +int64_t
 +__divdi3(int64_t dividend, int64_t divisor)
 +{
 +      int32_t d = divisor;
 +      /* Scale divisor to 32 bits */
 +      if (divisor > 0xffffffffLL) {
 +              unsigned int shift = fls(divisor >> 32);
 +              d = divisor >> shift;
 +              dividend >>= shift;
 +      }
 +      /* avoid 64 bit division if possible */
 +      if (dividend >> 32)
 +              do_div(dividend, d);
 +      else
 +              dividend = (int32_t) dividend / d;
 +      return dividend;
 +}
 +
 +uint64_t
 +__umoddi3(uint64_t dividend, uint64_t divisor)
 +{
 +      return dividend - (__udivdi3(dividend, divisor) * divisor);
 +}
 +
 +int64_t
 +__moddi3(int64_t dividend, int64_t divisor)
 +{
 +      return dividend - (__divdi3(dividend, divisor) * divisor);
 +}
 +#endif /* CONFIG_x86_32 */
diff --cc kdb/kdbmain.c
index 0a2e8f9,0000000..e38fbe9
mode 100644,000000..100644
--- /dev/null
@@@ -1,4332 -1,0 +1,4333 @@@
 +/*
 + * Kernel Debugger Architecture Independent Main Code
 + *
 + * This file is subject to the terms and conditions of the GNU General Public
 + * License.  See the file "COPYING" in the main directory of this archive
 + * for more details.
 + *
 + * Copyright (C) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
 + * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
 + * Xscale (R) modifications copyright (C) 2003 Intel Corporation.
 + */
 +
 +/*
 + * Updated for Xscale (R) architecture support
 + * Eddie Dong <eddie.dong@intel.com> 8 Jan 03
 + */
 +
 +#include <linux/ctype.h>
 +#include <linux/string.h>
 +#include <linux/kernel.h>
 +#include <linux/reboot.h>
 +#include <linux/sched.h>
 +#include <linux/sysrq.h>
 +#include <linux/smp.h>
 +#include <linux/utsname.h>
 +#include <linux/vmalloc.h>
 +#include <linux/module.h>
 +#include <linux/mm.h>
 +#include <linux/init.h>
 +#include <linux/kallsyms.h>
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>
 +#include <linux/notifier.h>
 +#include <linux/interrupt.h>
 +#include <linux/delay.h>
 +#include <linux/nmi.h>
 +#include <linux/ptrace.h>
 +#include <linux/sysctl.h>
 +#if defined(CONFIG_LKCD_DUMP) || defined(CONFIG_LKCD_DUMP_MODULE)
 +#include <linux/dump.h>
 +#endif
 +#include <linux/cpu.h>
 +#include <linux/kdebug.h>
 +#ifdef CONFIG_KDB_KDUMP
 +#include <linux/kexec.h>
 +#endif
 +
 +#include <acpi/acpi_bus.h>
 +
 +#include <asm/system.h>
 +#include <asm/kdebug.h>
 +#include <linux/proc_fs.h>
 +#include <asm/uaccess.h>
++#include <linux/slab.h>
 +char kdb_debug_info_filename[256] = {""};
 +EXPORT_SYMBOL(kdb_debug_info_filename);
 +#define GREP_LEN 256
 +char kdb_grep_string[GREP_LEN];
 +int kdb_grepping_flag;
 +EXPORT_SYMBOL(kdb_grepping_flag);
 +int kdb_grep_leading;
 +int kdb_grep_trailing;
 +
 +/*
 + * Kernel debugger state flags
 + */
 +volatile int kdb_flags;
 +atomic_t kdb_event;
 +atomic_t kdb_8250;
 +
 +/*
 + * kdb_lock protects updates to kdb_initial_cpu.  Used to
 + * single thread processors through the kernel debugger.
 + */
 +static DEFINE_SPINLOCK(kdb_lock);
 +volatile int kdb_initial_cpu = -1;            /* cpu number that owns kdb */
 +int kdb_seqno = 2;                            /* how many times kdb has been entered */
 +
 +volatile int kdb_nextline = 1;
 +static volatile int kdb_new_cpu;              /* Which cpu to switch to */
 +
 +volatile int kdb_state[NR_CPUS];              /* Per cpu state */
 +
 +const struct task_struct *kdb_current_task;
 +EXPORT_SYMBOL(kdb_current_task);
 +struct pt_regs *kdb_current_regs;
 +
 +#ifdef        CONFIG_KDB_OFF
 +int kdb_on = 0;                               /* Default is off */
 +#else
 +int kdb_on = 1;                               /* Default is on */
 +#endif        /* CONFIG_KDB_OFF */
 +
 +const char *kdb_diemsg;
 +static int kdb_go_count;
 +#ifdef CONFIG_KDB_CONTINUE_CATASTROPHIC
 +static unsigned int kdb_continue_catastrophic = CONFIG_KDB_CONTINUE_CATASTROPHIC;
 +#else
 +static unsigned int kdb_continue_catastrophic = 0;
 +#endif
 +
 +#ifdef kdba_setjmp
 +      /*
 +       * Must have a setjmp buffer per CPU.  Switching cpus will
 +       * cause the jump buffer to be setup for the new cpu, and
 +       * subsequent switches (and pager aborts) will use the
 +       * appropriate per-processor values.
 +       */
 +kdb_jmp_buf *kdbjmpbuf;
 +#endif        /* kdba_setjmp */
 +
 +      /*
 +       * kdb_commands describes the available commands.
 +       */
 +static kdbtab_t *kdb_commands;
 +static int kdb_max_commands;
 +
 +typedef struct _kdbmsg {
 +      int     km_diag;        /* kdb diagnostic */
 +      char    *km_msg;        /* Corresponding message text */
 +} kdbmsg_t;
 +
 +#define KDBMSG(msgnum, text) \
 +      { KDB_##msgnum, text }
 +
 +static kdbmsg_t kdbmsgs[] = {
 +      KDBMSG(NOTFOUND,"Command Not Found"),
 +      KDBMSG(ARGCOUNT, "Improper argument count, see usage."),
 +      KDBMSG(BADWIDTH, "Illegal value for BYTESPERWORD use 1, 2, 4 or 8, 8 is only allowed on 64 bit systems"),
 +      KDBMSG(BADRADIX, "Illegal value for RADIX use 8, 10 or 16"),
 +      KDBMSG(NOTENV, "Cannot find environment variable"),
 +      KDBMSG(NOENVVALUE, "Environment variable should have value"),
 +      KDBMSG(NOTIMP, "Command not implemented"),
 +      KDBMSG(ENVFULL, "Environment full"),
 +      KDBMSG(ENVBUFFULL, "Environment buffer full"),
 +      KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
 +#ifdef  CONFIG_CPU_XSCALE
 +      KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
 +#else
 +      KDBMSG(TOOMANYDBREGS, "More breakpoints than db registers defined"),
 +#endif
 +      KDBMSG(DUPBPT, "Duplicate breakpoint address"),
 +      KDBMSG(BPTNOTFOUND, "Breakpoint not found"),
 +      KDBMSG(BADMODE, "Invalid IDMODE"),
 +      KDBMSG(BADINT, "Illegal numeric value"),
 +      KDBMSG(INVADDRFMT, "Invalid symbolic address format"),
 +      KDBMSG(BADREG, "Invalid register name"),
 +      KDBMSG(BADCPUNUM, "Invalid cpu number"),
 +      KDBMSG(BADLENGTH, "Invalid length field"),
 +      KDBMSG(NOBP, "No Breakpoint exists"),
 +      KDBMSG(BADADDR, "Invalid address"),
 +};
 +#undef KDBMSG
 +
 +static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
 +
 +
 +/*
 + * Initial environment.   This is all kept static and local to
 + * this file.   We don't want to rely on the memory allocation
 + * mechanisms in the kernel, so we use a very limited allocate-only
 + * heap for new and altered environment variables.  The entire
 + * environment is limited to a fixed number of entries (add more
 + * to __env[] if required) and a fixed amount of heap (add more to
 + * KDB_ENVBUFSIZE if required).
 + */
 +
 +static char *__env[] = {
 +#if defined(CONFIG_SMP)
 + "PROMPT=[%d]kdb> ",
 + "MOREPROMPT=[%d]more> ",
 +#else
 + "PROMPT=kdb> ",
 + "MOREPROMPT=more> ",
 +#endif
 + "RADIX=16",
 + "LINES=24",
 + "COLUMNS=80",
 + "MDCOUNT=8",                 /* lines of md output */
 + "BTARGS=9",                  /* 9 possible args in bt */
 + KDB_PLATFORM_ENV,
 + "DTABCOUNT=30",
 + "NOSECT=1",
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 + (char *)0,
 +};
 +
 +static const int __nenv = (sizeof(__env) / sizeof(char *));
 +
 +/* external commands: */
 +int kdb_debuginfo_print(int argc, const char **argv);
 +int kdb_pxhelp(int argc, const char **argv);
 +int kdb_walkhelp(int argc, const char **argv);
 +int kdb_walk(int argc, const char **argv);
 +
 +/*
 + * kdb_serial_str is the sequence that the user must enter on a serial
 + * console to invoke kdb.  It can be a single character such as "\001"
 + * (control-A) or multiple characters such as "\eKDB".  NOTE: All except the
 + * last character are passed through to the application reading from the serial
 + * console.
 + *
 + * I tried to make the sequence a CONFIG_ option but most of CML1 cannot cope
 + * with '\' in strings.  CML2 would have been able to do it but we lost CML2.
 + * KAO.
 + */
 +const char kdb_serial_str[] = "\eKDB";
 +EXPORT_SYMBOL(kdb_serial_str);
 +
 +struct task_struct *
 +kdb_curr_task(int cpu)
 +{
 +      struct task_struct *p = curr_task(cpu);
 +#ifdef        _TIF_MCA_INIT
 +      struct kdb_running_process *krp = kdb_running_process + cpu;
 +      if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && krp->p)
 +              p = krp->p;
 +#endif
 +      return p;
 +}
 +
 +/*
 + * kdbgetenv
 + *
 + *    This function will return the character string value of
 + *    an environment variable.
 + *
 + * Parameters:
 + *    match   A character string representing an environment variable.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    NULL    No environment variable matches 'match'
 + *    char*   Pointer to string value of environment variable.
 + * Locking:
 + *    No locking considerations required.
 + * Remarks:
 + */
 +char *
 +kdbgetenv(const char *match)
 +{
 +      char **ep = __env;
 +      int matchlen = strlen(match);
 +      int i;
 +
 +      for(i=0; i<__nenv; i++) {
 +              char *e = *ep++;
 +
 +              if (!e) continue;
 +
 +              if ((strncmp(match, e, matchlen) == 0)
 +               && ((e[matchlen] == '\0')
 +                 ||(e[matchlen] == '='))) {
 +                      char *cp = strchr(e, '=');
 +                      return (cp ? ++cp :"");
 +              }
 +      }
 +      return NULL;
 +}
 +
 +/*
 + * kdballocenv
 + *
 + *    This function is used to allocate bytes for environment entries.
 + *
 + * Parameters:
 + *    match   A character string representing a numeric value
 + * Outputs:
 + *    *value  the unsigned long represntation of the env variable 'match'
 + * Returns:
 + *    Zero on success, a kdb diagnostic on failure.
 + * Locking:
 + *    No locking considerations required.  Must be called with all
 + *    processors halted.
 + * Remarks:
 + *    We use a static environment buffer (envbuffer) to hold the values
 + *    of dynamically generated environment variables (see kdb_set).  Buffer
 + *    space once allocated is never free'd, so over time, the amount of space
 + *    (currently 512 bytes) will be exhausted if env variables are changed
 + *    frequently.
 + */
 +static char *
 +kdballocenv(size_t bytes)
 +{
 +#define       KDB_ENVBUFSIZE  512
 +      static char envbuffer[KDB_ENVBUFSIZE];
 +      static int envbufsize;
 +      char *ep = NULL;
 +
 +      if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
 +              ep = &envbuffer[envbufsize];
 +              envbufsize += bytes;
 +      }
 +      return ep;
 +}
 +
 +/*
 + * kdbgetulenv
 + *
 + *    This function will return the value of an unsigned long-valued
 + *    environment variable.
 + *
 + * Parameters:
 + *    match   A character string representing a numeric value
 + * Outputs:
 + *    *value  the unsigned long represntation of the env variable 'match'
 + * Returns:
 + *    Zero on success, a kdb diagnostic on failure.
 + * Locking:
 + *    No locking considerations required.
 + * Remarks:
 + */
 +
 +static int
 +kdbgetulenv(const char *match, unsigned long *value)
 +{
 +      char *ep;
 +
 +      ep = kdbgetenv(match);
 +      if (!ep) return KDB_NOTENV;
 +      if (strlen(ep) == 0) return KDB_NOENVVALUE;
 +
 +      *value = simple_strtoul(ep, NULL, 0);
 +
 +      return 0;
 +}
 +
 +/*
 + * kdbgetintenv
 + *
 + *    This function will return the value of an integer-valued
 + *    environment variable.
 + *
 + * Parameters:
 + *    match   A character string representing an integer-valued env variable
 + * Outputs:
 + *    *value  the integer representation of the environment variable 'match'
 + * Returns:
 + *    Zero on success, a kdb diagnostic on failure.
 + * Locking:
 + *    No locking considerations required.
 + * Remarks:
 + */
 +
 +int
 +kdbgetintenv(const char *match, int *value) {
 +      unsigned long val;
 +      int diag;
 +
 +      diag = kdbgetulenv(match, &val);
 +      if (!diag) {
 +              *value = (int) val;
 +      }
 +      return diag;
 +}
 +
 +/*
 + * kdbgetularg
 + *
 + *    This function will convert a numeric string
 + *    into an unsigned long value.
 + *
 + * Parameters:
 + *    arg     A character string representing a numeric value
 + * Outputs:
 + *    *value  the unsigned long represntation of arg.
 + * Returns:
 + *    Zero on success, a kdb diagnostic on failure.
 + * Locking:
 + *    No locking considerations required.
 + * Remarks:
 + */
 +
 +int
 +kdbgetularg(const char *arg, unsigned long *value)
 +{
 +      char *endp;
 +      unsigned long val;
 +
 +      val = simple_strtoul(arg, &endp, 0);
 +
 +      if (endp == arg) {
 +              /*
 +               * Try base 16, for us folks too lazy to type the
 +               * leading 0x...
 +               */
 +              val = simple_strtoul(arg, &endp, 16);
 +              if (endp == arg)
 +                      return KDB_BADINT;
 +      }
 +
 +      *value = val;
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_set
 + *
 + *    This function implements the 'set' command.  Alter an existing
 + *    environment variable or create a new one.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_set(int argc, const char **argv)
 +{
 +      int i;
 +      char *ep;
 +      size_t varlen, vallen;
 +
 +      /*
 +       * we can be invoked two ways:
 +       *   set var=value    argv[1]="var", argv[2]="value"
 +       *   set var = value  argv[1]="var", argv[2]="=", argv[3]="value"
 +       * - if the latter, shift 'em down.
 +       */
 +      if (argc == 3) {
 +              argv[2] = argv[3];
 +              argc--;
 +      }
 +
 +      if (argc != 2)
 +              return KDB_ARGCOUNT;
 +
 +      /*
 +       * Check for internal variables
 +       */
 +      if (strcmp(argv[1], "KDBDEBUG") == 0) {
 +              unsigned int debugflags;
 +              char *cp;
 +
 +              debugflags = simple_strtoul(argv[2], &cp, 0);
 +              if (cp == argv[2] || debugflags & ~KDB_DEBUG_FLAG_MASK) {
 +                      kdb_printf("kdb: illegal debug flags '%s'\n",
 +                                  argv[2]);
 +                      return 0;
 +              }
 +              kdb_flags = (kdb_flags & ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT))
 +                        | (debugflags << KDB_DEBUG_FLAG_SHIFT);
 +
 +              return 0;
 +      }
 +
 +      /*
 +       * Tokenizer squashed the '=' sign.  argv[1] is variable
 +       * name, argv[2] = value.
 +       */
 +      varlen = strlen(argv[1]);
 +      vallen = strlen(argv[2]);
 +      ep = kdballocenv(varlen + vallen + 2);
 +      if (ep == (char *)0)
 +              return KDB_ENVBUFFULL;
 +
 +      sprintf(ep, "%s=%s", argv[1], argv[2]);
 +
 +      ep[varlen+vallen+1]='\0';
 +
 +      for(i=0; i<__nenv; i++) {
 +              if (__env[i]
 +               && ((strncmp(__env[i], argv[1], varlen)==0)
 +                 && ((__env[i][varlen] == '\0')
 +                  || (__env[i][varlen] == '=')))) {
 +                      __env[i] = ep;
 +                      return 0;
 +              }
 +      }
 +
 +      /*
 +       * Wasn't existing variable.  Fit into slot.
 +       */
 +      for(i=0; i<__nenv-1; i++) {
 +              if (__env[i] == (char *)0) {
 +                      __env[i] = ep;
 +                      return 0;
 +              }
 +      }
 +
 +      return KDB_ENVFULL;
 +}
 +
 +static int
 +kdb_check_regs(void)
 +{
 +      if (!kdb_current_regs) {
 +              kdb_printf("No current kdb registers."
 +                         "  You may need to select another task\n");
 +              return KDB_BADREG;
 +      }
 +      return 0;
 +}
 +
 +/*
 + * kdbgetaddrarg
 + *
 + *    This function is responsible for parsing an
 + *    address-expression and returning the value of
 + *    the expression, symbol name, and offset to the caller.
 + *
 + *    The argument may consist of a numeric value (decimal or
 + *    hexidecimal), a symbol name, a register name (preceeded
 + *    by the percent sign), an environment variable with a numeric
 + *    value (preceeded by a dollar sign) or a simple arithmetic
 + *    expression consisting of a symbol name, +/-, and a numeric
 + *    constant value (offset).
 + *
 + * Parameters:
 + *    argc    - count of arguments in argv
 + *    argv    - argument vector
 + *    *nextarg - index to next unparsed argument in argv[]
 + *    regs    - Register state at time of KDB entry
 + * Outputs:
 + *    *value  - receives the value of the address-expression
 + *    *offset - receives the offset specified, if any
 + *    *name   - receives the symbol name, if any
 + *    *nextarg - index to next unparsed argument in argv[]
 + *
 + * Returns:
 + *    zero is returned on success, a kdb diagnostic code is
 + *      returned on error.
 + *
 + * Locking:
 + *    No locking requirements.
 + *
 + * Remarks:
 + *
 + */
 +
 +int
 +kdbgetaddrarg(int argc, const char **argv, int *nextarg,
 +            kdb_machreg_t *value,  long *offset,
 +            char **name)
 +{
 +      kdb_machreg_t addr;
 +      unsigned long off = 0;
 +      int positive;
 +      int diag;
 +      int found = 0;
 +      char *symname;
 +      char symbol = '\0';
 +      char *cp;
 +      kdb_symtab_t symtab;
 +
 +      /*
 +       * Process arguments which follow the following syntax:
 +       *
 +       *  symbol | numeric-address [+/- numeric-offset]
 +       *  %register
 +       *  $environment-variable
 +       */
 +
 +      if (*nextarg > argc) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      symname = (char *)argv[*nextarg];
 +
 +      /*
 +       * If there is no whitespace between the symbol
 +       * or address and the '+' or '-' symbols, we
 +       * remember the character and replace it with a
 +       * null so the symbol/value can be properly parsed
 +       */
 +      if ((cp = strpbrk(symname, "+-")) != NULL) {
 +              symbol = *cp;
 +              *cp++ = '\0';
 +      }
 +
 +      if (symname[0] == '$') {
 +              diag = kdbgetulenv(&symname[1], &addr);
 +              if (diag)
 +                      return diag;
 +      } else if (symname[0] == '%') {
 +              if ((diag = kdb_check_regs()))
 +                      return diag;
 +              diag = kdba_getregcontents(&symname[1], kdb_current_regs, &addr);
 +              if (diag)
 +                      return diag;
 +      } else {
 +              found = kdbgetsymval(symname, &symtab);
 +              if (found) {
 +                      addr = symtab.sym_start;
 +              } else {
 +                      diag = kdbgetularg(argv[*nextarg], &addr);
 +                      if (diag)
 +                              return diag;
 +              }
 +      }
 +
 +      if (!found)
 +              found = kdbnearsym(addr, &symtab);
 +
 +      (*nextarg)++;
 +
 +      if (name)
 +              *name = symname;
 +      if (value)
 +              *value = addr;
 +      if (offset && name && *name)
 +              *offset = addr - symtab.sym_start;
 +
 +      if ((*nextarg > argc)
 +       && (symbol == '\0'))
 +              return 0;
 +
 +      /*
 +       * check for +/- and offset
 +       */
 +
 +      if (symbol == '\0') {
 +              if ((argv[*nextarg][0] != '+')
 +               && (argv[*nextarg][0] != '-')) {
 +                      /*
 +                       * Not our argument.  Return.
 +                       */
 +                      return 0;
 +              } else {
 +                      positive = (argv[*nextarg][0] == '+');
 +                      (*nextarg)++;
 +              }
 +      } else
 +              positive = (symbol == '+');
 +
 +      /*
 +       * Now there must be an offset!
 +       */
 +      if ((*nextarg > argc)
 +       && (symbol == '\0')) {
 +              return KDB_INVADDRFMT;
 +      }
 +
 +      if (!symbol) {
 +              cp = (char *)argv[*nextarg];
 +              (*nextarg)++;
 +      }
 +
 +      diag = kdbgetularg(cp, &off);
 +      if (diag)
 +              return diag;
 +
 +      if (!positive)
 +              off = -off;
 +
 +      if (offset)
 +              *offset += off;
 +
 +      if (value)
 +              *value += off;
 +
 +      return 0;
 +}
 +
 +static void
 +kdb_cmderror(int diag)
 +{
 +      int i;
 +
 +      if (diag >= 0) {
 +              kdb_printf("no error detected (diagnostic is %d)\n", diag);
 +              return;
 +      }
 +
 +      for(i=0; i<__nkdb_err; i++) {
 +              if (kdbmsgs[i].km_diag == diag) {
 +                      kdb_printf("diag: %d: %s\n", diag, kdbmsgs[i].km_msg);
 +                      return;
 +              }
 +      }
 +
 +      kdb_printf("Unknown diag %d\n", -diag);
 +}
 +
 +/*
 + * kdb_defcmd, kdb_defcmd2
 + *
 + *    This function implements the 'defcmd' command which defines one
 + *    command as a set of other commands, terminated by endefcmd.
 + *    kdb_defcmd processes the initial 'defcmd' command, kdb_defcmd2
 + *    is invoked from kdb_parse for the following commands until
 + *    'endefcmd'.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +struct defcmd_set {
 +      int count;
 +      int usable;
 +      char *name;
 +      char *usage;
 +      char *help;
 +      char **command;
 +};
 +static struct defcmd_set *defcmd_set;
 +static int defcmd_set_count;
 +static int defcmd_in_progress;
 +
 +/* Forward references */
 +static int kdb_exec_defcmd(int argc, const char **argv);
 +
 +static int
 +kdb_defcmd2(const char *cmdstr, const char *argv0)
 +{
 +      struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
 +      char **save_command = s->command;
 +      if (strcmp(argv0, "endefcmd") == 0) {
 +              defcmd_in_progress = 0;
 +              if (!s->count)
 +                      s->usable = 0;
 +              if (s->usable)
 +                      kdb_register(s->name, kdb_exec_defcmd, s->usage, s->help, 0);
 +              return 0;
 +      }
 +      if (!s->usable)
 +              return KDB_NOTIMP;
 +      s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
 +      if (!s->command) {
 +              kdb_printf("Could not allocate new kdb_defcmd table for %s\n", cmdstr);
 +              s->usable = 0;
 +              return KDB_NOTIMP;
 +      }
 +      memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
 +      s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
 +      kfree(save_command);
 +      return 0;
 +}
 +
 +static int
 +kdb_defcmd(int argc, const char **argv)
 +{
 +      struct defcmd_set *save_defcmd_set = defcmd_set, *s;
 +      if (defcmd_in_progress) {
 +              kdb_printf("kdb: nested defcmd detected, assuming missing endefcmd\n");
 +              kdb_defcmd2("endefcmd", "endefcmd");
 +      }
 +      if (argc == 0) {
 +              int i;
 +              for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
 +                      kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name, s->usage, s->help);
 +                      for (i = 0; i < s->count; ++i)
 +                              kdb_printf("%s", s->command[i]);
 +                      kdb_printf("endefcmd\n");
 +              }
 +              return 0;
 +      }
 +      if (argc != 3)
 +              return KDB_ARGCOUNT;
 +      defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set), GFP_KDB);
 +      if (!defcmd_set) {
 +              kdb_printf("Could not allocate new defcmd_set entry for %s\n", argv[1]);
 +              defcmd_set = save_defcmd_set;
 +              return KDB_NOTIMP;
 +      }
 +      memcpy(defcmd_set, save_defcmd_set, defcmd_set_count * sizeof(*defcmd_set));
 +      kfree(save_defcmd_set);
 +      s = defcmd_set + defcmd_set_count;
 +      memset(s, 0, sizeof(*s));
 +      s->usable = 1;
 +      s->name = kdb_strdup(argv[1], GFP_KDB);
 +      s->usage = kdb_strdup(argv[2], GFP_KDB);
 +      s->help = kdb_strdup(argv[3], GFP_KDB);
 +      if (s->usage[0] == '"') {
 +              strcpy(s->usage, s->usage+1);
 +              s->usage[strlen(s->usage)-1] = '\0';
 +      }
 +      if (s->help[0] == '"') {
 +              strcpy(s->help, s->help+1);
 +              s->help[strlen(s->help)-1] = '\0';
 +      }
 +      ++defcmd_set_count;
 +      defcmd_in_progress = 1;
 +      return 0;
 +}
 +
 +/*
 + * kdb_exec_defcmd
 + *
 + *    Execute the set of commands associated with this defcmd name.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_exec_defcmd(int argc, const char **argv)
 +{
 +      int i, ret;
 +      struct defcmd_set *s;
 +      if (argc != 0)
 +              return KDB_ARGCOUNT;
 +      for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
 +              if (strcmp(s->name, argv[0]) == 0)
 +                      break;
 +      }
 +      if (i == defcmd_set_count) {
 +              kdb_printf("kdb_exec_defcmd: could not find commands for %s\n", argv[0]);
 +              return KDB_NOTIMP;
 +      }
 +      for (i = 0; i < s->count; ++i) {
 +              /* Recursive use of kdb_parse, do not use argv after this point */
 +              argv = NULL;
 +              kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
 +              if ((ret = kdb_parse(s->command[i])))
 +                      return ret;
 +      }
 +      return 0;
 +}
 +
 +/* Command history */
 +#define KDB_CMD_HISTORY_COUNT 32
 +#define CMD_BUFLEN            200     /* kdb_printf: max printline size == 256 */
 +static unsigned int cmd_head=0, cmd_tail=0;
 +static unsigned int cmdptr;
 +static char cmd_hist[KDB_CMD_HISTORY_COUNT][CMD_BUFLEN];
 +static char cmd_cur[CMD_BUFLEN];
 +
 +/*
 + * The "str" argument may point to something like  | grep xyz
 + *
 + */
 +static void
 +parse_grep(const char *str)
 +{
 +      int     len;
 +      char    *cp = (char *)str, *cp2;
 +
 +      /* sanity check: we should have been called with the \ first */
 +      if (*cp != '|')
 +              return;
 +      cp++;
 +      while (isspace(*cp)) cp++;
 +      if (strncmp(cp,"grep ",5)) {
 +              kdb_printf ("invalid 'pipe', see grephelp\n");
 +              return;
 +      }
 +      cp += 5;
 +      while (isspace(*cp)) cp++;
 +      cp2 = strchr(cp, '\n');
 +      if (cp2)
 +              *cp2 = '\0'; /* remove the trailing newline */
 +      len = strlen(cp);
 +      if (len == 0) {
 +              kdb_printf ("invalid 'pipe', see grephelp\n");
 +              return;
 +      }
 +      /* now cp points to a nonzero length search string */
 +      if (*cp == '"') {
 +              /* allow it be "x y z" by removing the "'s - there must
 +                 be two of them */
 +              cp++;
 +              cp2 = strchr(cp, '"');
 +              if (!cp2) {
 +                      kdb_printf ("invalid quoted string, see grephelp\n");
 +                      return;
 +              }
 +              *cp2 = '\0'; /* end the string where the 2nd " was */
 +      }
 +      kdb_grep_leading = 0;
 +      if (*cp == '^') {
 +              kdb_grep_leading = 1;
 +              cp++;
 +      }
 +      len = strlen(cp);
 +      kdb_grep_trailing = 0;
 +      if (*(cp+len-1) == '$') {
 +              kdb_grep_trailing = 1;
 +              *(cp+len-1) = '\0';
 +      }
 +      len = strlen(cp);
 +      if (!len) return;
 +      if (len >= GREP_LEN) {
 +              kdb_printf ("search string too long\n");
 +              return;
 +      }
 +      strcpy(kdb_grep_string, cp);
 +      kdb_grepping_flag++;
 +      return;
 +}
 +
 +/*
 + * kdb_parse
 + *
 + *    Parse the command line, search the command table for a
 + *    matching command and invoke the command function.
 + *    This function may be called recursively, if it is, the second call
 + *    will overwrite argv and cbuf.  It is the caller's responsibility to
 + *    save their argv if they recursively call kdb_parse().
 + *
 + * Parameters:
 + *      cmdstr        The input command line to be parsed.
 + *    regs    The registers at the time kdb was entered.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    Zero for success, a kdb diagnostic if failure.
 + * Locking:
 + *    None.
 + * Remarks:
 + *    Limited to 20 tokens.
 + *
 + *    Real rudimentary tokenization. Basically only whitespace
 + *    is considered a token delimeter (but special consideration
 + *    is taken of the '=' sign as used by the 'set' command).
 + *
 + *    The algorithm used to tokenize the input string relies on
 + *    there being at least one whitespace (or otherwise useless)
 + *    character between tokens as the character immediately following
 + *    the token is altered in-place to a null-byte to terminate the
 + *    token string.
 + */
 +
 +#define MAXARGC       20
 +
 +int
 +kdb_parse(const char *cmdstr)
 +{
 +      static char *argv[MAXARGC];
 +      static int argc = 0;
 +      static char cbuf[CMD_BUFLEN+2];
 +      char *cp;
 +      char *cpp, quoted;
 +      kdbtab_t *tp;
 +      int i, escaped, ignore_errors = 0, check_grep;
 +
 +      /*
 +       * First tokenize the command string.
 +       */
 +      cp = (char *)cmdstr;
 +      kdb_grepping_flag = check_grep = 0;
 +
 +      if (KDB_FLAG(CMD_INTERRUPT)) {
 +              /* Previous command was interrupted, newline must not repeat the command */
 +              KDB_FLAG_CLEAR(CMD_INTERRUPT);
 +              argc = 0;       /* no repeat */
 +      }
 +
 +      if (*cp != '\n' && *cp != '\0') {
 +              argc = 0;
 +              cpp = cbuf;
 +              while (*cp) {
 +                      /* skip whitespace */
 +                      while (isspace(*cp)) cp++;
 +                      if ((*cp == '\0') || (*cp == '\n') || (*cp == '#' && !defcmd_in_progress))
 +                              break;
 +                      /* special case: check for | grep pattern */
 +                      if (*cp == '|') {
 +                              check_grep++;
 +                              break;
 +                      }
 +                      if (cpp >= cbuf + CMD_BUFLEN) {
 +                              kdb_printf("kdb_parse: command buffer overflow, command ignored\n%s\n", cmdstr);
 +                              return KDB_NOTFOUND;
 +                      }
 +                      if (argc >= MAXARGC - 1) {
 +                              kdb_printf("kdb_parse: too many arguments, command ignored\n%s\n", cmdstr);
 +                              return KDB_NOTFOUND;
 +                      }
 +                      argv[argc++] = cpp;
 +                      escaped = 0;
 +                      quoted = '\0';
 +                      /* Copy to next unquoted and unescaped whitespace or '=' */
 +                      while (*cp && *cp != '\n' && (escaped || quoted || !isspace(*cp))) {
 +                              if (cpp >= cbuf + CMD_BUFLEN)
 +                                      break;
 +                              if (escaped) {
 +                                      escaped = 0;
 +                                      *cpp++ = *cp++;
 +                                      continue;
 +                              }
 +                              if (*cp == '\\') {
 +                                      escaped = 1;
 +                                      ++cp;
 +                                      continue;
 +                              }
 +                              if (*cp == quoted) {
 +                                      quoted = '\0';
 +                              } else if (*cp == '\'' || *cp == '"') {
 +                                      quoted = *cp;
 +                              }
 +                              if ((*cpp = *cp++) == '=' && !quoted)
 +                                      break;
 +                              ++cpp;
 +                      }
 +                      *cpp++ = '\0';  /* Squash a ws or '=' character */
 +              }
 +      }
 +      if (!argc)
 +              return 0;
 +      if (check_grep)
 +              parse_grep(cp);
 +      if (defcmd_in_progress) {
 +              int result = kdb_defcmd2(cmdstr, argv[0]);
 +              if (!defcmd_in_progress) {
 +                      argc = 0;       /* avoid repeat on endefcmd */
 +                      *(argv[0]) = '\0';
 +              }
 +              return result;
 +      }
 +      if (argv[0][0] == '-' && argv[0][1] && (argv[0][1] < '0' || argv[0][1] > '9')) {
 +              ignore_errors = 1;
 +              ++argv[0];
 +      }
 +
 +      for(tp=kdb_commands, i=0; i < kdb_max_commands; i++,tp++) {
 +              if (tp->cmd_name) {
 +                      /*
 +                       * If this command is allowed to be abbreviated,
 +                       * check to see if this is it.
 +                       */
 +
 +                      if (tp->cmd_minlen
 +                       && (strlen(argv[0]) <= tp->cmd_minlen)) {
 +                              if (strncmp(argv[0],
 +                                          tp->cmd_name,
 +                                          tp->cmd_minlen) == 0) {
 +                                      break;
 +                              }
 +                      }
 +
 +                      if (strcmp(argv[0], tp->cmd_name)==0) {
 +                              break;
 +                      }
 +              }
 +      }
 +
 +      /*
 +       * If we don't find a command by this name, see if the first
 +       * few characters of this match any of the known commands.
 +       * e.g., md1c20 should match md.
 +       */
 +      if (i == kdb_max_commands) {
 +              for(tp=kdb_commands, i=0; i < kdb_max_commands; i++,tp++) {
 +                      if (tp->cmd_name) {
 +                              if (strncmp(argv[0],
 +                                          tp->cmd_name,
 +                                          strlen(tp->cmd_name))==0) {
 +                                      break;
 +                              }
 +                      }
 +              }
 +      }
 +
 +      if (i < kdb_max_commands) {
 +              int result;
 +              KDB_STATE_SET(CMD);
 +              result = (*tp->cmd_func)(argc-1,
 +                                     (const char**)argv);
 +              if (result && ignore_errors && result > KDB_CMD_GO)
 +                      result = 0;
 +              KDB_STATE_CLEAR(CMD);
 +              switch (tp->cmd_repeat) {
 +              case KDB_REPEAT_NONE:
 +                      argc = 0;
 +                      if (argv[0])
 +                              *(argv[0]) = '\0';
 +                      break;
 +              case KDB_REPEAT_NO_ARGS:
 +                      argc = 1;
 +                      if (argv[1])
 +                              *(argv[1]) = '\0';
 +                      break;
 +              case KDB_REPEAT_WITH_ARGS:
 +                      break;
 +              }
 +              return result;
 +      }
 +
 +      /*
 +       * If the input with which we were presented does not
 +       * map to an existing command, attempt to parse it as an
 +       * address argument and display the result.   Useful for
 +       * obtaining the address of a variable, or the nearest symbol
 +       * to an address contained in a register.
 +       */
 +      {
 +              kdb_machreg_t value;
 +              char *name = NULL;
 +              long offset;
 +              int nextarg = 0;
 +
 +              if (kdbgetaddrarg(0, (const char **)argv, &nextarg,
 +                                &value, &offset, &name)) {
 +                      return KDB_NOTFOUND;
 +              }
 +
 +              kdb_printf("%s = ", argv[0]);
 +              kdb_symbol_print(value, NULL, KDB_SP_DEFAULT);
 +              kdb_printf("\n");
 +              return 0;
 +      }
 +}
 +
 +
 +static int
 +handle_ctrl_cmd(char *cmd)
 +{
 +#define CTRL_P        16
 +#define CTRL_N        14
 +
 +      /* initial situation */
 +      if (cmd_head == cmd_tail) return 0;
 +
 +      switch(*cmd) {
 +              case CTRL_P:
 +                      if (cmdptr != cmd_tail)
 +                              cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT;
 +                      strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
 +                      return 1;
 +              case CTRL_N:
 +                      if (cmdptr != cmd_head)
 +                              cmdptr = (cmdptr+1) % KDB_CMD_HISTORY_COUNT;
 +                      strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
 +                      return 1;
 +      }
 +      return 0;
 +}
 +
 +/*
 + * kdb_do_dump
 + *
 + *    Call the dump() function if the kernel is configured for LKCD.
 + * Inputs:
 + *    None.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    None.  dump() may or may not return.
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static void
 +kdb_do_dump(void)
 +{
 +#if defined(CONFIG_LKCD_DUMP) || defined(CONFIG_LKCD_DUMP_MODULE)
 +      kdb_printf("Forcing dump (if configured)\n");
 +      console_loglevel = 8;   /* to see the dump messages */
 +      dump("kdb_do_dump");
 +#endif
 +}
 +
 +/*
 + * kdb_reboot
 + *
 + *    This function implements the 'reboot' command.  Reboot the system
 + *    immediately.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    Shouldn't return from this function.
 + */
 +
 +static int
 +kdb_reboot(int argc, const char **argv)
 +{
 +      emergency_restart();
 +      kdb_printf("Hmm, kdb_reboot did not reboot, spinning here\n");
 +      while (1) {};
 +      /* NOTREACHED */
 +      return 0;
 +}
 +
 +#ifdef CONFIG_KDB_KDUMP
 +
 +int kdb_kdump_state = KDB_KDUMP_RESET;        /* KDB kdump state */
 +
 +static int kdb_cpu(int argc, const char **argv);
 +
 +/*
 + * kdb_kdump_check
 + *
 + *    This is where the kdump on monarch cpu is handled.
 + *
 + */
 +void kdb_kdump_check(struct pt_regs *regs)
 +{
 +      if (kdb_kdump_state != KDB_KDUMP_RESET) {
 +              crash_kexec(regs);
 +
 +              /* If the call above returned then something
 +                 didn't work */
 +              kdb_printf("kdb_kdump_check: crash_kexec failed!\n");
 +              kdb_printf("  Please check if the kdump kernel has been properly loaded\n");
 +              kdb_kdump_state = KDB_KDUMP_RESET;
 +      }
 +}
 +
 +
 +/*
 + * kdb_kdump
 + *
 + *     This function implements the 'kdump' command.
 + *
 + * Inputs:
 + *     argc    argument count
 + *     argv    argument vector
 + *     envp    environment vector
 + *     regs    registers at time kdb was entered.
 + * Outputs:
 + *     None.
 + * Returns:
 + *     zero for success, a kdb diagnostic if error
 + * Locking:
 + *     none.
 + * Remarks:
 + *     Shouldn't return from this function.
 + */
 +
 +static int
 +kdb_kdump(int argc, const char **argv)
 +{
 +      char cpu_id[6];         /* up to 99,999 cpus */
 +      const char *cpu_argv[] = {NULL, cpu_id, NULL};
 +      int ret;
 +
 +      kdb_kdump_state = KDB_KDUMP_KDUMP;
 +      /* Switch back to the initial cpu before process kdump command */
 +      if (smp_processor_id() != kdb_initial_cpu) {
 +              sprintf(cpu_id, "%d", kdb_initial_cpu);
 +              ret = kdb_cpu(1, cpu_argv);
 +              if (ret != KDB_CMD_CPU) {
 +                      kdb_printf("kdump: Failed to switch to initial cpu %d;"
 +                              " aborted\n", kdb_initial_cpu);
 +                      kdb_kdump_state = KDB_KDUMP_RESET;
 +              }
 +      } else
 +              ret = KDB_CMD_CPU;
 +
 +      return ret;
 +}
 +
 +#endif /* CONFIG_KDB_KDUMP */
 +
 +static int
 +kdb_quiet(int reason)
 +{
 +      return (reason == KDB_REASON_CPU_UP || reason == KDB_REASON_SILENT);
 +}
 +
 +/*
 + * kdb_local
 + *
 + *    The main code for kdb.  This routine is invoked on a specific
 + *    processor, it is not global.  The main kdb() routine ensures
 + *    that only one processor at a time is in this routine.  This
 + *    code is called with the real reason code on the first entry
 + *    to a kdb session, thereafter it is called with reason SWITCH,
 + *    even if the user goes back to the original cpu.
 + *
 + * Inputs:
 + *    reason          The reason KDB was invoked
 + *    error           The hardware-defined error code
 + *    regs            The exception frame at time of fault/breakpoint.  NULL
 + *                    for reason SILENT or CPU_UP, otherwise valid.
 + *    db_result       Result code from the break or debug point.
 + * Returns:
 + *    0       KDB was invoked for an event which it wasn't responsible
 + *    1       KDB handled the event for which it was invoked.
 + *    KDB_CMD_GO      User typed 'go'.
 + *    KDB_CMD_CPU     User switched to another cpu.
 + *    KDB_CMD_SS      Single step.
 + *    KDB_CMD_SSB     Single step until branch.
 + * Locking:
 + *    none
 + * Remarks:
 + *    none
 + */
 +
 +static int
 +kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_dbtrap_t db_result)
 +{
 +      char *cmdbuf;
 +      int diag;
 +      struct task_struct *kdb_current = kdb_curr_task(smp_processor_id());
 +
 +#ifdef CONFIG_KDB_KDUMP
 +      kdb_kdump_check(regs);
 +#endif
 +
 +      /* If kdb has been entered for an event which has been/will be
 +       * recovered then silently return.  We have to get this far into kdb in
 +       * order to synchronize all the cpus, typically only one cpu (monarch)
 +       * knows that the event is recoverable but the other cpus (slaves) may
 +       * also be driven into kdb before that decision is made by the monarch.
 +       *
 +       * To pause in kdb even for recoverable events, 'set RECOVERY_PAUSE 1'
 +       */
 +      KDB_DEBUG_STATE("kdb_local 1", reason);
 +      if (reason == KDB_REASON_ENTER
 +          && KDB_FLAG(RECOVERY)
 +          && !KDB_FLAG(CATASTROPHIC)) {
 +              int recovery_pause = 0;
 +              kdbgetintenv("RECOVERY_PAUSE", &recovery_pause);
 +              if (recovery_pause == 0)
 +                      reason = KDB_REASON_SILENT;
 +              else
 +                      kdb_printf("%s: Recoverable error detected but"
 +                                 " RECOVERY_PAUSE is set, staying in KDB\n",
 +                                 __FUNCTION__);
 +      }
 +
 +      KDB_DEBUG_STATE("kdb_local 2", reason);
 +      kdb_go_count = 0;
 +      if (kdb_quiet(reason)) {
 +              /* no message */
 +      } else if (reason == KDB_REASON_DEBUG) {
 +              /* special case below */
 +      } else {
 +              kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", kdb_current, kdb_current->pid);
 +#if defined(CONFIG_SMP)
 +              kdb_printf("on processor %d ", smp_processor_id());
 +#endif
 +      }
 +
 +      switch (reason) {
 +      case KDB_REASON_DEBUG:
 +      {
 +              /*
 +               * If re-entering kdb after a single step
 +               * command, don't print the message.
 +               */
 +              switch(db_result) {
 +              case KDB_DB_BPT:
 +                      kdb_printf("\nEntering kdb (0x%p, pid %d) ", kdb_current, kdb_current->pid);
 +#if defined(CONFIG_SMP)
 +                      kdb_printf("on processor %d ", smp_processor_id());
 +#endif
 +                      kdb_printf("due to Debug @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
 +                      break;
 +              case KDB_DB_SSB:
 +                      /*
 +                       * In the midst of ssb command. Just return.
 +                       */
 +                      KDB_DEBUG_STATE("kdb_local 3", reason);
 +                      return KDB_CMD_SSB;     /* Continue with SSB command */
 +
 +                      break;
 +              case KDB_DB_SS:
 +                      break;
 +              case KDB_DB_SSBPT:
 +                      KDB_DEBUG_STATE("kdb_local 4", reason);
 +                      return 1;       /* kdba_db_trap did the work */
 +              default:
 +                      kdb_printf("kdb: Bad result from kdba_db_trap: %d\n",
 +                                 db_result);
 +                      break;
 +              }
 +
 +      }
 +              break;
 +      case KDB_REASON_ENTER:
 +              if (KDB_STATE(KEYBOARD))
 +                      kdb_printf("due to Keyboard Entry\n");
 +              else {
 +                      kdb_printf("due to KDB_ENTER()\n");
 +              }
 +              break;
 +      case KDB_REASON_KEYBOARD:
 +              KDB_STATE_SET(KEYBOARD);
 +              kdb_printf("due to Keyboard Entry\n");
 +              break;
 +      case KDB_REASON_ENTER_SLAVE:    /* drop through, slaves only get released via cpu switch */
 +      case KDB_REASON_SWITCH:
 +              kdb_printf("due to cpu switch\n");
 +              if (KDB_STATE(GO_SWITCH)) {
 +                      KDB_STATE_CLEAR(GO_SWITCH);
 +                      KDB_DEBUG_STATE("kdb_local 5", reason);
 +                      return KDB_CMD_GO;
 +              }
 +              break;
 +      case KDB_REASON_OOPS:
 +              kdb_printf("Oops: %s\n", kdb_diemsg);
 +              kdb_printf("due to oops @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
 +              kdba_dumpregs(regs, NULL, NULL);
 +              break;
 +      case KDB_REASON_NMI:
 +              kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "\n",
 +                        kdba_getpc(regs));
 +              kdba_dumpregs(regs, NULL, NULL);
 +              break;
 +      case KDB_REASON_BREAK:
 +              kdb_printf("due to Breakpoint @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
 +              /*
 +               * Determine if this breakpoint is one that we
 +               * are interested in.
 +               */
 +              if (db_result != KDB_DB_BPT) {
 +                      kdb_printf("kdb: error return from kdba_bp_trap: %d\n", db_result);
 +                      KDB_DEBUG_STATE("kdb_local 6", reason);
 +                      return 0;       /* Not for us, dismiss it */
 +              }
 +              break;
 +      case KDB_REASON_RECURSE:
 +              kdb_printf("due to Recursion @ " kdb_machreg_fmt "\n", kdba_getpc(regs));
 +              break;
 +      case KDB_REASON_CPU_UP:
 +      case KDB_REASON_SILENT:
 +              KDB_DEBUG_STATE("kdb_local 7", reason);
 +              if (reason == KDB_REASON_CPU_UP)
 +                      kdba_cpu_up();
 +              return KDB_CMD_GO;      /* Silent entry, silent exit */
 +              break;
 +      default:
 +              kdb_printf("kdb: unexpected reason code: %d\n", reason);
 +              KDB_DEBUG_STATE("kdb_local 8", reason);
 +              return 0;       /* Not for us, dismiss it */
 +      }
 +
 +      kdba_local_arch_setup();
 +
 +      kdba_set_current_task(kdb_current);
 +
 +      while (1) {
 +              /*
 +               * Initialize pager context.
 +               */
 +              kdb_nextline = 1;
 +              KDB_STATE_CLEAR(SUPPRESS);
 +#ifdef kdba_setjmp
 +              /*
 +               * Use kdba_setjmp/kdba_longjmp to break out of
 +               * the pager early and to attempt to recover from kdb errors.
 +               */
 +              KDB_STATE_CLEAR(LONGJMP);
 +              if (kdbjmpbuf) {
 +                      if (kdba_setjmp(&kdbjmpbuf[smp_processor_id()])) {
 +                              /* Command aborted (usually in pager) */
 +                              continue;
 +                      }
 +                      else
 +                              KDB_STATE_SET(LONGJMP);
 +              }
 +#endif        /* kdba_setjmp */
 +
 +              cmdbuf = cmd_cur;
 +              *cmdbuf = '\0';
 +              *(cmd_hist[cmd_head])='\0';
 +
 +              if (KDB_FLAG(ONLY_DO_DUMP)) {
 +                      /* kdb is off but a catastrophic error requires a dump.
 +                       * Take the dump and reboot.
 +                       * Turn on logging so the kdb output appears in the log
 +                       * buffer in the dump.
 +                       */
 +                      const char *setargs[] = { "set", "LOGGING", "1" };
 +                      kdb_set(2, setargs);
 +                      kdb_do_dump();
 +                      kdb_reboot(0, NULL);
 +                      /*NOTREACHED*/
 +              }
 +
 +do_full_getstr:
 +#if defined(CONFIG_SMP)
 +              snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), smp_processor_id());
 +#else
 +              snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));
 +#endif
 +              if (defcmd_in_progress)
 +                      strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
 +
 +              /*
 +               * Fetch command from keyboard
 +               */
 +              cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);
 +              if (*cmdbuf != '\n') {
 +                      if (*cmdbuf < 32) {
 +                              if(cmdptr == cmd_head) {
 +                                      strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
 +                                      *(cmd_hist[cmd_head]+strlen(cmd_hist[cmd_head])-1) = '\0';
 +                              }
 +                              if(!handle_ctrl_cmd(cmdbuf))
 +                                      *(cmd_cur+strlen(cmd_cur)-1) = '\0';
 +                              cmdbuf = cmd_cur;
 +                              goto do_full_getstr;
 +                      }
 +                      else
 +                              strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
 +
 +                      cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;
 +                      if (cmd_head == cmd_tail) cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;
 +
 +              }
 +
 +              cmdptr = cmd_head;
 +              diag = kdb_parse(cmdbuf);
 +              if (diag == KDB_NOTFOUND) {
 +                      kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
 +                      diag = 0;
 +              }
 +              if (diag == KDB_CMD_GO
 +               || diag == KDB_CMD_CPU
 +               || diag == KDB_CMD_SS
 +               || diag == KDB_CMD_SSB)
 +                      break;
 +
 +              if (diag)
 +                      kdb_cmderror(diag);
 +      }
 +
 +      kdba_local_arch_cleanup();
 +
 +      KDB_DEBUG_STATE("kdb_local 9", diag);
 +      return diag;
 +}
 +
 +
 +/*
 + * kdb_print_state
 + *
 + *    Print the state data for the current processor for debugging.
 + *
 + * Inputs:
 + *    text            Identifies the debug point
 + *    value           Any integer value to be printed, e.g. reason code.
 + * Returns:
 + *    None.
 + * Locking:
 + *    none
 + * Remarks:
 + *    none
 + */
 +
 +void kdb_print_state(const char *text, int value)
 +{
 +      kdb_printf("state: %s cpu %d value %d initial %d state %x\n",
 +              text, smp_processor_id(), value, kdb_initial_cpu, kdb_state[smp_processor_id()]);
 +}
 +
 +/*
 + * kdb_previous_event
 + *
 + *    Return a count of cpus that are leaving kdb, i.e. the number
 + *    of processors that are still handling the previous kdb event.
 + *
 + * Inputs:
 + *    None.
 + * Returns:
 + *    Count of cpus in previous event.
 + * Locking:
 + *    none
 + * Remarks:
 + *    none
 + */
 +
 +static int
 +kdb_previous_event(void)
 +{
 +      int i, leaving = 0;
 +      for (i = 0; i < NR_CPUS; ++i) {
 +              if (KDB_STATE_CPU(LEAVING, i))
 +                      ++leaving;
 +      }
 +      return leaving;
 +}
 +
 +/*
 + * kdb_wait_for_cpus
 + *
 + * Invoked once at the start of a kdb event, from the controlling cpu.  Wait a
 + * short period for the other cpus to enter kdb state.
 + *
 + * Inputs:
 + *    none
 + * Returns:
 + *    none
 + * Locking:
 + *    none
 + * Remarks:
 + *    none
 + */
 +
 +int kdb_wait_for_cpus_secs;
 +
 +static void
 +kdb_wait_for_cpus(void)
 +{
 +#ifdef        CONFIG_SMP
 +      int online = 0, kdb_data = 0, prev_kdb_data = 0, c, time;
 +      mdelay(100);
 +      for (time = 0; time < kdb_wait_for_cpus_secs; ++time) {
 +              online = 0;
 +              kdb_data = 0;
 +              for_each_online_cpu(c) {
 +                      ++online;
 +                      if (kdb_running_process[c].seqno >= kdb_seqno - 1)
 +                              ++kdb_data;
 +              }
 +              if (online == kdb_data)
 +                      break;
 +              if (prev_kdb_data != kdb_data) {
 +                      kdb_nextline = 0;       /* no prompt yet */
 +                      kdb_printf("  %d out of %d cpus in kdb, waiting for the rest, timeout in %d second(s)\n",
 +                              kdb_data, online, kdb_wait_for_cpus_secs - time);
 +                      prev_kdb_data = kdb_data;
 +              }
 +              touch_nmi_watchdog();
 +              mdelay(1000);
 +              /* Architectures may want to send a more forceful interrupt */
 +              if (time == min(kdb_wait_for_cpus_secs / 2, 5))
 +                      kdba_wait_for_cpus();
 +              if (time % 4 == 0)
 +                      kdb_printf(".");
 +      }
 +      if (time) {
 +              int wait = online - kdb_data;
 +              if (wait == 0)
 +                      kdb_printf("All cpus are now in kdb\n");
 +              else
 +                      kdb_printf("%d cpu%s not in kdb, %s state is unknown\n",
 +                                      wait,
 +                                      wait == 1 ? " is" : "s are",
 +                                      wait == 1 ? "its" : "their");
 +      }
 +      /* give back the vector we took over in smp_kdb_stop */
 +      kdba_giveback_vector(KDB_VECTOR);
 +#endif        /* CONFIG_SMP */
 +}
 +
 +/*
 + * kdb_main_loop
 + *
 + * The main kdb loop.  After initial setup and assignment of the controlling
 + * cpu, all cpus are in this loop.  One cpu is in control and will issue the kdb
 + * prompt, the others will spin until 'go' or cpu switch.
 + *
 + * To get a consistent view of the kernel stacks for all processes, this routine
 + * is invoked from the main kdb code via an architecture specific routine.
 + * kdba_main_loop is responsible for making the kernel stacks consistent for all
 + * processes, there should be no difference between a blocked process and a
 + * running process as far as kdb is concerned.
 + *
 + * Inputs:
 + *    reason          The reason KDB was invoked
 + *    error           The hardware-defined error code
 + *    reason2         kdb's current reason code.  Initially error but can change
 + *                    acording to kdb state.
 + *    db_result       Result code from break or debug point.
 + *    regs            The exception frame at time of fault/breakpoint.  If reason
 + *                    is SILENT or CPU_UP then regs is NULL, otherwise it
 + *                    should always be valid.
 + * Returns:
 + *    0       KDB was invoked for an event which it wasn't responsible
 + *    1       KDB handled the event for which it was invoked.
 + * Locking:
 + *    none
 + * Remarks:
 + *    none
 + */
 +
 +int
 +kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
 +            kdb_dbtrap_t db_result, struct pt_regs *regs)
 +{
 +      int result = 1;
 +      /* Stay in kdb() until 'go', 'ss[b]' or an error */
 +      while (1) {
 +              /*
 +               * All processors except the one that is in control
 +               * will spin here.
 +               */
 +              KDB_DEBUG_STATE("kdb_main_loop 1", reason);
 +              while (KDB_STATE(HOLD_CPU)) {
 +                      /* state KDB is turned off by kdb_cpu to see if the
 +                       * other cpus are still live, each cpu in this loop
 +                       * turns it back on.
 +                       */
 +                      if (!KDB_STATE(KDB)) {
 +                              KDB_STATE_SET(KDB);
 +                      }
 +
 +#if defined(CONFIG_KDB_KDUMP)
 +                      if (KDB_STATE(KEXEC)) {
 +                              struct pt_regs r;
 +                              if (regs == NULL)
 +                                      regs = &r;
 +
 +                              kdba_kdump_shutdown_slave(regs);
 +                              return 0;
 +                      }
 +#endif
 +              }
 +
 +              KDB_STATE_CLEAR(SUPPRESS);
 +              KDB_DEBUG_STATE("kdb_main_loop 2", reason);
 +              if (KDB_STATE(LEAVING))
 +                      break;  /* Another cpu said 'go' */
 +
 +              if (!kdb_quiet(reason))
 +                      kdb_wait_for_cpus();
 +              /* Still using kdb, this processor is in control */
 +              result = kdb_local(reason2, error, regs, db_result);
 +              KDB_DEBUG_STATE("kdb_main_loop 3", result);
 +
 +              if (result == KDB_CMD_CPU) {
 +                      /* Cpu switch, hold the current cpu, release the target one. */
 +                      reason2 = KDB_REASON_SWITCH;
 +                      KDB_STATE_SET(HOLD_CPU);
 +                      KDB_STATE_CLEAR_CPU(HOLD_CPU, kdb_new_cpu);
 +                      continue;
 +              }
 +
 +              if (result == KDB_CMD_SS) {
 +                      KDB_STATE_SET(DOING_SS);
 +                      break;
 +              }
 +
 +              if (result == KDB_CMD_SSB) {
 +                      KDB_STATE_SET(DOING_SS);
 +                      KDB_STATE_SET(DOING_SSB);
 +                      break;
 +              }
 +
 +              if (result && result != 1 && result != KDB_CMD_GO)
 +                      kdb_printf("\nUnexpected kdb_local return code %d\n", result);
 +
 +              KDB_DEBUG_STATE("kdb_main_loop 4", reason);
 +              break;
 +      }
 +      if (KDB_STATE(DOING_SS))
 +              KDB_STATE_CLEAR(SSBPT);
 +      return result;
 +}
 +
 +/* iapc_boot_arch was defined in ACPI 2.0, FADT revision 3 onwards.  For any
 + * FADT prior to revision 3, we have to assume that we have an i8042 I/O
 + * device.  ACPI initialises after KDB initialises but before using KDB, so
 + * check iapc_boot_arch on each entry to KDB.
 + */
 +static void
 +kdb_check_i8042(void)
 +{
 +      KDB_FLAG_CLEAR(NO_I8042);
 +#ifdef        CONFIG_ACPI
 +      if (acpi_gbl_FADT.header.revision >= 3 &&
 +          (acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) == 0)
 +              KDB_FLAG_SET(NO_I8042);
 +#endif        /* CONFIG_ACPI */
 +}
 +
 +/*
 + * kdb
 + *
 + *    This function is the entry point for the kernel debugger.  It
 + *    provides a command parser and associated support functions to
 + *    allow examination and control of an active kernel.
 + *
 + *    The breakpoint trap code should invoke this function with
 + *    one of KDB_REASON_BREAK (int 03) or KDB_REASON_DEBUG (debug register)
 + *
 + *    the die_if_kernel function should invoke this function with
 + *    KDB_REASON_OOPS.
 + *
 + *    In single step mode, one cpu is released to run without
 + *    breakpoints.   Interrupts and NMI are reset to their original values,
 + *    the cpu is allowed to do one instruction which causes a trap
 + *    into kdb with KDB_REASON_DEBUG.
 + *
 + * Inputs:
 + *    reason          The reason KDB was invoked
 + *    error           The hardware-defined error code
 + *    regs            The exception frame at time of fault/breakpoint.  If reason
 + *                    is SILENT or CPU_UP then regs is NULL, otherwise it
 + *                    should always be valid.
 + * Returns:
 + *    0       KDB was invoked for an event which it wasn't responsible
 + *    1       KDB handled the event for which it was invoked.
 + * Locking:
 + *    none
 + * Remarks:
 + *    No assumptions of system state.  This function may be invoked
 + *    with arbitrary locks held.  It will stop all other processors
 + *    in an SMP environment, disable all interrupts and does not use
 + *    the operating systems keyboard driver.
 + *
 + *    This code is reentrant but only for cpu switch.  Any other
 + *    reentrancy is an error, although kdb will attempt to recover.
 + *
 + *    At the start of a kdb session the initial processor is running
 + *    kdb() and the other processors can be doing anything.  When the
 + *    initial processor calls smp_kdb_stop() the other processors are
 + *    driven through kdb_ipi which calls kdb() with reason SWITCH.
 + *    That brings all processors into this routine, one with a "real"
 + *    reason code, the other with SWITCH.
 + *
 + *    Because the other processors are driven via smp_kdb_stop(),
 + *    they enter here from the NMI handler.  Until the other
 + *    processors exit from here and exit from kdb_ipi, they will not
 + *    take any more NMI requests.  The initial cpu will still take NMI.
 + *
 + *    Multiple race and reentrancy conditions, each with different
 + *    advoidance mechanisms.
 + *
 + *    Two cpus hit debug points at the same time.
 + *
 + *      kdb_lock and kdb_initial_cpu ensure that only one cpu gets
 + *      control of kdb.  The others spin on kdb_initial_cpu until
 + *      they are driven through NMI into kdb_ipi.  When the initial
 + *      cpu releases the others from NMI, they resume trying to get
 + *      kdb_initial_cpu to start a new event.
 + *
 + *    A cpu is released from kdb and starts a new event before the
 + *    original event has completely ended.
 + *
 + *      kdb_previous_event() prevents any cpu from entering
 + *      kdb_initial_cpu state until the previous event has completely
 + *      ended on all cpus.
 + *
 + *    An exception occurs inside kdb.
 + *
 + *      kdb_initial_cpu detects recursive entry to kdb and attempts
 + *      to recover.  The recovery uses longjmp() which means that
 + *      recursive calls to kdb never return.  Beware of assumptions
 + *      like
 + *
 + *        ++depth;
 + *        kdb();
 + *        --depth;
 + *
 + *      If the kdb call is recursive then longjmp takes over and
 + *      --depth is never executed.
 + *
 + *    NMI handling.
 + *
 + *      NMI handling is tricky.  The initial cpu is invoked by some kdb event,
 + *      this event could be NMI driven but usually is not.  The other cpus are
 + *      driven into kdb() via kdb_ipi which uses NMI so at the start the other
 + *      cpus will not accept NMI.  Some operations such as SS release one cpu
 + *      but hold all the others.  Releasing a cpu means it drops back to
 + *      whatever it was doing before the kdb event, this means it drops out of
 + *      kdb_ipi and hence out of NMI status.  But the software watchdog uses
 + *      NMI and we do not want spurious watchdog calls into kdb.  kdba_read()
 + *      resets the watchdog counters in its input polling loop, when a kdb
 + *      command is running it is subject to NMI watchdog events.
 + *
 + *      Another problem with NMI handling is the NMI used to drive the other
 + *      cpus into kdb cannot be distinguished from the watchdog NMI.  State
 + *      flag WAIT_IPI indicates that a cpu is waiting for NMI via kdb_ipi,
 + *      if not set then software NMI is ignored by kdb_ipi.
 + *
 + *    Cpu switching.
 + *
 + *      All cpus are in kdb (or they should be), all but one are
 + *      spinning on KDB_STATE(HOLD_CPU).  Only one cpu is not in
 + *      HOLD_CPU state, only that cpu can handle commands.
 + *
 + *    Go command entered.
 + *
 + *      If necessary, go will switch to the initial cpu first.  If the event
 + *      was caused by a software breakpoint (assumed to be global) that
 + *      requires single-step to get over the breakpoint then only release the
 + *      initial cpu, after the initial cpu has single-stepped the breakpoint
 + *      then release the rest of the cpus.  If SSBPT is not required then
 + *      release all the cpus at once.
 + */
 +
 +int
 +kdb(kdb_reason_t reason, int error, struct pt_regs *regs)
 +{
 +      kdb_intstate_t int_state;       /* Interrupt state */
 +      kdb_reason_t reason2 = reason;
 +      int result = 0; /* Default is kdb did not handle it */
 +      int ss_event, old_regs_saved = 0;
 +      struct pt_regs *old_regs = NULL;
 +      kdb_dbtrap_t db_result=KDB_DB_NOBPT;
 +      preempt_disable();
 +      atomic_inc(&kdb_event);
 +
 +      switch(reason) {
 +      case KDB_REASON_OOPS:
 +      case KDB_REASON_NMI:
 +              KDB_FLAG_SET(CATASTROPHIC);     /* kernel state is dubious now */
 +              break;
 +      default:
 +              break;
 +      }
 +      switch(reason) {
 +      case KDB_REASON_ENTER:
 +      case KDB_REASON_ENTER_SLAVE:
 +      case KDB_REASON_BREAK:
 +      case KDB_REASON_DEBUG:
 +      case KDB_REASON_OOPS:
 +      case KDB_REASON_SWITCH:
 +      case KDB_REASON_KEYBOARD:
 +      case KDB_REASON_NMI:
 +              if (regs && regs != get_irq_regs()) {
 +                      old_regs = set_irq_regs(regs);
 +                      old_regs_saved = 1;
 +              }
 +              break;
 +      default:
 +              break;
 +      }
 +      if (kdb_continue_catastrophic > 2) {
 +              kdb_printf("kdb_continue_catastrophic is out of range, setting to 2\n");
 +              kdb_continue_catastrophic = 2;
 +      }
 +      if (!kdb_on && KDB_FLAG(CATASTROPHIC) && kdb_continue_catastrophic == 2) {
 +              KDB_FLAG_SET(ONLY_DO_DUMP);
 +      }
 +      if (!kdb_on && !KDB_FLAG(ONLY_DO_DUMP))
 +              goto out;
 +
 +      KDB_DEBUG_STATE("kdb 1", reason);
 +      KDB_STATE_CLEAR(SUPPRESS);
 +
 +      /* Filter out userspace breakpoints first, no point in doing all
 +       * the kdb smp fiddling when it is really a gdb trap.
 +       * Save the single step status first, kdba_db_trap clears ss status.
 +       * kdba_b[dp]_trap sets SSBPT if required.
 +       */
 +      ss_event = KDB_STATE(DOING_SS) || KDB_STATE(SSBPT);
 +#ifdef  CONFIG_CPU_XSCALE
 +      if ( KDB_STATE(A_XSC_ICH) ) {
 +              /* restore changed I_BIT */
 +              KDB_STATE_CLEAR(A_XSC_ICH);
 +              kdba_restore_retirq(regs, KDB_STATE(A_XSC_IRQ));
 +              if ( !ss_event ) {
 +                      kdb_printf("Stranger!!! Why IRQ bit is changed====\n");
 +              }
 +      }
 +#endif
 +      if (reason == KDB_REASON_BREAK) {
 +              db_result = kdba_bp_trap(regs, error);  /* Only call this once */
 +      }
 +      if (reason == KDB_REASON_DEBUG) {
 +              db_result = kdba_db_trap(regs, error);  /* Only call this once */
 +      }
 +
 +      if ((reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG)
 +       && db_result == KDB_DB_NOBPT) {
 +              KDB_DEBUG_STATE("kdb 2", reason);
 +              goto out;       /* Not one of mine */
 +      }
 +
 +      /* Turn off single step if it was being used */
 +      if (ss_event) {
 +              kdba_clearsinglestep(regs);
 +              /* Single step after a breakpoint removes the need for a delayed reinstall */
 +              if (reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG)
 +                      KDB_STATE_CLEAR(SSBPT);
 +      }
 +
 +      /* kdb can validly reenter but only for certain well defined conditions */
 +      if (reason == KDB_REASON_DEBUG
 +       && !KDB_STATE(HOLD_CPU)
 +       && ss_event)
 +              KDB_STATE_SET(REENTRY);
 +      else
 +              KDB_STATE_CLEAR(REENTRY);
 +
 +      /* Wait for previous kdb event to completely exit before starting
 +       * a new event.
 +       */
 +      while (kdb_previous_event())
 +              ;
 +      KDB_DEBUG_STATE("kdb 3", reason);
 +
 +      /*
 +       * If kdb is already active, print a message and try to recover.
 +       * If recovery is not possible and recursion is allowed or
 +       * forced recursion without recovery is set then try to recurse
 +       * in kdb.  Not guaranteed to work but it makes an attempt at
 +       * debugging the debugger.
 +       */
 +      if (reason != KDB_REASON_SWITCH &&
 +          reason != KDB_REASON_ENTER_SLAVE) {
 +              if (KDB_IS_RUNNING() && !KDB_STATE(REENTRY)) {
 +                      int recover = 1;
 +                      unsigned long recurse = 0;
 +                      kdb_printf("kdb: Debugger re-entered on cpu %d, new reason = %d\n",
 +                              smp_processor_id(), reason);
 +                      /* Should only re-enter from released cpu */
 +
 +                      if (KDB_STATE(HOLD_CPU)) {
 +                              kdb_printf("     Strange, cpu %d should not be running\n", smp_processor_id());
 +                              recover = 0;
 +                      }
 +                      if (!KDB_STATE(CMD)) {
 +                              kdb_printf("     Not executing a kdb command\n");
 +                              recover = 0;
 +                      }
 +                      if (!KDB_STATE(LONGJMP)) {
 +                              kdb_printf("     No longjmp available for recovery\n");
 +                              recover = 0;
 +                      }
 +                      kdbgetulenv("RECURSE", &recurse);
 +                      if (recurse > 1) {
 +                              kdb_printf("     Forced recursion is set\n");
 +                              recover = 0;
 +                      }
 +                      if (recover) {
 +                              kdb_printf("     Attempting to abort command and recover\n");
 +#ifdef kdba_setjmp
 +                              kdba_longjmp(&kdbjmpbuf[smp_processor_id()], 0);
 +#endif        /* kdba_setjmp */
 +                      }
 +                      if (recurse) {
 +                              if (KDB_STATE(RECURSE)) {
 +                                      kdb_printf("     Already in recursive mode\n");
 +                              } else {
 +                                      kdb_printf("     Attempting recursive mode\n");
 +                                      KDB_STATE_SET(RECURSE);
 +                                      KDB_STATE_SET(REENTRY);
 +                                      reason2 = KDB_REASON_RECURSE;
 +                                      recover = 1;
 +                              }
 +                      }
 +                      if (!recover) {
 +                              kdb_printf("     Cannot recover, allowing event to proceed\n");
 +                              /*temp*/
 +                              while (KDB_IS_RUNNING())
 +                                      cpu_relax();
 +                              goto out;
 +                      }
 +              }
 +      } else if (reason == KDB_REASON_SWITCH && !KDB_IS_RUNNING()) {
 +              kdb_printf("kdb: CPU switch without kdb running, I'm confused\n");
 +              goto out;
 +      }
 +
 +      /*
 +       * Disable interrupts, breakpoints etc. on this processor
 +       * during kdb command processing
 +       */
 +      KDB_STATE_SET(KDB);
 +      kdba_disableint(&int_state);
 +      if (!KDB_STATE(KDB_CONTROL)) {
 +              kdb_bp_remove_local();
 +              KDB_STATE_SET(KDB_CONTROL);
 +      }
 +
 +      /*
 +       * If not entering the debugger due to CPU switch or single step
 +       * reentry, serialize access here.
 +       * The processors may race getting to this point - if,
 +       * for example, more than one processor hits a breakpoint
 +       * at the same time.   We'll serialize access to kdb here -
 +       * other processors will loop here, and the NMI from the stop
 +       * IPI will take them into kdb as switch candidates.  Once
 +       * the initial processor releases the debugger, the rest of
 +       * the processors will race for it.
 +       *
 +       * The above describes the normal state of affairs, where two or more
 +       * cpus that are entering kdb at the "same" time are assumed to be for
 +       * separate events.  However some processes such as ia64 MCA/INIT will
 +       * drive all the cpus into error processing at the same time.  For that
 +       * case, all of the cpus entering kdb at the "same" time are really a
 +       * single event.
 +       *
 +       * That case is handled by the use of KDB_ENTER by one cpu (the
 +       * monarch) and KDB_ENTER_SLAVE on the other cpus (the slaves).
 +       * KDB_ENTER_SLAVE maps to KDB_REASON_ENTER_SLAVE.  The slave events
 +       * will be treated as if they had just responded to the kdb IPI, i.e.
 +       * as if they were KDB_REASON_SWITCH.
 +       *
 +       * Because of races across multiple cpus, ENTER_SLAVE can occur before
 +       * the main ENTER.   Hold up ENTER_SLAVE here until the main ENTER
 +       * arrives.
 +       */
 +
 +      if (reason == KDB_REASON_ENTER_SLAVE) {
 +              spin_lock(&kdb_lock);
 +              while (!KDB_IS_RUNNING()) {
 +                      spin_unlock(&kdb_lock);
 +                      while (!KDB_IS_RUNNING())
 +                              cpu_relax();
 +                      spin_lock(&kdb_lock);
 +              }
 +              reason = KDB_REASON_SWITCH;
 +              KDB_STATE_SET(HOLD_CPU);
 +              spin_unlock(&kdb_lock);
 +      }
 +
 +      if (reason == KDB_REASON_SWITCH || KDB_STATE(REENTRY))
 +              ;       /* drop through */
 +      else {
 +              KDB_DEBUG_STATE("kdb 4", reason);
 +              spin_lock(&kdb_lock);
 +              while (KDB_IS_RUNNING() || kdb_previous_event()) {
 +                      spin_unlock(&kdb_lock);
 +                      while (KDB_IS_RUNNING() || kdb_previous_event())
 +                              cpu_relax();
 +                      spin_lock(&kdb_lock);
 +              }
 +              KDB_DEBUG_STATE("kdb 5", reason);
 +
 +              kdb_initial_cpu = smp_processor_id();
 +              ++kdb_seqno;
 +              spin_unlock(&kdb_lock);
 +              if (!kdb_quiet(reason))
 +                      notify_die(DIE_KDEBUG_ENTER, "KDEBUG ENTER", regs, error, 0, 0);
 +      }
 +
 +      if (smp_processor_id() == kdb_initial_cpu
 +       && !KDB_STATE(REENTRY)) {
 +              KDB_STATE_CLEAR(HOLD_CPU);
 +              KDB_STATE_CLEAR(WAIT_IPI);
 +              kdb_check_i8042();
 +              /*
 +               * Remove the global breakpoints.  This is only done
 +               * once from the initial processor on initial entry.
 +               */
 +              if (!kdb_quiet(reason) || smp_processor_id() == 0)
 +                      kdb_bp_remove_global();
 +
 +              /*
 +               * If SMP, stop other processors.  The other processors
 +               * will enter kdb() with KDB_REASON_SWITCH and spin in
 +               * kdb_main_loop().
 +               */
 +              KDB_DEBUG_STATE("kdb 6", reason);
 +              if (NR_CPUS > 1 && !kdb_quiet(reason)) {
 +                      int i;
 +                      for (i = 0; i < NR_CPUS; ++i) {
 +                              if (!cpu_online(i))
 +                                      continue;
 +                              if (i != kdb_initial_cpu) {
 +                                      KDB_STATE_SET_CPU(HOLD_CPU, i);
 +                                      KDB_STATE_SET_CPU(WAIT_IPI, i);
 +                              }
 +                      }
 +                      KDB_DEBUG_STATE("kdb 7", reason);
 +                      smp_kdb_stop();
 +                      KDB_DEBUG_STATE("kdb 8", reason);
 +              }
 +      }
 +
 +      if (KDB_STATE(GO1)) {
 +              kdb_bp_remove_global();         /* They were set for single-step purposes */
 +              KDB_STATE_CLEAR(GO1);
 +              reason = KDB_REASON_SILENT;     /* Now silently go */
 +      }
 +
 +      /* Set up a consistent set of process stacks before talking to the user */
 +      KDB_DEBUG_STATE("kdb 9", result);
 +      result = kdba_main_loop(reason, reason2, error, db_result, regs);
 +      reason = reason2;       /* back to original event type */
 +
 +      KDB_DEBUG_STATE("kdb 10", result);
 +      kdba_adjust_ip(reason, error, regs);
 +      KDB_STATE_CLEAR(LONGJMP);
 +      KDB_DEBUG_STATE("kdb 11", result);
 +      /* go which requires single-step over a breakpoint must only release
 +       * one cpu.
 +       */
 +      if (result == KDB_CMD_GO && KDB_STATE(SSBPT))
 +              KDB_STATE_SET(GO1);
 +
 +      if (smp_processor_id() == kdb_initial_cpu &&
 +        !KDB_STATE(DOING_SS) &&
 +        !KDB_STATE(RECURSE)) {
 +              /*
 +               * (Re)install the global breakpoints and cleanup the cached
 +               * symbol table.  This is only done once from the initial
 +               * processor on go.
 +               */
 +              KDB_DEBUG_STATE("kdb 12", reason);
 +              if (!kdb_quiet(reason) || smp_processor_id() == 0) {
 +                      kdb_bp_install_global(regs);
 +                      kdbnearsym_cleanup();
 +                      debug_kusage();
 +              }
 +              if (!KDB_STATE(GO1)) {
 +                      /*
 +                       * Release all other cpus which will see KDB_STATE(LEAVING) is set.
 +                       */
 +                      int i;
 +                      for (i = 0; i < NR_CPUS; ++i) {
 +                              if (KDB_STATE_CPU(KDB, i))
 +                                      KDB_STATE_SET_CPU(LEAVING, i);
 +                              KDB_STATE_CLEAR_CPU(WAIT_IPI, i);
 +                              KDB_STATE_CLEAR_CPU(HOLD_CPU, i);
 +                      }
 +                      /* Wait until all the other processors leave kdb */
 +                      while (kdb_previous_event() != 1)
 +                              ;
 +                      if (!kdb_quiet(reason))
 +                              notify_die(DIE_KDEBUG_LEAVE, "KDEBUG LEAVE", regs, error, 0, 0);
 +                      kdb_initial_cpu = -1;   /* release kdb control */
 +                      KDB_DEBUG_STATE("kdb 13", reason);
 +              }
 +      }
 +
 +      KDB_DEBUG_STATE("kdb 14", result);
 +      kdba_restoreint(&int_state);
 +#ifdef  CONFIG_CPU_XSCALE
 +      if ( smp_processor_id() == kdb_initial_cpu &&
 +           ( KDB_STATE(SSBPT) | KDB_STATE(DOING_SS) )
 +            ) {
 +              kdba_setsinglestep(regs);
 +              // disable IRQ in stack frame
 +              KDB_STATE_SET(A_XSC_ICH);
 +              if ( kdba_disable_retirq(regs) ) {
 +                      KDB_STATE_SET(A_XSC_IRQ);
 +              }
 +              else {
 +                      KDB_STATE_CLEAR(A_XSC_IRQ);
 +              }
 +      }
 +#endif
 +
 +      /* Only do this work if we are really leaving kdb */
 +      if (!(KDB_STATE(DOING_SS) || KDB_STATE(SSBPT) || KDB_STATE(RECURSE))) {
 +              KDB_DEBUG_STATE("kdb 15", result);
 +              kdb_bp_install_local(regs);
 +              if (old_regs_saved)
 +                      set_irq_regs(old_regs);
 +              KDB_STATE_CLEAR(KDB_CONTROL);
 +      }
 +
 +      KDB_DEBUG_STATE("kdb 16", result);
 +      KDB_FLAG_CLEAR(CATASTROPHIC);
 +      KDB_STATE_CLEAR(IP_ADJUSTED);   /* Re-adjust ip next time in */
 +      KDB_STATE_CLEAR(KEYBOARD);
 +      KDB_STATE_CLEAR(KDB);           /* Main kdb state has been cleared */
 +      KDB_STATE_CLEAR(RECURSE);
 +      KDB_STATE_CLEAR(LEAVING);       /* No more kdb work after this */
 +      KDB_DEBUG_STATE("kdb 17", reason);
 +out:
 +      atomic_dec(&kdb_event);
 +      preempt_enable();
 +      return result != 0;
 +}
 +
 +/*
 + * kdb_mdr
 + *
 + *    This function implements the guts of the 'mdr' command.
 + *
 + *    mdr  <addr arg>,<byte count>
 + *
 + * Inputs:
 + *    addr    Start address
 + *    count   Number of bytes
 + * Outputs:
 + *    None.
 + * Returns:
 + *    Always 0.  Any errors are detected and printed by kdb_getarea.
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_mdr(kdb_machreg_t addr, unsigned int count)
 +{
 +      unsigned char c;
 +      while (count--) {
 +              if (kdb_getarea(c, addr))
 +                      return 0;
 +              kdb_printf("%02x", c);
 +              addr++;
 +      }
 +      kdb_printf("\n");
 +      return 0;
 +}
 +
 +/*
 + * kdb_md
 + *
 + *    This function implements the 'md', 'md1', 'md2', 'md4', 'md8'
 + *    'mdr' and 'mds' commands.
 + *
 + *    md|mds  [<addr arg> [<line count> [<radix>]]]
 + *    mdWcN   [<addr arg> [<line count> [<radix>]]]
 + *            where W = is the width (1, 2, 4 or 8) and N is the count.
 + *            for eg., md1c20 reads 20 bytes, 1 at a time.
 + *    mdr  <addr arg>,<byte count>
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static void
 +kdb_md_line(const char *fmtstr, kdb_machreg_t addr,
 +          int symbolic, int nosect, int bytesperword,
 +          int num, int repeat, int phys)
 +{
 +      /* print just one line of data */
 +      kdb_symtab_t symtab;
 +      char cbuf[32];
 +      char *c = cbuf;
 +      int i;
 +      unsigned long word;
 +
 +      memset(cbuf, '\0', sizeof(cbuf));
 +      if (phys)
 +              kdb_printf("phys " kdb_machreg_fmt0 " ", addr);
 +      else
 +              kdb_printf(kdb_machreg_fmt0 " ", addr);
 +
 +      for (i = 0; i < num && repeat--; i++) {
 +              if (phys) {
 +                      if (kdb_getphysword(&word, addr, bytesperword))
 +                              break;
 +              } else if (kdb_getword(&word, addr, bytesperword))
 +                      break;
 +              kdb_printf(fmtstr, word);
 +              if (symbolic)
 +                      kdbnearsym(word, &symtab);
 +              else
 +                      memset(&symtab, 0, sizeof(symtab));
 +              if (symtab.sym_name) {
 +                      kdb_symbol_print(word, &symtab, 0);
 +                      if (!nosect) {
 +                              kdb_printf("\n");
 +                              kdb_printf("                       %s %s "
 +                                         kdb_machreg_fmt " " kdb_machreg_fmt " " kdb_machreg_fmt,
 +                                      symtab.mod_name,
 +                                      symtab.sec_name,
 +                                      symtab.sec_start,
 +                                      symtab.sym_start,
 +                                      symtab.sym_end);
 +                      }
 +                      addr += bytesperword;
 +              } else {
 +                      union {
 +                              u64 word;
 +                              unsigned char c[8];
 +                      } wc;
 +                      unsigned char *cp;
 +#ifdef        __BIG_ENDIAN
 +                      cp = wc.c + 8 - bytesperword;
 +#else
 +                      cp = wc.c;
 +#endif
 +                      wc.word = word;
 +#define printable_char(c) ({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.';})
 +                      switch (bytesperword) {
 +                      case 8:
 +                              *c++ = printable_char(*cp++);
 +                              *c++ = printable_char(*cp++);
 +                              *c++ = printable_char(*cp++);
 +                              *c++ = printable_char(*cp++);
 +                              addr += 4;
 +                      case 4:
 +                              *c++ = printable_char(*cp++);
 +                              *c++ = printable_char(*cp++);
 +                              addr += 2;
 +                      case 2:
 +                              *c++ = printable_char(*cp++);
 +                              addr++;
 +                      case 1:
 +                              *c++ = printable_char(*cp++);
 +                              addr++;
 +                              break;
 +                      }
 +#undef printable_char
 +              }
 +      }
 +      kdb_printf("%*s %s\n", (int)((num-i)*(2*bytesperword + 1)+1), " ", cbuf);
 +}
 +
 +static int
 +kdb_md(int argc, const char **argv)
 +{
 +      static kdb_machreg_t last_addr;
 +      static int last_radix, last_bytesperword, last_repeat;
 +      int radix = 16, mdcount = 8, bytesperword = KDB_WORD_SIZE, repeat;
 +      int nosect = 0;
 +      char fmtchar, fmtstr[64];
 +      kdb_machreg_t addr;
 +      unsigned long word;
 +      long offset = 0;
 +      int symbolic = 0;
 +      int valid = 0;
 +      int phys = 0;
 +
 +      kdbgetintenv("MDCOUNT", &mdcount);
 +      kdbgetintenv("RADIX", &radix);
 +      kdbgetintenv("BYTESPERWORD", &bytesperword);
 +
 +      /* Assume 'md <addr>' and start with environment values */
 +      repeat = mdcount * 16 / bytesperword;
 +
 +      if (strcmp(argv[0], "mdr") == 0) {
 +              if (argc != 2)
 +                      return KDB_ARGCOUNT;
 +              valid = 1;
 +      } else if (isdigit(argv[0][2])) {
 +              bytesperword = (int)(argv[0][2] - '0');
 +              if (bytesperword == 0) {
 +                      bytesperword = last_bytesperword;
 +                      if (bytesperword == 0) {
 +                              bytesperword = 4;
 +                      }
 +              }
 +              last_bytesperword = bytesperword;
 +              repeat = mdcount * 16 / bytesperword;
 +              if (!argv[0][3])
 +                      valid = 1;
 +              else if (argv[0][3] == 'c' && argv[0][4]) {
 +                      char *p;
 +                      repeat = simple_strtoul(argv[0]+4, &p, 10);
 +                      mdcount = ((repeat * bytesperword) + 15) / 16;
 +                      valid = !*p;
 +              }
 +              last_repeat = repeat;
 +      } else if (strcmp(argv[0], "md") == 0)
 +              valid = 1;
 +      else if (strcmp(argv[0], "mds") == 0)
 +              valid = 1;
 +      else if (strcmp(argv[0], "mdp") == 0) {
 +              phys = valid = 1;
 +      }
 +      if (!valid)
 +              return KDB_NOTFOUND;
 +
 +      if (argc == 0) {
 +              if (last_addr == 0)
 +                      return KDB_ARGCOUNT;
 +              addr = last_addr;
 +              radix = last_radix;
 +              bytesperword = last_bytesperword;
 +              repeat = last_repeat;
 +              mdcount = ((repeat * bytesperword) + 15) / 16;
 +      }
 +
 +      if (argc) {
 +              kdb_machreg_t val;
 +              int diag, nextarg = 1;
 +              diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
 +              if (diag)
 +                      return diag;
 +              if (argc > nextarg+2)
 +                      return KDB_ARGCOUNT;
 +
 +              if (argc >= nextarg) {
 +                      diag = kdbgetularg(argv[nextarg], &val);
 +                      if (!diag) {
 +                              mdcount = (int) val;
 +                              repeat = mdcount * 16 / bytesperword;
 +                      }
 +              }
 +              if (argc >= nextarg+1) {
 +                      diag = kdbgetularg(argv[nextarg+1], &val);
 +                      if (!diag)
 +                              radix = (int) val;
 +              }
 +      }
 +
 +      if (strcmp(argv[0], "mdr") == 0) {
 +              return kdb_mdr(addr, mdcount);
 +      }
 +
 +      switch (radix) {
 +      case 10:
 +              fmtchar = 'd';
 +              break;
 +      case 16:
 +              fmtchar = 'x';
 +              break;
 +      case 8:
 +              fmtchar = 'o';
 +              break;
 +      default:
 +              return KDB_BADRADIX;
 +      }
 +
 +      last_radix = radix;
 +
 +      if (bytesperword > KDB_WORD_SIZE)
 +              return KDB_BADWIDTH;
 +
 +      switch (bytesperword) {
 +      case 8:
 +              sprintf(fmtstr, "%%16.16l%c ", fmtchar);
 +              break;
 +      case 4:
 +              sprintf(fmtstr, "%%8.8l%c ", fmtchar);
 +              break;
 +      case 2:
 +              sprintf(fmtstr, "%%4.4l%c ", fmtchar);
 +              break;
 +      case 1:
 +              sprintf(fmtstr, "%%2.2l%c ", fmtchar);
 +              break;
 +      default:
 +              return KDB_BADWIDTH;
 +      }
 +
 +      last_repeat = repeat;
 +      last_bytesperword = bytesperword;
 +
 +      if (strcmp(argv[0], "mds") == 0) {
 +              symbolic = 1;
 +              /* Do not save these changes as last_*, they are temporary mds
 +               * overrides.
 +               */
 +              bytesperword = KDB_WORD_SIZE;
 +              repeat = mdcount;
 +              kdbgetintenv("NOSECT", &nosect);
 +      }
 +
 +      /* Round address down modulo BYTESPERWORD */
 +
 +      addr &= ~(bytesperword-1);
 +
 +      while (repeat > 0) {
 +              unsigned long a;
 +              int n, z, num = (symbolic ? 1 : (16 / bytesperword));
 +
 +              for (a = addr, z = 0; z < repeat; a += bytesperword, ++z) {
 +                      if (phys) {
 +                              if (kdb_getphysword(&word, a, bytesperword)
 +                                              || word)
 +                                      break;
 +                      } else if (kdb_getword(&word, a, bytesperword) || word)
 +                              break;
 +              }
 +              n = min(num, repeat);
 +              kdb_md_line(fmtstr, addr, symbolic, nosect, bytesperword, num, repeat, phys);
 +              addr += bytesperword * n;
 +              repeat -= n;
 +              z = (z + num - 1) / num;
 +              if (z > 2) {
 +                      int s = num * (z-2);
 +                      kdb_printf(kdb_machreg_fmt0 "-" kdb_machreg_fmt0 " zero suppressed\n",
 +                              addr, addr + bytesperword * s - 1);
 +                      addr += bytesperword * s;
 +                      repeat -= s;
 +              }
 +      }
 +      last_addr = addr;
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_mm
 + *
 + *    This function implements the 'mm' command.
 + *
 + *    mm address-expression new-value
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    mm works on machine words, mmW works on bytes.
 + */
 +
 +static int
 +kdb_mm(int argc, const char **argv)
 +{
 +      int diag;
 +      kdb_machreg_t addr;
 +      long offset = 0;
 +      unsigned long contents;
 +      int nextarg;
 +      int width;
 +
 +      if (argv[0][2] && !isdigit(argv[0][2]))
 +              return KDB_NOTFOUND;
 +
 +      if (argc < 2) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
 +              return diag;
 +
 +      if (nextarg > argc)
 +              return KDB_ARGCOUNT;
 +
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &contents, NULL, NULL)))
 +              return diag;
 +
 +      if (nextarg != argc + 1)
 +              return KDB_ARGCOUNT;
 +
 +      width = argv[0][2] ? (argv[0][2] - '0') : (KDB_WORD_SIZE);
 +      if ((diag = kdb_putword(addr, contents, width)))
 +              return diag;
 +
 +      kdb_printf(kdb_machreg_fmt " = " kdb_machreg_fmt "\n", addr, contents);
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_go
 + *
 + *    This function implements the 'go' command.
 + *
 + *    go [address-expression]
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    KDB_CMD_GO for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_go(int argc, const char **argv)
 +{
 +      kdb_machreg_t addr;
 +      int diag;
 +      int nextarg;
 +      long offset;
 +      struct pt_regs *regs = get_irq_regs();
 +
 +      if (argc == 1) {
 +              if (smp_processor_id() != kdb_initial_cpu) {
 +                      kdb_printf("go <address> must be issued from the initial cpu, do cpu %d first\n", kdb_initial_cpu);
 +                      return KDB_ARGCOUNT;
 +              }
 +              nextarg = 1;
 +              diag = kdbgetaddrarg(argc, argv, &nextarg,
 +                                   &addr, &offset, NULL);
 +              if (diag)
 +                      return diag;
 +
 +              kdba_setpc(regs, addr);
 +      } else if (argc)
 +              return KDB_ARGCOUNT;
 +
 +      diag = KDB_CMD_GO;
 +      if (KDB_FLAG(CATASTROPHIC)) {
 +              kdb_printf("Catastrophic error detected\n");
 +              kdb_printf("kdb_continue_catastrophic=%d, ",
 +                      kdb_continue_catastrophic);
 +              if (kdb_continue_catastrophic == 0 && kdb_go_count++ == 0) {
 +                      kdb_printf("type go a second time if you really want to continue\n");
 +                      return 0;
 +              }
 +              if (kdb_continue_catastrophic == 2) {
 +                      kdb_do_dump();
 +                      kdb_printf("forcing reboot\n");
 +                      kdb_reboot(0, NULL);
 +              }
 +              kdb_printf("attempting to continue\n");
 +      }
 +      if (smp_processor_id() != kdb_initial_cpu) {
 +              char buf[80];
 +              kdb_printf("go was not issued from initial cpu, switching back to cpu %d\n", kdb_initial_cpu);
 +              sprintf(buf, "cpu %d\n", kdb_initial_cpu);
 +              /* Recursive use of kdb_parse, do not use argv after this point */
 +              argv = NULL;
 +              diag = kdb_parse(buf);
 +              if (diag == KDB_CMD_CPU)
 +                      KDB_STATE_SET_CPU(GO_SWITCH, kdb_initial_cpu);
 +      }
 +      return diag;
 +}
 +
 +/*
 + * kdb_rd
 + *
 + *    This function implements the 'rd' command.
 + *
 + *    rd              display all general registers.
 + *    rd  c           display all control registers.
 + *    rd  d           display all debug registers.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_rd(int argc, const char **argv)
 +{
 +      int diag;
 +      if (argc == 0) {
 +              if ((diag = kdb_check_regs()))
 +                      return diag;
 +              return kdba_dumpregs(kdb_current_regs, NULL, NULL);
 +      }
 +
 +      if (argc > 2) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      if ((diag = kdb_check_regs()))
 +              return diag;
 +      return kdba_dumpregs(kdb_current_regs, argv[1], argc==2 ? argv[2]: NULL);
 +}
 +
 +/*
 + * kdb_rm
 + *
 + *    This function implements the 'rm' (register modify)  command.
 + *
 + *    rm register-name new-contents
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    Currently doesn't allow modification of control or
 + *    debug registers.
 + */
 +
 +static int
 +kdb_rm(int argc, const char **argv)
 +{
 +      int diag;
 +      int ind = 0;
 +      kdb_machreg_t contents;
 +
 +      if (argc != 2) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      /*
 +       * Allow presence or absence of leading '%' symbol.
 +       */
 +
 +      if (argv[1][0] == '%')
 +              ind = 1;
 +
 +      diag = kdbgetularg(argv[2], &contents);
 +      if (diag)
 +              return diag;
 +
 +      if ((diag = kdb_check_regs()))
 +              return diag;
 +      diag = kdba_setregcontents(&argv[1][ind], kdb_current_regs, contents);
 +      if (diag)
 +              return diag;
 +
 +      return 0;
 +}
 +
 +#if defined(CONFIG_MAGIC_SYSRQ)
 +/*
 + * kdb_sr
 + *
 + *    This function implements the 'sr' (SYSRQ key) command which
 + *    interfaces to the soi-disant MAGIC SYSRQ functionality.
 + *
 + *    sr <magic-sysrq-code>
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    None.
 + */
 +static int
 +kdb_sr(int argc, const char **argv)
 +{
 +      extern int __sysrq_enabled;
 +      if (argc != 1) {
 +              return KDB_ARGCOUNT;
 +      }
 +      if (!__sysrq_enabled) {
 +              kdb_printf("Auto activating sysrq\n");
 +              __sysrq_enabled = 1;
 +      }
 +
 +      handle_sysrq(*argv[1], NULL);
 +
 +      return 0;
 +}
 +#endif        /* CONFIG_MAGIC_SYSRQ */
 +
 +/*
 + * kdb_ef
 + *
 + *    This function implements the 'regs' (display exception frame)
 + *    command.  This command takes an address and expects to find
 + *    an exception frame at that address, formats and prints it.
 + *
 + *    regs address-expression
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    Not done yet.
 + */
 +
 +static int
 +kdb_ef(int argc, const char **argv)
 +{
 +      int diag;
 +      kdb_machreg_t addr;
 +      long offset;
 +      int nextarg;
 +
 +      if (argc == 1) {
 +              nextarg = 1;
 +              diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
 +              if (diag)
 +                      return diag;
 +
 +              return kdba_dumpregs((struct pt_regs *)addr, NULL, NULL);
 +      }
 +
 +      return KDB_ARGCOUNT;
 +}
 +
 +#if defined(CONFIG_MODULES)
 +extern struct list_head *kdb_modules;
 +extern void free_module(struct module *);
 +
 +/* modules using other modules */
 +struct module_use
 +{
 +      struct list_head list;
 +      struct module *module_which_uses;
 +};
 +
 +/*
 + * kdb_lsmod
 + *
 + *    This function implements the 'lsmod' command.  Lists currently
 + *    loaded kernel modules.
 + *
 + *    Mostly taken from userland lsmod.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *
 + */
 +
 +static int
 +kdb_lsmod(int argc, const char **argv)
 +{
 +      struct module *mod;
 +
 +      if (argc != 0)
 +              return KDB_ARGCOUNT;
 +
 +      kdb_printf("Module                  Size  modstruct     Used by\n");
 +      list_for_each_entry(mod, kdb_modules, list) {
 +
 +              kdb_printf("%-20s%8u  0x%p ", mod->name,
 +                         mod->core_size, (void *)mod);
 +#ifdef CONFIG_MODULE_UNLOAD
 +              kdb_printf("%4d ", module_refcount(mod));
 +#endif
 +              if (mod->state == MODULE_STATE_GOING)
 +                      kdb_printf(" (Unloading)");
 +              else if (mod->state == MODULE_STATE_COMING)
 +                      kdb_printf(" (Loading)");
 +              else
 +                      kdb_printf(" (Live)");
 +
 +#ifdef CONFIG_MODULE_UNLOAD
 +              {
 +                      struct module_use *use;
 +                      kdb_printf(" [ ");
 +                      list_for_each_entry(use, &mod->modules_which_use_me, list)
 +                              kdb_printf("%s ", use->module_which_uses->name);
 +                      kdb_printf("]\n");
 +              }
 +#endif
 +      }
 +
 +      return 0;
 +}
 +
 +#endif        /* CONFIG_MODULES */
 +
 +/*
 + * kdb_env
 + *
 + *    This function implements the 'env' command.  Display the current
 + *    environment variables.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_env(int argc, const char **argv)
 +{
 +      int i;
 +
 +      for(i=0; i<__nenv; i++) {
 +              if (__env[i]) {
 +                      kdb_printf("%s\n", __env[i]);
 +              }
 +      }
 +
 +      if (KDB_DEBUG(MASK))
 +              kdb_printf("KDBFLAGS=0x%x\n", kdb_flags);
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_dmesg
 + *
 + *    This function implements the 'dmesg' command to display the contents
 + *    of the syslog buffer.
 + *
 + *    dmesg [lines] [adjust]
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    None.
 + */
 +
 +static int
 +kdb_dmesg(int argc, const char **argv)
 +{
 +      char *syslog_data[4], *start, *end, c = '\0', *p;
 +      int diag, logging, logsize, lines = 0, adjust = 0, n;
 +
 +      if (argc > 2)
 +              return KDB_ARGCOUNT;
 +      if (argc) {
 +              char *cp;
 +              lines = simple_strtol(argv[1], &cp, 0);
 +              if (*cp)
 +                      lines = 0;
 +              if (argc > 1) {
 +                      adjust = simple_strtoul(argv[2], &cp, 0);
 +                      if (*cp || adjust < 0)
 +                              adjust = 0;
 +              }
 +      }
 +
 +      /* disable LOGGING if set */
 +      diag = kdbgetintenv("LOGGING", &logging);
 +      if (!diag && logging) {
 +              const char *setargs[] = { "set", "LOGGING", "0" };
 +              kdb_set(2, setargs);
 +      }
 +
 +      /* syslog_data[0,1] physical start, end+1.  syslog_data[2,3] logical start, end+1. */
 +      debugger_syslog_data(syslog_data);
 +      if (syslog_data[2] == syslog_data[3])
 +              return 0;
 +      logsize = syslog_data[1] - syslog_data[0];
 +      start = syslog_data[2];
 +      end = syslog_data[3];
 +#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
 +      for (n = 0, p = start; p < end; ++p) {
 +              if ((c = *KDB_WRAP(p)) == '\n')
 +                      ++n;
 +      }
 +      if (c != '\n')
 +              ++n;
 +      if (lines < 0) {
 +              if (adjust >= n)
 +                      kdb_printf("buffer only contains %d lines, nothing printed\n", n);
 +              else if (adjust - lines >= n)
 +                      kdb_printf("buffer only contains %d lines, last %d lines printed\n",
 +                              n, n - adjust);
 +              if (adjust) {
 +                      for (; start < end && adjust; ++start) {
 +                              if (*KDB_WRAP(start) == '\n')
 +                                      --adjust;
 +                      }
 +                      if (start < end)
 +                              ++start;
 +              }
 +              for (p = start; p < end && lines; ++p) {
 +                      if (*KDB_WRAP(p) == '\n')
 +                              ++lines;
 +              }
 +              end = p;
 +      } else if (lines > 0) {
 +              int skip = n - (adjust + lines);
 +              if (adjust >= n) {
 +                      kdb_printf("buffer only contains %d lines, nothing printed\n", n);
 +                      skip = n;
 +              } else if (skip < 0) {
 +                      lines += skip;
 +                      skip = 0;
 +                      kdb_printf("buffer only contains %d lines, first %d lines printed\n",
 +                              n, lines);
 +              }
 +              for (; start < end && skip; ++start) {
 +                      if (*KDB_WRAP(start) == '\n')
 +                              --skip;
 +              }
 +              for (p = start; p < end && lines; ++p) {
 +                      if (*KDB_WRAP(p) == '\n')
 +                              --lines;
 +              }
 +              end = p;
 +      }
 +      /* Do a line at a time (max 200 chars) to reduce protocol overhead */
 +      c = '\n';
 +      while (start != end) {
 +              char buf[201];
 +              p = buf;
 +              while (start < end && (c = *KDB_WRAP(start)) && (p - buf) < sizeof(buf)-1) {
 +                      ++start;
 +                      *p++ = c;
 +                      if (c == '\n')
 +                              break;
 +              }
 +              *p = '\0';
 +              kdb_printf("%s", buf);
 +      }
 +      if (c != '\n')
 +              kdb_printf("\n");
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_cpu
 + *
 + *    This function implements the 'cpu' command.
 + *
 + *    cpu     [<cpunum>]
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    KDB_CMD_CPU for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + *    All cpu's should be spinning in kdb().  However just in case
 + *    a cpu did not take the smp_kdb_stop NMI, check that a cpu
 + *    entered kdb() before passing control to it.
 + */
 +
 +static void
 +kdb_cpu_status(void)
 +{
 +      int i, start_cpu, first_print = 1;
 +      char state, prev_state = '?';
 +
 +      kdb_printf("Currently on cpu %d\n", smp_processor_id());
 +      kdb_printf("Available cpus: ");
 +      for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
 +              if (!cpu_online(i))
 +                      state = 'F';    /* cpu is offline */
 +              else {
 +                      struct kdb_running_process *krp = kdb_running_process+i;
 +                      if (KDB_STATE_CPU(KDB, i)) {
 +                              state = ' ';    /* cpu is responding to kdb */
 +                              if (kdb_task_state_char(krp->p) == 'I')
 +                                      state = 'I';    /* running the idle task */
 +                      } else if (krp->seqno && krp->p && krp->seqno >= kdb_seqno - 1)
 +                              state = '+';    /* some kdb data, but not responding */
 +                      else
 +                              state = '*';    /* no kdb data */
 +              }
 +              if (state != prev_state) {
 +                      if (prev_state != '?') {
 +                              if (!first_print)
 +                                      kdb_printf(", ");
 +                              first_print = 0;
 +                              kdb_printf("%d", start_cpu);
 +                              if (start_cpu < i-1)
 +                                      kdb_printf("-%d", i-1);
 +                              if (prev_state != ' ')
 +                                      kdb_printf("(%c)", prev_state);
 +                      }
 +                      prev_state = state;
 +                      start_cpu = i;
 +              }
 +      }
 +      /* print the trailing cpus, ignoring them if they are all offline */
 +      if (prev_state != 'F') {
 +              if (!first_print)
 +                      kdb_printf(", ");
 +              kdb_printf("%d", start_cpu);
 +              if (start_cpu < i-1)
 +                      kdb_printf("-%d", i-1);
 +              if (prev_state != ' ')
 +                      kdb_printf("(%c)", prev_state);
 +      }
 +      kdb_printf("\n");
 +}
 +
 +static int
 +kdb_cpu(int argc, const char **argv)
 +{
 +      unsigned long cpunum;
 +      int diag, i;
 +
 +      /* ask the other cpus if they are still active */
 +      for (i=0; i<NR_CPUS; i++) {
 +              if (cpu_online(i))
 +                      KDB_STATE_CLEAR_CPU(KDB, i);
 +      }
 +      KDB_STATE_SET(KDB);
 +      barrier();
 +      /* wait for the other cpus to notice and set state KDB again,
 +       * see kdb_main_loop
 +       */
 +      udelay(1000);
 +
 +      if (argc == 0) {
 +              kdb_cpu_status();
 +              return 0;
 +      }
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      diag = kdbgetularg(argv[1], &cpunum);
 +      if (diag)
 +              return diag;
 +
 +      /*
 +       * Validate cpunum
 +       */
 +      if ((cpunum > NR_CPUS)
 +       || !cpu_online(cpunum)
 +       || !KDB_STATE_CPU(KDB, cpunum))
 +              return KDB_BADCPUNUM;
 +
 +      kdb_new_cpu = cpunum;
 +
 +      /*
 +       * Switch to other cpu
 +       */
 +      return KDB_CMD_CPU;
 +}
 +
 +/* The user may not realize that ps/bta with no parameters does not print idle
 + * or sleeping system daemon processes, so tell them how many were suppressed.
 + */
 +void
 +kdb_ps_suppressed(void)
 +{
 +      int idle = 0, daemon = 0;
 +      unsigned long mask_I = kdb_task_state_string("I"),
 +                    mask_M = kdb_task_state_string("M");
 +      unsigned long cpu;
 +      const struct task_struct *p, *g;
 +      for (cpu = 0; cpu < NR_CPUS; ++cpu) {
 +              if (!cpu_online(cpu))
 +                      continue;
 +              p = kdb_curr_task(cpu);
 +              if (kdb_task_state(p, mask_I))
 +                      ++idle;
 +      }
 +      kdb_do_each_thread(g, p) {
 +              if (kdb_task_state(p, mask_M))
 +                      ++daemon;
 +      } kdb_while_each_thread(g, p);
 +      if (idle || daemon) {
 +              if (idle)
 +                      kdb_printf("%d idle process%s (state I)%s\n",
 +                                 idle, idle == 1 ? "" : "es",
 +                                 daemon ? " and " : "");
 +              if (daemon)
 +                      kdb_printf("%d sleeping system daemon (state M) process%s",
 +                                 daemon, daemon == 1 ? "" : "es");
 +              kdb_printf(" suppressed,\nuse 'ps A' to see all.\n");
 +      }
 +}
 +
 +/*
 + * kdb_ps
 + *
 + *    This function implements the 'ps' command which shows
 + *    a list of the active processes.
 + *
 + *    ps [DRSTCZEUIMA]                All processes, optionally filtered by state
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +void
 +kdb_ps1(const struct task_struct *p)
 +{
 +      struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);
 +      kdb_printf("0x%p %8d %8d  %d %4d   %c  0x%p %c%s\n",
 +                 (void *)p, p->pid, p->parent->pid,
 +                 kdb_task_has_cpu(p), kdb_process_cpu(p),
 +                 kdb_task_state_char(p),
 +                 (void *)(&p->thread),
 +                 p == kdb_curr_task(smp_processor_id()) ? '*': ' ',
 +                 p->comm);
 +      if (kdb_task_has_cpu(p)) {
 +              if (!krp->seqno || !krp->p)
 +                      kdb_printf("  Error: no saved data for this cpu\n");
 +              else {
 +                      if (krp->seqno < kdb_seqno - 1)
 +                              kdb_printf("  Warning: process state is stale\n");
 +                      if (krp->p != p)
 +                              kdb_printf("  Error: does not match running process table (0x%p)\n", krp->p);
 +              }
 +      }
 +}
 +
 +static int
 +kdb_ps(int argc, const char **argv)
 +{
 +      struct task_struct *g, *p;
 +      unsigned long mask, cpu;
 +
 +      if (argc == 0)
 +              kdb_ps_suppressed();
 +      kdb_printf("%-*s      Pid   Parent [*] cpu State %-*s Command\n",
 +              (int)(2*sizeof(void *))+2, "Task Addr",
 +              (int)(2*sizeof(void *))+2, "Thread");
 +      mask = kdb_task_state_string(argc ? argv[1] : NULL);
 +      /* Run the active tasks first */
 +      for (cpu = 0; cpu < NR_CPUS; ++cpu) {
 +              if (!cpu_online(cpu))
 +                      continue;
 +              p = kdb_curr_task(cpu);
 +              if (kdb_task_state(p, mask))
 +                      kdb_ps1(p);
 +      }
 +      kdb_printf("\n");
 +      /* Now the real tasks */
 +      kdb_do_each_thread(g, p) {
 +              if (kdb_task_state(p, mask))
 +                      kdb_ps1(p);
 +      } kdb_while_each_thread(g, p);
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_pid
 + *
 + *    This function implements the 'pid' command which switches
 + *    the currently active process.
 + *
 + *    pid [<pid> | R]
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +
 +static int
 +kdb_pid(int argc, const char **argv)
 +{
 +      struct task_struct *p;
 +      unsigned long val;
 +      int diag;
 +
 +      if (argc > 1)
 +              return KDB_ARGCOUNT;
 +
 +      if (argc) {
 +              if (strcmp(argv[1], "R") == 0) {
 +                      p = KDB_RUNNING_PROCESS_ORIGINAL[kdb_initial_cpu].p;
 +              } else {
 +                      diag = kdbgetularg(argv[1], &val);
 +                      if (diag)
 +                              return KDB_BADINT;
 +
 +                      p = find_task_by_pid_ns((pid_t)val, &init_pid_ns);
 +                      if (!p) {
 +                              kdb_printf("No task with pid=%d\n", (pid_t)val);
 +                              return 0;
 +                      }
 +              }
 +
 +              kdba_set_current_task(p);
 +      }
 +
 +      kdb_printf("KDB current process is %s(pid=%d)\n", kdb_current_task->comm,
 +                 kdb_current_task->pid);
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_ll
 + *
 + *    This function implements the 'll' command which follows a linked
 + *    list and executes an arbitrary command for each element.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_ll(int argc, const char **argv)
 +{
 +      int diag;
 +      kdb_machreg_t addr;
 +      long offset = 0;
 +      kdb_machreg_t va;
 +      unsigned long linkoffset;
 +      int nextarg;
 +      const char *command;
 +
 +      if (argc != 3) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      nextarg = 1;
 +      diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
 +      if (diag)
 +              return diag;
 +
 +      diag = kdbgetularg(argv[2], &linkoffset);
 +      if (diag)
 +              return diag;
 +
 +      /*
 +       * Using the starting address as
 +       * the first element in the list, and assuming that
 +       * the list ends with a null pointer.
 +       */
 +
 +      va = addr;
 +      if (!(command = kdb_strdup(argv[3], GFP_KDB))) {
 +              kdb_printf("%s: cannot duplicate command\n", __FUNCTION__);
 +              return 0;
 +      }
 +      /* Recursive use of kdb_parse, do not use argv after this point */
 +      argv = NULL;
 +
 +      while (va) {
 +              char buf[80];
 +
 +              sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
 +              diag = kdb_parse(buf);
 +              if (diag)
 +                      return diag;
 +
 +              addr = va + linkoffset;
 +              if (kdb_getword(&va, addr, sizeof(va)))
 +                      return 0;
 +      }
 +      kfree(command);
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_help
 + *
 + *    This function implements the 'help' and '?' commands.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_help(int argc, const char **argv)
 +{
 +      kdbtab_t *kt;
 +      int i;
 +
 +      kdb_printf("%-15.15s %-20.20s %s\n", "Command", "Usage", "Description");
 +      kdb_printf("----------------------------------------------------------\n");
 +      for(i=0, kt=kdb_commands; i<kdb_max_commands; i++, kt++) {
 +              if (kt->cmd_name)
 +                      kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name,
 +                                 kt->cmd_usage, kt->cmd_help);
 +      }
 +      return 0;
 +}
 +
 +extern int kdb_wake_up_process(struct task_struct * p);
 +
 +/*
 + * kdb_kill
 + *
 + *    This function implements the 'kill' commands.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_kill(int argc, const char **argv)
 +{
 +      long sig, pid;
 +      char *endp;
 +      struct task_struct *p;
 +      struct siginfo info;
 +
 +      if (argc!=2)
 +              return KDB_ARGCOUNT;
 +
 +      sig = simple_strtol(argv[1], &endp, 0);
 +      if (*endp)
 +              return KDB_BADINT;
 +      if (sig >= 0 ) {
 +              kdb_printf("Invalid signal parameter.<-signal>\n");
 +              return 0;
 +      }
 +      sig=-sig;
 +
 +      pid = simple_strtol(argv[2], &endp, 0);
 +      if (*endp)
 +              return KDB_BADINT;
 +      if (pid <=0 ) {
 +              kdb_printf("Process ID must be large than 0.\n");
 +              return 0;
 +      }
 +
 +      /* Find the process. */
 +      if (!(p = find_task_by_pid_ns(pid, &init_pid_ns))) {
 +              kdb_printf("The specified process isn't found.\n");
 +              return 0;
 +      }
 +      p = p->group_leader;
 +      info.si_signo = sig;
 +      info.si_errno = 0;
 +      info.si_code = SI_USER;
 +      info.si_pid = pid;      /* use same capabilities as process being signalled */
 +      info.si_uid = 0;        /* kdb has root authority */
 +      kdb_send_sig_info(p, &info, kdb_seqno);
 +      return 0;
 +}
 +
 +struct kdb_tm {
 +      int tm_sec;     /* seconds */
 +      int tm_min;     /* minutes */
 +      int tm_hour;    /* hours */
 +      int tm_mday;    /* day of the month */
 +      int tm_mon;     /* month */
 +      int tm_year;    /* year */
 +};
 +
 +static void
 +kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
 +{
 +      /* This will work from 1970-2099, 2100 is not a leap year */
 +      static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
 +      memset(tm, 0, sizeof(*tm));
 +      tm->tm_sec  = tv->tv_sec % (24 * 60 * 60);
 +      tm->tm_mday = tv->tv_sec / (24 * 60 * 60) + (2 * 365 + 1); /* shift base from 1970 to 1968 */
 +      tm->tm_min =  tm->tm_sec / 60 % 60;
 +      tm->tm_hour = tm->tm_sec / 60 / 60;
 +      tm->tm_sec =  tm->tm_sec % 60;
 +      tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
 +      tm->tm_mday %= (4*365+1);
 +      mon_day[1] = 29;
 +      while (tm->tm_mday >= mon_day[tm->tm_mon]) {
 +              tm->tm_mday -= mon_day[tm->tm_mon];
 +              if (++tm->tm_mon == 12) {
 +                      tm->tm_mon = 0;
 +                      ++tm->tm_year;
 +                      mon_day[1] = 28;
 +              }
 +      }
 +      ++tm->tm_mday;
 +}
 +
 +/*
 + * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
 + * I cannot call that code directly from kdb, it has an unconditional
 + * cli()/sti() and calls routines that take locks which can stop the debugger.
 + */
 +
 +static void
 +kdb_sysinfo(struct sysinfo *val)
 +{
 +      struct timespec uptime;
 +      do_posix_clock_monotonic_gettime(&uptime);
 +      memset(val, 0, sizeof(*val));
 +      val->uptime = uptime.tv_sec;
 +      val->loads[0] = avenrun[0];
 +      val->loads[1] = avenrun[1];
 +      val->loads[2] = avenrun[2];
 +      val->procs = nr_threads-1;
 +      si_meminfo(val);
 +      kdb_si_swapinfo(val);
 +
 +      return;
 +}
 +
 +/*
 + * kdb_summary
 + *
 + *    This function implements the 'summary' command.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_summary(int argc, const char **argv)
 +{
 +      extern struct timespec xtime;
 +      extern struct timezone sys_tz;
 +      struct kdb_tm tm;
 +      struct sysinfo val;
 +
 +      if (argc)
 +              return KDB_ARGCOUNT;
 +
 +      kdb_printf("sysname    %s\n", init_uts_ns.name.sysname);
 +      kdb_printf("release    %s\n", init_uts_ns.name.release);
 +      kdb_printf("version    %s\n", init_uts_ns.name.version);
 +      kdb_printf("machine    %s\n", init_uts_ns.name.machine);
 +      kdb_printf("nodename   %s\n", init_uts_ns.name.nodename);
 +      kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
 +      kdb_printf("ccversion  %s\n", __stringify(CCVERSION));
 +
 +      kdb_gmtime(&xtime, &tm);
 +      kdb_printf("date       %04d-%02d-%02d %02d:%02d:%02d tz_minuteswest %d\n",
 +              1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
 +              tm.tm_hour, tm.tm_min, tm.tm_sec,
 +              sys_tz.tz_minuteswest);
 +
 +      kdb_sysinfo(&val);
 +      kdb_printf("uptime     ");
 +      if (val.uptime > (24*60*60)) {
 +              int days = val.uptime / (24*60*60);
 +              val.uptime %= (24*60*60);
 +              kdb_printf("%d day%s ", days, days == 1 ? "" : "s");
 +      }
 +      kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
 +
 +      /* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
 +
 +#define LOAD_INT(x) ((x) >> FSHIFT)
 +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 +      kdb_printf("load avg   %ld.%02ld %ld.%02ld %ld.%02ld\n",
 +              LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
 +              LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
 +              LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
 +      kdb_printf("\n");
 +#undef LOAD_INT
 +#undef LOAD_FRAC
 +
 +      kdb_meminfo_proc_show();        /* in fs/proc/meminfo.c */
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_per_cpu
 + *
 + *    This function implements the 'per_cpu' command.
 + *
 + * Inputs:
 + *    argc    argument count
 + *    argv    argument vector
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, a kdb diagnostic if error
 + * Locking:
 + *    none.
 + * Remarks:
 + */
 +
 +static int
 +kdb_per_cpu(int argc, const char **argv)
 +{
 +      char buf[256], fmtstr[64];
 +      kdb_symtab_t symtab;
 +      cpumask_t suppress;
 +      int cpu, diag;
 +      unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
 +
 +      if (argc < 1 || argc > 3)
 +              return KDB_ARGCOUNT;
 +
 +      cpus_clear(suppress);
 +      snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]);
 +      if (!kdbgetsymval(buf, &symtab)) {
 +              kdb_printf("%s is not a per_cpu variable\n", argv[1]);
 +              return KDB_BADADDR;
 +      }
 +      if (argc >=2 && (diag = kdbgetularg(argv[2], &bytesperword)))
 +              return diag;
 +      if (!bytesperword)
 +              bytesperword = KDB_WORD_SIZE;
 +      else if (bytesperword > KDB_WORD_SIZE)
 +              return KDB_BADWIDTH;
 +      sprintf(fmtstr, "%%0%dlx ", (int)(2*bytesperword));
 +      if (argc >= 3) {
 +              if ((diag = kdbgetularg(argv[3], &whichcpu)))
 +                      return diag;
 +              if (!cpu_online(whichcpu)) {
 +                      kdb_printf("cpu %ld is not online\n", whichcpu);
 +                      return KDB_BADCPUNUM;
 +              }
 +      }
 +
 +      /* Most architectures use __per_cpu_offset[cpu], some use
 +       * __per_cpu_offset(cpu), smp has no __per_cpu_offset.
 +       */
 +#ifdef        __per_cpu_offset
 +#define KDB_PCU(cpu) __per_cpu_offset(cpu)
 +#else
 +#ifdef        CONFIG_SMP
 +#define KDB_PCU(cpu) __per_cpu_offset[cpu]
 +#else
 +#define KDB_PCU(cpu) 0
 +#endif
 +#endif
 +
 +      for_each_online_cpu(cpu) {
 +              if (whichcpu != ~0UL && whichcpu != cpu)
 +                      continue;
 +              addr = symtab.sym_start + KDB_PCU(cpu);
 +              if ((diag = kdb_getword(&val, addr, bytesperword))) {
 +                      kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to read, diag=%d\n",
 +                              cpu, addr, diag);
 +                      continue;
 +              }
 +#ifdef        CONFIG_SMP
 +              if (!val) {
 +                      cpu_set(cpu, suppress);
 +                      continue;
 +              }
 +#endif        /* CONFIG_SMP */
 +              kdb_printf("%5d ", cpu);
 +              kdb_md_line(fmtstr, addr,
 +                      bytesperword == KDB_WORD_SIZE,
 +                      1, bytesperword, 1, 1, 0);
 +      }
 +      if (cpus_weight(suppress) == 0)
 +              return 0;
 +      kdb_printf("Zero suppressed cpu(s):");
 +      for_each_cpu_mask(cpu, suppress) {
 +              kdb_printf(" %d", cpu);
 +              if (cpu == NR_CPUS-1 || next_cpu(cpu, suppress) != cpu + 1)
 +                      continue;
 +              while (cpu < NR_CPUS && next_cpu(cpu, suppress) == cpu + 1)
 +                      ++cpu;
 +              kdb_printf("-%d", cpu);
 +      }
 +      kdb_printf("\n");
 +
 +#undef KDB_PCU
 +
 +      return 0;
 +}
 +
 +/*
 + * display help for the use of cmd | grep pattern
 + */
 +static int
 +kdb_grep_help(int argc, const char **argv)
 +{
 +      kdb_printf ("Usage of  cmd args | grep pattern:\n");
 +      kdb_printf ("  Any command's output may be filtered through an ");
 +      kdb_printf ("emulated 'pipe'.\n");
 +      kdb_printf ("  'grep' is just a key word.\n");
 +      kdb_printf
 +      ("  The pattern may include a very limited set of metacharacters:\n");
 +      kdb_printf ("   pattern or ^pattern or pattern$ or ^pattern$\n");
 +      kdb_printf
 +      ("  And if there are spaces in the pattern, you may quote it:\n");
 +      kdb_printf
 +      ("   \"pat tern\" or \"^pat tern\" or \"pat tern$\" or \"^pat tern$\"\n");
 +      return 0;
 +}
 +
 +/*
 + * kdb_register_repeat
 + *
 + *    This function is used to register a kernel debugger command.
 + *
 + * Inputs:
 + *    cmd     Command name
 + *    func    Function to execute the command
 + *    usage   A simple usage string showing arguments
 + *    help    A simple help string describing command
 + *    repeat  Does the command auto repeat on enter?
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, one if a duplicate command.
 + * Locking:
 + *    none.
 + * Remarks:
 + *
 + */
 +
 +#define kdb_command_extend 50 /* arbitrary */
 +int
 +kdb_register_repeat(char *cmd,
 +                  kdb_func_t func,
 +                  char *usage,
 +                  char *help,
 +                  short minlen,
 +                  kdb_repeat_t repeat)
 +{
 +      int i;
 +      kdbtab_t *kp;
 +
 +      /*
 +       *  Brute force method to determine duplicates
 +       */
 +      for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
 +              if (kp->cmd_name && (strcmp(kp->cmd_name, cmd)==0)) {
 +                      kdb_printf("Duplicate kdb command registered: "
 +                              "%s, func %p help %s\n", cmd, func, help);
 +                      return 1;
 +              }
 +      }
 +
 +      /*
 +       * Insert command into first available location in table
 +       */
 +      for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
 +              if (kp->cmd_name == NULL) {
 +                      break;
 +              }
 +      }
 +
 +      if (i >= kdb_max_commands) {
 +              kdbtab_t *new = kmalloc((kdb_max_commands + kdb_command_extend) * sizeof(*new), GFP_KDB);
 +              if (!new) {
 +                      kdb_printf("Could not allocate new kdb_command table\n");
 +                      return 1;
 +              }
 +              if (kdb_commands) {
 +                      memcpy(new, kdb_commands, kdb_max_commands * sizeof(*new));
 +                      kfree(kdb_commands);
 +              }
 +              memset(new + kdb_max_commands, 0, kdb_command_extend * sizeof(*new));
 +              kdb_commands = new;
 +              kp = kdb_commands + kdb_max_commands;
 +              kdb_max_commands += kdb_command_extend;
 +      }
 +
 +      kp->cmd_name   = cmd;
 +      kp->cmd_func   = func;
 +      kp->cmd_usage  = usage;
 +      kp->cmd_help   = help;
 +      kp->cmd_flags  = 0;
 +      kp->cmd_minlen = minlen;
 +      kp->cmd_repeat = repeat;
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_register
 + *
 + *    Compatibility register function for commands that do not need to
 + *    specify a repeat state.  Equivalent to kdb_register_repeat with
 + *    KDB_REPEAT_NONE.
 + *
 + * Inputs:
 + *    cmd     Command name
 + *    func    Function to execute the command
 + *    usage   A simple usage string showing arguments
 + *    help    A simple help string describing command
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, one if a duplicate command.
 + * Locking:
 + *    none.
 + * Remarks:
 + *
 + */
 +
 +int
 +kdb_register(char *cmd,
 +           kdb_func_t func,
 +           char *usage,
 +           char *help,
 +           short minlen)
 +{
 +      return kdb_register_repeat(cmd, func, usage, help, minlen, KDB_REPEAT_NONE);
 +}
 +
 +/*
 + * kdb_unregister
 + *
 + *    This function is used to unregister a kernel debugger command.
 + *    It is generally called when a module which implements kdb
 + *    commands is unloaded.
 + *
 + * Inputs:
 + *    cmd     Command name
 + * Outputs:
 + *    None.
 + * Returns:
 + *    zero for success, one command not registered.
 + * Locking:
 + *    none.
 + * Remarks:
 + *
 + */
 +
 +int
 +kdb_unregister(char *cmd)
 +{
 +      int i;
 +      kdbtab_t *kp;
 +
 +      /*
 +       *  find the command.
 +       */
 +      for (i=0, kp=kdb_commands; i<kdb_max_commands; i++, kp++) {
 +              if (kp->cmd_name && (strcmp(kp->cmd_name, cmd)==0)) {
 +                      kp->cmd_name = NULL;
 +                      return 0;
 +              }
 +      }
 +
 +      /*
 +       * Couldn't find it.
 +       */
 +      return 1;
 +}
 +
 +/*
 + * kdb_inittab
 + *
 + *    This function is called by the kdb_init function to initialize
 + *    the kdb command table.   It must be called prior to any other
 + *    call to kdb_register_repeat.
 + *
 + * Inputs:
 + *    None.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    None.
 + * Locking:
 + *    None.
 + * Remarks:
 + *
 + */
 +
 +static void __init
 +kdb_inittab(void)
 +{
 +      int i;
 +      kdbtab_t *kp;
 +
 +      for(i=0, kp=kdb_commands; i < kdb_max_commands; i++,kp++) {
 +              kp->cmd_name = NULL;
 +      }
 +
 +      kdb_register_repeat("md", kdb_md, "<vaddr>",   "Display Memory Contents, also mdWcN, e.g. md8c1", 1, KDB_REPEAT_NO_ARGS);
 +      kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>",   "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
 +      kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>",   "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
 +      kdb_register_repeat("mds", kdb_md, "<vaddr>",   "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
 +      kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>",   "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
 +      kdb_register_repeat("id", kdb_id, "<vaddr>",   "Display Instructions", 1, KDB_REPEAT_NO_ARGS);
 +      kdb_register_repeat("go", kdb_go, "[<vaddr>]", "Continue Execution", 1, KDB_REPEAT_NONE);
 +      kdb_register_repeat("rd", kdb_rd, "",           "Display Registers", 1, KDB_REPEAT_NONE);
 +      kdb_register_repeat("rm", kdb_rm, "<reg> <contents>", "Modify Registers", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("ef", kdb_ef, "<vaddr>",   "Display exception frame", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("bt", kdb_bt, "[<vaddr>]", "Stack traceback", 1, KDB_REPEAT_NONE);
 +      kdb_register_repeat("btp", kdb_bt, "<pid>",     "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("bta", kdb_bt, "[DRSTCZEUIMA]",     "Display stack all processes", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("btc", kdb_bt, "",  "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("btt", kdb_bt, "<vaddr>",   "Backtrace process given its struct task address", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("ll", kdb_ll, "<first-element> <linkoffset> <cmd>", "Execute cmd for each element in linked list", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("env", kdb_env, "",         "Show environment variables", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("set", kdb_set, "",         "Set environment variables", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("help", kdb_help, "",       "Display Help Message", 1, KDB_REPEAT_NONE);
 +      kdb_register_repeat("?", kdb_help, "",         "Display Help Message", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("cpu", kdb_cpu, "<cpunum>","Switch to new cpu", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("ps", kdb_ps, "[<flags>|A]", "Display active task list", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("pid", kdb_pid, "<pidnum>", "Switch to another task", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("reboot", kdb_reboot, "",  "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
 +#if defined(CONFIG_KDB_KDUMP)
 +      kdb_register_repeat("kdump", kdb_kdump, "",    "Calls kdump mode", 0, KDB_REPEAT_NONE);
 +#endif
 +#if defined(CONFIG_MODULES)
 +      kdb_register_repeat("lsmod", kdb_lsmod, "",     "List loaded kernel modules", 0, KDB_REPEAT_NONE);
 +#endif
 +#if defined(CONFIG_MAGIC_SYSRQ)
 +      kdb_register_repeat("sr", kdb_sr, "<key>",      "Magic SysRq key", 0, KDB_REPEAT_NONE);
 +#endif
 +      kdb_register_repeat("dmesg", kdb_dmesg, "[lines]",      "Display syslog buffer", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"", "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>", "Send a signal to a process", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("summary", kdb_summary, "", "Summarize the system", 4, KDB_REPEAT_NONE);
 +      kdb_register_repeat("per_cpu", kdb_per_cpu, "", "Display per_cpu variables", 3, KDB_REPEAT_NONE);
 +      kdb_register_repeat("grephelp", kdb_grep_help, "",
 +              "Display help on | grep", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("print", kdb_debuginfo_print, "<expression>",
 +              "Type casting, as in lcrash",  0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("px", kdb_debuginfo_print, "<expression>",
 +         "Print in hex (type casting) (see 'pxhelp')",  0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("pxhelp", kdb_pxhelp, "",
 +              "Display help for the px command", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("pd", kdb_debuginfo_print, "<expression>",
 +              "Print in decimal (type casting)", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("whatis", kdb_debuginfo_print,"<type or symbol>",
 +      "Display the type, or the address for a symbol", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("sizeof", kdb_debuginfo_print, "<type>",
 +      "Display the size of a structure, typedef, etc.", 0, KDB_REPEAT_NONE);
 +        kdb_register_repeat("walk", kdb_walk, "",
 +              "Walk a linked list (see 'walkhelp')", 0, KDB_REPEAT_NONE);
 +      kdb_register_repeat("walkhelp", kdb_walkhelp, "",
 +              "Display help for the walk command", 0, KDB_REPEAT_NONE);
 +}
 +
 +/*
 + * The user has written to our "file"
 + * file: the /proc file
 + * buffer: user address of the data he is writing
 + * count:  number of bytes in the user's buffer
 + */
 +static int
 +kdb_write_proc_filename(struct file *file, const char __user *buffer,
 +              unsigned long count, void *data)
 +{
 +      int     ret_count;
 +
 +      /* our buffer is kdb_debug_info_filename[256] */
 +      if (count > 256) {
 +              return 0;
 +      }
 +      if (copy_from_user(kdb_debug_info_filename, buffer, count)) {
 +              return 0;
 +      }
 +      ret_count = count; /* actual count */
 +      /* remove any newline from the end of the file name */
 +      if (kdb_debug_info_filename[count-1] == '\n') count--;
 +        kdb_debug_info_filename[count] = '\0';
 +
 +      return ret_count;
 +}
 +
 +/*
 + * The user is reading from our "file"
 + * page:  the beginning of the user's buffer
 + * start: pointer to the user's pointer (tells him where we put the data)
 + * off:   offset into the resource to be read
 + * count: length of the read
 + */
 +static int
 +kdb_read_proc_filename(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
 +{
 +      /* give him kdb_debug_info_filename[]; */
 +      return snprintf(page, count, "%s\n", kdb_debug_info_filename);
 +}
 +
 +/*
 + * kdb_proc_filename
 + *
 + * create /proc/kdb/debug_info_name
 + */
 +static void
 +kdb_proc_filename(void)
 +{
 +      struct proc_dir_entry *kdb_dir_entry, *kdb_file_entry;
 +
 +      /* create /proc/kdb */
 +      kdb_dir_entry = proc_mkdir("kdb", NULL);
 +      if (!kdb_dir_entry) {
 +              printk ("kdb could not create /proc/kdb\n");
 +              return;
 +      }
 +
 +      /* read/write by owner (root) only */
 +      kdb_file_entry = create_proc_entry("debug_info_name",
 +                                      S_IRUSR | S_IWUSR, kdb_dir_entry);
 +      if (!kdb_file_entry) {
 +              printk ("kdb could not create /proc/kdb/kdb_dir_entry\n");
 +              return;
 +      }
 +      kdb_file_entry->nlink = 1;
 +      kdb_file_entry->data = (void *)NULL;
 +      kdb_file_entry->read_proc = kdb_read_proc_filename;
 +      kdb_file_entry->write_proc = kdb_write_proc_filename;
 +      return;
 +}
 +
 +/*
 + * kdb_cmd_init
 + *
 + *    This function is called by the kdb_init function to execute any
 + *    commands defined in kdb_cmds.
 + *
 + * Inputs:
 + *    Commands in *kdb_cmds[];
 + * Outputs:
 + *    None.
 + * Returns:
 + *    None.
 + * Locking:
 + *    None.
 + * Remarks:
 + *
 + */
 +
 +static void __init
 +kdb_cmd_init(void)
 +{
 +      int i, diag;
 +      for (i = 0; kdb_cmds[i]; ++i) {
 +              if (!defcmd_in_progress)
 +                      if (console_loglevel >= 6 /* KERN_INFO */)
 +                              kdb_printf("kdb_cmd[%d]: %s", i, kdb_cmds[i]);
 +              diag = kdb_parse(kdb_cmds[i]);
 +              if (diag)
 +                      kdb_printf("kdb command %s failed, kdb diag %d\n",
 +                              kdb_cmds[i], diag);
 +      }
 +      if (defcmd_in_progress) {
 +              kdb_printf("Incomplete 'defcmd' set, forcing endefcmd\n");
 +              kdb_parse("endefcmd");
 +      }
 +}
 +
 +/*
 + * kdb_panic
 + *
 + *    Invoked via the panic_notifier_list.
 + *
 + * Inputs:
 + *    None.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    Zero.
 + * Locking:
 + *    None.
 + * Remarks:
 + *    When this function is called from panic(), the other cpus have already
 + *    been stopped.
 + *
 + */
 +
 +static int
 +kdb_panic(struct notifier_block *self, unsigned long command, void *ptr)
 +{
 +      KDB_FLAG_SET(CATASTROPHIC);     /* kernel state is dubious now */
 +      KDB_ENTER();
 +      return 0;
 +}
 +
 +static struct notifier_block kdb_block = { kdb_panic, NULL, 0 };
 +
 +#ifdef        CONFIG_SYSCTL
 +static int proc_do_kdb(ctl_table *table, int write, void __user *buffer,
 +              size_t *lenp, loff_t *ppos)
 +{
 +      if (KDB_FLAG(NO_CONSOLE) && write) {
 +              printk(KERN_ERR "kdb has no working console and has switched itself off\n");
 +              return -EINVAL;
 +      }
 +      return proc_dointvec(table, write, buffer, lenp, ppos);
 +}
 +
 +static ctl_table kdb_kern_table[] = {
 +      {
 +              .procname       = "kdb",
 +              .data           = &kdb_on,
 +              .maxlen         = sizeof(int),
 +              .mode           = 0644,
 +              .proc_handler   = proc_do_kdb,
 +      },
 +      {}
 +};
 +
 +static ctl_table kdb_root_table[] = {
 +      {
 +              .procname       = "kernel",
 +              .mode           = 0555,
 +              .child          = kdb_kern_table,
 +      },
 +      {}
 +};
 +#endif        /* CONFIG_SYSCTL */
 +
 +static int
 +kdb_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 +{
 +      if (action == CPU_ONLINE) {
 +              int cpu =(unsigned long)hcpu;
 +              cpumask_t save_cpus_allowed = current->cpus_allowed;
 +              set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 +              kdb(KDB_REASON_CPU_UP, 0, NULL); /* do kdb setup on this cpu */
 +              set_cpus_allowed_ptr(current, &save_cpus_allowed);
 +      }
 +      return NOTIFY_OK;
 +}
 +
 +static struct notifier_block kdb_cpu_nfb = {
 +      .notifier_call = kdb_cpu_callback
 +};
 +
 +/*
 + * kdb_init
 + *
 + *    Initialize the kernel debugger environment.
 + *
 + * Parameters:
 + *    None.
 + * Returns:
 + *    None.
 + * Locking:
 + *    None.
 + * Remarks:
 + *    None.
 + */
 +
 +void __init
 +kdb_init(void)
 +{
 +      kdb_initial_cpu = smp_processor_id();
 +      /*
 +       * This must be called before any calls to kdb_printf.
 +       */
 +      kdb_io_init();
 +
 +      kdb_inittab();          /* Initialize Command Table */
 +      kdb_initbptab();        /* Initialize Breakpoint Table */
 +      kdb_id_init();          /* Initialize Disassembler */
 +      kdba_init();            /* Architecture Dependent Initialization */
 +
 +      /*
 +       * Use printk() to get message in log_buf[];
 +       */
 +      printk("kdb version %d.%d%s by Keith Owens, Scott Lurndal. "\
 +             "Copyright SGI, All Rights Reserved\n",
 +              KDB_MAJOR_VERSION, KDB_MINOR_VERSION, KDB_TEST_VERSION);
 +
 +      kdb_cmd_init();         /* Preset commands from kdb_cmds */
 +      kdb_initial_cpu = -1;   /* Avoid recursion problems */
 +      kdb(KDB_REASON_CPU_UP, 0, NULL);        /* do kdb setup on boot cpu */
 +      kdb_initial_cpu = smp_processor_id();
 +      atomic_notifier_chain_register(&panic_notifier_list, &kdb_block);
 +      register_cpu_notifier(&kdb_cpu_nfb);
 +
 +#ifdef kdba_setjmp
 +      kdbjmpbuf = vmalloc(NR_CPUS * sizeof(*kdbjmpbuf));
 +      if (!kdbjmpbuf)
 +              printk(KERN_ERR "Cannot allocate kdbjmpbuf, no kdb recovery will be possible\n");
 +#endif        /* kdba_setjmp */
 +
 +      kdb_initial_cpu = -1;
 +      kdb_wait_for_cpus_secs = 2*num_online_cpus();
 +      kdb_wait_for_cpus_secs = max(kdb_wait_for_cpus_secs, 10);
 +}
 +
 +#ifdef        CONFIG_SYSCTL
 +static int __init
 +kdb_late_init(void)
 +{
 +      register_sysctl_table(kdb_root_table);
 +      /* seems that we cannot allocate with kmalloc until now */
 +        kdb_proc_filename();
 +      return 0;
 +}
 +
 +__initcall(kdb_late_init);
 +#endif
 +
 +EXPORT_SYMBOL(kdb_register);
 +EXPORT_SYMBOL(kdb_register_repeat);
 +EXPORT_SYMBOL(kdb_unregister);
 +EXPORT_SYMBOL(kdb_getarea_size);
 +EXPORT_SYMBOL(kdb_putarea_size);
 +EXPORT_SYMBOL(kdb_getuserarea_size);
 +EXPORT_SYMBOL(kdb_putuserarea_size);
 +EXPORT_SYMBOL(kdbgetularg);
 +EXPORT_SYMBOL(kdbgetenv);
 +EXPORT_SYMBOL(kdbgetintenv);
 +EXPORT_SYMBOL(kdbgetaddrarg);
 +EXPORT_SYMBOL(kdb);
 +EXPORT_SYMBOL(kdb_on);
 +EXPORT_SYMBOL(kdb_seqno);
 +EXPORT_SYMBOL(kdb_initial_cpu);
 +EXPORT_SYMBOL(kdbnearsym);
 +EXPORT_SYMBOL(kdb_printf);
 +EXPORT_SYMBOL(kdb_symbol_print);
 +EXPORT_SYMBOL(kdb_running_process);
index 0cdceb5,0000000..14fcfc2
mode 100644,000000..100644
--- /dev/null
@@@ -1,1154 -1,0 +1,1155 @@@
 +/*
 + * Kernel Debugger Architecture Independent Support Functions
 + *
 + * This file is subject to the terms and conditions of the GNU General Public
 + * License.  See the file "COPYING" in the main directory of this archive
 + * for more details.
 + *
 + * Copyright (c) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
 + * 03/02/13    added new 2.5 kallsyms <xavier.bru@bull.net>
 + */
 +
 +#include <stdarg.h>
 +#include <linux/types.h>
 +#include <linux/sched.h>
 +#include <linux/mm.h>
 +#include <linux/kallsyms.h>
 +#include <linux/stddef.h>
 +#include <linux/vmalloc.h>
 +#include <linux/ptrace.h>
 +#include <linux/module.h>
 +#include <linux/highmem.h>
 +#include <linux/hardirq.h>
 +#include <linux/delay.h>
++#include <linux/slab.h>
 +
 +#include <asm/uaccess.h>
 +
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>
 +
 +/*
 + * Symbol table functions.
 + */
 +
 +/*
 + * kdbgetsymval
 + *
 + *    Return the address of the given symbol.
 + *
 + * Parameters:
 + *    symname Character string containing symbol name
 + *      symtab  Structure to receive results
 + * Outputs:
 + * Returns:
 + *    0       Symbol not found, symtab zero filled
 + *    1       Symbol mapped to module/symbol/section, data in symtab
 + * Locking:
 + *    None.
 + * Remarks:
 + */
 +
 +int
 +kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
 +{
 +      if (KDB_DEBUG(AR))
 +              kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname, symtab);
 +      memset(symtab, 0, sizeof(*symtab));
 +
 +      if ((symtab->sym_start = kallsyms_lookup_name(symname))) {
 +              if (KDB_DEBUG(AR))
 +                      kdb_printf("kdbgetsymval: returns 1, symtab->sym_start=0x%lx\n", symtab->sym_start);
 +              return 1;
 +      }
 +      if (KDB_DEBUG(AR))
 +              kdb_printf("kdbgetsymval: returns 0\n");
 +      return 0;
 +}
 +EXPORT_SYMBOL(kdbgetsymval);
 +
 +/*
 + * kdbnearsym
 + *
 + *    Return the name of the symbol with the nearest address
 + *    less than 'addr'.
 + *
 + * Parameters:
 + *    addr    Address to check for symbol near
 + *    symtab  Structure to receive results
 + * Outputs:
 + * Returns:
 + *    0       No sections contain this address, symtab zero filled
 + *    1       Address mapped to module/symbol/section, data in symtab
 + * Locking:
 + *    None.
 + * Remarks:
 + *    2.6 kallsyms has a "feature" where it unpacks the name into a string.
 + *    If that string is reused before the caller expects it then the caller
 + *    sees its string change without warning.  To avoid cluttering up the
 + *    main kdb code with lots of kdb_strdup, tests and kfree calls, kdbnearsym
 + *    maintains an LRU list of the last few unique strings.  The list is sized
 + *    large enough to hold active strings, no kdb caller of kdbnearsym makes
 + *    more than ~20 later calls before using a saved value.
 + */
 +
 +static char *kdb_name_table[100];     /* arbitrary size */
 +
 +int
 +kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
 +{
 +      int ret = 0;
 +      unsigned long symbolsize;
 +      unsigned long offset;
 +#define knt1_size 128         /* must be >= kallsyms table size */
 +      char *knt1 = NULL;
 +
 +      if (KDB_DEBUG(AR))
 +              kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab);
 +      memset(symtab, 0, sizeof(*symtab));
 +
 +      if (addr < 4096)
 +              goto out;
 +      knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
 +      if (!knt1) {
 +              kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n", addr);
 +              goto out;
 +      }
 +      symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset, (char **)(&symtab->mod_name), knt1);
 +      if (offset > 8*1024*1024) {
 +              symtab->sym_name = NULL;
 +              addr = offset = symbolsize = 0;
 +      }
 +      symtab->sym_start = addr - offset;
 +      symtab->sym_end = symtab->sym_start + symbolsize;
 +      ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
 +
 +      if (ret) {
 +              int i;
 +              /* Another 2.6 kallsyms "feature".  Sometimes the sym_name is
 +               * set but the buffer passed into kallsyms_lookup is not used,
 +               * so it contains garbage.  The caller has to work out which
 +               * buffer needs to be saved.
 +               *
 +               * What was Rusty smoking when he wrote that code?
 +               */
 +              if (symtab->sym_name != knt1) {
 +                      strncpy(knt1, symtab->sym_name, knt1_size);
 +                      knt1[knt1_size-1] = '\0';
 +              }
 +              for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
 +                      if (kdb_name_table[i] && strcmp(kdb_name_table[i], knt1) == 0)
 +                              break;
 +              }
 +              if (i >= ARRAY_SIZE(kdb_name_table)) {
 +                      debug_kfree(kdb_name_table[0]);
 +                      memcpy(kdb_name_table, kdb_name_table+1,
 +                             sizeof(kdb_name_table[0])*(ARRAY_SIZE(kdb_name_table)-1));
 +              } else {
 +                      debug_kfree(knt1);
 +                      knt1 = kdb_name_table[i];
 +                      memcpy(kdb_name_table+i, kdb_name_table+i+1,
 +                             sizeof(kdb_name_table[0])*(ARRAY_SIZE(kdb_name_table)-i-1));
 +              }
 +              i = ARRAY_SIZE(kdb_name_table) - 1;
 +              kdb_name_table[i] = knt1;
 +              symtab->sym_name = kdb_name_table[i];
 +              knt1 = NULL;
 +      }
 +
 +      if (symtab->mod_name == NULL)
 +              symtab->mod_name = "kernel";
 +      if (KDB_DEBUG(AR))
 +              kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name);
 +
 +out:
 +      debug_kfree(knt1);
 +      return ret;
 +}
 +
 +void
 +kdbnearsym_cleanup(void)
 +{
 +      int i;
 +      for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
 +              if (kdb_name_table[i]) {
 +                      debug_kfree(kdb_name_table[i]);
 +                      kdb_name_table[i] = NULL;
 +              }
 +      }
 +}
 +
 +/*
 + * kallsyms_symbol_complete
 + *
 + * Parameters:
 + *    prefix_name     prefix of a symbol name to lookup
 + *    max_len         maximum length that can be returned
 + * Returns:
 + *    Number of symbols which match the given prefix.
 + * Notes:
 + *    prefix_name is changed to contain the longest unique prefix that
 + *    starts with this prefix (tab completion).
 + */
 +
 +static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
 +
 +int kallsyms_symbol_complete(char *prefix_name, int max_len)
 +{
 +      loff_t pos = 0;
 +      int prefix_len = strlen(prefix_name), prev_len = 0;
 +      int i, number = 0;
 +      const char *name;
 +
 +      while ((name = kdb_walk_kallsyms(&pos))) {
 +              if (strncmp(name, prefix_name, prefix_len) == 0) {
 +                      strcpy(ks_namebuf, name);
 +                      /* Work out the longest name that matches the prefix */
 +                      if (++number == 1) {
 +                              prev_len = min_t(int, max_len-1, strlen(ks_namebuf));
 +                              memcpy(ks_namebuf_prev, ks_namebuf, prev_len);
 +                              ks_namebuf_prev[prev_len] = '\0';
 +                      } else for (i = 0; i < prev_len; ++i) {
 +                              if (ks_namebuf[i] != ks_namebuf_prev[i]) {
 +                                      prev_len = i;
 +                                      ks_namebuf_prev[i] = '\0';
 +                                      break;
 +                              }
 +                      }
 +              }
 +      }
 +      if (prev_len > prefix_len)
 +              memcpy(prefix_name, ks_namebuf_prev, prev_len+1);
 +      return number;
 +}
 +
 +/*
 + * kallsyms_symbol_next
 + *
 + * Parameters:
 + *    prefix_name     prefix of a symbol name to lookup
 + *    flag    0 means search from the head, 1 means continue search.
 + * Returns:
 + *    1 if a symbol matches the given prefix.
 + *    0 if no string found
 + */
 +
 +int kallsyms_symbol_next(char *prefix_name, int flag)
 +{
 +      int prefix_len = strlen(prefix_name);
 +      static loff_t pos;
 +      const char *name;
 +
 +      if (!flag)
 +              pos = 0;
 +
 +      while ((name = kdb_walk_kallsyms(&pos))) {
 +              if (strncmp(name, prefix_name, prefix_len) == 0) {
 +                      strncpy(prefix_name, name, strlen(name)+1);
 +                      return 1;
 +              }
 +      }
 +      return 0;
 +}
 +
 +#if defined(CONFIG_SMP)
 +/*
 + * kdb_ipi
 + *
 + *    This function is called from the non-maskable interrupt
 + *    handler to handle a kdb IPI instruction.
 + *
 + * Inputs:
 + *    regs    = Exception frame pointer
 + * Outputs:
 + *    None.
 + * Returns:
 + *    0       - Did not handle NMI
 + *    1       - Handled NMI
 + * Locking:
 + *    None.
 + * Remarks:
 + *    Initially one processor is invoked in the kdb() code.  That
 + *    processor sends an ipi which drives this routine on the other
 + *    processors.  All this does is call kdb() with reason SWITCH.
 + *    This puts all processors into the kdb() routine and all the
 + *    code for breakpoints etc. is in one place.
 + *    One problem with the way the kdb NMI is sent, the NMI has no
 + *    identification that says it came from kdb.  If the cpu's kdb state is
 + *    marked as "waiting for kdb_ipi" then the NMI is treated as coming from
 + *    kdb, otherwise it is assumed to be for another reason and is ignored.
 + */
 +
 +int
 +kdb_ipi(struct pt_regs *regs, void (*ack_interrupt)(void))
 +{
 +      /* Do not print before checking and clearing WAIT_IPI, IPIs are
 +       * going all the time.
 +       */
 +      if (KDB_STATE(WAIT_IPI)) {
 +              /*
 +               * Stopping other processors via smp_kdb_stop().
 +               */
 +              if (ack_interrupt)
 +                      (*ack_interrupt)();     /* Acknowledge the interrupt */
 +              KDB_STATE_CLEAR(WAIT_IPI);
 +              KDB_DEBUG_STATE("kdb_ipi 1", 0);
 +              kdb(KDB_REASON_SWITCH, 0, regs);        /* Spin in kdb() */
 +              KDB_DEBUG_STATE("kdb_ipi 2", 0);
 +              return 1;
 +      }
 +      return 0;
 +}
 +#endif        /* CONFIG_SMP */
 +
 +/*
 + * kdb_symbol_print
 + *
 + *    Standard method for printing a symbol name and offset.
 + * Inputs:
 + *    addr    Address to be printed.
 + *    symtab  Address of symbol data, if NULL this routine does its
 + *            own lookup.
 + *    punc    Punctuation for string, bit field.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    Always 0.
 + * Locking:
 + *    none.
 + * Remarks:
 + *    The string and its punctuation is only printed if the address
 + *    is inside the kernel, except that the value is always printed
 + *    when requested.
 + */
 +
 +void
 +kdb_symbol_print(kdb_machreg_t addr, const kdb_symtab_t *symtab_p, unsigned int punc)
 +{
 +      kdb_symtab_t symtab, *symtab_p2;
 +      if (symtab_p) {
 +              symtab_p2 = (kdb_symtab_t *)symtab_p;
 +      }
 +      else {
 +              symtab_p2 = &symtab;
 +              kdbnearsym(addr, symtab_p2);
 +      }
 +      if (symtab_p2->sym_name || (punc & KDB_SP_VALUE)) {
 +              ;       /* drop through */
 +      }
 +      else {
 +              return;
 +      }
 +      if (punc & KDB_SP_SPACEB) {
 +              kdb_printf(" ");
 +      }
 +      if (punc & KDB_SP_VALUE) {
 +              kdb_printf(kdb_machreg_fmt0, addr);
 +      }
 +      if (symtab_p2->sym_name) {
 +              if (punc & KDB_SP_VALUE) {
 +                      kdb_printf(" ");
 +              }
 +              if (punc & KDB_SP_PAREN) {
 +                      kdb_printf("(");
 +              }
 +              if (strcmp(symtab_p2->mod_name, "kernel")) {
 +                      kdb_printf("[%s]", symtab_p2->mod_name);
 +              }
 +              kdb_printf("%s", symtab_p2->sym_name);
 +              if (addr != symtab_p2->sym_start) {
 +                      kdb_printf("+0x%lx", addr - symtab_p2->sym_start);
 +              }
 +              if (punc & KDB_SP_SYMSIZE) {
 +                      kdb_printf("/0x%lx", symtab_p2->sym_end - symtab_p2->sym_start);
 +              }
 +              if (punc & KDB_SP_PAREN) {
 +                      kdb_printf(")");
 +              }
 +      }
 +      if (punc & KDB_SP_SPACEA) {
 +              kdb_printf(" ");
 +      }
 +      if (punc & KDB_SP_NEWLINE) {
 +              kdb_printf("\n");
 +      }
 +}
 +
 +/*
 + * kdb_strdup
 + *
 + *    kdb equivalent of strdup, for disasm code.
 + * Inputs:
 + *    str     The string to duplicate.
 + *    type    Flags to kmalloc for the new string.
 + * Outputs:
 + *    None.
 + * Returns:
 + *    Address of the new string, NULL if storage could not be allocated.
 + * Locking:
 + *    none.
 + * Remarks:
 + *    This is not in lib/string.c because it uses kmalloc which is not
 + *    available when string.o is used in boot loaders.
 + */
 +
 +char *kdb_strdup(const char *str, gfp_t type)
 +{
 +      int n = strlen(str)+1;
 +      char *s = kmalloc(n, type);
 +      if (!s) return NULL;
 +      return strcpy(s, str);
 +}
 +
 +/*
 + * kdb_getarea_size
 + *
 + *    Read an area of data.  The kdb equivalent of copy_from_user, with
 + *    kdb messages for invalid addresses.
 + * Inputs:
 + *    res     Pointer to the area to receive the result.
 + *    addr    Address of the area to copy.
 + *    size    Size of the area.
 + * Outputs:
 + *    none.
 + * Returns:
 + *    0 for success, < 0 for error.
 + * Locking:
 + *    none.
 + */
 +
 +int kdb_getarea_size(void *res, unsigned long addr, size_t size)
 +{
 +      int ret = kdba_getarea_size(res, addr, size);
 +      if (ret) {
 +              if (!KDB_STATE(SUPPRESS)) {
 +                      kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
 +                      KDB_STATE_SET(SUPPRESS);
 +              }
 +              ret = KDB_BADADDR;
 +      }
 +      else {
 +              KDB_STATE_CLEAR(SUPPRESS);
 +      }
 +      return(ret);
 +}
 +
 +/*
 + * kdb_putarea_size
 + *
 + *    Write an area of data.  The kdb equivalent of copy_to_user, with
 + *    kdb messages for invalid addresses.
 + * Inputs:
 + *    addr    Address of the area to write to.
 + *    res     Pointer to the area holding the data.
 + *    size    Size of the area.
 + * Outputs:
 + *    none.
 + * Returns:
 + *    0 for success, < 0 for error.
 + * Locking:
 + *    none.
 + */
 +
 +int kdb_putarea_size(unsigned long addr, void *res, size_t size)
 +{
 +      int ret = kdba_putarea_size(addr, res, size);
 +      if (ret) {
 +              if (!KDB_STATE(SUPPRESS)) {
 +                      kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
 +                      KDB_STATE_SET(SUPPRESS);
 +              }
 +              ret = KDB_BADADDR;
 +      }
 +      else {
 +              KDB_STATE_CLEAR(SUPPRESS);
 +      }
 +      return(ret);
 +}
 +
 +/*
 + * kdb_getphys
 + *
 + * Read data from a physical address. Validate the address is in range,
 + * use kmap_atomic() to get data
 + *
 + * Similar to kdb_getarea() - but for phys addresses
 + *
 + * Inputs:
 + *    res     Pointer to the word to receive the result
 + *    addr    Physical address of the area to copy
 + *    size    Size of the area
 + * Outputs:
 + *    none.
 + * Returns:
 + *    0 for success, < 0 for error.
 + * Locking:
 + *    none.
 + */
 +static int kdb_getphys(void *res, unsigned long addr, size_t size)
 +{
 +      unsigned long pfn;
 +      void *vaddr;
 +      struct page *page;
 +
 +      pfn = (addr >> PAGE_SHIFT);
 +      if (!pfn_valid(pfn))
 +              return 1;
 +      page = pfn_to_page(pfn);
 +      vaddr = kmap_atomic(page, KM_KDB);
 +      memcpy(res, vaddr + (addr & (PAGE_SIZE -1)), size);
 +      kunmap_atomic(vaddr, KM_KDB);
 +
 +      return 0;
 +}
 +
 +/*
 + * kdb_getphysword
 + *
 + * Inputs:
 + *    word    Pointer to the word to receive the result.
 + *    addr    Address of the area to copy.
 + *    size    Size of the area.
 + * Outputs:
 + *    none.
 + * Returns:
 + *    0 for success, < 0 for error.
 + * Locking:
 + *    none.
 + */
 +int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size)
 +{
 +      int diag;
 +      __u8  w1;
 +      __u16 w2;
 +      __u32 w4;
 +      __u64 w8;
 +      *word = 0;      /* Default value if addr or size is invalid */
 +
 +      switch (size) {
 +      case 1:
 +              if (!(diag = kdb_getphys(&w1, addr, sizeof(w1))))
 +                      *word = w1;
 +              break;
 +      case 2:
 +              if (!(diag = kdb_getphys(&w2, addr, sizeof(w2))))
 +                      *word = w2;
 +              break;
 +      case 4:
 +              if (!(diag = kdb_getphys(&w4, addr, sizeof(w4))))
 +                      *word = w4;
 +              break;
 +      case 8:
 +              if (size <= sizeof(*word)) {
 +                      if (!(diag = kdb_getphys(&w8, addr, sizeof(w8))))
 +                              *word = w8;
 +                      break;
 +              }
 +              /* drop through */
 +      default:
 +              diag = KDB_BADWIDTH;
 +              kdb_printf("kdb_getphysword: bad width %ld\n", (long) size);
 +      }
 +      return(diag);
 +}
 +
 +/*
 + * kdb_getword
 + *
 + *    Read a binary value.  Unlike kdb_getarea, this treats data as numbers.
 + * Inputs:
 + *    word    Pointer to the word to receive the result.
 + *    addr    Address of the area to copy.
 + *    size    Size of the area.
 + * Outputs:
 + *    none.
 + * Returns:
 + *    0 for success, < 0 for error.
 + * Locking:
 + *    none.
 + */
 +
 +int kdb_getword(unsigned long *word, unsigned long addr, size_t size)
 +{
 +      int diag;
 +      __u8  w1;
 +      __u16 w2;
 +      __u32 w4;
 +      __u64 w8;
 +      *word = 0;      /* Default value if addr or size is invalid */
 +      switch (size) {
 +      case 1:
 +              if (!(diag = kdb_getarea(w1, addr)))
 +                      *word = w1;
 +              break;
 +      case 2:
 +              if (!(diag = kdb_getarea(w2, addr)))
 +                      *word = w2;
 +              break;
 +      case 4:
 +              if (!(diag = kdb_getarea(w4, addr)))
 +                      *word = w4;
 +              break;
 +      case 8:
 +              if (size <= sizeof(*word)) {
 +                      if (!(diag = kdb_getarea(w8, addr)))
 +                              *word = w8;
 +                      break;
 +              }
 +              /* drop through */
 +      default:
 +              diag = KDB_BADWIDTH;
 +              kdb_printf("kdb_getword: bad width %ld\n", (long) size);
 +      }
 +      return(diag);
 +}
 +
 +/*
 + * kdb_putword
 + *
 + *    Write a binary value.  Unlike kdb_putarea, this treats data as numbers.
 + * Inputs:
 + *    addr    Address of the area to write to..
 + *    word    The value to set.
 + *    size    Size of the area.
 + * Outputs:
 + *    none.
 + * Returns:
 + *    0 for success, < 0 for error.
 + * Locking:
 + *    none.
 + */
 +
 +int kdb_putword(unsigned long addr, unsigned long word, size_t size)
 +{
 +      int diag;
 +      __u8  w1;
 +      __u16 w2;
 +      __u32 w4;
 +      __u64 w8;
 +      switch (size) {
 +      case 1:
 +              w1 = word;
 +              diag = kdb_putarea(addr, w1);
 +              break;
 +      case 2:
 +              w2 = word;
 +              diag = kdb_putarea(addr, w2);
 +              break;
 +      case 4:
 +              w4 = word;
 +              diag = kdb_putarea(addr, w4);
 +              break;
 +      case 8:
 +              if (size <= sizeof(word)) {
 +                      w8 = word;
 +                      diag = kdb_putarea(addr, w8);
 +                      break;
 +              }
 +              /* drop through */
 +      default:
 +              diag = KDB_BADWIDTH;
 +              kdb_printf("kdb_putword: bad width %ld\n", (long) size);
 +      }
 +      return(diag);
 +}
 +
 +/*
 + * kdb_task_state_string
 + *
 + *    Convert a string containing any of the letters DRSTCZEUIMA to a mask
 + *    for the process state field and return the value.  If no argument is
 + *    supplied, return the mask that corresponds to environment variable PS,
 + *    DRSTCZEU by default.
 + * Inputs:
 + *    s       String to convert
 + * Outputs:
 + *    none.
 + * Returns:
 + *    Mask for process state.
 + * Locking:
 + *    none.
 + * Notes:
 + *    The mask folds data from several sources into a single long value, so
 + *    be carefull not to overlap the bits.  TASK_* bits are in the LSB,
 + *    special cases like UNRUNNABLE are in the MSB.  As of 2.6.10-rc1 there
 + *    is no overlap between TASK_* and EXIT_* but that may not always be
 + *    true, so EXIT_* bits are shifted left 16 bits before being stored in
 + *    the mask.
 + */
 +
 +#define UNRUNNABLE    (1UL << (8*sizeof(unsigned long) - 1))  /* unrunnable is < 0 */
 +#define RUNNING               (1UL << (8*sizeof(unsigned long) - 2))
 +#define IDLE          (1UL << (8*sizeof(unsigned long) - 3))
 +#define DAEMON                (1UL << (8*sizeof(unsigned long) - 4))
 +
 +unsigned long
 +kdb_task_state_string(const char *s)
 +{
 +      long res = 0;
 +      if (!s && !(s = kdbgetenv("PS"))) {
 +              s = "DRSTCZEU"; /* default value for ps */
 +      }
 +      while (*s) {
 +              switch (*s) {
 +              case 'D': res |= TASK_UNINTERRUPTIBLE; break;
 +              case 'R': res |= RUNNING; break;
 +              case 'S': res |= TASK_INTERRUPTIBLE; break;
 +              case 'T': res |= TASK_STOPPED; break;
 +              case 'C': res |= TASK_TRACED; break;
 +              case 'Z': res |= EXIT_ZOMBIE << 16; break;
 +              case 'E': res |= EXIT_DEAD << 16; break;
 +              case 'U': res |= UNRUNNABLE; break;
 +              case 'I': res |= IDLE; break;
 +              case 'M': res |= DAEMON; break;
 +              case 'A': res = ~0UL; break;
 +              default:
 +                        kdb_printf("%s: unknown flag '%c' ignored\n", __FUNCTION__, *s);
 +                        break;
 +              }
 +              ++s;
 +      }
 +      return res;
 +}
 +
 +/*
 + * kdb_task_state_char
 + *
 + *    Return the character that represents the task state.
 + * Inputs:
 + *    p       struct task for the process
 + * Outputs:
 + *    none.
 + * Returns:
 + *    One character to represent the task state.
 + * Locking:
 + *    none.
 + */
 +
 +char
 +kdb_task_state_char (const struct task_struct *p)
 +{
 +      int cpu = kdb_process_cpu(p);
 +      struct kdb_running_process *krp = kdb_running_process + cpu;
 +      char state = (p->state == 0) ? 'R' :
 +                   (p->state < 0) ? 'U' :
 +                   (p->state & TASK_UNINTERRUPTIBLE) ? 'D' :
 +                   (p->state & TASK_STOPPED) ? 'T' :
 +                   (p->state & TASK_TRACED) ? 'C' :
 +                   (p->exit_state & EXIT_ZOMBIE) ? 'Z' :
 +                   (p->exit_state & EXIT_DEAD) ? 'E' :
 +                   (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
 +      if (p->pid == 0) {
 +              /* Idle task.  Is it really idle, apart from the kdb interrupt? */
 +              if (!kdb_task_has_cpu(p) || krp->irq_depth == 1) {
 +                      /* There is a corner case when the idle task takes an
 +                       * interrupt and dies in the interrupt code.  It has an
 +                       * interrupt count of 1 but that did not come from kdb.
 +                       * This corner case can only occur on the initial cpu,
 +                       * all the others were entered via the kdb IPI.
 +                       */
 +                      if (cpu != kdb_initial_cpu || KDB_STATE_CPU(KEYBOARD, cpu))
 +                              state = 'I';    /* idle task */
 +              }
 +      }
 +      else if (!p->mm && state == 'S') {
 +              state = 'M';    /* sleeping system daemon */
 +      }
 +      return state;
 +}
 +
 +/*
 + * kdb_task_state
 + *
 + *    Return true if a process has the desired state given by the mask.
 + * Inputs:
 + *    p       struct task for the process
 + *    mask    mask from kdb_task_state_string to select processes
 + * Outputs:
 + *    none.
 + * Returns:
 + *    True if the process matches at least one criteria defined by the mask.
 + * Locking:
 + *    none.
 + */
 +
 +unsigned long
 +kdb_task_state(const struct task_struct *p, unsigned long mask)
 +{
 +      char state[] = { kdb_task_state_char(p), '\0' };
 +      return (mask & kdb_task_state_string(state)) != 0;
 +}
 +
 +struct kdb_running_process kdb_running_process[NR_CPUS];
 +
 +/* Save the state of a running process and invoke kdb_main_loop.  This is
 + * invoked on the current process on each cpu (assuming the cpu is responding).
 + */
 +
 +int
 +kdb_save_running(struct pt_regs *regs, kdb_reason_t reason,
 +               kdb_reason_t reason2, int error, kdb_dbtrap_t db_result)
 +{
 +      struct kdb_running_process *krp = kdb_running_process + smp_processor_id();
 +      krp->p = current;
 +      krp->regs = regs;
 +      krp->seqno = kdb_seqno;
 +      krp->irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
 +      kdba_save_running(&(krp->arch), regs);
 +      return kdb_main_loop(reason, reason2, error, db_result, regs);
 +}
 +
 +/*
 + * kdb_unsave_running
 + *
 + *    Reverse the effect of kdb_save_running.
 + * Inputs:
 + *    regs    struct pt_regs for the process
 + * Outputs:
 + *    Updates kdb_running_process[] for this cpu.
 + * Returns:
 + *    none.
 + * Locking:
 + *    none.
 + */
 +
 +void
 +kdb_unsave_running(struct pt_regs *regs)
 +{
 +      struct kdb_running_process *krp = kdb_running_process + smp_processor_id();
 +      kdba_unsave_running(&(krp->arch), regs);
 +      krp->seqno = 0;
 +}
 +
 +
 +/*
 + * kdb_print_nameval
 + *
 + *    Print a name and its value, converting the value to a symbol lookup
 + *    if possible.
 + * Inputs:
 + *    name    field name to print
 + *    val     value of field
 + * Outputs:
 + *    none.
 + * Returns:
 + *    none.
 + * Locking:
 + *    none.
 + */
 +
 +void
 +kdb_print_nameval(const char *name, unsigned long val)
 +{
 +      kdb_symtab_t symtab;
 +      kdb_printf("  %-11.11s ", name);
 +      if (kdbnearsym(val, &symtab))
 +              kdb_symbol_print(val, &symtab, KDB_SP_VALUE|KDB_SP_SYMSIZE|KDB_SP_NEWLINE);
 +      else
 +              kdb_printf("0x%lx\n", val);
 +}
 +
 +static struct page * kdb_get_one_user_page(const struct task_struct *tsk, unsigned long start,
 +              int len, int write)
 +{
 +      struct mm_struct *mm = tsk->mm;
 +      unsigned int flags;
 +      struct vm_area_struct * vma;
 +
 +      /* shouldn't cross a page boundary. */
 +      if ((start & PAGE_MASK) != ((start+len) & PAGE_MASK))
 +              return NULL;
 +
 +      /* we need to align start address to the current page boundy, PAGE_ALIGN
 +       * aligns to next page boundry.
 +       * FIXME: What about hugetlb?
 +       */
 +      start = start & PAGE_MASK;
 +      flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 +
 +      vma = find_extend_vma(mm, start);
 +
 +      /* may be we can allow access to VM_IO pages inside KDB? */
 +      if (!vma || (vma->vm_flags & VM_IO) || !(flags & vma->vm_flags))
 +              return NULL;
 +
 +      return follow_page(vma, start, write ? FOLL_WRITE : 0);
 +}
 +
 +int kdb_getuserarea_size(void *to, unsigned long from, size_t size)
 +{
 +      struct page *page;
 +      void *vaddr;
 +
 +      page = kdb_get_one_user_page(kdb_current_task, from, size, 0);
 +      if (!page)
 +              return size;
 +
 +      vaddr = kmap_atomic(page, KM_KDB);
 +      memcpy(to, vaddr+ (from & (PAGE_SIZE - 1)), size);
 +      kunmap_atomic(vaddr, KM_KDB);
 +
 +      return 0;
 +}
 +
 +int kdb_putuserarea_size(unsigned long to, void *from, size_t size)
 +{
 +      struct page *page;
 +      void *vaddr;
 +
 +      page = kdb_get_one_user_page(kdb_current_task, to, size, 1);
 +      if (!page)
 +              return size;
 +
 +      vaddr = kmap_atomic(page, KM_KDB);
 +      memcpy(vaddr+ (to & (PAGE_SIZE - 1)), from, size);
 +      kunmap_atomic(vaddr, KM_KDB);
 +
 +      return 0;
 +}
 +
 +/* Last ditch allocator for debugging, so we can still debug even when the
 + * GFP_ATOMIC pool has been exhausted.  The algorithms are tuned for space
 + * usage, not for speed.  One smallish memory pool, the free chain is always in
 + * ascending address order to allow coalescing, allocations are done in brute
 + * force best fit.
 + */
 +
 +struct debug_alloc_header {
 +      u32 next;       /* offset of next header from start of pool */
 +      u32 size;
 +      void *caller;
 +};
 +
 +/* The memory returned by this allocator must be aligned, which means so must
 + * the header size.  Do not assume that sizeof(struct debug_alloc_header) is a
 + * multiple of the alignment, explicitly calculate the overhead of this header,
 + * including the alignment.  The rest of this code must not use sizeof() on any
 + * header or pointer to a header.
 + */
 +#define dah_align 8
 +#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
 +
 +static u64 debug_alloc_pool_aligned[256*1024/dah_align];      /* 256K pool */
 +static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
 +static u32 dah_first, dah_first_call = 1, dah_used = 0, dah_used_max = 0;
 +
 +/* Locking is awkward.  The debug code is called from all contexts, including
 + * non maskable interrupts.  A normal spinlock is not safe in NMI context.  Try
 + * to get the debug allocator lock, if it cannot be obtained after a second
 + * then give up.  If the lock could not be previously obtained on this cpu then
 + * only try once.
 + *
 + * sparse has no annotation for "this function _sometimes_ acquires a lock", so
 + * fudge the acquire/release notation.
 + */
 +static DEFINE_SPINLOCK(dap_lock);
 +static int
 +get_dap_lock(void)
 +      __acquires(dap_lock)
 +{
 +      static int dap_locked = -1;
 +      int count;
 +      if (dap_locked == smp_processor_id())
 +              count = 1;
 +      else
 +              count = 1000;
 +      while (1) {
 +              if (spin_trylock(&dap_lock)) {
 +                      dap_locked = -1;
 +                      return 1;
 +              }
 +              if (!count--)
 +                      break;
 +              udelay(1000);
 +      }
 +      dap_locked = smp_processor_id();
 +      __acquire(dap_lock);
 +      return 0;
 +}
 +
 +void
 +*debug_kmalloc(size_t size, gfp_t flags)
 +{
 +      unsigned int rem, h_offset;
 +      struct debug_alloc_header *best, *bestprev, *prev, *h;
 +      void *p = NULL;
 +      if (!get_dap_lock()) {
 +              __release(dap_lock);    /* we never actually got it */
 +              return NULL;
 +      }
 +      h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
 +      if (dah_first_call) {
 +              h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
 +              dah_first_call = 0;
 +      }
 +      size = ALIGN(size, dah_align);
 +      prev = best = bestprev = NULL;
 +      while (1) {
 +              if (h->size >= size && (!best || h->size < best->size)) {
 +                      best = h;
 +                      bestprev = prev;
 +                      if (h->size == size)
 +                              break;
 +              }
 +              if (!h->next)
 +                      break;
 +              prev = h;
 +              h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
 +      }
 +      if (!best)
 +              goto out;
 +      rem = best->size - size;
 +      /* The pool must always contain at least one header */
 +      if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
 +              goto out;
 +      if (rem >= dah_overhead) {
 +              best->size = size;
 +              h_offset = ((char *)best - debug_alloc_pool) +
 +                         dah_overhead + best->size;
 +              h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
 +              h->size = rem - dah_overhead;
 +              h->next = best->next;
 +      } else
 +              h_offset = best->next;
 +      best->caller = __builtin_return_address(0);
 +      dah_used += best->size;
 +      dah_used_max = max(dah_used, dah_used_max);
 +      if (bestprev)
 +              bestprev->next = h_offset;
 +      else
 +              dah_first = h_offset;
 +      p = (char *)best + dah_overhead;
 +      memset(p, POISON_INUSE, best->size - 1);
 +      *((char *)p + best->size - 1) = POISON_END;
 +out:
 +      spin_unlock(&dap_lock);
 +      return p;
 +}
 +
 +void
 +debug_kfree(void *p)
 +{
 +      struct debug_alloc_header *h;
 +      unsigned int h_offset;
 +      if (!p)
 +              return;
 +      if ((char *)p < debug_alloc_pool ||
 +          (char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
 +              kfree(p);
 +              return;
 +      }
 +      if (!get_dap_lock()) {
 +              __release(dap_lock);    /* we never actually got it */
 +              return;         /* memory leak, cannot be helped */
 +      }
 +      h = (struct debug_alloc_header *)((char *)p - dah_overhead);
 +      memset(p, POISON_FREE, h->size - 1);
 +      *((char *)p + h->size - 1) = POISON_END;
 +      h->caller = NULL;
 +      dah_used -= h->size;
 +      h_offset = (char *)h - debug_alloc_pool;
 +      if (h_offset < dah_first) {
 +              h->next = dah_first;
 +              dah_first = h_offset;
 +      } else {
 +              struct debug_alloc_header *prev;
 +              unsigned int prev_offset;
 +              prev = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
 +              while (1) {
 +                      if (!prev->next || prev->next > h_offset)
 +                              break;
 +                      prev = (struct debug_alloc_header *)
 +                              (debug_alloc_pool + prev->next);
 +              }
 +              prev_offset = (char *)prev - debug_alloc_pool;
 +              if (prev_offset + dah_overhead + prev->size == h_offset) {
 +                      prev->size += dah_overhead + h->size;
 +                      memset(h, POISON_FREE, dah_overhead - 1);
 +                      *((char *)h + dah_overhead - 1) = POISON_END;
 +                      h = prev;
 +                      h_offset = prev_offset;
 +              } else {
 +                      h->next = prev->next;
 +                      prev->next = h_offset;
 +              }
 +      }
 +      if (h_offset + dah_overhead + h->size == h->next) {
 +              struct debug_alloc_header *next;
 +              next = (struct debug_alloc_header *)
 +                      (debug_alloc_pool + h->next);
 +              h->size += dah_overhead + next->size;
 +              h->next = next->next;
 +              memset(next, POISON_FREE, dah_overhead - 1);
 +              *((char *)next + dah_overhead - 1) = POISON_END;
 +      }
 +      spin_unlock(&dap_lock);
 +}
 +
 +void
 +debug_kusage(void)
 +{
 +      struct debug_alloc_header *h_free, *h_used;
 +#ifdef        CONFIG_IA64
 +      /* FIXME: using dah for ia64 unwind always results in a memory leak.
 +       * Fix that memory leak first, then set debug_kusage_one_time = 1 for
 +       * all architectures.
 +       */
 +      static int debug_kusage_one_time = 0;
 +#else
 +      static int debug_kusage_one_time = 1;
 +#endif
 +      if (!get_dap_lock()) {
 +              __release(dap_lock);    /* we never actually got it */
 +              return;
 +      }
 +      h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
 +      if (dah_first == 0 &&
 +          (h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
 +           dah_first_call))
 +              goto out;
 +      if (!debug_kusage_one_time)
 +              goto out;
 +      debug_kusage_one_time = 0;
 +      kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n",
 +                 __FUNCTION__, dah_first);
 +      if (dah_first) {
 +              h_used = (struct debug_alloc_header *)debug_alloc_pool;
 +              kdb_printf("%s: h_used %p size %d\n", __FUNCTION__, h_used, h_used->size);
 +      }
 +      do {
 +              h_used = (struct debug_alloc_header *)
 +                        ((char *)h_free + dah_overhead + h_free->size);
 +              kdb_printf("%s: h_used %p size %d caller %p\n",
 +                         __FUNCTION__, h_used, h_used->size, h_used->caller);
 +              h_free = (struct debug_alloc_header *)
 +                        (debug_alloc_pool + h_free->next);
 +      } while (h_free->next);
 +      h_used = (struct debug_alloc_header *)
 +                ((char *)h_free + dah_overhead + h_free->size);
 +      if ((char *)h_used - debug_alloc_pool !=
 +          sizeof(debug_alloc_pool_aligned))
 +              kdb_printf("%s: h_used %p size %d caller %p\n",
 +                         __FUNCTION__, h_used, h_used->size, h_used->caller);
 +out:
 +      spin_unlock(&dap_lock);
 +}
 +
 +/* Maintain a small stack of kdb_flags to allow recursion without disturbing
 + * the global kdb state.
 + */
 +
 +static int kdb_flags_stack[4], kdb_flags_index;
 +
 +void
 +kdb_save_flags(void)
 +{
 +      BUG_ON(kdb_flags_index >= ARRAY_SIZE(kdb_flags_stack));
 +      kdb_flags_stack[kdb_flags_index++] = kdb_flags;
 +}
 +
 +void
 +kdb_restore_flags(void)
 +{
 +      BUG_ON(kdb_flags_index <= 0);
 +      kdb_flags = kdb_flags_stack[--kdb_flags_index];
 +}
index 15501ae,0000000..308f0f9
mode 100644,000000..100644
--- /dev/null
@@@ -1,683 -1,0 +1,684 @@@
 +/*
 + * This file is subject to the terms and conditions of the GNU General Public
 + * License.  See the file "COPYING" in the main directory of this archive
 + * for more details.
 + *
 + * Copyright (c) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
 + */
 +
 +#include <linux/module.h>
 +#include <linux/init.h>
 +#include <linux/mm.h>
 +#include <linux/pagemap.h>
 +#include <linux/fs.h>
 +#include <linux/bio.h>
 +#include <linux/buffer_head.h>
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>
 +#include <linux/blkdev.h>
 +#include <linux/ctype.h>
++#include <linux/slab.h>
 +
 +MODULE_AUTHOR("SGI");
 +MODULE_DESCRIPTION("Debug page information");
 +MODULE_LICENSE("GPL");
 +
 +/* Standard Linux page stuff */
 +
 +#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
 +/* From include/linux/page-flags.h */
 +static char *pg_flag_vals[] = {
 +      "PG_locked", "PG_error", "PG_referenced", "PG_uptodate",
 +      "PG_dirty", "PG_lru", "PG_active", "PG_slab",
 +      "PG_owner_priv_1", "PG_arch_1", "PG_reserved", "PG_private",
 +      "PG_writeback",
 +#ifdef CONFIG_PAGEFLAGS_EXTENDED
 +      "PG_head", "PG_tail",
 +#else
 +      "PG_compound",
 +#endif
 +      "PG_swapcache", "PG_mappedtodisk", "PG_reclaim", "PG_buddy",
 +#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 +      "PG_uncached",
 +#endif
 +      NULL };
 +#endif
 +
 +/* From include/linux/buffer_head.h */
 +static char *bh_state_vals[] = {
 +      "Uptodate", "Dirty", "Lock", "Req",
 +      "Uptodate_Lock", "Mapped", "New", "Async_read",
 +      "Async_write", "Delay", "Boundary", "Write_EIO",
 +      "Ordered", "Eopnotsupp", "Unwritten", "PriavateStart",
 +      NULL };
 +
 +/* From include/linux/bio.h */
 +static char *bio_flag_vals[] = {
 +      "Uptodate", "RW_block", "EOF", "Seg_valid",
 +      "Cloned", "Bounced", "User_mapped", "Eopnotsupp",
 +      NULL };
 +
 +/* From include/linux/fs.h */
 +static char *inode_flag_vals[] = {
 +      "I_DIRTY_SYNC", "I_DIRTY_DATASYNC", "I_DIRTY_PAGES", "I_NEW",
 +      "I_WILL_FREE", "I_FREEING", "I_CLEAR", "I_LOCK",
 +      "I_SYNC", NULL };
 +
 +static char *map_flags(unsigned long flags, char *mapping[])
 +{
 +      static char buffer[256];
 +      int index;
 +      int offset = 12;
 +
 +      buffer[0] = '\0';
 +
 +      for (index = 0; flags && mapping[index]; flags >>= 1, index++) {
 +              if (flags & 1) {
 +                      if ((offset + strlen(mapping[index]) + 1) >= 80) {
 +                              strcat(buffer, "\n            ");
 +                              offset = 12;
 +                      } else if (offset > 12) {
 +                              strcat(buffer, " ");
 +                              offset++;
 +                      }
 +                      strcat(buffer, mapping[index]);
 +                      offset += strlen(mapping[index]);
 +              }
 +      }
 +
 +      return (buffer);
 +}
 +
 +static int
 +kdbm_buffers(int argc, const char **argv)
 +{
 +      struct buffer_head bh;
 +      unsigned long addr;
 +      long offset = 0;
 +      int nextarg;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
 +          (diag = kdb_getarea(bh, addr)))
 +              return(diag);
 +
 +      kdb_printf("buffer_head at 0x%lx\n", addr);
 +      kdb_printf("  bno %llu size %llu dev 0x%x\n",
 +              (unsigned long long)bh.b_blocknr,
 +              (unsigned long long)bh.b_size,
 +              bh.b_bdev ? bh.b_bdev->bd_dev : 0);
 +      kdb_printf("  count %d state 0x%lx [%s]\n",
 +              bh.b_count.counter, bh.b_state,
 +              map_flags(bh.b_state, bh_state_vals));
 +      kdb_printf("  b_data 0x%p\n",
 +              bh.b_data);
 +      kdb_printf("  b_page 0x%p b_this_page 0x%p b_private 0x%p\n",
 +              bh.b_page, bh.b_this_page, bh.b_private);
 +      kdb_printf("  b_end_io ");
 +      if (bh.b_end_io)
 +              kdb_symbol_print(kdba_funcptr_value(bh.b_end_io), NULL, KDB_SP_VALUE);
 +      else
 +              kdb_printf("(NULL)");
 +      kdb_printf("\n");
 +
 +      return 0;
 +}
 +
 +static int
 +print_biovec(struct bio_vec *vec, int vcount)
 +{
 +      struct bio_vec bvec;
 +      unsigned long addr;
 +      int diag;
 +      int i;
 +
 +      if (vcount < 1 || vcount > BIO_MAX_PAGES) {
 +              kdb_printf("  [skipped iovecs, vcnt is %d]\n", vcount);
 +              return 0;
 +      }
 +
 +      addr = (unsigned long)vec;
 +      for (i = 0; i < vcount; i++) {
 +              if ((diag = kdb_getarea(bvec, addr)))
 +                      return(diag);
 +              addr += sizeof(bvec);
 +              kdb_printf("  [%d] page 0x%p length=%u offset=%u\n",
 +                      i, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
 +      }
 +      return 0;
 +}
 +
 +static int
 +kdbm_bio(int argc, const char **argv)
 +{
 +      struct bio bio;
 +      unsigned long addr;
 +      long offset = 0;
 +      int nextarg;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
 +          (diag = kdb_getarea(bio, addr)))
 +              return(diag);
 +
 +      kdb_printf("bio at 0x%lx\n", addr);
 +      kdb_printf("  bno %llu  next 0x%p  dev 0x%x\n",
 +              (unsigned long long)bio.bi_sector,
 +              bio.bi_next, bio.bi_bdev ? bio.bi_bdev->bd_dev : 0);
 +      kdb_printf("  vcnt %u vec 0x%p  rw 0x%lx flags 0x%lx [%s]\n",
 +              bio.bi_vcnt, bio.bi_io_vec, bio.bi_rw, bio.bi_flags,
 +              map_flags(bio.bi_flags, bio_flag_vals));
 +      print_biovec(bio.bi_io_vec, bio.bi_vcnt);
 +      kdb_printf("  count %d  private 0x%p\n",
 +              atomic_read(&bio.bi_cnt), bio.bi_private);
 +      kdb_printf("  bi_end_io ");
 +      if (bio.bi_end_io)
 +              kdb_symbol_print(kdba_funcptr_value(bio.bi_end_io), NULL, KDB_SP_VALUE);
 +      else
 +              kdb_printf("(NULL)");
 +      kdb_printf("\n");
 +
 +      return 0;
 +}
 +
 +#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
 +static char *page_flags(unsigned long flags)
 +{
 +      return(map_flags(flags, pg_flag_vals));
 +}
 +
 +static int
 +kdbm_page(int argc, const char **argv)
 +{
 +      struct page page;
 +      unsigned long addr;
 +      long offset = 0;
 +      int nextarg;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
 +      if (diag)
 +              return diag;
 +
 +#ifdef        __ia64__
 +      if (rgn_index(addr) == 0)
 +              addr = (unsigned long) &mem_map[addr];  /* assume region 0 is a page index, not an address */
 +#else
 +      if (addr < PAGE_OFFSET)
 +              addr = (unsigned long) &mem_map[addr];
 +#endif
 +
 +      if ((diag = kdb_getarea(page, addr)))
 +              return(diag);
 +
 +      kdb_printf("struct page at 0x%lx\n", addr);
 +      kdb_printf("  addr space 0x%p index %lu (offset 0x%llx)\n",
 +                 page.mapping, page.index,
 +                 (unsigned long long)page.index << PAGE_CACHE_SHIFT);
 +      kdb_printf("  count %d flags %s\n",
 +                 page._count.counter, page_flags(page.flags));
 +      kdb_printf("  virtual 0x%p\n", page_address((struct page *)addr));
 +      if (page_has_buffers(&page))
 +              kdb_printf("  buffers 0x%p\n", page_buffers(&page));
 +      else
 +              kdb_printf("  private 0x%lx\n", page_private(&page));
 +
 +      return 0;
 +}
 +#endif /* !CONFIG_DISCONTIGMEM && !NUMA */
 +
 +static unsigned long
 +print_request(unsigned long addr)
 +{
 +      struct request rq;
 +
 +      if (kdb_getarea(rq, addr))
 +              return(0);
 +
 +      kdb_printf("struct request at 0x%lx\n", addr);
 +      kdb_printf("  errors %d sector %llu nr_sectors %llu\n",
 +                      rq.errors,
 +                      (unsigned long long)blk_rq_pos(&rq),
 +                      (unsigned long long)blk_rq_sectors(&rq));
 +
 +      return (unsigned long) rq.queuelist.next;
 +}
 +
 +static int
 +kdbm_request(int argc, const char **argv)
 +{
 +      long offset = 0;
 +      unsigned long addr;
 +      int nextarg;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
 +      if (diag)
 +              return diag;
 +
 +      print_request(addr);
 +      return 0;
 +}
 +
 +
 +static int
 +kdbm_rqueue(int argc, const char **argv)
 +{
 +      struct request_queue rq;
 +      unsigned long addr, head_addr, next;
 +      long offset = 0;
 +      int nextarg;
 +      int i, diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
 +          (diag = kdb_getarea(rq, addr)))
 +              return(diag);
 +
 +      kdb_printf("struct request_queue at 0x%lx\n", addr);
 +      i = 0;
 +      next = (unsigned long)rq.queue_head.next;
 +      head_addr = addr + offsetof(struct request_queue, queue_head);
 +      kdb_printf(" request queue: %s\n", next == head_addr ?
 +              "empty" : "");
 +      while (next != head_addr) {
 +              i++;
 +              next = print_request(next);
 +      }
 +
 +      if (i)
 +              kdb_printf("%d requests found\n", i);
 +
 +      return 0;
 +}
 +
 +
 +static void
 +do_buffer(unsigned long addr)
 +{
 +      struct buffer_head bh;
 +
 +      if (kdb_getarea(bh, addr))
 +              return;
 +
 +      kdb_printf("\tbh 0x%lx bno %8llu [%s]\n", addr,
 +               (unsigned long long)bh.b_blocknr,
 +               map_flags(bh.b_state, bh_state_vals));
 +}
 +
 +static void
 +kdbm_show_page(struct page *page, int first)
 +{
 +      if (first)
 +              kdb_printf("page_struct       index   cnt zone nid flags\n");
 +      kdb_printf("%p%s %6lu %5d %3d %3d 0x%lx",
 +              page_address(page), sizeof(void *) == 4 ? "        " : "",
 +              page->index, atomic_read(&(page->_count)),
 +              page_zonenum(page), page_to_nid(page),
 +              page->flags & (~0UL >> ZONES_SHIFT));
 +#define kdb_page_flags(page, type) if (Page ## type(page)) kdb_printf(" " #type);
 +      kdb_page_flags(page, Locked);
 +      kdb_page_flags(page, Error);
 +      kdb_page_flags(page, Referenced);
 +      kdb_page_flags(page, Uptodate);
 +      kdb_page_flags(page, Dirty);
 +      kdb_page_flags(page, LRU);
 +      kdb_page_flags(page, Active);
 +      kdb_page_flags(page, Slab);
 +      kdb_page_flags(page, Checked);
 +      if (page->flags & (1UL << PG_arch_1))
 +              kdb_printf(" arch_1");
 +      kdb_page_flags(page, Reserved);
 +      kdb_page_flags(page, Private);
 +      kdb_page_flags(page, Writeback);
 +      kdb_page_flags(page, Compound);
 +      kdb_page_flags(page, SwapCache);
 +      kdb_page_flags(page, MappedToDisk);
 +      kdb_page_flags(page, Reclaim);
 +      kdb_page_flags(page, Buddy);
 +
 +      /* PageHighMem is not a flag any more, but treat it as one */
 +      kdb_page_flags(page, HighMem);
 +
 +      if (page_has_buffers(page)) {
 +              struct buffer_head *head, *bh;
 +              kdb_printf("\n");
 +              head = bh = page_buffers(page);
 +              do {
 +                      do_buffer((unsigned long) bh);
 +              } while ((bh = bh->b_this_page) != head);
 +      } else if (page_private(page)) {
 +              kdb_printf(" private= 0x%lx", page_private(page));
 +      }
 +      /* Cannot use page_mapping(page) here, it needs swapper_space which is
 +       * not exported.
 +       */
 +      if (page->mapping)
 +              kdb_printf(" mapping= %p", page->mapping);
 +      kdb_printf("\n");
 +#undef kdb_page_flags
 +}
 +
 +static int
 +kdbm_inode_pages(int argc, const char **argv)
 +{
 +      struct inode *inode = NULL;
 +      struct address_space *ap = NULL;
 +      unsigned long addr, addr1 = 0;
 +      long offset = 0;
 +      int nextarg;
 +      int diag;
 +      pgoff_t next = 0;
 +      struct page *page;
 +      int first;
 +
 +      nextarg = 1;
 +      diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
 +      if (diag)
 +              goto out;
 +
 +      if (argc == 2) {
 +              nextarg = 2;
 +              diag = kdbgetaddrarg(argc, argv, &nextarg, &addr1,
 +                                      &offset, NULL);
 +              if (diag)
 +                      goto out;
 +              kdb_printf("Looking for page index 0x%lx ... \n", addr1);
 +              next = addr1;
 +      }
 +
 +      if (!(inode = kmalloc(sizeof(*inode), GFP_ATOMIC))) {
 +              kdb_printf("kdbm_inode_pages: cannot kmalloc inode\n");
 +              goto out;
 +      }
 +      if (!(ap = kmalloc(sizeof(*ap), GFP_ATOMIC))) {
 +              kdb_printf("kdbm_inode_pages: cannot kmalloc ap\n");
 +              goto out;
 +      }
 +      if ((diag = kdb_getarea(*inode, addr)))
 +              goto out;
 +      if (!inode->i_mapping) {
 +              kdb_printf("inode has no mapping\n");
 +              goto out;
 +      }
 +      if ((diag = kdb_getarea(*ap, (unsigned long) inode->i_mapping)))
 +              goto out;
 +
 +      /* Run the pages in the radix tree, printing the state of each page */
 +      first = 1;
 +      while (radix_tree_gang_lookup(&ap->page_tree, (void **)&page, next, 1)) {
 +              kdbm_show_page(page, first);
 +              if (addr1)
 +                      break;
 +              first = 0;
 +              next = page->index + 1;
 +      }
 +
 +out:
 +      if (inode)
 +              kfree(inode);
 +      if (ap)
 +              kfree(ap);
 +      return diag;
 +}
 +
 +static int
 +kdbm_inode(int argc, const char **argv)
 +{
 +      struct inode *inode = NULL;
 +      unsigned long addr;
 +      unsigned char *iaddr;
 +      long offset = 0;
 +      int nextarg;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
 +              goto out;
 +      if (!(inode = kmalloc(sizeof(*inode), GFP_ATOMIC))) {
 +              kdb_printf("kdbm_inode: cannot kmalloc inode\n");
 +              goto out;
 +      }
 +      if ((diag = kdb_getarea(*inode, addr)))
 +              goto out;
 +
 +      kdb_printf("struct inode at  0x%lx\n", addr);
 +
 +      kdb_printf(" i_ino = %lu i_count = %u i_size %Ld\n",
 +                                      inode->i_ino, atomic_read(&inode->i_count),
 +                                      inode->i_size);
 +
 +      kdb_printf(" i_mode = 0%o  i_nlink = %d  i_rdev = 0x%x\n",
 +                                      inode->i_mode, inode->i_nlink,
 +                                      inode->i_rdev);
 +
 +      kdb_printf(" i_hash.nxt = 0x%p i_hash.pprev = 0x%p\n",
 +              inode->i_hash.next,
 +              inode->i_hash.pprev);
 +
 +      kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n",
 +              list_entry(inode->i_list.next, struct inode, i_list),
 +              list_entry(inode->i_list.prev, struct inode, i_list));
 +
 +      kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n",
 +              list_entry(inode->i_dentry.next, struct dentry, d_alias),
 +              list_entry(inode->i_dentry.prev, struct dentry, d_alias));
 +
 +      kdb_printf(" i_sb = 0x%p i_op = 0x%p i_data = 0x%lx nrpages = %lu\n",
 +                                      inode->i_sb, inode->i_op,
 +                                      addr + offsetof(struct inode, i_data),
 +                                      inode->i_data.nrpages);
 +      kdb_printf(" i_fop= 0x%p i_flock = 0x%p i_mapping = 0x%p\n",
 +                         inode->i_fop, inode->i_flock, inode->i_mapping);
 +
 +      kdb_printf(" i_flags 0x%x i_state 0x%lx [%s]",
 +                         inode->i_flags, inode->i_state,
 +                         map_flags(inode->i_state, inode_flag_vals));
 +
 +      iaddr  = (char *)addr;
 +      iaddr += offsetof(struct inode, i_private);
 +
 +      kdb_printf("  fs specific info @ 0x%p\n", iaddr);
 +out:
 +      if (inode)
 +              kfree(inode);
 +      return diag;
 +}
 +
 +static int
 +kdbm_sb(int argc, const char **argv)
 +{
 +      struct super_block *sb = NULL;
 +      unsigned long addr;
 +      long offset = 0;
 +      int nextarg;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
 +              goto out;
 +      if (!(sb = kmalloc(sizeof(*sb), GFP_ATOMIC))) {
 +              kdb_printf("kdbm_sb: cannot kmalloc sb\n");
 +              goto out;
 +      }
 +      if ((diag = kdb_getarea(*sb, addr)))
 +              goto out;
 +
 +      kdb_printf("struct super_block at  0x%lx\n", addr);
 +      kdb_printf(" s_dev 0x%x blocksize 0x%lx\n", sb->s_dev, sb->s_blocksize);
 +      kdb_printf(" s_flags 0x%lx s_root 0x%p\n", sb->s_flags, sb->s_root);
 +      kdb_printf(" s_frozen %d s_id [%s]\n", sb->s_frozen, sb->s_id);
 +out:
 +      if (sb)
 +              kfree(sb);
 +      return diag;
 +}
 +
 +
 +#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
 +/* According to Steve Lord, this code is ix86 specific.  Patches to extend it to
 + * other architectures will be greatefully accepted.
 + */
 +static int
 +kdbm_memmap(int argc, const char **argv)
 +{
 +      struct page page;
 +      int i, page_count;
 +      int slab_count = 0;
 +      int dirty_count = 0;
 +      int locked_count = 0;
 +      int page_counts[10];    /* [8] = large counts, [9] = -1 counts */
 +      int buffered_count = 0;
 +#ifdef buffer_delay
 +      int delay_count = 0;
 +#endif
 +      int diag;
 +      unsigned long addr;
 +#ifdef CONFIG_DISCONTIGMEM
 +      int node_id = -1, found_node = 0;
 +      int tot_page_count = 0;
 +      unsigned long unode_id;
 +      pg_data_t *pgdat;
 +
 +      if (argc == 1) {                /* node_id was specified */
 +          diag = kdbgetularg(argv[argc], &unode_id);
 +          if (diag)
 +              return diag;
 +          node_id = (int)unode_id;
 +      }
 +      else if (argc)
 +          return KDB_ARGCOUNT;
 +
 +      tot_page_count = 0;
 +      memset(page_counts, 0, sizeof(page_counts));
 +
 +      for_each_online_pgdat(pgdat) {
 +          if ((node_id != -1) && (pgdat->node_id != node_id))
 +              continue;
 +          found_node = 1;
 +          addr = (unsigned long)pgdat->node_mem_map;
 +          page_count = pgdat->node_spanned_pages;
 +          tot_page_count += page_count;
 +#else
 +      addr = (unsigned long)mem_map;
 +      page_count = max_mapnr;
 +      memset(page_counts, 0, sizeof(page_counts));
 +#endif
 +      for (i = 0; i < page_count; i++) {
 +              if ((diag = kdb_getarea(page, addr)))
 +                      return(diag);
 +              addr += sizeof(page);
 +
 +              if (PageSlab(&page))
 +                      slab_count++;
 +              if (PageDirty(&page))
 +                      dirty_count++;
 +              if (PageLocked(&page))
 +                      locked_count++;
 +              if (page._count.counter == -1)
 +                        page_counts[9]++;
 +              else if (page._count.counter < 8)
 +                      page_counts[page._count.counter]++;
 +              else
 +                      page_counts[8]++;
 +              if (page_has_buffers(&page)) {
 +                      buffered_count++;
 +#ifdef buffer_delay
 +                      if (buffer_delay(page.buffers))
 +                              delay_count++;
 +#endif
 +              }
 +      }
 +#ifdef CONFIG_DISCONTIGMEM
 +      }
 +      page_count = tot_page_count;
 +      if (node_id != -1) {
 +          if (!found_node) {
 +              kdb_printf("Node %d does not exist.\n", node_id);
 +              return 0;
 +          }
 +          kdb_printf("Node %d pages:\n", node_id);
 +      }
 +#endif
 +      kdb_printf("  Total pages:      %6d\n", page_count);
 +      kdb_printf("  Slab pages:       %6d\n", slab_count);
 +      kdb_printf("  Dirty pages:      %6d\n", dirty_count);
 +      kdb_printf("  Locked pages:     %6d\n", locked_count);
 +      kdb_printf("  Buffer pages:     %6d\n", buffered_count);
 +#ifdef buffer_delay
 +      kdb_printf("  Delalloc pages:   %6d\n", delay_count);
 +#endif
 +      kdb_printf(" -1 page count:     %6d\n", page_counts[9]);
 +      for (i = 0; i < 8; i++) {
 +              kdb_printf("  %d page count:     %6d\n",
 +                      i, page_counts[i]);
 +      }
 +      kdb_printf("  high page count:  %6d\n", page_counts[8]);
 +      return 0;
 +}
 +#endif /* !CONFIG_DISCONTIGMEM && !NUMA */
 +
 +static int __init kdbm_pg_init(void)
 +{
 +#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
 +      kdb_register("page", kdbm_page, "<vaddr>", "Display page", 0);
 +#endif
 +      kdb_register("inode", kdbm_inode, "<vaddr>", "Display inode", 0);
 +      kdb_register("sb", kdbm_sb, "<vaddr>", "Display super_block", 0);
 +      kdb_register("bh", kdbm_buffers, "<buffer head address>", "Display buffer", 0);
 +      kdb_register("bio", kdbm_bio, "<bio address>", "Display bio", 0);
 +      kdb_register("inode_pages", kdbm_inode_pages, "<inode *>", "Display pages in an inode", 0);
 +      kdb_register("req", kdbm_request, "<vaddr>", "dump request struct", 0);
 +      kdb_register("rqueue", kdbm_rqueue, "<vaddr>", "dump request queue", 0);
 +#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
 +      kdb_register("memmap", kdbm_memmap, "", "page table summary", 0);
 +#endif
 +
 +      return 0;
 +}
 +
 +
 +static void __exit kdbm_pg_exit(void)
 +{
 +#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
 +      kdb_unregister("page");
 +#endif
 +      kdb_unregister("inode");
 +      kdb_unregister("sb");
 +      kdb_unregister("bh");
 +      kdb_unregister("bio");
 +      kdb_unregister("inode_pages");
 +      kdb_unregister("req");
 +      kdb_unregister("rqueue");
 +#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_NUMA)
 +      kdb_unregister("memmap");
 +#endif
 +}
 +
 +module_init(kdbm_pg_init)
 +module_exit(kdbm_pg_exit)
index 4150a6e,0000000..e8435c5
mode 100644,000000..100644
--- /dev/null
@@@ -1,195 -1,0 +1,196 @@@
 +/*
 + * This file is subject to the terms and conditions of the GNU General Public
 + * License.  See the file "COPYING" in the main directory of this archive
 + * for more details.
 + *
 + * Copyright (c) 1999-2006 Silicon Graphics, Inc.  All Rights Reserved.
 + */
 +
 +#include <linux/blkdev.h>
 +#include <linux/types.h>
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>
 +#include <linux/module.h>
 +#include <linux/init.h>
 +#include <linux/mm.h>
 +#include <linux/sched.h>
++#include <linux/slab.h>
 +#include <asm/signal.h>
 +
 +MODULE_AUTHOR("SGI");
 +MODULE_DESCRIPTION("Debug struct task and sigset information");
 +MODULE_LICENSE("GPL");
 +
 +static char *
 +kdb_cpus_allowed_string(struct task_struct *tp)
 +{
 +      static char maskbuf[NR_CPUS * 8];
 +      if (cpus_equal(tp->cpus_allowed, cpu_online_map))
 +              strcpy(maskbuf, "ALL");
 +      else if (cpus_empty(tp->cpus_allowed))
 +              strcpy(maskbuf, "NONE");
 +      else if (cpus_weight(tp->cpus_allowed) == 1)
 +              snprintf(maskbuf, sizeof(maskbuf), "ONLY(%d)", first_cpu(tp->cpus_allowed));
 +      else
 +              cpulist_scnprintf(maskbuf, sizeof(maskbuf), &tp->cpus_allowed);
 +      return maskbuf;
 +}
 +
 +static int
 +kdbm_task(int argc, const char **argv)
 +{
 +      unsigned long addr;
 +      long offset=0;
 +      int nextarg;
 +      int e = 0;
 +      struct task_struct *tp = NULL, *tp1;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
 +              return(e);
 +
 +      if (!(tp = kmalloc(sizeof(*tp), GFP_ATOMIC))) {
 +          kdb_printf("%s: cannot kmalloc tp\n", __FUNCTION__);
 +          goto out;
 +      }
 +      if ((e = kdb_getarea(*tp, addr))) {
 +          kdb_printf("%s: invalid task address\n", __FUNCTION__);
 +          goto out;
 +      }
 +
 +      tp1 = (struct task_struct *)addr;
 +      kdb_printf(
 +          "struct task at 0x%lx, pid=%d flags=0x%x state=%ld comm=\"%s\"\n",
 +          addr, tp->pid, tp->flags, tp->state, tp->comm);
 +
 +      kdb_printf("  cpu=%d policy=%u ", kdb_process_cpu(tp), tp->policy);
 +      kdb_printf(
 +          "prio=%d static_prio=%d cpus_allowed=",
 +          tp->prio, tp->static_prio);
 +      {
 +              /* The cpus allowed string may be longer than kdb_printf() can
 +               * handle.  Print it in chunks.
 +               */
 +              char c, *p;
 +              p = kdb_cpus_allowed_string(tp);
 +              while (1) {
 +                      if (strlen(p) < 100) {
 +                              kdb_printf("%s", p);
 +                              break;
 +                      }
 +                      c = p[100];
 +                      p[100] = '\0';
 +                      kdb_printf("%s", p);
 +                      p[100] = c;
 +                      p += 100;
 +              }
 +      }
 +      kdb_printf(" &thread=0x%p\n", &tp1->thread);
 +
 +      kdb_printf("  need_resched=%d ",
 +              test_tsk_thread_flag(tp, TIF_NEED_RESCHED));
 +      kdb_printf(
 +          "time_slice=%u",
 +          tp->rt.time_slice);
 +      kdb_printf(" lock_depth=%d\n", tp->lock_depth);
 +
 +      kdb_printf(
 +          "  fs=0x%p files=0x%p mm=0x%p\n",
 +          tp->fs, tp->files, tp->mm);
 +
 +      if (tp->sysvsem.undo_list)
 +              kdb_printf(
 +                  "  sysvsem.sem_undo refcnt %d list_proc=0x%p\n",
 +                  atomic_read(&tp->sysvsem.undo_list->refcnt),
 +                  &tp->sysvsem.undo_list->list_proc);
 +
 +      kdb_printf(
 +          "  signal=0x%p &blocked=0x%p &pending=0x%p\n",
 +          tp->signal, &tp1->blocked, &tp1->pending);
 +
 +      kdb_printf(
 +          "  utime=%ld stime=%ld cutime=%ld cstime=%ld\n",
 +          tp->utime, tp->stime,
 +          tp->signal ? tp->signal->cutime : 0L,
 +          tp->signal ? tp->signal->cstime : 0L);
 +
 +      kdb_printf("  thread_info=0x%p\n", task_thread_info(tp));
 +      kdb_printf("  ti flags=0x%lx\n", (unsigned long)task_thread_info(tp)->flags);
 +
 +#ifdef CONFIG_NUMA
 +      kdb_printf(
 +          "  mempolicy=0x%p il_next=%d\n",
 +          tp->mempolicy, tp->il_next);
 +#endif
 +
 +out:
 +      if (tp)
 +          kfree(tp);
 +      return e;
 +}
 +
 +static int
 +kdbm_sigset(int argc, const char **argv)
 +{
 +      sigset_t *sp = NULL;
 +      unsigned long addr;
 +      long offset=0;
 +      int nextarg;
 +      int e = 0;
 +      int i;
 +      char fmt[32];
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +#ifndef _NSIG_WORDS
 +      kdb_printf("unavailable on this platform, _NSIG_WORDS not defined.\n");
 +#else
 +      nextarg = 1;
 +      if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
 +              return(e);
 +
 +      if (!(sp = kmalloc(sizeof(*sp), GFP_ATOMIC))) {
 +          kdb_printf("%s: cannot kmalloc sp\n", __FUNCTION__);
 +          goto out;
 +      }
 +      if ((e = kdb_getarea(*sp, addr))) {
 +          kdb_printf("%s: invalid sigset address\n", __FUNCTION__);
 +          goto out;
 +      }
 +
 +      sprintf(fmt, "[%%d]=0x%%0%dlx ", (int)sizeof(sp->sig[0])*2);
 +      kdb_printf("sigset at 0x%p : ", sp);
 +      for (i=_NSIG_WORDS-1; i >= 0; i--) {
 +          if (i == 0 || sp->sig[i]) {
 +              kdb_printf(fmt, i, sp->sig[i]);
 +          }
 +      }
 +      kdb_printf("\n");
 +#endif /* _NSIG_WORDS */
 +
 +out:
 +      if (sp)
 +          kfree(sp);
 +      return e;
 +}
 +
 +static int __init kdbm_task_init(void)
 +{
 +      kdb_register("task", kdbm_task, "<vaddr>", "Display task_struct", 0);
 +      kdb_register("sigset", kdbm_sigset, "<vaddr>", "Display sigset_t", 0);
 +
 +      return 0;
 +}
 +
 +static void __exit kdbm_task_exit(void)
 +{
 +      kdb_unregister("task");
 +      kdb_unregister("sigset");
 +}
 +
 +module_init(kdbm_task_init)
 +module_exit(kdbm_task_exit)
index 64e9a17,0000000..482152d
mode 100644,000000..100644
--- /dev/null
@@@ -1,1043 -1,0 +1,1044 @@@
 +/*
 + * This file is subject to the terms and conditions of the GNU General Public
 + * License.  See the file "COPYING" in the main directory of this archive
 + * for more details.
 + *
 + * Copyright (c) 1999-2006 Silicon Graphics, Inc.  All Rights Reserved.
 + */
 +
 +#include <linux/blkdev.h>
 +#include <linux/types.h>
 +#include <linux/kdb.h>
 +#include <linux/kdbprivate.h>
 +#include <linux/module.h>
 +#include <linux/init.h>
 +#include <linux/mm.h>
 +#include <linux/swap.h>
 +#include <linux/swapops.h>
++#include <linux/slab.h>
 +
 +#include <scsi/scsi.h>
 +#include <scsi/scsi_cmnd.h>
 +#include <scsi/scsi_device.h>
 +#include <scsi/scsi_host.h>
 +#include <asm/pgtable.h>
 +
 +MODULE_AUTHOR("SGI");
 +MODULE_DESCRIPTION("Debug VM information");
 +MODULE_LICENSE("GPL");
 +
 +struct __vmflags {
 +      unsigned long mask;
 +      char *name;
 +};
 +
 +static struct __vmflags vmflags[] = {
 +      { VM_READ, "VM_READ " },
 +      { VM_WRITE, "VM_WRITE " },
 +      { VM_EXEC, "VM_EXEC " },
 +      { VM_SHARED, "VM_SHARED " },
 +      { VM_MAYREAD, "VM_MAYREAD " },
 +      { VM_MAYWRITE, "VM_MAYWRITE " },
 +      { VM_MAYEXEC, "VM_MAYEXEC " },
 +      { VM_MAYSHARE, "VM_MAYSHARE " },
 +      { VM_GROWSDOWN, "VM_GROWSDOWN " },
 +      { VM_GROWSUP, "VM_GROWSUP " },
 +      { VM_PFNMAP, "VM_PFNMAP " },
 +      { VM_DENYWRITE, "VM_DENYWRITE " },
 +      { VM_EXECUTABLE, "VM_EXECUTABLE " },
 +      { VM_LOCKED, "VM_LOCKED " },
 +      { VM_IO, "VM_IO " },
 +      { VM_SEQ_READ, "VM_SEQ_READ " },
 +      { VM_RAND_READ, "VM_RAND_READ " },
 +      { VM_DONTCOPY, "VM_DONTCOPY " },
 +      { VM_DONTEXPAND, "VM_DONTEXPAND " },
 +      { VM_RESERVED, "VM_RESERVED " },
 +      { VM_ACCOUNT, "VM_ACCOUNT " },
 +      { VM_HUGETLB, "VM_HUGETLB " },
 +      { VM_NONLINEAR, "VM_NONLINEAR " },
 +      { VM_MAPPED_COPY, "VM_MAPPED_COPY " },
 +      { VM_INSERTPAGE, "VM_INSERTPAGE " },
 +      { 0, "" }
 +};
 +
 +static int
 +kdbm_print_vm(struct vm_area_struct *vp, unsigned long addr, int verbose_flg)
 +{
 +      struct __vmflags *tp;
 +
 +      kdb_printf("struct vm_area_struct at 0x%lx for %d bytes\n",
 +                 addr, (int) sizeof (struct vm_area_struct));
 +
 +      kdb_printf("vm_start = 0x%p   vm_end = 0x%p\n", (void *) vp->vm_start,
 +                 (void *) vp->vm_end);
 +      kdb_printf("vm_page_prot = 0x%llx\n",
 +              (unsigned long long)pgprot_val(vp->vm_page_prot));
 +
 +      kdb_printf("vm_flags: ");
 +      for (tp = vmflags; tp->mask; tp++) {
 +              if (vp->vm_flags & tp->mask) {
 +                      kdb_printf(" %s", tp->name);
 +              }
 +      }
 +      kdb_printf("\n");
 +
 +      if (!verbose_flg)
 +              return 0;
 +
 +      kdb_printf("vm_mm = 0x%p\n", (void *) vp->vm_mm);
 +      kdb_printf("vm_next = 0x%p\n", (void *) vp->vm_next);
 +      kdb_printf("shared.vm_set.list.next = 0x%p\n", (void *) vp->shared.vm_set.list.next);
 +      kdb_printf("shared.vm_set.list.prev = 0x%p\n", (void *) vp->shared.vm_set.list.prev);
 +      kdb_printf("shared.vm_set.parent = 0x%p\n", (void *) vp->shared.vm_set.parent);
 +      kdb_printf("shared.vm_set.head = 0x%p\n", (void *) vp->shared.vm_set.head);
 +      kdb_printf("anon_vma_chain.next = 0x%p\n", (void *) vp->anon_vma_chain.next);
 +      kdb_printf("anon_vma_chain.prev = 0x%p\n", (void *) vp->anon_vma_chain.prev);
 +      kdb_printf("vm_ops = 0x%p\n", (void *) vp->vm_ops);
 +      if (vp->vm_ops != NULL) {
 +              kdb_printf("vm_ops->open = 0x%p\n", vp->vm_ops->open);
 +              kdb_printf("vm_ops->close = 0x%p\n", vp->vm_ops->close);
 +              kdb_printf("vm_ops->fault = 0x%p\n", vp->vm_ops->fault);
 +#ifdef HAVE_VMOP_MPROTECT
 +              kdb_printf("vm_ops->mprotect = 0x%p\n", vp->vm_ops->mprotect);
 +#endif
 +#ifdef CONFIG_NUMA
 +              kdb_printf("vm_ops->set_policy = 0x%p\n", vp->vm_ops->set_policy);
 +              kdb_printf("vm_ops->get_policy = 0x%p\n", vp->vm_ops->get_policy);
 +#endif
 +      }
 +      kdb_printf("vm_pgoff = 0x%lx\n", vp->vm_pgoff);
 +      kdb_printf("vm_file = 0x%p\n", (void *) vp->vm_file);
 +      kdb_printf("vm_private_data = 0x%p\n", vp->vm_private_data);
 +#ifdef CONFIG_NUMA
 +      kdb_printf("vm_policy = 0x%p\n", vp->vm_policy);
 +#endif
 +
 +      return 0;
 +}
 +
 +static int
 +kdbm_print_vmp(struct vm_area_struct *vp, int verbose_flg)
 +{
 +      struct __vmflags *tp;
 +
 +      if (verbose_flg) {
 +              kdb_printf("0x%lx:  ", (unsigned long) vp);
 +      }
 +
 +      kdb_printf("0x%p  0x%p ", (void *) vp->vm_start, (void *) vp->vm_end);
 +
 +      for (tp = vmflags; tp->mask; tp++) {
 +              if (vp->vm_flags & tp->mask) {
 +                      kdb_printf(" %s", tp->name);
 +              }
 +      }
 +      kdb_printf("\n");
 +
 +      return 0;
 +}
 +
 +
 +#ifdef CONFIG_NUMA
 +#include <linux/mempolicy.h>
 +
 +/*
 + * kdbm_mpol
 + *
 + *    This function implements the 'mempolicy' command.
 + *    Print a struct mempolicy.
 + *
 + *    mempolicy <address>     Print struct mempolicy at <address>
 + */
 +static int
 +kdbm_mpol(int argc, const char **argv)
 +{
 +      unsigned long addr;
 +      long offset = 0;
 +      int nextarg;
 +      int err = 0;
 +      struct mempolicy *mp = NULL;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((err = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
 +                              NULL)) != 0)
 +              return(err);
 +
 +      if (!(mp = kmalloc(sizeof(*mp), GFP_ATOMIC))) {
 +              kdb_printf("%s: cannot kmalloc mp\n", __FUNCTION__);
 +              goto out;
 +      }
 +
 +      if ((err = kdb_getarea(*mp, addr))) {
 +              kdb_printf("%s: invalid mempolicy address\n", __FUNCTION__);
 +              goto out;
 +      }
 +
 +      kdb_printf("struct mempolicy at 0x%p\n", (struct mempolicy *)addr);
 +      kdb_printf("  refcnt %d\n", atomic_read(&mp->refcnt));
 +
 +      switch (mp->mode) {
 +        case MPOL_DEFAULT:
 +              kdb_printf("  mode %d (MPOL_DEFAULT)\n", mp->mode);
 +              break;
 +
 +        case MPOL_PREFERRED:
 +              kdb_printf("  mode %d (MPOL_PREFERRED)\n", mp->mode);
 +              if (mp->flags & MPOL_F_LOCAL)
 +                      kdb_printf("  preferred_node local\n");
 +              else
 +                      kdb_printf("  preferred_node %d\n", mp->v.preferred_node);
 +              break;
 +
 +        case MPOL_BIND:
 +        case MPOL_INTERLEAVE:
 +        {
 +              int i, nlongs;
 +              unsigned long *longp;
 +
 +              kdb_printf("  mode %d (%s)\n", mp->mode,
 +                      mp->mode == MPOL_INTERLEAVE
 +                              ? "MPOL_INTERLEAVE"
 +                              : "MPOL_BIND");
 +              nlongs = (int)BITS_TO_LONGS(MAX_NUMNODES);
 +              kdb_printf("  nodes:");
 +              longp = mp->v.nodes.bits;
 +              for (i = 0; i < nlongs; i++, longp++)
 +                      kdb_printf("  0x%lx ", *longp);
 +              kdb_printf("\n");
 +              break;
 +        }
 +
 +        default:
 +              kdb_printf("  mode %d (unknown)\n", mp->mode);
 +              break;
 +      }
 +out:
 +      if (mp)
 +              kfree(mp);
 +      return err;
 +}
 +
 +#endif /* CONFIG_NUMA */
 +
 +/*
 + * kdbm_pgdat
 + *
 + *    This function implements the 'pgdat' command.
 + *    Print a struct pglist_data (pg_dat_t).
 + *
 + *    pgdat <node_id>         Print struct pglist_data for node <node_id>.
 + *
 + *    Print pglist_data for node 0 if node_id not specified,
 + *    or print the one pglist_data structure if !CONFIG_NUMA.
 + */
 +static int
 +kdbm_pgdat(int argc, const char **argv)
 +{
 +      int err = 0, node_id = 0, i;
 +      pg_data_t *pgdatp = NULL;
 +
 +#ifdef CONFIG_NUMA
 +      if (argc > 1)
 +              return KDB_ARGCOUNT;
 +      if (argc == 1) {
 +              int nextarg;
 +              long offset = 0;
 +              unsigned long node_id_ul;
 +
 +              nextarg = 1;
 +              if ((err = kdbgetaddrarg(argc, argv, &nextarg, &node_id_ul,
 +                                       &offset, NULL)) != 0) {
 +                      return(err);
 +              }
 +              node_id = (int)node_id_ul;
 +      }
 +#endif
 +      for_each_online_pgdat(pgdatp) {
 +              if (pgdatp->node_id == node_id)
 +                      break;
 +      }
 +      if (!pgdatp) {
 +              kdb_printf("%s: specified node not found\n", __FUNCTION__);
 +              return 0;
 +      }
 +      kdb_printf("struct pglist_data at 0x%p  node_id = %d\n",
 +                 pgdatp, pgdatp->node_id);
 +
 +      for (i = 0; i < MAX_ZONELISTS; i++) {
 +              int zr;
 +              struct zoneref *zonerefp;
 +              struct zone *zonep;
 +
 +              zonerefp = pgdatp->node_zonelists[i]._zonerefs;
 +              kdb_printf("  _zonerefs[%d] at 0x%p\n", i, zonerefp);
 +
 +              for (zr = 0; zr <= MAX_ZONES_PER_ZONELIST; zr++, zonerefp++) {
 +                      int z;
 +                      pg_data_t *tmp_pgdatp;
 +
 +                      zonep = zonelist_zone(zonerefp);
 +                      if (!zonep)
 +                              break;
 +
 +                      kdb_printf("    0x%p", zonep);
 +
 +                      for_each_online_pgdat(tmp_pgdatp) {
 +                              for (z = 0; z < MAX_NR_ZONES; z++) {
 +                                      if (zonep == &tmp_pgdatp->node_zones[z]) {
 +                                              kdb_printf ("  (node %d node_zones[%d])",
 +                                                   tmp_pgdatp->node_id, z);
 +                                              break;
 +                                      }
 +                              }
 +                              if (z != MAX_NR_ZONES)
 +                                      break;  /* found it */
 +                      }
 +                      kdb_printf("\n");
 +              }
 +      }
 +
 +      kdb_printf("  nr_zones = %d", pgdatp->nr_zones);
 +#ifdef CONFIG_FLAT_NODE_MEM_MAP
 +      kdb_printf("  node_mem_map = 0x%p\n", pgdatp->node_mem_map);
 +#endif
 +#ifndef CONFIG_NO_BOOTMEM
 +      kdb_printf("  bdata = 0x%p", pgdatp->bdata);
 +#endif
 +      kdb_printf("  node_start_pfn = 0x%lx\n", pgdatp->node_start_pfn);
 +      kdb_printf("  node_present_pages = %ld (0x%lx)\n",
 +                 pgdatp->node_present_pages, pgdatp->node_present_pages);
 +      kdb_printf("  node_spanned_pages = %ld (0x%lx)\n",
 +                 pgdatp->node_spanned_pages, pgdatp->node_spanned_pages);
 +      kdb_printf("  kswapd = 0x%p\n", pgdatp->kswapd);
 +
 +      return err;
 +}
 +
 +/*
 + * kdbm_vm
 + *
 + *     This function implements the 'vm' command.  Print a vm_area_struct.
 + *
 + *     vm [-v] <address>      Print vm_area_struct at <address>
 + *     vmp [-v] <pid>         Print all vm_area_structs for <pid>
 + */
 +
 +static int
 +kdbm_vm(int argc, const char **argv)
 +{
 +      unsigned long addr;
 +      long offset = 0;
 +      int nextarg;
 +      int diag;
 +      int verbose_flg = 0;
 +
 +      if (argc == 2) {
 +              if (strcmp(argv[1], "-v") != 0) {
 +                      return KDB_ARGCOUNT;
 +              }
 +              verbose_flg = 1;
 +      } else if (argc != 1) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      if (strcmp(argv[0], "vmp") == 0) {
 +              struct task_struct *g, *tp;
 +              struct vm_area_struct *vp;
 +              pid_t pid;
 +
 +              if ((diag = kdbgetularg(argv[argc], (unsigned long *) &pid)))
 +                      return diag;
 +
 +              kdb_do_each_thread(g, tp) {
 +                      if (tp->pid == pid) {
 +                              if (tp->mm != NULL) {
 +                                      if (verbose_flg)
 +                                              kdb_printf
 +                                                  ("vm_area_struct       ");
 +                                      kdb_printf
 +                                          ("vm_start            vm_end              vm_flags\n");
 +                                      vp = tp->mm->mmap;
 +                                      while (vp != NULL) {
 +                                              kdbm_print_vmp(vp, verbose_flg);
 +                                              vp = vp->vm_next;
 +                                      }
 +                              }
 +                              return 0;
 +                      }
 +              } kdb_while_each_thread(g, tp);
 +
 +              kdb_printf("No process with pid == %d found\n", pid);
 +
 +      } else {
 +              struct vm_area_struct v;
 +
 +              nextarg = argc;
 +              if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
 +                                        NULL))
 +                  || (diag = kdb_getarea(v, addr)))
 +                      return (diag);
 +
 +              kdbm_print_vm(&v, addr, verbose_flg);
 +      }
 +
 +      return 0;
 +}
 +
 +static int
 +kdbm_print_pte(pte_t * pte)
 +{
 +      kdb_printf("0x%lx (", (unsigned long) pte_val(*pte));
 +
 +      if (pte_present(*pte)) {
 +#ifdef        pte_exec
 +              if (pte_exec(*pte))
 +                      kdb_printf("X");
 +#endif
 +              if (pte_write(*pte))
 +                      kdb_printf("W");
 +#ifdef        pte_read
 +              if (pte_read(*pte))
 +                      kdb_printf("R");
 +#endif
 +              if (pte_young(*pte))
 +                      kdb_printf("A");
 +              if (pte_dirty(*pte))
 +                      kdb_printf("D");
 +
 +      } else {
 +              kdb_printf("OFFSET=0x%lx ", swp_offset(pte_to_swp_entry(*pte)));
 +              kdb_printf("TYPE=0x%ulx", swp_type(pte_to_swp_entry(*pte)));
 +      }
 +
 +      kdb_printf(")");
 +
 +      /* final newline is output by caller of kdbm_print_pte() */
 +
 +      return 0;
 +}
 +
 +/*
 + * kdbm_pte
 + *
 + *     This function implements the 'pte' command.  Print all pte_t structures
 + *     that map to the given virtual address range (<address> through <address>
 + *     plus <nbytes>) for the given process. The default value for nbytes is
 + *     one.
 + *
 + *     pte -m <mm> <address> [<nbytes>]    Print all pte_t structures for
 + *                                       virtual <address> in address space
 + *                                       of <mm> which is a pointer to a
 + *                                       mm_struct
 + *     pte -p <pid> <address> [<nbytes>]   Print all pte_t structures for
 + *                                       virtual <address> in address space
 + *                                       of <pid>
 + */
 +
 +static int
 +kdbm_pte(int argc, const char **argv)
 +{
 +      unsigned long addr;
 +      long offset = 0;
 +      int nextarg;
 +      unsigned long nbytes = 1;
 +      long npgs;
 +      int diag;
 +      int found;
 +      pid_t pid;
 +      struct task_struct *tp;
 +      struct mm_struct *mm, copy_of_mm;
 +      pgd_t *pgd;
 +      pud_t *pud;
 +      pmd_t *pmd;
 +      pte_t *pte;
 +
 +      if (argc < 3 || argc > 4) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +       if (strcmp(argv[1], "-p") == 0) {
 +              if ((diag = kdbgetularg(argv[2], (unsigned long *) &pid))) {
 +                      return diag;
 +              }
 +
 +              found = 0;
 +              for_each_process(tp) {
 +                      if (tp->pid == pid) {
 +                              if (tp->mm != NULL) {
 +                                      found = 1;
 +                                      break;
 +                              }
 +                              kdb_printf("task structure's mm field is NULL\n");
 +                              return 0;
 +                      }
 +              }
 +
 +              if (!found) {
 +                      kdb_printf("No process with pid == %d found\n", pid);
 +                      return 0;
 +              }
 +              mm = tp->mm;
 +      } else if (strcmp(argv[1], "-m") == 0) {
 +
 +
 +              nextarg = 2;
 +              if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
 +                                        NULL))
 +                  || (diag = kdb_getarea(copy_of_mm, addr)))
 +                      return (diag);
 +              mm = &copy_of_mm;
 +      } else {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      if ((diag = kdbgetularg(argv[3], &addr))) {
 +              return diag;
 +      }
 +
 +      if (argc == 4) {
 +              if ((diag = kdbgetularg(argv[4], &nbytes))) {
 +                      return diag;
 +              }
 +      }
 +
 +      kdb_printf("vaddr              pte\n");
 +
 +      npgs = ((((addr & ~PAGE_MASK) + nbytes) + ~PAGE_MASK) >> PAGE_SHIFT);
 +      while (npgs-- > 0) {
 +
 +              kdb_printf("0x%p ", (void *) (addr & PAGE_MASK));
 +
 +              pgd = pgd_offset(mm, addr);
 +              if (pgd_present(*pgd)) {
 +                      pud = pud_offset(pgd, addr);
 +                      if (pud_present(*pud)) {
 +                              pmd = pmd_offset(pud, addr);
 +                              if (pmd_present(*pmd)) {
 +                                      pte = pte_offset_map(pmd, addr);
 +                                      if (pte_present(*pte)) {
 +                                              kdbm_print_pte(pte);
 +                                      }
 +                              }
 +                      }
 +              }
 +
 +              kdb_printf("\n");
 +              addr += PAGE_SIZE;
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * kdbm_rpte
 + *
 + *     This function implements the 'rpte' command.  Print all pte_t structures
 + *     that contain the given physical page range (<pfn> through <pfn>
 + *     plus <npages>) for the given process. The default value for npages is
 + *     one.
 + *
 + *     rpte -m <mm> <pfn> [<npages>]     Print all pte_t structures for
 + *                                       physical page <pfn> in address space
 + *                                       of <mm> which is a pointer to a
 + *                                       mm_struct
 + *     rpte -p <pid> <pfn> [<npages>]    Print all pte_t structures for
 + *                                       physical page <pfn> in address space
 + *                                       of <pid>
 + */
 +
 +static int
 +kdbm_rpte(int argc, const char **argv)
 +{
 +      unsigned long addr;
 +      unsigned long pfn;
 +      long offset = 0;
 +      int nextarg;
 +      unsigned long npages = 1;
 +      int diag;
 +      int found;
 +      pid_t pid;
 +      struct task_struct *tp;
 +      struct mm_struct *mm, copy_of_mm;
 +      pgd_t *pgd;
 +      pud_t *pud;
 +      pmd_t *pmd;
 +      pte_t *pte;
 +      unsigned long g, u, m, t;
 +
 +      if (argc < 3 || argc > 4) {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +       if (strcmp(argv[1], "-p") == 0) {
 +              if ((diag = kdbgetularg(argv[2], (unsigned long *) &pid))) {
 +                      return diag;
 +              }
 +
 +              found = 0;
 +              for_each_process(tp) {
 +                      if (tp->pid == pid) {
 +                              if (tp->mm != NULL) {
 +                                      found = 1;
 +                                      break;
 +                              }
 +                              kdb_printf("task structure's mm field is NULL\n");
 +                              return 0;
 +                      }
 +              }
 +
 +              if (!found) {
 +                      kdb_printf("No process with pid == %d found\n", pid);
 +                      return 0;
 +              }
 +              mm = tp->mm;
 +      } else if (strcmp(argv[1], "-m") == 0) {
 +
 +
 +              nextarg = 2;
 +              if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset,
 +                                        NULL))
 +                  || (diag = kdb_getarea(copy_of_mm, addr)))
 +                      return (diag);
 +              mm = &copy_of_mm;
 +      } else {
 +              return KDB_ARGCOUNT;
 +      }
 +
 +      if ((diag = kdbgetularg(argv[3], &pfn))) {
 +              return diag;
 +      }
 +
 +      if (argc == 4) {
 +              if ((diag = kdbgetularg(argv[4], &npages))) {
 +                      return diag;
 +              }
 +      }
 +
 +      /* spaces after vaddr depends on sizeof(unsigned long) */
 +      kdb_printf("pfn              vaddr%*s pte\n",
 +                 (int)(2*sizeof(unsigned long) + 2 - 5), " ");
 +
 +      for (g = 0, pgd = pgd_offset(mm, 0UL); g < PTRS_PER_PGD; ++g, ++pgd) {
 +              if (pgd_none(*pgd) || pgd_bad(*pgd))
 +                      continue;
 +              for (u = 0, pud = pud_offset(pgd, 0UL); u < PTRS_PER_PUD; ++u, ++pud) {
 +                      if (pud_none(*pud) || pud_bad(*pud))
 +                              continue;
 +                      for (m = 0, pmd = pmd_offset(pud, 0UL); m < PTRS_PER_PMD; ++m, ++pmd) {
 +                              if (pmd_none(*pmd) || pmd_bad(*pmd))
 +                                      continue;
 +                              for (t = 0, pte = pte_offset_map(pmd, 0UL); t < PTRS_PER_PTE; ++t, ++pte) {
 +                                      if (pte_none(*pte))
 +                                              continue;
 +                                      if (pte_pfn(*pte) < pfn || pte_pfn(*pte) >= (pfn + npages))
 +                                              continue;
 +                                      addr = g << PGDIR_SHIFT;
 +#ifdef __ia64__
 +                                      /* IA64 plays tricks with the pgd mapping to save space.
 +                                       * This reverses pgd_index().
 +                                       */
 +                                      {
 +                                              unsigned long region = g >> (PAGE_SHIFT - 6);
 +                                              unsigned long l1index = g - (region << (PAGE_SHIFT - 6));
 +                                              addr = (region << 61) + (l1index << PGDIR_SHIFT);
 +                                      }
 +#endif
 +                                      addr += (m << PMD_SHIFT) + (t << PAGE_SHIFT);
 +                                      kdb_printf("0x%-14lx " kdb_bfd_vma_fmt0 " ",
 +                                                 pte_pfn(*pte), addr);
 +                                      kdbm_print_pte(pte);
 +                                      kdb_printf("\n");
 +                              }
 +                      }
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +static int
 +kdbm_print_dentry(unsigned long daddr)
 +{
 +      struct dentry d;
 +      int diag;
 +      char buf[256];
 +
 +      kdb_printf("Dentry at 0x%lx\n", daddr);
 +      if ((diag = kdb_getarea(d, (unsigned long)daddr)))
 +              return diag;
 +
 +      if ((d.d_name.len > sizeof(buf)) || (diag = kdb_getarea_size(buf, (unsigned long)(d.d_name.name), d.d_name.len)))
 +              kdb_printf(" d_name.len = %d d_name.name = 0x%p\n",
 +                                      d.d_name.len, d.d_name.name);
 +      else
 +              kdb_printf(" d_name.len = %d d_name.name = 0x%p <%.*s>\n",
 +                                      d.d_name.len, d.d_name.name,
 +                                      (int)(d.d_name.len), d.d_name.name);
 +
 +      kdb_printf(" d_count = %d d_flags = 0x%x d_inode = 0x%p\n",
 +                                      atomic_read(&d.d_count), d.d_flags, d.d_inode);
 +
 +      kdb_printf(" d_parent = 0x%p\n", d.d_parent);
 +
 +      kdb_printf(" d_hash.nxt = 0x%p d_hash.prv = 0x%p\n",
 +                                      d.d_hash.next, d.d_hash.pprev);
 +
 +      kdb_printf(" d_lru.nxt = 0x%p d_lru.prv = 0x%p\n",
 +                                      d.d_lru.next, d.d_lru.prev);
 +
 +      kdb_printf(" d_child.nxt = 0x%p d_child.prv = 0x%p\n",
 +                                      d.d_u.d_child.next, d.d_u.d_child.prev);
 +
 +      kdb_printf(" d_subdirs.nxt = 0x%p d_subdirs.prv = 0x%p\n",
 +                                      d.d_subdirs.next, d.d_subdirs.prev);
 +
 +      kdb_printf(" d_alias.nxt = 0x%p d_alias.prv = 0x%p\n",
 +                                      d.d_alias.next, d.d_alias.prev);
 +
 +      kdb_printf(" d_op = 0x%p d_sb = 0x%p d_fsdata = 0x%p\n",
 +                                      d.d_op, d.d_sb, d.d_fsdata);
 +
 +      kdb_printf(" d_iname = %s\n",
 +                                      d.d_iname);
 +
 +      if (d.d_inode) {
 +              struct inode i;
 +              kdb_printf("\nInode Entry at 0x%p\n", d.d_inode);
 +              if ((diag = kdb_getarea(i, (unsigned long)d.d_inode)))
 +                      return diag;
 +              kdb_printf(" i_mode = 0%o  i_nlink = %d  i_rdev = 0x%x\n",
 +                                              i.i_mode, i.i_nlink, i.i_rdev);
 +
 +              kdb_printf(" i_ino = %ld i_count = %d\n",
 +                                              i.i_ino, atomic_read(&i.i_count));
 +
 +              kdb_printf(" i_hash.nxt = 0x%p i_hash.prv = 0x%p\n",
 +                                              i.i_hash.next, i.i_hash.pprev);
 +
 +              kdb_printf(" i_list.nxt = 0x%p i_list.prv = 0x%p\n",
 +                                              i.i_list.next, i.i_list.prev);
 +
 +              kdb_printf(" i_dentry.nxt = 0x%p i_dentry.prv = 0x%p\n",
 +                                              i.i_dentry.next, i.i_dentry.prev);
 +
 +      }
 +      kdb_printf("\n");
 +      return 0;
 +}
 +
 +static int
 +kdbm_filp(int argc, const char **argv)
 +{
 +      struct file   f;
 +      int nextarg;
 +      unsigned long addr;
 +      long offset;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
 +          (diag = kdb_getarea(f, addr)))
 +              return diag;
 +
 +      kdb_printf("File Pointer at 0x%lx\n", addr);
 +
 +      kdb_printf(" fu_list.nxt = 0x%p fu_list.prv = 0x%p\n",
 +                      f.f_u.fu_list.next, f.f_u.fu_list.prev);
 +
 +      kdb_printf(" f_dentry = 0x%p f_vfsmnt = 0x%p f_op = 0x%p\n",
 +                      f.f_dentry, f.f_vfsmnt, f.f_op);
 +
 +      kdb_printf(" f_count = %ld f_flags = 0x%x f_mode = 0x%x\n",
 +                      atomic_long_read(&f.f_count), f.f_flags, f.f_mode);
 +
 +      kdb_printf(" f_pos = %Ld\n", f.f_pos);
 +#ifdef        CONFIG_SECURITY
 +      kdb_printf(" security = 0x%p\n", f.f_security);
 +#endif
 +
 +      kdb_printf(" private_data = 0x%p f_mapping = 0x%p\n\n",
 +                                      f.private_data, f.f_mapping);
 +
 +      return kdbm_print_dentry((unsigned long)f.f_dentry);
 +}
 +
 +static int
 +kdbm_fl(int argc, const char **argv)
 +{
 +      struct file_lock fl;
 +      int nextarg;
 +      unsigned long addr;
 +      long offset;
 +      int diag;
 +
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
 +              (diag = kdb_getarea(fl, addr)))
 +                      return diag;
 +
 +      kdb_printf("File_lock at 0x%lx\n", addr);
 +
 +      kdb_printf(" fl_next = 0x%p fl_link.nxt = 0x%p fl_link.prv = 0x%p\n",
 +                      fl.fl_next, fl.fl_link.next, fl.fl_link.prev);
 +      kdb_printf(" fl_block.nxt = 0x%p fl_block.prv = 0x%p\n",
 +                      fl.fl_block.next, fl.fl_block.prev);
 +      kdb_printf(" fl_owner = 0x%p fl_pid = %d fl_wait = 0x%p\n",
 +                      fl.fl_owner, fl.fl_pid, &fl.fl_wait);
 +      kdb_printf(" fl_file = 0x%p fl_flags = 0x%x\n",
 +                      fl.fl_file, fl.fl_flags);
 +      kdb_printf(" fl_type = %d fl_start = 0x%llx fl_end = 0x%llx\n",
 +                      fl.fl_type, fl.fl_start, fl.fl_end);
 +
 +      kdb_printf(" file_lock_operations");
 +      if (fl.fl_ops)
 +              kdb_printf("\n   fl_copy_lock = 0x%p fl_release_private = 0x%p\n",
 +                      fl.fl_ops->fl_copy_lock, fl.fl_ops->fl_release_private);
 +      else
 +              kdb_printf("   empty\n");
 +
 +      kdb_printf(" lock_manager_operations");
 +      if (fl.fl_lmops)
 +              kdb_printf("\n   fl_compare_owner = 0x%p fl_notify = 0x%p\n",
 +                      fl.fl_lmops->fl_compare_owner, fl.fl_lmops->fl_notify);
 +      else
 +              kdb_printf("   empty\n");
 +
 +      kdb_printf(" fl_fasync = 0x%p fl_break 0x%lx\n",
 +                      fl.fl_fasync, fl.fl_break_time);
 +
 +      return 0;
 +}
 +
 +
 +static int
 +kdbm_dentry(int argc, const char **argv)
 +{
 +      int nextarg;
 +      unsigned long addr;
 +      long offset;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
 +              return diag;
 +
 +      return kdbm_print_dentry(addr);
 +}
 +
 +static int
 +kdbm_kobject(int argc, const char **argv)
 +{
 +      struct kobject k;
 +      int nextarg;
 +      unsigned long addr;
 +      long offset;
 +      int diag;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
 +          (diag = kdb_getarea(k, addr)))
 +              return diag;
 +
 +
 +      kdb_printf("kobject at 0x%lx\n", addr);
 +
 +      if (k.name) {
 +              char c;
 +              kdb_printf(" name 0x%p", k.name);
 +              if (kdb_getarea(c, (unsigned long)k.name) == 0)
 +                      kdb_printf(" '%s'", k.name);
 +              kdb_printf("\n");
 +      }
 +
 +      if (k.name != kobject_name((struct kobject *)addr))
 +              kdb_printf(" name '%.20s'\n", k.name);
 +
 +      kdb_printf(" kref.refcount %d'\n", atomic_read(&k.kref.refcount));
 +
 +      kdb_printf(" entry.next = 0x%p entry.prev = 0x%p\n",
 +                                      k.entry.next, k.entry.prev);
 +
 +      kdb_printf(" parent = 0x%p kset = 0x%p ktype = 0x%p sd = 0x%p\n",
 +                                      k.parent, k.kset, k.ktype, k.sd);
 +
 +      return 0;
 +}
 +
 +static int
 +kdbm_sh(int argc, const char **argv)
 +{
 +      int diag;
 +      int nextarg;
 +      unsigned long addr;
 +      long offset = 0L;
 +      struct Scsi_Host sh;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) ||
 +          (diag = kdb_getarea(sh, addr)))
 +              return diag;
 +
 +      kdb_printf("Scsi_Host at 0x%lx\n", addr);
 +      kdb_printf("host_queue = 0x%p\n", sh.__devices.next);
 +      kdb_printf("ehandler = 0x%p eh_action = 0x%p\n",
 +                 sh.ehandler, sh.eh_action);
 +      kdb_printf("host_wait = 0x%p hostt = 0x%p\n",
 +                 &sh.host_wait, sh.hostt);
 +      kdb_printf("host_failed = %d  host_no = %d resetting = %d\n",
 +                 sh.host_failed, sh.host_no, sh.resetting);
 +      kdb_printf("max id/lun/channel = [%d/%d/%d]  this_id = %d\n",
 +                 sh.max_id, sh.max_lun, sh.max_channel, sh.this_id);
 +      kdb_printf("can_queue = %d cmd_per_lun = %d  sg_tablesize = %d u_isa_dma = %d\n",
 +                 sh.can_queue, sh.cmd_per_lun, sh.sg_tablesize, sh.unchecked_isa_dma);
 +      kdb_printf("host_blocked = %d  reverse_ordering = %d \n",
 +                 sh.host_blocked, sh.reverse_ordering);
 +
 +      return 0;
 +}
 +
 +static int
 +kdbm_sd(int argc, const char **argv)
 +{
 +      int diag;
 +      int nextarg;
 +      unsigned long addr;
 +      long offset = 0L;
 +      struct scsi_device *sd = NULL;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
 +              goto out;
 +      if (!(sd = kmalloc(sizeof(*sd), GFP_ATOMIC))) {
 +              kdb_printf("kdbm_sd: cannot kmalloc sd\n");
 +              goto out;
 +      }
 +      if ((diag = kdb_getarea(*sd, addr)))
 +              goto out;
 +
 +      kdb_printf("scsi_device at 0x%lx\n", addr);
 +      kdb_printf("next = 0x%p   prev = 0x%p  host = 0x%p\n",
 +                 sd->siblings.next, sd->siblings.prev, sd->host);
 +      kdb_printf("device_busy = %d   current_cmnd 0x%p\n",
 +                 sd->device_busy, sd->current_cmnd);
 +      kdb_printf("id/lun/chan = [%d/%d/%d]  single_lun = %d  device_blocked = %d\n",
 +                 sd->id, sd->lun, sd->channel, sd->sdev_target->single_lun, sd->device_blocked);
 +      kdb_printf("queue_depth = %d current_tag = %d  scsi_level = %d\n",
 +                 sd->queue_depth, sd->current_tag, sd->scsi_level);
 +      kdb_printf("%8.8s %16.16s %4.4s\n", sd->vendor, sd->model, sd->rev);
 +out:
 +      if (sd)
 +              kfree(sd);
 +      return diag;
 +}
 +
 +static int
 +kdbm_sc(int argc, const char **argv)
 +{
 +      int diag;
 +      int nextarg;
 +      unsigned long addr;
 +      long offset = 0L;
 +      struct scsi_cmnd *sc = NULL;
 +
 +      if (argc != 1)
 +              return KDB_ARGCOUNT;
 +
 +      nextarg = 1;
 +      if ((diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)))
 +              goto out;
 +      if (!(sc = kmalloc(sizeof(*sc), GFP_ATOMIC))) {
 +              kdb_printf("kdbm_sc: cannot kmalloc sc\n");
 +              goto out;
 +      }
 +      if ((diag = kdb_getarea(*sc, addr)))
 +              goto out;
 +
 +      kdb_printf("scsi_cmnd at 0x%lx\n", addr);
 +      kdb_printf("device = 0x%p  next = 0x%p\n",
 +                 sc->device, sc->list.next);
 +      kdb_printf("serial_number = %ld  retries = %d\n",
 +                 sc->serial_number, sc->retries);
 +      kdb_printf("cmd_len = %d\n", sc->cmd_len);
 +      kdb_printf("cmnd = [%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x/%2.2x]\n",
 +                 sc->cmnd[0], sc->cmnd[1], sc->cmnd[2], sc->cmnd[3], sc->cmnd[4],
 +                 sc->cmnd[5], sc->cmnd[6], sc->cmnd[7], sc->cmnd[8], sc->cmnd[9],
 +                 sc->cmnd[10], sc->cmnd[11]);
 +      kdb_printf("request_buffer = 0x%p  request_bufflen = %d\n",
 +                 scsi_sglist(sc), scsi_bufflen(sc));
 +      kdb_printf("use_sg = %d\n", scsi_sg_count(sc));
 +      kdb_printf("underflow = %d transfersize = %d\n",
 +                 sc->underflow, sc->transfersize);
 +      kdb_printf("tag = %d\n", sc->tag);
 +
 +out:
 +      if (sc)
 +              kfree(sc);
 +      return diag;
 +}
 +
 +static int __init kdbm_vm_init(void)
 +{
 +      kdb_register("vm", kdbm_vm, "[-v] <vaddr>", "Display vm_area_struct", 0);
 +      kdb_register("vmp", kdbm_vm, "[-v] <pid>", "Display all vm_area_struct for <pid>", 0);
 +#ifdef CONFIG_NUMA
 +      kdb_register("mempolicy", kdbm_mpol, "<vaddr>", "Display mempolicy structure", 0);
 +      kdb_register("pgdat", kdbm_pgdat, "<node_id>", "Display pglist_data node structure", 0);
 +#else
 +      kdb_register("pgdat", kdbm_pgdat, "", "Display pglist_data node structure", 0);
 +#endif
 +      kdb_register("pte", kdbm_pte, "( -m <mm> | -p <pid> ) <vaddr> [<nbytes>]", "Display pte_t for mm_struct or pid", 0);
 +      kdb_register("rpte", kdbm_rpte, "( -m <mm> | -p <pid> ) <pfn> [<npages>]", "Find pte_t containing pfn for mm_struct or pid", 0);
 +      kdb_register("dentry", kdbm_dentry, "<dentry>", "Display interesting dentry stuff", 0);
 +      kdb_register("kobject", kdbm_kobject, "<kobject>", "Display interesting kobject stuff", 0);
 +      kdb_register("filp", kdbm_filp, "<filp>", "Display interesting filp stuff", 0);
 +      kdb_register("fl", kdbm_fl, "<fl>", "Display interesting file_lock stuff", 0);
 +      kdb_register("sh", kdbm_sh, "<vaddr>", "Show scsi_host", 0);
 +      kdb_register("sd", kdbm_sd, "<vaddr>", "Show scsi_device", 0);
 +      kdb_register("sc", kdbm_sc, "<vaddr>", "Show scsi_cmnd", 0);
 +
 +      return 0;
 +}
 +
 +static void __exit kdbm_vm_exit(void)
 +{
 +      kdb_unregister("vm");
 +      kdb_unregister("vmp");
 +#ifdef CONFIG_NUMA
 +      kdb_unregister("mempolicy");
 +#endif
 +      kdb_unregister("pgdat");
 +      kdb_unregister("pte");
 +      kdb_unregister("rpte");
 +      kdb_unregister("dentry");
 +      kdb_unregister("kobject");
 +      kdb_unregister("filp");
 +      kdb_unregister("fl");
 +      kdb_unregister("sh");
 +      kdb_unregister("sd");
 +      kdb_unregister("sc");
 +}
 +
 +module_init(kdbm_vm_init)
 +module_exit(kdbm_vm_exit)
diff --cc kernel/async.c
Simple merge
diff --cc kernel/audit.c
Simple merge
diff --cc kernel/exit.c
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc kernel/module.c
@@@ -2001,10 -2018,9 +2067,10 @@@ static noinline struct module *load_mod
        unsigned int symindex = 0;
        unsigned int strindex = 0;
        unsigned int modindex, versindex, infoindex, pcpuindex;
 +      unsigned int unwindex = 0;
        struct module *mod;
        long err = 0;
-       void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+       void *ptr = NULL; /* Stops spurious gcc warning */
        unsigned long symoffs, stroffs, *strmap;
  
        mm_segment_t old_fs;
diff --cc kernel/sched.c
Simple merge
diff --cc kernel/sys.c
Simple merge
Simple merge
Simple merge
diff --cc kernel/unwind.c
index 95bcddc,0000000..bc5e170
mode 100644,000000..100644
--- /dev/null
@@@ -1,1302 -1,0 +1,1303 @@@
 +/*
 + * Copyright (C) 2002-2006 Novell, Inc.
 + *    Jan Beulich <jbeulich@novell.com>
 + * This code is released under version 2 of the GNU GPL.
 + *
 + * A simple API for unwinding kernel stacks.  This is used for
 + * debugging and error reporting purposes.  The kernel doesn't need
 + * full-blown stack unwinding with all the bells and whistles, so there
 + * is not much point in implementing the full Dwarf2 unwind API.
 + */
 +
 +#include <linux/unwind.h>
 +#include <linux/module.h>
 +#include <linux/bootmem.h>
 +#include <linux/sort.h>
 +#include <linux/stop_machine.h>
 +#include <linux/uaccess.h>
 +#include <asm/sections.h>
 +#include <asm/unaligned.h>
++#include <linux/slab.h>
 +
 +extern const char __start_unwind[], __end_unwind[];
 +extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
 +
 +#define MAX_STACK_DEPTH 8
 +
 +#define EXTRA_INFO(f) { \
 +              BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
 +                                % FIELD_SIZEOF(struct unwind_frame_info, f)) \
 +              + offsetof(struct unwind_frame_info, f) \
 +                / FIELD_SIZEOF(struct unwind_frame_info, f), \
 +              FIELD_SIZEOF(struct unwind_frame_info, f) \
 +      }
 +#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
 +
 +static const struct {
 +      unsigned offs:BITS_PER_LONG / 2;
 +      unsigned width:BITS_PER_LONG / 2;
 +} reg_info[] = {
 +      UNW_REGISTER_INFO
 +};
 +
 +#undef PTREGS_INFO
 +#undef EXTRA_INFO
 +
 +#ifndef REG_INVALID
 +#define REG_INVALID(r) (reg_info[r].width == 0)
 +#endif
 +
 +#define DW_CFA_nop                          0x00
 +#define DW_CFA_set_loc                      0x01
 +#define DW_CFA_advance_loc1                 0x02
 +#define DW_CFA_advance_loc2                 0x03
 +#define DW_CFA_advance_loc4                 0x04
 +#define DW_CFA_offset_extended              0x05
 +#define DW_CFA_restore_extended             0x06
 +#define DW_CFA_undefined                    0x07
 +#define DW_CFA_same_value                   0x08
 +#define DW_CFA_register                     0x09
 +#define DW_CFA_remember_state               0x0a
 +#define DW_CFA_restore_state                0x0b
 +#define DW_CFA_def_cfa                      0x0c
 +#define DW_CFA_def_cfa_register             0x0d
 +#define DW_CFA_def_cfa_offset               0x0e
 +#define DW_CFA_def_cfa_expression           0x0f
 +#define DW_CFA_expression                   0x10
 +#define DW_CFA_offset_extended_sf           0x11
 +#define DW_CFA_def_cfa_sf                   0x12
 +#define DW_CFA_def_cfa_offset_sf            0x13
 +#define DW_CFA_val_offset                   0x14
 +#define DW_CFA_val_offset_sf                0x15
 +#define DW_CFA_val_expression               0x16
 +#define DW_CFA_lo_user                      0x1c
 +#define DW_CFA_GNU_window_save              0x2d
 +#define DW_CFA_GNU_args_size                0x2e
 +#define DW_CFA_GNU_negative_offset_extended 0x2f
 +#define DW_CFA_hi_user                      0x3f
 +
 +#define DW_EH_PE_FORM     0x07
 +#define DW_EH_PE_native   0x00
 +#define DW_EH_PE_leb128   0x01
 +#define DW_EH_PE_data2    0x02
 +#define DW_EH_PE_data4    0x03
 +#define DW_EH_PE_data8    0x04
 +#define DW_EH_PE_signed   0x08
 +#define DW_EH_PE_ADJUST   0x70
 +#define DW_EH_PE_abs      0x00
 +#define DW_EH_PE_pcrel    0x10
 +#define DW_EH_PE_textrel  0x20
 +#define DW_EH_PE_datarel  0x30
 +#define DW_EH_PE_funcrel  0x40
 +#define DW_EH_PE_aligned  0x50
 +#define DW_EH_PE_indirect 0x80
 +#define DW_EH_PE_omit     0xff
 +
 +typedef unsigned long uleb128_t;
 +typedef   signed long sleb128_t;
 +#define sleb128abs __builtin_labs
 +
 +static struct unwind_table {
 +      struct {
 +              unsigned long pc;
 +              unsigned long range;
 +      } core, init;
 +      const void *address;
 +      unsigned long size;
 +      const unsigned char *header;
 +      unsigned long hdrsz;
 +      struct unwind_table *link;
 +      const char *name;
 +} root_table;
 +
 +struct unwind_item {
 +      enum item_location {
 +              Nowhere,
 +              Memory,
 +              Register,
 +              Value
 +      } where;
 +      uleb128_t value;
 +};
 +
 +struct unwind_state {
 +      uleb128_t loc, org;
 +      const u8 *cieStart, *cieEnd;
 +      uleb128_t codeAlign;
 +      sleb128_t dataAlign;
 +      struct cfa {
 +              uleb128_t reg, offs;
 +      } cfa;
 +      struct unwind_item regs[ARRAY_SIZE(reg_info)];
 +      unsigned stackDepth:8;
 +      unsigned version:8;
 +      const u8 *label;
 +      const u8 *stack[MAX_STACK_DEPTH];
 +};
 +
 +static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
 +
 +static unsigned unwind_debug;
 +static int __init unwind_debug_setup(char *s)
 +{
 +      unwind_debug = simple_strtoul(s, NULL, 0);
 +      return 1;
 +}
 +__setup("unwind_debug=", unwind_debug_setup);
 +#define dprintk(lvl, fmt, args...) \
 +      ((void)(lvl > unwind_debug \
 +       || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
 +
 +static struct unwind_table *find_table(unsigned long pc)
 +{
 +      struct unwind_table *table;
 +
 +      for (table = &root_table; table; table = table->link)
 +              if ((pc >= table->core.pc
 +                   && pc < table->core.pc + table->core.range)
 +                  || (pc >= table->init.pc
 +                      && pc < table->init.pc + table->init.range))
 +                      break;
 +
 +      return table;
 +}
 +
 +static unsigned long read_pointer(const u8 **pLoc,
 +                                  const void *end,
 +                                  signed ptrType,
 +                                  unsigned long text_base,
 +                                  unsigned long data_base);
 +
 +static void init_unwind_table(struct unwind_table *table,
 +                              const char *name,
 +                              const void *core_start,
 +                              unsigned long core_size,
 +                              const void *init_start,
 +                              unsigned long init_size,
 +                              const void *table_start,
 +                              unsigned long table_size,
 +                              const u8 *header_start,
 +                              unsigned long header_size)
 +{
 +      const u8 *ptr = header_start + 4;
 +      const u8 *end = header_start + header_size;
 +
 +      table->core.pc = (unsigned long)core_start;
 +      table->core.range = core_size;
 +      table->init.pc = (unsigned long)init_start;
 +      table->init.range = init_size;
 +      table->address = table_start;
 +      table->size = table_size;
 +      /* See if the linker provided table looks valid. */
 +      if (header_size <= 4
 +          || header_start[0] != 1
 +          || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
 +             != table_start
 +          || !read_pointer(&ptr, end, header_start[2], 0, 0)
 +          || !read_pointer(&ptr, end, header_start[3], 0,
 +                           (unsigned long)header_start)
 +          || !read_pointer(&ptr, end, header_start[3], 0,
 +                           (unsigned long)header_start))
 +              header_start = NULL;
 +      table->hdrsz = header_size;
 +      smp_wmb();
 +      table->header = header_start;
 +      table->link = NULL;
 +      table->name = name;
 +}
 +
 +void __init unwind_init(void)
 +{
 +      init_unwind_table(&root_table, "kernel",
 +                        _text, _end - _text,
 +                        NULL, 0,
 +                        __start_unwind, __end_unwind - __start_unwind,
 +                        __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
 +}
 +
 +static const u32 bad_cie, not_fde;
 +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
 +static signed fde_pointer_type(const u32 *cie);
 +
 +struct eh_frame_hdr_table_entry {
 +      unsigned long start, fde;
 +};
 +
 +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
 +{
 +      const struct eh_frame_hdr_table_entry *e1 = p1;
 +      const struct eh_frame_hdr_table_entry *e2 = p2;
 +
 +      return (e1->start > e2->start) - (e1->start < e2->start);
 +}
 +
 +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
 +{
 +      struct eh_frame_hdr_table_entry *e1 = p1;
 +      struct eh_frame_hdr_table_entry *e2 = p2;
 +      unsigned long v;
 +
 +      v = e1->start;
 +      e1->start = e2->start;
 +      e2->start = v;
 +      v = e1->fde;
 +      e1->fde = e2->fde;
 +      e2->fde = v;
 +}
 +
 +static void __init setup_unwind_table(struct unwind_table *table,
 +                                      void *(*alloc)(unsigned long))
 +{
 +      const u8 *ptr;
 +      unsigned long tableSize = table->size, hdrSize;
 +      unsigned n;
 +      const u32 *fde;
 +      struct {
 +              u8 version;
 +              u8 eh_frame_ptr_enc;
 +              u8 fde_count_enc;
 +              u8 table_enc;
 +              unsigned long eh_frame_ptr;
 +              unsigned int fde_count;
 +              struct eh_frame_hdr_table_entry table[];
 +      } __attribute__((__packed__)) *header;
 +
 +      if (table->header)
 +              return;
 +
 +      if (table->hdrsz)
 +              printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
 +                     table->name);
 +
 +      if (tableSize & (sizeof(*fde) - 1))
 +              return;
 +
 +      for (fde = table->address, n = 0;
 +           tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
 +           tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
 +              const u32 *cie = cie_for_fde(fde, table);
 +              signed ptrType;
 +
 +              if (cie == &not_fde)
 +                      continue;
 +              if (cie == NULL
 +                  || cie == &bad_cie
 +                  || (ptrType = fde_pointer_type(cie)) < 0)
 +                      return;
 +              ptr = (const u8 *)(fde + 2);
 +              if (!read_pointer(&ptr,
 +                                (const u8 *)(fde + 1) + *fde,
 +                                ptrType, 0, 0))
 +                      return;
 +              ++n;
 +      }
 +
 +      if (tableSize || !n)
 +              return;
 +
 +      hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
 +              + 2 * n * sizeof(unsigned long);
 +      dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
 +      header = alloc(hdrSize);
 +      if (!header)
 +              return;
 +      header->version          = 1;
 +      header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
 +      header->fde_count_enc    = DW_EH_PE_abs|DW_EH_PE_data4;
 +      header->table_enc        = DW_EH_PE_abs|DW_EH_PE_native;
 +      put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
 +      BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
 +                   % __alignof(typeof(header->fde_count)));
 +      header->fde_count        = n;
 +
 +      BUILD_BUG_ON(offsetof(typeof(*header), table)
 +                   % __alignof(typeof(*header->table)));
 +      for (fde = table->address, tableSize = table->size, n = 0;
 +           tableSize;
 +           tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
 +              const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
 +
 +              if (!fde[1])
 +                      continue; /* this is a CIE */
 +              ptr = (const u8 *)(fde + 2);
 +              header->table[n].start = read_pointer(&ptr,
 +                                                    (const u8 *)(fde + 1) + *fde,
 +                                                    fde_pointer_type(cie), 0, 0);
 +              header->table[n].fde = (unsigned long)fde;
 +              ++n;
 +      }
 +      WARN_ON(n != header->fde_count);
 +
 +      sort(header->table,
 +           n,
 +           sizeof(*header->table),
 +           cmp_eh_frame_hdr_table_entries,
 +           swap_eh_frame_hdr_table_entries);
 +
 +      table->hdrsz = hdrSize;
 +      smp_wmb();
 +      table->header = (const void *)header;
 +}
 +
 +static void *__init balloc(unsigned long sz)
 +{
 +      return __alloc_bootmem_nopanic(sz,
 +                                     sizeof(unsigned int),
 +                                     __pa(MAX_DMA_ADDRESS));
 +}
 +
 +void __init unwind_setup(void)
 +{
 +      setup_unwind_table(&root_table, balloc);
 +}
 +
 +#ifdef CONFIG_MODULES
 +
 +static struct unwind_table *last_table;
 +
 +/* Must be called with module_mutex held. */
 +void *unwind_add_table(struct module *module,
 +                       const void *table_start,
 +                       unsigned long table_size)
 +{
 +      struct unwind_table *table;
 +
 +      if (table_size <= 0)
 +              return NULL;
 +
 +      table = kmalloc(sizeof(*table), GFP_KERNEL);
 +      if (!table)
 +              return NULL;
 +
 +      init_unwind_table(table, module->name,
 +                        module->module_core, module->core_size,
 +                        module->module_init, module->init_size,
 +                        table_start, table_size,
 +                        NULL, 0);
 +
 +      if (last_table)
 +              last_table->link = table;
 +      else
 +              root_table.link = table;
 +      last_table = table;
 +
 +      return table;
 +}
 +
 +struct unlink_table_info
 +{
 +      struct unwind_table *table;
 +      int init_only;
 +};
 +
 +static int unlink_table(void *arg)
 +{
 +      struct unlink_table_info *info = arg;
 +      struct unwind_table *table = info->table, *prev;
 +
 +      for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
 +              ;
 +
 +      if (prev->link) {
 +              if (info->init_only) {
 +                      table->init.pc = 0;
 +                      table->init.range = 0;
 +                      info->table = NULL;
 +              } else {
 +                      prev->link = table->link;
 +                      if (!prev->link)
 +                              last_table = prev;
 +              }
 +      } else
 +              info->table = NULL;
 +
 +      return 0;
 +}
 +
 +/* Must be called with module_mutex held. */
 +void unwind_remove_table(void *handle, int init_only)
 +{
 +      struct unwind_table *table = handle;
 +      struct unlink_table_info info;
 +
 +      if (!table || table == &root_table)
 +              return;
 +
 +      if (init_only && table == last_table) {
 +              table->init.pc = 0;
 +              table->init.range = 0;
 +              return;
 +      }
 +
 +      info.table = table;
 +      info.init_only = init_only;
 +      stop_machine(unlink_table, &info, NULL);
 +
 +      if (info.table)
 +              kfree(table);
 +}
 +
 +#endif /* CONFIG_MODULES */
 +
 +static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
 +{
 +      const u8 *cur = *pcur;
 +      uleb128_t value;
 +      unsigned shift;
 +
 +      for (shift = 0, value = 0; cur < end; shift += 7) {
 +              if (shift + 7 > 8 * sizeof(value)
 +                  && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
 +                      cur = end + 1;
 +                      break;
 +              }
 +              value |= (uleb128_t)(*cur & 0x7f) << shift;
 +              if (!(*cur++ & 0x80))
 +                      break;
 +      }
 +      *pcur = cur;
 +
 +      return value;
 +}
 +
 +static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
 +{
 +      const u8 *cur = *pcur;
 +      sleb128_t value;
 +      unsigned shift;
 +
 +      for (shift = 0, value = 0; cur < end; shift += 7) {
 +              if (shift + 7 > 8 * sizeof(value)
 +                  && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
 +                      cur = end + 1;
 +                      break;
 +              }
 +              value |= (sleb128_t)(*cur & 0x7f) << shift;
 +              if (!(*cur & 0x80)) {
 +                      value |= -(*cur++ & 0x40) << shift;
 +                      break;
 +              }
 +      }
 +      *pcur = cur;
 +
 +      return value;
 +}
 +
 +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
 +{
 +      const u32 *cie;
 +
 +      if (!*fde || (*fde & (sizeof(*fde) - 1)))
 +              return &bad_cie;
 +      if (!fde[1])
 +              return &not_fde; /* this is a CIE */
 +      if ((fde[1] & (sizeof(*fde) - 1))
 +          || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
 +              return NULL; /* this is not a valid FDE */
 +      cie = fde + 1 - fde[1] / sizeof(*fde);
 +      if (*cie <= sizeof(*cie) + 4
 +          || *cie >= fde[1] - sizeof(*fde)
 +          || (*cie & (sizeof(*cie) - 1))
 +          || cie[1])
 +              return NULL; /* this is not a (valid) CIE */
 +      return cie;
 +}
 +
 +static unsigned long read_pointer(const u8 **pLoc,
 +                                  const void *end,
 +                                  signed ptrType,
 +                                  unsigned long text_base,
 +                                  unsigned long data_base)
 +{
 +      unsigned long value = 0;
 +      union {
 +              const u8 *p8;
 +              const u16 *p16u;
 +              const s16 *p16s;
 +              const u32 *p32u;
 +              const s32 *p32s;
 +              const unsigned long *pul;
 +      } ptr;
 +
 +      if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
 +              dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
 +              return 0;
 +      }
 +      ptr.p8 = *pLoc;
 +      switch (ptrType & DW_EH_PE_FORM) {
 +      case DW_EH_PE_data2:
 +              if (end < (const void *)(ptr.p16u + 1)) {
 +                      dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
 +                      return 0;
 +              }
 +              if (ptrType & DW_EH_PE_signed)
 +                      value = get_unaligned(ptr.p16s++);
 +              else
 +                      value = get_unaligned(ptr.p16u++);
 +              break;
 +      case DW_EH_PE_data4:
 +#ifdef CONFIG_64BIT
 +              if (end < (const void *)(ptr.p32u + 1)) {
 +                      dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
 +                      return 0;
 +              }
 +              if (ptrType & DW_EH_PE_signed)
 +                      value = get_unaligned(ptr.p32s++);
 +              else
 +                      value = get_unaligned(ptr.p32u++);
 +              break;
 +      case DW_EH_PE_data8:
 +              BUILD_BUG_ON(sizeof(u64) != sizeof(value));
 +#else
 +              BUILD_BUG_ON(sizeof(u32) != sizeof(value));
 +#endif
 +      case DW_EH_PE_native:
 +              if (end < (const void *)(ptr.pul + 1)) {
 +                      dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
 +                      return 0;
 +              }
 +              value = get_unaligned(ptr.pul++);
 +              break;
 +      case DW_EH_PE_leb128:
 +              BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
 +              value = ptrType & DW_EH_PE_signed
 +                      ? get_sleb128(&ptr.p8, end)
 +                      : get_uleb128(&ptr.p8, end);
 +              if ((const void *)ptr.p8 > end) {
 +                      dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
 +                      return 0;
 +              }
 +              break;
 +      default:
 +              dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
 +                      ptrType, ptr.p8, end);
 +              return 0;
 +      }
 +      switch (ptrType & DW_EH_PE_ADJUST) {
 +      case DW_EH_PE_abs:
 +              break;
 +      case DW_EH_PE_pcrel:
 +              value += (unsigned long)*pLoc;
 +              break;
 +      case DW_EH_PE_textrel:
 +              if (likely(text_base)) {
 +                      value += text_base;
 +                      break;
 +              }
 +              dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
 +                      ptrType, *pLoc, end);
 +              return 0;
 +      case DW_EH_PE_datarel:
 +              if (likely(data_base)) {
 +                      value += data_base;
 +                      break;
 +              }
 +              dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
 +                      ptrType, *pLoc, end);
 +              return 0;
 +      default:
 +              dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
 +                      ptrType, *pLoc, end);
 +              return 0;
 +      }
 +      if ((ptrType & DW_EH_PE_indirect)
 +          && probe_kernel_address(value, value)) {
 +              dprintk(1, "Cannot read indirect value %lx (%p,%p).",
 +                      value, *pLoc, end);
 +              return 0;
 +      }
 +      *pLoc = ptr.p8;
 +
 +      return value;
 +}
 +
 +static signed fde_pointer_type(const u32 *cie)
 +{
 +      const u8 *ptr = (const u8 *)(cie + 2);
 +      unsigned version = *ptr;
 +
 +      if (version != 1)
 +              return -1; /* unsupported */
 +      if (*++ptr) {
 +              const char *aug;
 +              const u8 *end = (const u8 *)(cie + 1) + *cie;
 +              uleb128_t len;
 +
 +              /* check if augmentation size is first (and thus present) */
 +              if (*ptr != 'z')
 +                      return -1;
 +              /* check if augmentation string is nul-terminated */
 +              if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
 +                      return -1;
 +              ++ptr; /* skip terminator */
 +              get_uleb128(&ptr, end); /* skip code alignment */
 +              get_sleb128(&ptr, end); /* skip data alignment */
 +              /* skip return address column */
 +              version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
 +              len = get_uleb128(&ptr, end); /* augmentation length */
 +              if (ptr + len < ptr || ptr + len > end)
 +                      return -1;
 +              end = ptr + len;
 +              while (*++aug) {
 +                      if (ptr >= end)
 +                              return -1;
 +                      switch (*aug) {
 +                      case 'L':
 +                              ++ptr;
 +                              break;
 +                      case 'P': {
 +                                      signed ptrType = *ptr++;
 +
 +                                      if (!read_pointer(&ptr, end, ptrType, 0, 0)
 +                                          || ptr > end)
 +                                              return -1;
 +                              }
 +                              break;
 +                      case 'R':
 +                              return *ptr;
 +                      default:
 +                              return -1;
 +                      }
 +              }
 +      }
 +      return DW_EH_PE_native|DW_EH_PE_abs;
 +}
 +
 +static int advance_loc(unsigned long delta, struct unwind_state *state)
 +{
 +      state->loc += delta * state->codeAlign;
 +
 +      return delta > 0;
 +}
 +
 +static void set_rule(uleb128_t reg,
 +                     enum item_location where,
 +                     uleb128_t value,
 +                     struct unwind_state *state)
 +{
 +      if (reg < ARRAY_SIZE(state->regs)) {
 +              state->regs[reg].where = where;
 +              state->regs[reg].value = value;
 +      }
 +}
 +
 +static int processCFI(const u8 *start,
 +                      const u8 *end,
 +                      unsigned long targetLoc,
 +                      signed ptrType,
 +                      struct unwind_state *state)
 +{
 +      union {
 +              const u8 *p8;
 +              const u16 *p16;
 +              const u32 *p32;
 +      } ptr;
 +      int result = 1;
 +
 +      if (start != state->cieStart) {
 +              state->loc = state->org;
 +              result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
 +              if (targetLoc == 0 && state->label == NULL)
 +                      return result;
 +      }
 +      for (ptr.p8 = start; result && ptr.p8 < end; ) {
 +              switch (*ptr.p8 >> 6) {
 +                      uleb128_t value;
 +
 +              case 0:
 +                      switch (*ptr.p8++) {
 +                      case DW_CFA_nop:
 +                              break;
 +                      case DW_CFA_set_loc:
 +                              state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
 +                              if (state->loc == 0)
 +                                      result = 0;
 +                              break;
 +                      case DW_CFA_advance_loc1:
 +                              result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
 +                              break;
 +                      case DW_CFA_advance_loc2:
 +                              result = ptr.p8 <= end + 2
 +                                       && advance_loc(*ptr.p16++, state);
 +                              break;
 +                      case DW_CFA_advance_loc4:
 +                              result = ptr.p8 <= end + 4
 +                                       && advance_loc(*ptr.p32++, state);
 +                              break;
 +                      case DW_CFA_offset_extended:
 +                              value = get_uleb128(&ptr.p8, end);
 +                              set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
 +                              break;
 +                      case DW_CFA_val_offset:
 +                              value = get_uleb128(&ptr.p8, end);
 +                              set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
 +                              break;
 +                      case DW_CFA_offset_extended_sf:
 +                              value = get_uleb128(&ptr.p8, end);
 +                              set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
 +                              break;
 +                      case DW_CFA_val_offset_sf:
 +                              value = get_uleb128(&ptr.p8, end);
 +                              set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
 +                              break;
 +                      case DW_CFA_restore_extended:
 +                      case DW_CFA_undefined:
 +                      case DW_CFA_same_value:
 +                              set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
 +                              break;
 +                      case DW_CFA_register:
 +                              value = get_uleb128(&ptr.p8, end);
 +                              set_rule(value,
 +                                       Register,
 +                                       get_uleb128(&ptr.p8, end), state);
 +                              break;
 +                      case DW_CFA_remember_state:
 +                              if (ptr.p8 == state->label) {
 +                                      state->label = NULL;
 +                                      return 1;
 +                              }
 +                              if (state->stackDepth >= MAX_STACK_DEPTH) {
 +                                      dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
 +                                      return 0;
 +                              }
 +                              state->stack[state->stackDepth++] = ptr.p8;
 +                              break;
 +                      case DW_CFA_restore_state:
 +                              if (state->stackDepth) {
 +                                      const uleb128_t loc = state->loc;
 +                                      const u8 *label = state->label;
 +
 +                                      state->label = state->stack[state->stackDepth - 1];
 +                                      memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
 +                                      memset(state->regs, 0, sizeof(state->regs));
 +                                      state->stackDepth = 0;
 +                                      result = processCFI(start, end, 0, ptrType, state);
 +                                      state->loc = loc;
 +                                      state->label = label;
 +                              } else {
 +                                      dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
 +                                      return 0;
 +                              }
 +                              break;
 +                      case DW_CFA_def_cfa:
 +                              state->cfa.reg = get_uleb128(&ptr.p8, end);
 +                              /*nobreak*/
 +                      case DW_CFA_def_cfa_offset:
 +                              state->cfa.offs = get_uleb128(&ptr.p8, end);
 +                              break;
 +                      case DW_CFA_def_cfa_sf:
 +                              state->cfa.reg = get_uleb128(&ptr.p8, end);
 +                              /*nobreak*/
 +                      case DW_CFA_def_cfa_offset_sf:
 +                              state->cfa.offs = get_sleb128(&ptr.p8, end)
 +                                                * state->dataAlign;
 +                              break;
 +                      case DW_CFA_def_cfa_register:
 +                              state->cfa.reg = get_uleb128(&ptr.p8, end);
 +                              break;
 +                      /*todo case DW_CFA_def_cfa_expression: */
 +                      /*todo case DW_CFA_expression: */
 +                      /*todo case DW_CFA_val_expression: */
 +                      case DW_CFA_GNU_args_size:
 +                              get_uleb128(&ptr.p8, end);
 +                              break;
 +                      case DW_CFA_GNU_negative_offset_extended:
 +                              value = get_uleb128(&ptr.p8, end);
 +                              set_rule(value,
 +                                       Memory,
 +                                       (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
 +                              break;
 +                      case DW_CFA_GNU_window_save:
 +                      default:
 +                              dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
 +                              result = 0;
 +                              break;
 +                      }
 +                      break;
 +              case 1:
 +                      result = advance_loc(*ptr.p8++ & 0x3f, state);
 +                      break;
 +              case 2:
 +                      value = *ptr.p8++ & 0x3f;
 +                      set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
 +                      break;
 +              case 3:
 +                      set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
 +                      break;
 +              }
 +              if (ptr.p8 > end) {
 +                      dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
 +                      result = 0;
 +              }
 +              if (result && targetLoc != 0 && targetLoc < state->loc)
 +                      return 1;
 +      }
 +
 +      if (result && ptr.p8 < end)
 +              dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
 +
 +      return result
 +             && ptr.p8 == end
 +             && (targetLoc == 0
 +                 || (/*todo While in theory this should apply, gcc in practice omits
 +                       everything past the function prolog, and hence the location
 +                       never reaches the end of the function.
 +                     targetLoc < state->loc &&*/ state->label == NULL));
 +}
 +
 +/* Unwind to previous to frame.  Returns 0 if successful, negative
 + * number in case of an error. */
 +int unwind(struct unwind_frame_info *frame)
 +{
 +#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
 +      const u32 *fde = NULL, *cie = NULL;
 +      const u8 *ptr = NULL, *end = NULL;
 +      unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
 +      unsigned long startLoc = 0, endLoc = 0, cfa;
 +      unsigned i;
 +      signed ptrType = -1;
 +      uleb128_t retAddrReg = 0;
 +      const struct unwind_table *table;
 +      struct unwind_state state;
 +
 +      if (UNW_PC(frame) == 0)
 +              return -EINVAL;
 +      if ((table = find_table(pc)) != NULL
 +          && !(table->size & (sizeof(*fde) - 1))) {
 +              const u8 *hdr = table->header;
 +              unsigned long tableSize;
 +
 +              smp_rmb();
 +              if (hdr && hdr[0] == 1) {
 +                      switch (hdr[3] & DW_EH_PE_FORM) {
 +                      case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
 +                      case DW_EH_PE_data2: tableSize = 2; break;
 +                      case DW_EH_PE_data4: tableSize = 4; break;
 +                      case DW_EH_PE_data8: tableSize = 8; break;
 +                      default: tableSize = 0; break;
 +                      }
 +                      ptr = hdr + 4;
 +                      end = hdr + table->hdrsz;
 +                      if (tableSize
 +                          && read_pointer(&ptr, end, hdr[1], 0, 0)
 +                             == (unsigned long)table->address
 +                          && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
 +                          && i == (end - ptr) / (2 * tableSize)
 +                          && !((end - ptr) % (2 * tableSize))) {
 +                              do {
 +                                      const u8 *cur = ptr + (i / 2) * (2 * tableSize);
 +
 +                                      startLoc = read_pointer(&cur,
 +                                                              cur + tableSize,
 +                                                              hdr[3], 0,
 +                                                              (unsigned long)hdr);
 +                                      if (pc < startLoc)
 +                                              i /= 2;
 +                                      else {
 +                                              ptr = cur - tableSize;
 +                                              i = (i + 1) / 2;
 +                                      }
 +                              } while (startLoc && i > 1);
 +                              if (i == 1
 +                                  && (startLoc = read_pointer(&ptr,
 +                                                              ptr + tableSize,
 +                                                              hdr[3], 0,
 +                                                              (unsigned long)hdr)) != 0
 +                                  && pc >= startLoc)
 +                                      fde = (void *)read_pointer(&ptr,
 +                                                                 ptr + tableSize,
 +                                                                 hdr[3], 0,
 +                                                                 (unsigned long)hdr);
 +                      }
 +              }
 +              if (hdr && !fde)
 +                      dprintk(3, "Binary lookup for %lx failed.", pc);
 +
 +              if (fde != NULL) {
 +                      cie = cie_for_fde(fde, table);
 +                      ptr = (const u8 *)(fde + 2);
 +                      if (cie != NULL
 +                          && cie != &bad_cie
 +                          && cie != &not_fde
 +                          && (ptrType = fde_pointer_type(cie)) >= 0
 +                          && read_pointer(&ptr,
 +                                          (const u8 *)(fde + 1) + *fde,
 +                                          ptrType, 0, 0) == startLoc) {
 +                              if (!(ptrType & DW_EH_PE_indirect))
 +                                      ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
 +                              endLoc = startLoc
 +                                       + read_pointer(&ptr,
 +                                                      (const u8 *)(fde + 1) + *fde,
 +                                                      ptrType, 0, 0);
 +                              if (pc >= endLoc)
 +                                      fde = NULL;
 +                      } else
 +                              fde = NULL;
 +                      if (!fde)
 +                              dprintk(1, "Binary lookup result for %lx discarded.", pc);
 +              }
 +              if (fde == NULL) {
 +                      for (fde = table->address, tableSize = table->size;
 +                           cie = NULL, tableSize > sizeof(*fde)
 +                           && tableSize - sizeof(*fde) >= *fde;
 +                           tableSize -= sizeof(*fde) + *fde,
 +                           fde += 1 + *fde / sizeof(*fde)) {
 +                              cie = cie_for_fde(fde, table);
 +                              if (cie == &bad_cie) {
 +                                      cie = NULL;
 +                                      break;
 +                              }
 +                              if (cie == NULL
 +                                  || cie == &not_fde
 +                                  || (ptrType = fde_pointer_type(cie)) < 0)
 +                                      continue;
 +                              ptr = (const u8 *)(fde + 2);
 +                              startLoc = read_pointer(&ptr,
 +                                                      (const u8 *)(fde + 1) + *fde,
 +                                                      ptrType, 0, 0);
 +                              if (!startLoc)
 +                                      continue;
 +                              if (!(ptrType & DW_EH_PE_indirect))
 +                                      ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
 +                              endLoc = startLoc
 +                                       + read_pointer(&ptr,
 +                                                      (const u8 *)(fde + 1) + *fde,
 +                                                      ptrType, 0, 0);
 +                              if (pc >= startLoc && pc < endLoc)
 +                                      break;
 +                      }
 +                      if (!fde)
 +                              dprintk(3, "Linear lookup for %lx failed.", pc);
 +              }
 +      }
 +      if (cie != NULL) {
 +              memset(&state, 0, sizeof(state));
 +              state.cieEnd = ptr; /* keep here temporarily */
 +              ptr = (const u8 *)(cie + 2);
 +              end = (const u8 *)(cie + 1) + *cie;
 +              frame->call_frame = 1;
 +              if ((state.version = *ptr) != 1)
 +                      cie = NULL; /* unsupported version */
 +              else if (*++ptr) {
 +                      /* check if augmentation size is first (and thus present) */
 +                      if (*ptr == 'z') {
 +                              while (++ptr < end && *ptr) {
 +                                      switch (*ptr) {
 +                                      /* check for ignorable (or already handled)
 +                                       * nul-terminated augmentation string */
 +                                      case 'L':
 +                                      case 'P':
 +                                      case 'R':
 +                                              continue;
 +                                      case 'S':
 +                                              frame->call_frame = 0;
 +                                              continue;
 +                                      default:
 +                                              break;
 +                                      }
 +                                      break;
 +                              }
 +                      }
 +                      if (ptr >= end || *ptr)
 +                              cie = NULL;
 +              }
 +              if (!cie)
 +                      dprintk(1, "CIE unusable (%p,%p).", ptr, end);
 +              ++ptr;
 +      }
 +      if (cie != NULL) {
 +              /* get code aligment factor */
 +              state.codeAlign = get_uleb128(&ptr, end);
 +              /* get data aligment factor */
 +              state.dataAlign = get_sleb128(&ptr, end);
 +              if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
 +                      cie = NULL;
 +              else if (UNW_PC(frame) % state.codeAlign
 +                       || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
 +                      dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
 +                              UNW_PC(frame), UNW_SP(frame));
 +                      return -EPERM;
 +              } else {
 +                      retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
 +                      /* skip augmentation */
 +                      if (((const char *)(cie + 2))[1] == 'z') {
 +                              uleb128_t augSize = get_uleb128(&ptr, end);
 +
 +                              ptr += augSize;
 +                      }
 +                      if (ptr > end
 +                          || retAddrReg >= ARRAY_SIZE(reg_info)
 +                          || REG_INVALID(retAddrReg)
 +                          || reg_info[retAddrReg].width != sizeof(unsigned long))
 +                              cie = NULL;
 +              }
 +              if (!cie)
 +                      dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
 +      }
 +      if (cie != NULL) {
 +              state.cieStart = ptr;
 +              ptr = state.cieEnd;
 +              state.cieEnd = end;
 +              end = (const u8 *)(fde + 1) + *fde;
 +              /* skip augmentation */
 +              if (((const char *)(cie + 2))[1] == 'z') {
 +                      uleb128_t augSize = get_uleb128(&ptr, end);
 +
 +                      if ((ptr += augSize) > end)
 +                              fde = NULL;
 +              }
 +              if (!fde)
 +                      dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
 +      }
 +#ifdef CONFIG_FRAME_POINTER
 +      if (cie == NULL || fde == NULL) {
 +              unsigned long top = TSK_STACK_TOP(frame->task);
 +              unsigned long bottom = STACK_BOTTOM(frame->task);
 +              unsigned long fp = UNW_FP(frame);
 +              unsigned long sp = UNW_SP(frame);
 +              unsigned long link;
 +
 +              if ((sp | fp) & sizeof(unsigned long))
 +                      return -EPERM;
 +
 +# if FRAME_RETADDR_OFFSET < 0
 +              if (!(sp < top && fp <= sp && bottom < fp))
 +# else
 +              if (!(sp < top && fp >= sp && bottom < fp))
 +# endif
 +                      return -ENXIO;
 +
 +              if (probe_kernel_address(fp + FRAME_LINK_OFFSET, link))
 +                      return -ENXIO;
 +
 +# if FRAME_RETADDR_OFFSET < 0
 +              if (!(link > bottom && link < fp))
 +# else
 +              if (!(link > bottom && link > fp))
 +# endif
 +                      return -ENXIO;
 +
 +              if (link & (sizeof(unsigned long) - 1))
 +                      return -ENXIO;
 +
 +              fp += FRAME_RETADDR_OFFSET;
 +              if (probe_kernel_address(fp, UNW_PC(frame)))
 +                      return -ENXIO;
 +
 +              /* Ok, we can use it */
 +# if FRAME_RETADDR_OFFSET < 0
 +              UNW_SP(frame) = fp - sizeof(UNW_PC(frame));
 +# else
 +              UNW_SP(frame) = fp + sizeof(UNW_PC(frame));
 +# endif
 +              UNW_FP(frame) = link;
 +              return 0;
 +      }
 +#endif
 +      state.org = startLoc;
 +      memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
 +      /* process instructions */
 +      if (!processCFI(ptr, end, pc, ptrType, &state)
 +          || state.loc > endLoc
 +          || state.regs[retAddrReg].where == Nowhere
 +          || state.cfa.reg >= ARRAY_SIZE(reg_info)
 +          || reg_info[state.cfa.reg].width != sizeof(unsigned long)
 +          || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
 +          || state.cfa.offs % sizeof(unsigned long)) {
 +              dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
 +              return -EIO;
 +      }
 +      /* update frame */
 +#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
 +      if (frame->call_frame
 +          && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
 +              frame->call_frame = 0;
 +#endif
 +      cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
 +      startLoc = min((unsigned long)UNW_SP(frame), cfa);
 +      endLoc = max((unsigned long)UNW_SP(frame), cfa);
 +      if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
 +              startLoc = min(STACK_LIMIT(cfa), cfa);
 +              endLoc = max(STACK_LIMIT(cfa), cfa);
 +      }
 +#ifndef CONFIG_64BIT
 +# define CASES CASE(8); CASE(16); CASE(32)
 +#else
 +# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
 +#endif
 +      pc = UNW_PC(frame);
 +      sp = UNW_SP(frame);
 +      for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
 +              if (REG_INVALID(i)) {
 +                      if (state.regs[i].where == Nowhere)
 +                              continue;
 +                      dprintk(1, "Cannot restore register %u (%d).",
 +                              i, state.regs[i].where);
 +                      return -EIO;
 +              }
 +              switch (state.regs[i].where) {
 +              default:
 +                      break;
 +              case Register:
 +                      if (state.regs[i].value >= ARRAY_SIZE(reg_info)
 +                          || REG_INVALID(state.regs[i].value)
 +                          || reg_info[i].width > reg_info[state.regs[i].value].width) {
 +                              dprintk(1, "Cannot restore register %u from register %lu.",
 +                                      i, state.regs[i].value);
 +                              return -EIO;
 +                      }
 +                      switch (reg_info[state.regs[i].value].width) {
 +#define CASE(n) \
 +                      case sizeof(u##n): \
 +                              state.regs[i].value = FRAME_REG(state.regs[i].value, \
 +                                                              const u##n); \
 +                              break
 +                      CASES;
 +#undef CASE
 +                      default:
 +                              dprintk(1, "Unsupported register size %u (%lu).",
 +                                      reg_info[state.regs[i].value].width,
 +                                      state.regs[i].value);
 +                              return -EIO;
 +                      }
 +                      break;
 +              }
 +      }
 +      for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
 +              if (REG_INVALID(i))
 +                      continue;
 +              switch (state.regs[i].where) {
 +              case Nowhere:
 +                      if (reg_info[i].width != sizeof(UNW_SP(frame))
 +                          || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
 +                             != &UNW_SP(frame))
 +                              continue;
 +                      UNW_SP(frame) = cfa;
 +                      break;
 +              case Register:
 +                      switch (reg_info[i].width) {
 +#define CASE(n) case sizeof(u##n): \
 +                              FRAME_REG(i, u##n) = state.regs[i].value; \
 +                              break
 +                      CASES;
 +#undef CASE
 +                      default:
 +                              dprintk(1, "Unsupported register size %u (%u).",
 +                                      reg_info[i].width, i);
 +                              return -EIO;
 +                      }
 +                      break;
 +              case Value:
 +                      if (reg_info[i].width != sizeof(unsigned long)) {
 +                              dprintk(1, "Unsupported value size %u (%u).",
 +                                      reg_info[i].width, i);
 +                              return -EIO;
 +                      }
 +                      FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
 +                                                          * state.dataAlign;
 +                      break;
 +              case Memory: {
 +                              unsigned long addr = cfa + state.regs[i].value
 +                                                         * state.dataAlign;
 +
 +                              if ((state.regs[i].value * state.dataAlign)
 +                                  % sizeof(unsigned long)
 +                                  || addr < startLoc
 +                                  || addr + sizeof(unsigned long) < addr
 +                                  || addr + sizeof(unsigned long) > endLoc) {
 +                                      dprintk(1, "Bad memory location %lx (%lx).",
 +                                              addr, state.regs[i].value);
 +                                      return -EIO;
 +                              }
 +                              switch (reg_info[i].width) {
 +#define CASE(n)                       case sizeof(u##n): \
 +                                      if (probe_kernel_address(addr, \
 +                                                               FRAME_REG(i, u##n))) \
 +                                              return -EFAULT; \
 +                                      break
 +                              CASES;
 +#undef CASE
 +                              default:
 +                                      dprintk(1, "Unsupported memory size %u (%u).",
 +                                              reg_info[i].width, i);
 +                                      return -EIO;
 +                              }
 +                      }
 +                      break;
 +              }
 +      }
 +
 +      if (UNW_PC(frame) % state.codeAlign
 +          || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
 +              dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
 +                      UNW_PC(frame), UNW_SP(frame));
 +              return -EIO;
 +      }
 +      if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
 +              dprintk(1, "No progress (%lx,%lx).", pc, sp);
 +              return -EIO;
 +      }
 +
 +      return 0;
 +#undef CASES
 +#undef FRAME_REG
 +}
 +EXPORT_SYMBOL_GPL(unwind);
 +
 +int unwind_init_frame_info(struct unwind_frame_info *info,
 +                           struct task_struct *tsk,
 +                           /*const*/ struct pt_regs *regs)
 +{
 +      info->task = tsk;
 +      info->call_frame = 0;
 +      arch_unw_init_frame_info(info, regs);
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(unwind_init_frame_info);
 +
 +/*
 + * Prepare to unwind a blocked task.
 + */
 +int unwind_init_blocked(struct unwind_frame_info *info,
 +                        struct task_struct *tsk)
 +{
 +      info->task = tsk;
 +      info->call_frame = 0;
 +      arch_unw_init_blocked(info);
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(unwind_init_blocked);
 +
 +/*
 + * Prepare to unwind the currently running thread.
 + */
 +int unwind_init_running(struct unwind_frame_info *info,
 +                      asmlinkage unwind_callback_fn callback,
 +                      const struct stacktrace_ops *ops, void *data)
 +{
 +      info->task = current;
 +      info->call_frame = 0;
 +
 +      return arch_unwind_init_running(info, callback, ops, data);
 +}
 +EXPORT_SYMBOL_GPL(unwind_init_running);
 +
 +/*
 + * Unwind until the return pointer is in user-land (or until an error
 + * occurs).  Returns 0 if successful, negative number in case of
 + * error.
 + */
 +int unwind_to_user(struct unwind_frame_info *info)
 +{
 +      while (!arch_unw_user_mode(info)) {
 +              int err = unwind(info);
 +
 +              if (err < 0)
 +                      return err;
 +      }
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(unwind_to_user);
Simple merge
diff --cc mm/Makefile
Simple merge
diff --cc mm/filemap.c
Simple merge
diff --cc mm/hugetlb.c
Simple merge
diff --cc mm/memcontrol.c
Simple merge
diff --cc mm/memory.c
Simple merge
diff --cc mm/migrate.c
Simple merge
diff --cc mm/mmap.c
Simple merge
diff --cc mm/mprotect.c
Simple merge
diff --cc mm/page_io.c
Simple merge
diff --cc mm/slab.c
Simple merge
diff --cc mm/slub.c
Simple merge
diff --cc mm/swap_state.c
Simple merge
diff --cc mm/truncate.c
Simple merge
diff --cc mm/vmscan.c
Simple merge
diff --cc mm/vmstat.c
Simple merge
Simple merge
diff --cc net/core/dev.c
Simple merge
Simple merge
@@@ -19,7 -19,7 +19,8 @@@
  #include <linux/random.h>
  #include <linux/skbuff.h>
  #include <linux/rtnetlink.h>
+ #include <linux/slab.h>
 +#include <linux/reserve.h>
  
  #include <net/inet_frag.h>
  
Simple merge
Simple merge
diff --cc net/ipv4/tcp.c
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -41,7 -41,7 +41,8 @@@
  #include <linux/random.h>
  #include <linux/jhash.h>
  #include <linux/skbuff.h>
+ #include <linux/slab.h>
 +#include <linux/reserve.h>
  
  #include <net/sock.h>
  #include <net/snmp.h>
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge