- Updated to 2.6.29-rc3.

author Jeff Mahoney <jeffm@suse.de>

Mon, 2 Feb 2009 16:36:17 +0000 (11:36 -0500)

committer Jeff Mahoney <jeffm@suse.de>

Mon, 2 Feb 2009 16:36:17 +0000 (11:36 -0500)
author Jeff Mahoney <jeffm@suse.de>
Mon, 2 Feb 2009 16:36:17 +0000 (11:36 -0500)
committer Jeff Mahoney <jeffm@suse.de>
Mon, 2 Feb 2009 16:36:17 +0000 (11:36 -0500)
diff --cc Documentation/DocBook/Makefile

index 92ca631,dc3154e..66644b9
--- 1/Documentation/DocBook/Makefile
--- 2/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@@ -6,8 -6,8 +6,8 @@@
   # To add a new book the only step required is to add the book to the
   # list of DOCBOOKS.
   
- DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
+ DOCBOOKS := z8530book.xml mcabook.xml \
- -          kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
+ +          kernel-hacking.xml kernel-locking.xml deviceiobook.xml utrace.xml \
             procfs-guide.xml writing_usb_driver.xml networking.xml \
             kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
             gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
diff --cc Documentation/cpu-freq/user-guide.txt
Simple merge
diff --cc Documentation/filesystems/Locking

index a2413f2,ec6a939..1443958
--- 1/Documentation/filesystems/Locking
--- 2/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@@ -194,12 -192,8 +196,12 @@@ invalidatepage:          no      ye
   releasepage:          no      yes
   direct_IO:            no
   launder_page:         no      yes
+ +swapon                        no
+ +swapoff                       no
+ +swap_out              no      yes, unlocks
+ +swap_in                       no      yes, unlocks
   
-       ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
+       ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
   may be called from the request handler (/dev/loop).
   
         ->readpage() unlocks the page, either synchronously or via I/O
diff --cc Documentation/filesystems/vfs.txt
Simple merge
diff --cc Documentation/kernel-parameters.txt

index 24570c3,d8362cf..b8c0b8f
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -253,11 -258,6 +261,12 @@@ and is between 256 and 4096 characters
                         to assume that this machine's pmtimer latches its value
                         and always returns good values.
   
+ +      acpi_root_table= [X86,ACPI]
+ +                      { rsdt }
+ +                      rsdt: Take RSDT address for fetching
+ +                      ACPI tables (instead of XSDT)
++                      For compatibility. Use acpi=rsdt instead.
+ +
         agp=            [AGP]
                         { off | try_unsupported }
                         off: disable AGP support
@@@ -1425,14 -1489,9 +1508,15 @@@
                         instruction doesn't work correctly and not to
                         use it.
   
-       file_caps=      Tells the kernel whether to honor file capabilities.
-                       When disabled, the only way then for a file to be
-                       executed with privilege is to be setuid root or executed
-                       by root.
-                       Format: {"0" | "1"}
-                       0 -- ignore file capabilities.
-                       1 -- honor file capabilities.
-                       Default value is 0.
+       no_file_caps    Tells the kernel not to honor file capabilities.  The
+                       only way then for a file to be executed with privilege
- -                      is to be setuid root or executed by root.
++                      is to be setuid root or executed by root. They
++                      default to disabled.
++
++      file_caps       Tells the kernel to honor file capabilities.  The
++                      only way then for a file to be executed with privilege
++                      is to be setuid root or executed by root. They default
++                      to disabled.
   
         nohalt          [IA-64] Tells the kernel not to use the power saving
                         function PAL_HALT_LIGHT when idle. This increases
@@@ -1701,17 -1771,14 +1796,17 @@@
                         Format: { 0 | 1 }
                         See arch/parisc/kernel/pdc_chassis.c
   
+ +      perfmon_debug   [PERFMON] Enables Perfmon debug messages. Needed
+ +                      to see traces of the early startup startup phase.
+ +
         pf.             [PARIDE]
-                       See Documentation/paride.txt.
+                       See Documentation/blockdev/paride.txt.
   
         pg.             [PARIDE]
-                       See Documentation/paride.txt.
+                       See Documentation/blockdev/paride.txt.
   
         pirq=           [SMP,APIC] Manual mp-table setup
-                       See Documentation/i386/IO-APIC.txt.
+                       See Documentation/x86/i386/IO-APIC.txt.
   
         plip=           [PPT,NET] Parallel port network link
                         Format: { parport<nr> | timid | 0 }
diff --cc Documentation/sysctl/kernel.txt

index 3cfa6c4,a4ccdd1..6f1a78c
--- 1/Documentation/sysctl/kernel.txt
--- 2/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@@ -363,23 -363,21 +363,33 @@@ tainted
   Non-zero if the kernel has been tainted.  Numeric values, which
   can be ORed together:
   
-   1 - A module with a non-GPL license has been loaded, this
-       includes modules with no license.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   2 - A module was force loaded by insmod -f.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   4 - Unsafe SMP processors: SMP with CPUs not designed for SMP.
-  64 - A module from drivers/staging was loaded.
+    1 - A module with a non-GPL license has been loaded, this
+        includes modules with no license.
+        Set by modutils >= 2.4.9 and module-init-tools.
+    2 - A module was force loaded by insmod -f.
+        Set by modutils >= 2.4.9 and module-init-tools.
+    4 - Unsafe SMP processors: SMP with CPUs not designed for SMP.
+    8 - A module was forcibly unloaded from the system by rmmod -f.
+   16 - A hardware machine check error occurred on the system.
+   32 - A bad page was discovered on the system.
+   64 - The user has asked that the system be marked "tainted".  This
+        could be because they are running software that directly modifies
+        the hardware, or for other reasons.
+  128 - The system has died.
+  256 - The ACPI DSDT has been overridden with one supplied by the user
+         instead of using the one provided by the hardware.
+  512 - A kernel warning has occurred.
+ 1024 - A module from drivers/staging was loaded.
+ + 0x40000000 - An unsupported kernel module was loaded.
+ + 0x80000000 - An kernel module with external support was loaded.
+ +
+ +==============================================================
+ +
+ +unsupported:
+ +
+ +Allow to load unsupported kernel modules:
+ +
+ +  0 - refuse to load unsupported modules,
+ +  1 - warn when loading unsupported modules,
+ +  2 - don't warn.
   
diff --cc MAINTAINERS

index 95efa9c,d992d40..e4a9523
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -4601,16 -4862,9 +4870,16 @@@ P:    Bill O'Donnel
   M:    xfs-masters@oss.sgi.com
   L:    xfs@oss.sgi.com
   W:    http://oss.sgi.com/projects/xfs
- T:    git git://oss.sgi.com:8090/xfs/xfs-2.6.git
+ T:    git://oss.sgi.com/xfs/xfs.git
   S:    Supported
   
+ +DMAPI
+ +P:    Silicon Graphics Inc
+ +M:    xfs-masters@oss.sgi.com
+ +L:    xfs@oss.sgi.com
+ +W:    http://oss.sgi.com/projects/xfs
+ +S:    Supported
+ +
   XILINX SYSTEMACE DRIVER
   P:    Grant Likely
   M:    grant.likely@secretlab.ca
diff --cc Makefile

index 9d95e97,7715b2c..0155c03
--- 1/Makefile
--- 2/Makefile
+++ b/Makefile
@@@ -962,12 -944,9 +956,13 @@@ ifneq ($(KBUILD_SRC),
         fi;
         $(Q)if [ ! -d include2 ]; then                                  \
             mkdir -p include2;                                          \
- -          ln -fsn $(srctree)/include/asm-$(SRCARCH) include2/asm;     \
+ +          if [ -d $(srctree)/arch/$(SRCARCH)/include/asm ]; then      \
+ +              ln -fsn $(srctree)/arch/$(SRCARCH)/include/asm include2/asm; \
+ +          else                                                        \
+ +              ln -fsn $(srctree)/include/asm-$(SRCARCH) include2/asm; \
+ +          fi;                                                         \
         fi
+       ln -fsn $(srctree) source
   endif
   
   # prepare2 creates a makefile if using a separate output directory
@@@ -1008,20 -990,15 +1008,20 @@@ define check-symlin
   endef
   
   # We create the target directory of the symlink if it does
- # not exist so the test in chack-symlink works and we have a
+ # not exist so the test in check-symlink works and we have a
   # directory for generated filesas used by some architectures.
   define create-symlink
- -      if [ ! -L include/asm ]; then                                      \
- -                      $(kecho) '  SYMLINK $@ -> include/asm-$(SRCARCH)'; \
- -                      if [ ! -d include/asm-$(SRCARCH) ]; then           \
- -                              mkdir -p include/asm-$(SRCARCH);           \
- -                      fi;                                                \
- -                      ln -fsn asm-$(SRCARCH) $@;                         \
+ +      if [ ! -L include/asm ]; then                                       \
+ +              if [ -d arch/$(SRCARCH)/include/asm ]; then                 \
+ +                      echo '  SYMLINK $@ -> arch/$(SRCARCH)/include/asm'; \
+ +                      ln -fsn ../arch/$(SRCARCH)/include/asm $@;          \
+ +              else                                                        \
+ +                      echo '  SYMLINK $@ -> include/asm-$(SRCARCH)';      \
+ +                      if [ ! -d include/asm-$(SRCARCH) ]; then            \
+ +                              mkdir -p include/asm-$(SRCARCH);            \
+ +                      fi;                                                 \
+ +                      ln -fsn asm-$(SRCARCH) $@;                          \
+ +              fi;                                                         \
         fi
   endef
   
diff --cc arch/cris/include/arch-v32/arch/spinlock.h

index 0000000,0d5709b..129756b

mode 000000,100644..100644
--- /dev/null
--- 2/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h
@@@ -1,0 -1,129 +1,131 @@@
+ #ifndef __ASM_ARCH_SPINLOCK_H
+ #define __ASM_ARCH_SPINLOCK_H
+ 
+ #include <linux/spinlock_types.h>
+ 
+ #define RW_LOCK_BIAS 0x01000000
+ 
+ extern void cris_spin_unlock(void *l, int val);
+ extern void cris_spin_lock(void *l);
+ extern int cris_spin_trylock(void *l);
+ 
+ static inline int __raw_spin_is_locked(raw_spinlock_t *x)
+ {
+       return *(volatile signed char *)(&(x)->slock) <= 0;
+ }
+ 
+ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+ {
+       __asm__ volatile ("move.d %1,%0" \
+                         : "=m" (lock->slock) \
+                         : "r" (1) \
+                         : "memory");
+ }
+ 
+ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+ {
+       while (__raw_spin_is_locked(lock))
+               cpu_relax();
+ }
+ 
+ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+ {
+       return cris_spin_trylock((void *)&lock->slock);
+ }
+ 
+ static inline void __raw_spin_lock(raw_spinlock_t *lock)
+ {
+       cris_spin_lock((void *)&lock->slock);
+ }
+ 
+ static inline void
+ __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+ {
+       __raw_spin_lock(lock);
+ }
+ 
+ /*
+  * Read-write spinlocks, allowing multiple readers
+  * but only one writer.
+  *
+  * NOTE! it is quite common to have readers in interrupts
+  * but no interrupt writers. For those circumstances we
+  * can "mix" irq-safe locks - any writer needs to get a
+  * irq-safe write-lock, but readers can get non-irqsafe
+  * read-locks.
+  *
+  */
+ 
+ static inline int __raw_read_can_lock(raw_rwlock_t *x)
+ {
+       return (int)(x)->lock > 0;
+ }
+ 
+ static inline int __raw_write_can_lock(raw_rwlock_t *x)
+ {
+       return (x)->lock == RW_LOCK_BIAS;
+ }
+ 
+ static  inline void __raw_read_lock(raw_rwlock_t *rw)
+ {
+       __raw_spin_lock(&rw->slock);
+       while (rw->lock == 0);
+       rw->lock--;
+       __raw_spin_unlock(&rw->slock);
+ }
+ 
+ static  inline void __raw_write_lock(raw_rwlock_t *rw)
+ {
+       __raw_spin_lock(&rw->slock);
+       while (rw->lock != RW_LOCK_BIAS);
+       rw->lock == 0;
+       __raw_spin_unlock(&rw->slock);
+ }
+ 
+ static  inline void __raw_read_unlock(raw_rwlock_t *rw)
+ {
+       __raw_spin_lock(&rw->slock);
+       rw->lock++;
+       __raw_spin_unlock(&rw->slock);
+ }
+ 
+ static  inline void __raw_write_unlock(raw_rwlock_t *rw)
+ {
+       __raw_spin_lock(&rw->slock);
+       while (rw->lock != RW_LOCK_BIAS);
+       rw->lock == RW_LOCK_BIAS;
+       __raw_spin_unlock(&rw->slock);
+ }
+ 
+ static  inline int __raw_read_trylock(raw_rwlock_t *rw)
+ {
+       int ret = 0;
+       __raw_spin_lock(&rw->slock);
+       if (rw->lock != 0) {
+               rw->lock--;
+               ret = 1;
+       }
+       __raw_spin_unlock(&rw->slock);
+       return ret;
+ }
+ 
+ static  inline int __raw_write_trylock(raw_rwlock_t *rw)
+ {
+       int ret = 0;
+       __raw_spin_lock(&rw->slock);
+       if (rw->lock == RW_LOCK_BIAS) {
+               rw->lock == 0;
+               ret = 1;
+       }
+       __raw_spin_unlock(&rw->slock);
+       return 1;
+ }
+ 
++#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
++#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
+ 
+ #define _raw_spin_relax(lock) cpu_relax()
+ #define _raw_read_relax(lock) cpu_relax()
+ #define _raw_write_relax(lock)        cpu_relax()
+ 
+ #endif /* __ASM_ARCH_SPINLOCK_H */
diff --cc arch/ia64/Kconfig

index ef2dd97,6183aec..3659d19
--- 1/arch/ia64/Kconfig
--- 2/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@@ -554,11 -585,9 +587,11 @@@ source "drivers/firmware/Kconfig
   
   source "fs/Kconfig.binfmt"
   
+ +source "arch/ia64/perfmon/Kconfig"
+ +
   endmenu
   
- menu "Power management and ACPI"
+ menu "Power management and ACPI options"
   
   source "kernel/power/Kconfig"
   
diff --cc arch/ia64/Makefile

index bc0a678,58a7e46..be292a3
--- 1/arch/ia64/Makefile
--- 2/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@@ -56,12 -57,12 +57,14 @@@ core-$(CONFIG_IA64_DIG_VTD)        += arch/ia
   core-$(CONFIG_IA64_GENERIC)   += arch/ia64/dig/
   core-$(CONFIG_IA64_HP_ZX1)    += arch/ia64/dig/
   core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
+ core-$(CONFIG_IA64_XEN_GUEST) += arch/ia64/dig/
   core-$(CONFIG_IA64_SGI_SN2)   += arch/ia64/sn/
+ +core-$(CONFIG_PERFMON)                += arch/ia64/perfmon/
   core-$(CONFIG_IA64_SGI_UV)    += arch/ia64/uv/
   core-$(CONFIG_KVM)            += arch/ia64/kvm/
+ core-$(CONFIG_XEN)            += arch/ia64/xen/
   
+ +drivers-$(CONFIG_KDB)         += arch/$(ARCH)/kdb/
   drivers-$(CONFIG_PCI)         += arch/ia64/pci/
   drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
   drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
diff --cc arch/ia64/configs/generic_defconfig

index ff9572a,a109db3..d4b38a7
--- 1/arch/ia64/configs/generic_defconfig
--- 2/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@@ -233,17 -237,7 +236,17 @@@ CONFIG_BINFMT_ELF=
   CONFIG_BINFMT_MISC=m
   
   #
+ +# Hardware Performance Monitoring support
+ +#
+ +CONFIG_PERFMON=y
+ +CONFIG_IA64_PERFMON_COMPAT=y
+ +CONFIG_IA64_PERFMON_GENERIC=m
+ +CONFIG_IA64_PERFMON_ITANIUM=y
+ +CONFIG_IA64_PERFMON_MCKINLEY=y
+ +CONFIG_IA64_PERFMON_MONTECITO=y
+ +
+ +#
- # Power management and ACPI
+ # Power management and ACPI options
   #
   CONFIG_PM=y
   # CONFIG_PM_DEBUG is not set
diff --cc arch/ia64/include/asm/irq.h
Simple merge
diff --cc arch/ia64/include/asm/kregs.h
Simple merge
diff --cc arch/ia64/include/asm/perfmon_kern.h

index 5eef0db,0000000..29c26a2

mode 100644,000000..100644
--- 1/arch/ia64/include/asm/perfmon_kern.h
--- /dev/null
+++ b/arch/ia64/include/asm/perfmon_kern.h
@@@ -1,351 -1,0 +1,342 @@@
+ +/*
+ + * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P.
+ + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ + *
+ + * This file contains Itanium Processor Family specific definitions
+ + * for the perfmon interface.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of version 2 of the GNU General Public
+ + * License as published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ + * 02111-1307 USA
+ +  */
+ +#ifndef _ASM_IA64_PERFMON_KERN_H_
+ +#define _ASM_IA64_PERFMON_KERN_H_
+ +
+ +#ifdef __KERNEL__
+ +
+ +#ifdef CONFIG_PERFMON
+ +#include <asm/unistd.h>
+ +#include <asm/hw_irq.h>
+ +
+ +/*
+ + * describe the content of the pfm_syst_info field
+ + * layout:
+ + * bits[00-15] : generic flags
+ + * bits[16-31] : arch-specific flags
+ + */
+ +#define PFM_ITA_CPUINFO_IDLE_EXCL 0x10000 /* stop monitoring in idle loop */
+ +
+ +/*
+ + * For some CPUs, the upper bits of a counter must be set in order for the
+ + * overflow interrupt to happen. On overflow, the counter has wrapped around,
+ + * and the upper bits are cleared. This function may be used to set them back.
+ + */
+ +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
+ +                                         unsigned int cnum)
+ +{}
+ +
+ +/*
+ + * called from __pfm_interrupt_handler(). ctx is not NULL.
+ + * ctx is locked. PMU interrupt is masked.
+ + *
+ + * must stop all monitoring to ensure handler has consistent view.
+ + * must collect overflowed PMDs bitmask  into povfls_pmds and
+ + * npend_ovfls. If no interrupt detected then npend_ovfls
+ + * must be set to zero.
+ + */
+ +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
+ +                                          struct pfm_event_set *set)
+ +{
+ +      u64 tmp;
+ +
+ +      /*
+ +       * do not overwrite existing value, must
+ +       * process those first (coming from context switch replay)
+ +       */
+ +      if (set->npend_ovfls)
+ +              return;
+ +
+ +      ia64_srlz_d();
+ +
+ +      tmp =  ia64_get_pmc(0) & ~0xf;
+ +
+ +      set->povfl_pmds[0] = tmp;
+ +
+ +      set->npend_ovfls = ia64_popcnt(tmp);
+ +}
+ +
+ +static inline int pfm_arch_init_pmu_config(void)
+ +{
+ +      return 0;
+ +}
+ +
+ +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
+ +{
+ +      ia64_resend_irq(IA64_PERFMON_VECTOR);
+ +}
+ +
+ +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
+ +                                              struct pfm_event_set *set)
+ +{}
+ +
+ +static inline void pfm_arch_serialize(void)
+ +{
+ +      ia64_srlz_d();
+ +}
+ +
+ +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
+ +{
+ +      PFM_DBG_ovfl("state=%d", ctx->state);
+ +      ia64_set_pmc(0, 0);
+ +      /* no serialization */
+ +}
+ +
+ +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
+ +                                    unsigned int cnum, u64 value)
+ +{
+ +      if (cnum < 256) {
+ +              ia64_set_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
+ +      } else if (cnum < 264) {
+ +              ia64_set_ibr(cnum-256, value);
+ +              ia64_dv_serialize_instruction();
+ +      } else {
+ +              ia64_set_dbr(cnum-264, value);
+ +              ia64_dv_serialize_instruction();
+ +      }
+ +}
+ +
+ +/*
+ + * On IA-64, for per-thread context which have the ITA_FL_INSECURE
+ + * flag, it is possible to start/stop monitoring directly from user evel
+ + * without calling pfm_start()/pfm_stop. This allows very lightweight
+ + * control yet the kernel sometimes needs to know if monitoring is actually
+ + * on or off.
+ + *
+ + * Tracking of this information is normally done by pfm_start/pfm_stop
+ + * in flags.started. Here we need to compensate by checking actual
+ + * psr bit.
+ + */
+ +static inline int pfm_arch_is_active(struct pfm_context *ctx)
+ +{
+ +      return ctx->flags.started
+ +             || ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_UP|IA64_PSR_PP);
+ +}
+ +
+ +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
+ +                                    unsigned int cnum, u64 value)
+ +{
+ +      /*
+ +       * for a counting PMD, overflow bit must be cleared
+ +       */
+ +      if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
+ +              value &= pfm_pmu_conf->ovfl_mask;
+ +
+ +      /*
+ +       * for counters, write to upper bits are ignored, no need to mask
+ +       */
+ +      ia64_set_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
+ +}
+ +
+ +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
+ +{
+ +      return ia64_get_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr);
+ +}
+ +
+ +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
+ +{
+ +      return ia64_get_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr);
+ +}
+ +
+ +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
+ +                                       struct pfm_context *ctx)
+ +{
+ +      struct pt_regs *regs;
+ +
+ +      regs = task_pt_regs(task);
+ +      ia64_psr(regs)->pp = 0;
+ +}
+ +
+ +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
+ +                                      struct pfm_context *ctx)
+ +{
+ +      struct pt_regs *regs;
+ +
+ +      if (!(ctx->active_set->flags & PFM_ITA_SETFL_INTR_ONLY)) {
+ +              regs = task_pt_regs(task);
+ +              ia64_psr(regs)->pp = 1;
+ +      }
+ +}
+ +
+ +/*
+ + * On IA-64, the PMDs are NOT saved by pfm_arch_freeze_pmu()
+ + * when entering the PMU interrupt handler, thus, we need
+ + * to save them in pfm_switch_sets_from_intr()
+ + */
+ +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
+ +                                         struct pfm_event_set *set)
+ +{
+ +      pfm_save_pmds(ctx, set);
+ +}
+ +
+ +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags);
+ +
+ +static inline void pfm_arch_context_free(struct pfm_context *ctx)
+ +{}
+ +
+ +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
+ +void pfm_arch_ctxswin_thread(struct task_struct *task,
+ +                           struct pfm_context *ctx);
+ +
+ +void pfm_arch_unload_context(struct pfm_context *ctx);
+ +int pfm_arch_load_context(struct pfm_context *ctx);
+ +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags);
+ +
+ +void pfm_arch_mask_monitoring(struct pfm_context *ctx,
+ +                            struct pfm_event_set *set);
+ +void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
+ +                              struct pfm_event_set *set);
+ +
+ +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
+ +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
+ +
+ +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
+ +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
+ +
+ +int  pfm_arch_init(void);
+ +void pfm_arch_init_percpu(void);
+ +char *pfm_arch_get_pmu_module_name(void);
+ +
+ +int __pfm_use_dbregs(struct task_struct *task);
+ +int  __pfm_release_dbregs(struct task_struct *task);
+ +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx,
+ +                            struct pfm_event_set *set);
+ +
+ +void pfm_arch_show_session(struct seq_file *m);
+ +
+ +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
+ +{
+ +      return 0;
+ +}
+ +
+ +static inline void pfm_arch_pmu_release(void)
+ +{}
+ +
+ +/* not necessary on IA-64 */
+ +static inline void pfm_cacheflush(void *addr, unsigned int len)
+ +{}
+ +
+ +/*
+ + * miscellaneous architected definitions
+ + */
+ +#define PFM_ITA_FCNTR 4 /* first counting monitor (PMC/PMD) */
+ +
+ +/*
+ + * private event set flags  (set_priv_flags)
+ + */
+ +#define PFM_ITA_SETFL_USE_DBR 0x1000000 /* set uses debug registers */
+ +
+ +
+ +/*
+ + * Itanium-specific data structures
+ + */
+ +struct pfm_ia64_context_flags {
+ +      unsigned int use_dbr:1;  /* use range restrictions (debug registers) */
+ +      unsigned int insecure:1; /* insecure monitoring for non-self session */
+ +      unsigned int reserved:30;/* for future use */
+ +};
+ +
+ +struct pfm_arch_context {
+ +      struct pfm_ia64_context_flags flags;    /* arch specific ctx flags */
+ +      u64                      ctx_saved_psr_up;/* storage for psr_up */
+ +#ifdef CONFIG_IA64_PERFMON_COMPAT
+ +      void                    *ctx_smpl_vaddr; /* vaddr of user mapping */
+ +#endif
+ +};
+ +
+ +#ifdef CONFIG_IA64_PERFMON_COMPAT
+ +ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
+ +                           char __user *buf,
+ +                           int non_block,
+ +                           size_t size);
+ +int pfm_ia64_compat_init(void);
+ +int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
+ +                            size_t rsize, struct file *filp);
+ +#else
+ +static inline ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
+ +                           char __user *buf,
+ +                           int non_block,
+ +                           size_t size)
+ +{
+ +      return -EINVAL;
+ +}
+ +
+ +static inline int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
+ +                                          size_t rsize, struct file *filp)
+ +{
+ +      return -EINVAL;
+ +}
+ +#endif
+ +
+ +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
+ +{
-       /*
-        * On IA-64, we ran out of bits in the bottom 7 bits of the
-        * threadinfo bitmask.Thus we used a 2-stage approach by piggybacking
-        * on NOTIFY_RESUME and then in do_notify_resume() we demultiplex and
-        * call pfm_handle_work() if needed
-        */
+ +      set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
+ +}
+ +
+ +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
+ +{
+ +      /*
-        * we cannot just clear TIF_NOTIFY_RESUME because other TIF flags are
-        * piggybackedonto it: TIF_PERFMON_WORK, TIF_RESTORE_RSE
-        *
-        * The tsk_clear_notify_resume() checks if any of those are set before
-        * clearing the * bit
++       * since 2.6.28, we do not need this function anymore because
++       * TIF_NOTIFY_RESUME, it automatically cleared by do_notify_resume_user()
++       * so worst case we have a spurious call to this function
+ +       */
-       tsk_clear_notify_resume(task);
+ +}
+ +
+ +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
+ +{
+ +      return 0;
+ +}
+ +
+ +extern struct pfm_ia64_pmu_info *pfm_ia64_pmu_info;
+ +
+ +#define PFM_ARCH_CTX_SIZE     (sizeof(struct pfm_arch_context))
+ +
+ +/*
+ + * IA-64 does not need extra alignment requirements for the sampling buffer
+ + */
+ +#define PFM_ARCH_SMPL_ALIGN_SIZE      0
+ +
+ +
+ +static inline void pfm_release_dbregs(struct task_struct *task)
+ +{
+ +      if (task->thread.flags & IA64_THREAD_DBG_VALID)
+ +              __pfm_release_dbregs(task);
+ +}
+ +
+ +#define pfm_use_dbregs(_t)     __pfm_use_dbregs(_t)
+ +
+ +struct pfm_arch_pmu_info {
+ +      unsigned long mask_pmcs[PFM_PMC_BV]; /* modify on when masking */
+ +};
+ +
+ +DECLARE_PER_CPU(u32, pfm_syst_info);
+ +#else /* !CONFIG_PERFMON */
+ +/*
+ + * perfmon ia64-specific hooks
+ + */
+ +#define pfm_release_dbregs(_t)                do { } while (0)
+ +#define pfm_use_dbregs(_t)                    (0)
+ +
+ +#endif /* CONFIG_PERFMON */
+ +
+ +#endif /* __KERNEL__ */
+ +#endif /* _ASM_IA64_PERFMON_KERN_H_ */
diff --cc arch/ia64/include/asm/thread_info.h
Simple merge
diff --cc arch/ia64/kernel/Makefile
Simple merge
diff --cc arch/ia64/kernel/cpe_migrate.c

index 8846e9e,0000000..7dd1f11

mode 100644,000000..100644
--- 1/arch/ia64/kernel/cpe_migrate.c
--- /dev/null
+++ b/arch/ia64/kernel/cpe_migrate.c
@@@ -1,474 -1,0 +1,474 @@@
+ +/*
+ + * File:      cpe_migrate.c
+ + * Purpose:   Migrate data from physical pages with excessive correctable
+ + *            errors to new physical pages.  Keep the old pages on a discard
+ + *            list.
+ + *
+ + * Copyright (C) 2008 SGI - Silicon Graphics Inc.
+ + * Copyright (C) 2008 Russ Anderson <rja@sgi.com>
+ + */
+ +
+ +#include <linux/sysdev.h>
+ +#include <linux/types.h>
+ +#include <linux/sched.h>
+ +#include <linux/module.h>
+ +#include <linux/kernel.h>
+ +#include <linux/smp.h>
+ +#include <linux/workqueue.h>
+ +#include <linux/mm.h>
+ +#include <linux/swap.h>
+ +#include <linux/vmalloc.h>
+ +#include <linux/migrate.h>
+ +#include <linux/page-isolation.h>
+ +#include <linux/memcontrol.h>
+ +#include <linux/kobject.h>
+ +#include <linux/kthread.h>
+ +
+ +#include <asm/page.h>
+ +#include <asm/system.h>
+ +#include <asm/sn/sn_cpuid.h>
+ +#include <asm/mca.h>
+ +
+ +#define BADRAM_BASENAME               "badram"
+ +#define CE_HISTORY_LENGTH     30
+ +
+ +struct cpe_info {
+ +      u64     paddr;
+ +      u16     node;
+ +};
+ +static struct cpe_info cpe[CE_HISTORY_LENGTH];
+ +
+ +static int cpe_polling_enabled = 1;
+ +static int cpe_head;
+ +static int cpe_tail;
+ +static int mstat_cannot_isolate;
+ +static int mstat_failed_to_discard;
+ +static int mstat_already_marked;
+ +static int mstat_already_on_list;
+ +
+ +/* IRQ handler notifies this wait queue on receipt of an IRQ */
+ +DECLARE_WAIT_QUEUE_HEAD(cpe_activate_IRQ_wq);
+ +static DECLARE_COMPLETION(kthread_cpe_migrated_exited);
+ +int cpe_active;
+ +DEFINE_SPINLOCK(cpe_migrate_lock);
+ +
+ +static void
+ +get_physical_address(void *buffer, u64 *paddr, u16 *node)
+ +{
+ +      sal_log_record_header_t *rh;
+ +      sal_log_mem_dev_err_info_t *mdei;
+ +      ia64_err_rec_t *err_rec;
+ +      sal_log_platform_err_info_t *plat_err;
+ +      efi_guid_t guid;
+ +
+ +      err_rec = buffer;
+ +      rh = &err_rec->sal_elog_header;
+ +      *paddr = 0;
+ +      *node = 0;
+ +
+ +      /*
+ +       * Make sure it is a corrected error.
+ +       */
+ +      if (rh->severity != sal_log_severity_corrected)
+ +              return;
+ +
+ +      plat_err = (sal_log_platform_err_info_t *)&err_rec->proc_err;
+ +
+ +      guid = plat_err->mem_dev_err.header.guid;
+ +      if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
+ +              /*
+ +               * Memory cpe
+ +               */
+ +              mdei = &plat_err->mem_dev_err;
+ +              if (mdei->valid.oem_data) {
+ +                      if (mdei->valid.physical_addr)
+ +                              *paddr = mdei->physical_addr;
+ +
+ +                      if (mdei->valid.node) {
+ +                              if (ia64_platform_is("sn2"))
+ +                                      *node = nasid_to_cnodeid(mdei->node);
+ +                              else
+ +                                      *node = mdei->node;
+ +                      }
+ +              }
+ +      }
+ +}
+ +
+ +static struct page *
+ +alloc_migrate_page(struct page *ignored, unsigned long node, int **x)
+ +{
+ +
+ +      return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
+ +}
+ +
+ +static int
+ +validate_paddr_page(u64 paddr)
+ +{
+ +      struct page *page;
+ +
+ +      if (!paddr)
+ +              return -EINVAL;
+ +
+ +      if (!ia64_phys_addr_valid(paddr))
+ +              return -EINVAL;
+ +
+ +      if (!pfn_valid(paddr >> PAGE_SHIFT))
+ +              return -EINVAL;
+ +
+ +      page = phys_to_page(paddr);
+ +      if (PageMemError(page))
+ +              mstat_already_marked++;
+ +      return 0;
+ +}
+ +
++extern int isolate_lru_page(page);
+ +static int
+ +ia64_mca_cpe_move_page(u64 paddr, u32 node)
+ +{
+ +      LIST_HEAD(pagelist);
+ +      struct page *page;
+ +      int ret;
+ +
+ +      ret = validate_paddr_page(paddr);
+ +      if (ret < 0)
+ +              return ret;
+ +
+ +      /*
+ +       * convert physical address to page number
+ +       */
+ +      page = phys_to_page(paddr);
+ +
+ +      migrate_prep();
-       ret = isolate_lru_page(page, &pagelist);
++      ret = isolate_lru_page(page);
+ +      if (ret) {
+ +              mstat_cannot_isolate++;
+ +              return ret;
+ +      }
+ +
-       SetPageMemError(page);          /* Mark the page as bad */
++      list_add(&page->lru, &pagelist);
+ +      ret = migrate_pages(&pagelist, alloc_migrate_page, node);
+ +      if (ret == 0) {
+ +              total_badpages++;
+ +              list_add_tail(&page->lru, &badpagelist);
+ +      } else {
+ +              mstat_failed_to_discard++;
+ +              /*
+ +               * The page failed to migrate and is not on the bad page list.
+ +               * Clearing the error bit will allow another attempt to migrate
+ +               * if it gets another correctable error.
+ +               */
+ +              ClearPageMemError(page);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * cpe_process_queue
+ + *    Pulls the physical address off the list and calls the migration code.
+ + *    Will process all the addresses on the list.
+ + */
+ +void
+ +cpe_process_queue(void)
+ +{
+ +      int ret;
+ +      u64 paddr;
+ +      u16 node;
+ +
+ +      do {
+ +              paddr = cpe[cpe_tail].paddr;
+ +              if (paddr) {
+ +                      /*
+ +                       * There is a valid entry that needs processing.
+ +                       */
+ +                      node = cpe[cpe_tail].node;
+ +
+ +                      ret = ia64_mca_cpe_move_page(paddr, node);
+ +                      if (ret <= 0)
+ +                              /*
+ +                               * Even though the return status is negative,
+ +                               * clear the entry.  If the same address has
+ +                               * another CPE it will be re-added to the list.
+ +                               */
+ +                              cpe[cpe_tail].paddr = 0;
+ +
+ +              }
+ +              if (++cpe_tail >= CE_HISTORY_LENGTH)
+ +                      cpe_tail = 0;
+ +
+ +      } while (cpe_tail != cpe_head);
+ +      return;
+ +}
+ +
+ +inline int
+ +cpe_list_empty(void)
+ +{
+ +      return (cpe_head == cpe_tail) && (!cpe[cpe_head].paddr);
+ +}
+ +
+ +/*
+ + * kthread_cpe_migrate
+ + *    kthread_cpe_migrate is created at module load time and lives
+ + *    until the module is removed.  When not active, it will sleep.
+ + */
+ +static int
+ +kthread_cpe_migrate(void *ignore)
+ +{
+ +      while (cpe_active) {
+ +              /*
+ +               * wait for work
+ +               */
+ +              (void)wait_event_interruptible(cpe_activate_IRQ_wq,
+ +                                              (!cpe_list_empty() ||
+ +                                              !cpe_active));
+ +              cpe_process_queue();            /* process work */
+ +      }
+ +      complete(&kthread_cpe_migrated_exited);
+ +      return 0;
+ +}
+ +
+ +DEFINE_SPINLOCK(cpe_list_lock);
+ +
+ +/*
+ + * cpe_setup_migrate
+ + *    Get the physical address out of the CPE record, add it
+ + *    to the list of addresses to migrate (if not already on),
+ + *    and schedule the back end worker task.  This is called
+ + *    in interrupt context so cannot directly call the migration
+ + *    code.
+ + *
+ + *  Inputs
+ + *    rec     The CPE record
+ + *  Outputs
+ + *    1 on Success, -1 on failure
+ + */
+ +static int
+ +cpe_setup_migrate(void *rec)
+ +{
+ +      u64 paddr;
+ +      u16 node;
+ +      /* int head, tail; */
+ +      int i, ret;
+ +
+ +      if (!rec)
+ +              return -EINVAL;
+ +
+ +      get_physical_address(rec, &paddr, &node);
+ +      ret = validate_paddr_page(paddr);
+ +      if (ret < 0)
+ +              return -EINVAL;
+ +
+ +      if (!cpe_list_empty())
+ +              for (i = 0; i < CE_HISTORY_LENGTH; i++) {
+ +                      if (PAGE_ALIGN(cpe[i].paddr) == PAGE_ALIGN(paddr)) {
+ +                              mstat_already_on_list++;
+ +                              return 1;       /* already on the list */
+ +                      }
+ +              }
+ +
+ +      if (!spin_trylock(&cpe_list_lock)) {
+ +              /*
+ +               * Someone else has the lock.  To avoid spinning in interrupt
+ +               * handler context, bail.
+ +               */
+ +              return 1;
+ +      }
+ +
+ +      if (cpe[cpe_head].paddr == 0) {
+ +              cpe[cpe_head].node = node;
+ +              cpe[cpe_head].paddr = paddr;
+ +
+ +              if (++cpe_head >= CE_HISTORY_LENGTH)
+ +                      cpe_head = 0;
+ +      }
+ +      spin_unlock(&cpe_list_lock);
+ +
+ +      wake_up_interruptible(&cpe_activate_IRQ_wq);
+ +
+ +      return 1;
+ +}
+ +
+ +/*
+ + * =============================================================================
+ + */
+ +
+ +/*
+ + * free_one_bad_page
+ + *    Free one page from the list of bad pages.
+ + */
+ +static int
+ +free_one_bad_page(unsigned long paddr)
+ +{
+ +      LIST_HEAD(pagelist);
+ +      struct page *page, *page2, *target;
+ +
+ +      /*
+ +       * Verify page address
+ +       */
+ +      target = phys_to_page(paddr);
+ +      list_for_each_entry_safe(page, page2, &badpagelist, lru) {
+ +              if (page != target)
+ +                      continue;
+ +
+ +              ClearPageMemError(page);        /* Mark the page as good */
+ +              total_badpages--;
+ +              list_move_tail(&page->lru, &pagelist);
+ +              putback_lru_pages(&pagelist);
+ +              break;
+ +      }
+ +      return 0;
+ +}
+ +
+ +/*
+ + * free_all_bad_pages
+ + *    Free all of the pages on the bad pages list.
+ + */
+ +static int
+ +free_all_bad_pages(void)
+ +{
+ +      struct page *page, *page2;
+ +
+ +      list_for_each_entry_safe(page, page2, &badpagelist, lru) {
+ +              ClearPageMemError(page);        /* Mark the page as good */
+ +              total_badpages--;
+ +      }
+ +      putback_lru_pages(&badpagelist);
+ +      return 0;
+ +}
+ +
+ +#define OPT_LEN       16
+ +
+ +static ssize_t
+ +badpage_store(struct kobject *kobj,
+ +            struct kobj_attribute *attr, const char *buf, size_t count)
+ +{
+ +      char optstr[OPT_LEN];
+ +      unsigned long opt;
+ +      int len = OPT_LEN;
+ +      int err;
+ +
+ +      if (count < len)
+ +              len = count;
+ +
+ +      strlcpy(optstr, buf, len);
+ +
+ +      err = strict_strtoul(optstr, 16, &opt);
+ +      if (err)
+ +              return err;
+ +
+ +      if (opt == 0)
+ +              free_all_bad_pages();
+ +      else
+ +              free_one_bad_page(opt);
+ +
+ +      return count;
+ +}
+ +
+ +/*
+ + * badpage_show
+ + *    Display the number, size, and addresses of all the pages on the
+ + *    bad page list.
+ + *
+ + *    Note that sysfs provides buf of PAGE_SIZE length.  bufend tracks
+ + *    the remaining space in buf to avoid overflowing.
+ + */
+ +static ssize_t
+ +badpage_show(struct kobject *kobj,
+ +           struct kobj_attribute *attr, char *buf)
+ +
+ +{
+ +      struct page *page, *page2;
+ +      int i = 0, cnt = 0;
+ +      char *bufend = buf + PAGE_SIZE;
+ +
+ +      cnt = snprintf(buf, bufend - (buf + cnt),
+ +                      "Memory marked bad:        %d kB\n"
+ +                      "Pages marked bad:         %d\n"
+ +                      "Unable to isolate on LRU: %d\n"
+ +                      "Unable to migrate:        %d\n"
+ +                      "Already marked bad:       %d\n"
+ +                      "Already on list:          %d\n"
+ +                      "List of bad physical pages\n",
+ +                      total_badpages << (PAGE_SHIFT - 10), total_badpages,
+ +                      mstat_cannot_isolate, mstat_failed_to_discard,
+ +                      mstat_already_marked, mstat_already_on_list
+ +                      );
+ +
+ +      list_for_each_entry_safe(page, page2, &badpagelist, lru) {
+ +              if (bufend - (buf + cnt) < 20)
+ +                      break;          /* Avoid overflowing the buffer */
+ +              cnt += snprintf(buf + cnt, bufend - (buf + cnt),
+ +                              " 0x%011lx", page_to_phys(page));
+ +              if (!(++i % 5))
+ +                      cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
+ +      }
+ +      cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
+ +
+ +      return cnt;
+ +}
+ +
+ +static struct kobj_attribute badram_attr = {
+ +      .attr    = {
+ +              .name = "badram",
+ +              .mode = S_IWUSR | S_IRUGO,
+ +      },
+ +      .show = badpage_show,
+ +      .store = badpage_store,
+ +};
+ +
+ +static int __init
+ +cpe_migrate_external_handler_init(void)
+ +{
+ +      int error;
+ +      struct task_struct *kthread;
+ +
+ +      error = sysfs_create_file(kernel_kobj, &badram_attr.attr);
+ +      if (error)
+ +              return -EINVAL;
+ +
+ +      /*
+ +       * set up the kthread
+ +       */
+ +      cpe_active = 1;
+ +      kthread = kthread_run(kthread_cpe_migrate, NULL, "cpe_migrate");
+ +      if (IS_ERR(kthread)) {
+ +              complete(&kthread_cpe_migrated_exited);
+ +              return -EFAULT;
+ +      }
+ +
+ +      /*
+ +       * register external ce handler
+ +       */
+ +      if (ia64_reg_CE_extension(cpe_setup_migrate)) {
+ +              printk(KERN_ERR "ia64_reg_CE_extension failed.\n");
+ +              return -EFAULT;
+ +      }
+ +      cpe_poll_enabled = cpe_polling_enabled;
+ +
+ +      printk(KERN_INFO "Registered badram Driver\n");
+ +      return 0;
+ +}
+ +
+ +static void __exit
+ +cpe_migrate_external_handler_exit(void)
+ +{
+ +      /* unregister external mca handlers */
+ +      ia64_unreg_CE_extension();
+ +
+ +      /* Stop kthread */
+ +      cpe_active = 0;                 /* tell kthread_cpe_migrate to exit */
+ +      wake_up_interruptible(&cpe_activate_IRQ_wq);
+ +      wait_for_completion(&kthread_cpe_migrated_exited);
+ +
+ +      sysfs_remove_file(kernel_kobj, &badram_attr.attr);
+ +}
+ +
+ +module_init(cpe_migrate_external_handler_init);
+ +module_exit(cpe_migrate_external_handler_exit);
+ +
+ +module_param(cpe_polling_enabled, int, 0644);
+ +MODULE_PARM_DESC(cpe_polling_enabled,
+ +              "Enable polling with migration");
+ +
+ +MODULE_AUTHOR("Russ Anderson <rja@sgi.com>");
+ +MODULE_DESCRIPTION("ia64 Corrected Error page migration driver");
- MODULE_LICENSE("GPL");
diff --cc arch/ia64/kernel/head.S

index 895be8d,59301c4..46d524a
--- 1/arch/ia64/kernel/head.S
--- 2/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@@ -259,13 -259,8 +259,13 @@@ start_ap
         /*
          * Switch into virtual mode:
          */
+ +#ifdef CONFIG_KDB_HARDWARE_BREAKPOINTS
+ +#define IA64_PSR_KDB_FLAGS IA64_PSR_DB
+ +#else
+ +#define IA64_PSR_KDB_FLAGS 0
+ +#endif
         movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
-                 |IA64_PSR_DI|IA64_PSR_KDB_FLAGS)
- -                |IA64_PSR_DI|IA64_PSR_AC)
++                |IA64_PSR_DI|IA64_PSR_AC|IA64_PSR_KDB_FLAGS)
         ;;
         mov cr.ipsr=r16
         movl r17=1f
diff --cc arch/ia64/kernel/mca.c
Simple merge
diff --cc arch/ia64/kernel/process.c

index a7dfb39,c571627..1681e9e
--- 1/arch/ia64/kernel/process.c
--- 2/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@@ -28,7 -28,7 +28,8 @@@
   #include <linux/delay.h>
   #include <linux/kdebug.h>
   #include <linux/utsname.h>
+ #include <linux/tracehook.h>
+ +#include <linux/perfmon_kern.h>
   
   #include <asm/cpu.h>
   #include <asm/delay.h>
@@@ -43,6 -43,6 +44,7 @@@
   #include <asm/uaccess.h>
   #include <asm/unwind.h>
   #include <asm/user.h>
++#include <asm/perfmon_kern.h>
   
   #include "entry.h"
   
diff --cc arch/ia64/kernel/ptrace.c

index bb1ca1e,92c9689..46de0b4
--- 1/arch/ia64/kernel/ptrace.c
--- 2/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@@ -20,9 -20,9 +20,10 @@@
   #include <linux/security.h>
   #include <linux/audit.h>
   #include <linux/signal.h>
+ +#include <linux/perfmon_kern.h>
   #include <linux/regset.h>
   #include <linux/elf.h>
+ #include <linux/tracehook.h>
   
   #include <asm/pgtable.h>
   #include <asm/processor.h>
@@@ -31,6 -31,9 +32,7 @@@
   #include <asm/system.h>
   #include <asm/uaccess.h>
   #include <asm/unwind.h>
- -#ifdef CONFIG_PERFMON
- -#include <asm/perfmon.h>
- -#endif
++#include <asm/perfmon_kern.h>
   
   #include "entry.h"
   
diff --cc arch/ia64/kernel/setup.c
Simple merge
diff --cc arch/ia64/kernel/smpboot.c
Simple merge
diff --cc arch/ia64/kvm/Kconfig
Simple merge
diff --cc arch/ia64/oprofile/init.c

index 892de6a,31b545c..10260ae
--- 1/arch/ia64/oprofile/init.c
--- 2/arch/ia64/oprofile/init.c
+++ b/arch/ia64/oprofile/init.c
@@@ -12,11 -12,11 +12,11 @@@
   #include <linux/init.h>
   #include <linux/errno.h>
    
- extern int op_perfmon_init(struct oprofile_operations * ops);
- -extern int perfmon_init(struct oprofile_operations *ops);
- -extern void perfmon_exit(void);
++extern int op_perfmon_init(struct oprofile_operations *ops);
+ +extern void op_perfmon_exit(void);
   extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
   
- int __init oprofile_arch_init(struct oprofile_operations * ops)
+ int __init oprofile_arch_init(struct oprofile_operations *ops)
   {
         int ret = -ENODEV;
   
diff --cc arch/ia64/oprofile/perfmon.c

index ad82a6b,192d3e8..7c325bf
--- 1/arch/ia64/oprofile/perfmon.c
--- 2/arch/ia64/oprofile/perfmon.c
+++ b/arch/ia64/oprofile/perfmon.c
@@@ -50,14 -45,18 +50,14 @@@ static void perfmon_stop(void
         allow_ints = 0;
   }
   
- -
- -#define OPROFILE_FMT_UUID { \
- -      0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
- -
- -static pfm_buffer_fmt_t oprofile_fmt = {
- -      .fmt_name           = "oprofile_format",
- -      .fmt_uuid           = OPROFILE_FMT_UUID,
- -      .fmt_handler        = perfmon_handler,
+ +static struct pfm_smpl_fmt oprofile_fmt = {
+ +      .fmt_name = "OProfile",
+ +      .fmt_handler = perfmon_handler,
+ +      .fmt_flags = PFM_FMT_BUILTIN_FLAG,
+ +      .owner = THIS_MODULE
   };
   
- static char * get_cpu_type(void)
- -
+ static char *get_cpu_type(void)
   {
         __u8 family = local_cpu_data->family;
   
@@@ -76,9 -75,9 +76,9 @@@
   
   static int using_perfmon;
   
- int __init op_perfmon_init(struct oprofile_operations * ops)
- -int perfmon_init(struct oprofile_operations *ops)
++int __init op_perfmon_init(struct oprofile_operations *ops)
   {
- -      int ret = pfm_register_buffer_fmt(&oprofile_fmt);
+ +      int ret = pfm_fmt_register(&oprofile_fmt);
         if (ret)
                 return -ENODEV;
   
@@@ -91,7 -90,7 +91,7 @@@
   }
   
   
- void op_perfmon_exit(void)
- -void perfmon_exit(void)
++void __exit op_perfmon_exit(void)
   {
         if (!using_perfmon)
                 return;
diff --cc arch/m68k/include/asm/ioctls.h

index 0000000,b8d2f4b..85a271b

mode 000000,100644..100644
--- /dev/null
--- 2/arch/m68k/include/asm/ioctls.h
+++ b/arch/m68k/include/asm/ioctls.h
@@@ -1,0 -1,84 +1,85 @@@
+ #ifndef __ARCH_M68K_IOCTLS_H__
+ #define __ARCH_M68K_IOCTLS_H__
+ 
+ #include <asm/ioctl.h>
+ 
+ /* 0x54 is just a magic number to make these relatively unique ('T') */
+ 
+ #define TCGETS                0x5401
+ #define TCSETS                0x5402
+ #define TCSETSW               0x5403
+ #define TCSETSF               0x5404
+ #define TCGETA                0x5405
+ #define TCSETA                0x5406
+ #define TCSETAW               0x5407
+ #define TCSETAF               0x5408
+ #define TCSBRK                0x5409
+ #define TCXONC                0x540A
+ #define TCFLSH                0x540B
+ #define TIOCEXCL      0x540C
+ #define TIOCNXCL      0x540D
+ #define TIOCSCTTY     0x540E
+ #define TIOCGPGRP     0x540F
+ #define TIOCSPGRP     0x5410
+ #define TIOCOUTQ      0x5411
+ #define TIOCSTI               0x5412
+ #define TIOCGWINSZ    0x5413
+ #define TIOCSWINSZ    0x5414
+ #define TIOCMGET      0x5415
+ #define TIOCMBIS      0x5416
+ #define TIOCMBIC      0x5417
+ #define TIOCMSET      0x5418
+ #define TIOCGSOFTCAR  0x5419
+ #define TIOCSSOFTCAR  0x541A
+ #define FIONREAD      0x541B
+ #define TIOCINQ               FIONREAD
+ #define TIOCLINUX     0x541C
+ #define TIOCCONS      0x541D
+ #define TIOCGSERIAL   0x541E
+ #define TIOCSSERIAL   0x541F
+ #define TIOCPKT               0x5420
+ #define FIONBIO               0x5421
+ #define TIOCNOTTY     0x5422
+ #define TIOCSETD      0x5423
+ #define TIOCGETD      0x5424
+ #define TCSBRKP               0x5425  /* Needed for POSIX tcsendbreak() */
+ #define TIOCSBRK      0x5427  /* BSD compatibility */
+ #define TIOCCBRK      0x5428  /* BSD compatibility */
+ #define TIOCGSID      0x5429  /* Return the session ID of FD */
+ #define TCGETS2               _IOR('T',0x2A, struct termios2)
+ #define TCSETS2               _IOW('T',0x2B, struct termios2)
+ #define TCSETSW2      _IOW('T',0x2C, struct termios2)
+ #define TCSETSF2      _IOW('T',0x2D, struct termios2)
+ #define TIOCGPTN      _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+ #define TIOCSPTLCK    _IOW('T',0x31, int)  /* Lock/unlock Pty */
++#define TIOCGDEV      _IOR('T',0x32, unsigned int) /* Get real dev no below /dev/console */
+ 
+ #define FIONCLEX      0x5450  /* these numbers need to be adjusted. */
+ #define FIOCLEX               0x5451
+ #define FIOASYNC      0x5452
+ #define TIOCSERCONFIG 0x5453
+ #define TIOCSERGWILD  0x5454
+ #define TIOCSERSWILD  0x5455
+ #define TIOCGLCKTRMIOS        0x5456
+ #define TIOCSLCKTRMIOS        0x5457
+ #define TIOCSERGSTRUCT        0x5458 /* For debugging only */
+ #define TIOCSERGETLSR   0x5459 /* Get line status register */
+ #define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+ #define TIOCSERSETMULTI 0x545B /* Set multiport config */
+ 
+ #define TIOCMIWAIT    0x545C  /* wait for a change on serial input line(s) */
+ #define TIOCGICOUNT   0x545D  /* read serial port inline interrupt counts */
+ #define FIOQSIZE      0x545E
+ 
+ /* Used for packet mode */
+ #define TIOCPKT_DATA           0
+ #define TIOCPKT_FLUSHREAD      1
+ #define TIOCPKT_FLUSHWRITE     2
+ #define TIOCPKT_STOP           4
+ #define TIOCPKT_START          8
+ #define TIOCPKT_NOSTOP                16
+ #define TIOCPKT_DOSTOP                32
+ 
+ #define TIOCSER_TEMT    0x01  /* Transmitter physically empty */
+ 
+ #endif /* __ARCH_M68K_IOCTLS_H__ */
diff --cc arch/mips/Kconfig
Simple merge
diff --cc arch/mips/Makefile
Simple merge
diff --cc arch/mips/include/asm/Kbuild

index 0000000,7897f05..7ed16fc

mode 000000,100644..100644
--- /dev/null
--- 2/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@@ -1,0 -1,3 +1,4 @@@
+ include include/asm-generic/Kbuild.asm
+ 
+ header-y += cachectl.h sgidefs.h sysmips.h
++header-y += perfmon.h
diff --cc arch/mips/include/asm/ioctls.h

index 0000000,3f04a99..6303297

mode 000000,100644..100644
--- /dev/null
--- 2/arch/mips/include/asm/ioctls.h
+++ b/arch/mips/include/asm/ioctls.h
@@@ -1,0 -1,109 +1,110 @@@
+ /*
+  * This file is subject to the terms and conditions of the GNU General Public
+  * License.  See the file "COPYING" in the main directory of this archive
+  * for more details.
+  *
+  * Copyright (C) 1995, 1996, 2001 Ralf Baechle
+  * Copyright (C) 2001 MIPS Technologies, Inc.
+  */
+ #ifndef __ASM_IOCTLS_H
+ #define __ASM_IOCTLS_H
+ 
+ #include <asm/ioctl.h>
+ 
+ #define TCGETA                0x5401
+ #define TCSETA                0x5402  /* Clashes with SNDCTL_TMR_START sound ioctl */
+ #define TCSETAW               0x5403
+ #define TCSETAF               0x5404
+ 
+ #define TCSBRK                0x5405
+ #define TCXONC                0x5406
+ #define TCFLSH                0x5407
+ 
+ #define TCGETS                0x540d
+ #define TCSETS                0x540e
+ #define TCSETSW               0x540f
+ #define TCSETSF               0x5410
+ 
+ #define TIOCEXCL      0x740d          /* set exclusive use of tty */
+ #define TIOCNXCL      0x740e          /* reset exclusive use of tty */
+ #define TIOCOUTQ      0x7472          /* output queue size */
+ #define TIOCSTI               0x5472          /* simulate terminal input */
+ #define TIOCMGET      0x741d          /* get all modem bits */
+ #define TIOCMBIS      0x741b          /* bis modem bits */
+ #define TIOCMBIC      0x741c          /* bic modem bits */
+ #define TIOCMSET      0x741a          /* set all modem bits */
+ #define TIOCPKT               0x5470          /* pty: set/clear packet mode */
+ #define        TIOCPKT_DATA           0x00    /* data packet */
+ #define        TIOCPKT_FLUSHREAD      0x01    /* flush packet */
+ #define        TIOCPKT_FLUSHWRITE     0x02    /* flush packet */
+ #define        TIOCPKT_STOP           0x04    /* stop output */
+ #define        TIOCPKT_START          0x08    /* start output */
+ #define        TIOCPKT_NOSTOP         0x10    /* no more ^S, ^Q */
+ #define        TIOCPKT_DOSTOP         0x20    /* now do ^S ^Q */
+ /* #define  TIOCPKT_IOCTL             0x40    state change of pty driver */
+ #define TIOCSWINSZ    _IOW('t', 103, struct winsize)  /* set window size */
+ #define TIOCGWINSZ    _IOR('t', 104, struct winsize)  /* get window size */
+ #define TIOCNOTTY     0x5471          /* void tty association */
+ #define TIOCSETD      0x7401
+ #define TIOCGETD      0x7400
+ 
+ #define FIOCLEX               0x6601
+ #define FIONCLEX      0x6602
+ #define FIOASYNC      0x667d
+ #define FIONBIO               0x667e
+ #define FIOQSIZE      0x667f
+ 
+ #define TIOCGLTC      0x7474                  /* get special local chars */
+ #define TIOCSLTC      0x7475                  /* set special local chars */
+ #define TIOCSPGRP     _IOW('t', 118, int)     /* set pgrp of tty */
+ #define TIOCGPGRP     _IOR('t', 119, int)     /* get pgrp of tty */
+ #define TIOCCONS      _IOW('t', 120, int)     /* become virtual console */
+ 
+ #define FIONREAD      0x467f
+ #define TIOCINQ               FIONREAD
+ 
+ #define TIOCGETP        0x7408
+ #define TIOCSETP        0x7409
+ #define TIOCSETN        0x740a                        /* TIOCSETP wo flush */
+ 
+ /* #define TIOCSETA   _IOW('t', 20, struct termios) set termios struct */
+ /* #define TIOCSETAW  _IOW('t', 21, struct termios) drain output, set */
+ /* #define TIOCSETAF  _IOW('t', 22, struct termios) drn out, fls in, set */
+ /* #define TIOCGETD   _IOR('t', 26, int)      get line discipline */
+ /* #define TIOCSETD   _IOW('t', 27, int)      set line discipline */
+                                               /* 127-124 compat */
+ 
+ #define TIOCSBRK      0x5427  /* BSD compatibility */
+ #define TIOCCBRK      0x5428  /* BSD compatibility */
+ #define TIOCGSID      0x7416  /* Return the session ID of FD */
+ #define TCGETS2               _IOR('T', 0x2A, struct termios2)
+ #define TCSETS2               _IOW('T', 0x2B, struct termios2)
+ #define TCSETSW2      _IOW('T', 0x2C, struct termios2)
+ #define TCSETSF2      _IOW('T', 0x2D, struct termios2)
+ #define TIOCGPTN      _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+ #define TIOCSPTLCK    _IOW('T', 0x31, int)  /* Lock/unlock Pty */
++#define TIOCGDEV      _IOR('T', 0x32, unsigned int) /* Get real dev no below /dev/console */
+ 
+ /* I hope the range from 0x5480 on is free ... */
+ #define TIOCSCTTY     0x5480          /* become controlling tty */
+ #define TIOCGSOFTCAR  0x5481
+ #define TIOCSSOFTCAR  0x5482
+ #define TIOCLINUX     0x5483
+ #define TIOCGSERIAL   0x5484
+ #define TIOCSSERIAL   0x5485
+ #define TCSBRKP               0x5486  /* Needed for POSIX tcsendbreak() */
+ #define TIOCSERCONFIG 0x5488
+ #define TIOCSERGWILD  0x5489
+ #define TIOCSERSWILD  0x548a
+ #define TIOCGLCKTRMIOS        0x548b
+ #define TIOCSLCKTRMIOS        0x548c
+ #define TIOCSERGSTRUCT        0x548d /* For debugging only */
+ #define TIOCSERGETLSR   0x548e /* Get line status register */
+ #define TIOCSERGETMULTI 0x548f /* Get multiport config  */
+ #define TIOCSERSETMULTI 0x5490 /* Set multiport config */
+ #define TIOCMIWAIT      0x5491 /* wait for a change on serial input line(s) */
+ #define TIOCGICOUNT     0x5492 /* read serial port inline interrupt counts */
+ #define TIOCGHAYESESP 0x5493 /* Get Hayes ESP configuration */
+ #define TIOCSHAYESESP 0x5494 /* Set Hayes ESP configuration */
+ 
+ #endif /* __ASM_IOCTLS_H */
diff --cc arch/mips/include/asm/perfmon.h

index 0000000,0000000..7915c17

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/mips/include/asm/perfmon.h
@@@ -1,0 -1,0 +1,34 @@@
++/*
++ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
++ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
++ *
++ * This file contains mips64 specific definitions for the perfmon
++ * interface.
++ *
++ * This file MUST never be included directly. Use linux/perfmon.h.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of version 2 of the GNU General Public
++ * License as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
++ * 02111-1307 USA
++  */
++#ifndef _ASM_MIPS64_PERFMON_H_
++#define _ASM_MIPS64_PERFMON_H_
++
++/*
++ * arch-specific user visible interface definitions
++ */
++
++#define PFM_ARCH_MAX_PMCS     (256+64) /* 256 HW 64 SW */
++#define PFM_ARCH_MAX_PMDS     (256+64) /* 256 HW 64 SW */
++
++#endif /* _ASM_MIPS64_PERFMON_H_ */
diff --cc arch/mips/include/asm/perfmon_kern.h

index 0000000,0000000..7d213df

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/mips/include/asm/perfmon_kern.h
@@@ -1,0 -1,0 +1,412 @@@
++/*
++ * Copyright (c) 2005 Philip Mucci.
++ *
++ * Based on other versions:
++ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
++ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
++ *
++ * This file contains mips64 specific definitions for the perfmon
++ * interface.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of version 2 of the GNU General Public
++ * License as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
++ * 02111-1307 USA
++  */
++#ifndef _ASM_MIPS64_PERFMON_KERN_H_
++#define _ASM_MIPS64_PERFMON_KERN_H_
++
++#ifdef __KERNEL__
++
++#ifdef CONFIG_PERFMON
++#include <linux/unistd.h>
++#include <asm/cacheflush.h>
++
++#define PFM_ARCH_PMD_STK_ARG  2
++#define PFM_ARCH_PMC_STK_ARG  2
++
++struct pfm_arch_pmu_info {
++      u32 pmu_style;
++};
++
++#define MIPS64_CONFIG_PMC_MASK (1 << 4)
++#define MIPS64_PMC_INT_ENABLE_MASK (1 << 4)
++#define MIPS64_PMC_CNT_ENABLE_MASK (0xf)
++#define MIPS64_PMC_EVT_MASK (0x7 << 6)
++#define MIPS64_PMC_CTR_MASK (1 << 31)
++#define MIPS64_PMD_INTERRUPT (1 << 31)
++
++/* Coprocessor register 25 contains the PMU interface. */
++/* Sel 0 is control for counter 0 */
++/* Sel 1 is count for counter 0. */
++/* Sel 2 is control for counter 1. */
++/* Sel 3 is count for counter 1. */
++
++/*
++
++31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4  3 2 1 0
++M  0--------------------------------------------------------------0 Event-- IE U S K EXL
++
++M 31 If this bit is one, another pair of Performance Control
++and Counter registers is implemented at a MTC0
++
++Event 8:5 Counter event enabled for this counter. Possible events
++are listed in Table 6-30. R/W Undefined
++
++IE 4 Counter Interrupt Enable. This bit masks bit 31 of the
++associated count register from the interrupt exception
++request output. R/W 0
++
++U 3 Count in User Mode. When this bit is set, the specified
++event is counted in User Mode. R/W Undefined
++
++S 2 Count in Supervisor Mode. When this bit is set, the
++specified event is counted in Supervisor Mode. R/W Undefined
++
++K 1 Count in Kernel Mode. When this bit is set, count the
++event in Kernel Mode when EXL and ERL both are 0. R/W Undefined
++
++EXL 0 Count when EXL. When this bit is set, count the event
++when EXL = 1 and ERL = 0. R/W Undefined
++*/
++
++static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
++{}
++
++static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
++                                              struct pfm_event_set *set)
++{}
++
++static inline void pfm_arch_serialize(void)
++{}
++
++
++/*
++ * MIPS does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
++ * this routine needs to do it when switching sets on overflow
++ */
++static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
++                                              struct pfm_event_set *set)
++{
++      pfm_save_pmds(ctx, set);
++}
++
++static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
++                                    unsigned int cnum, u64 value)
++{
++      /*
++       * we only write to the actual register when monitoring is
++       * active (pfm_start was issued)
++       */
++      if (ctx && (ctx->flags.started == 0))
++              return;
++
++      switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
++      case 0:
++              write_c0_perfctrl0(value);
++              break;
++      case 1:
++              write_c0_perfctrl1(value);
++              break;
++      case 2:
++              write_c0_perfctrl2(value);
++              break;
++      case 3:
++              write_c0_perfctrl3(value);
++              break;
++      default:
++              BUG();
++      }
++}
++
++static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
++                                    unsigned int cnum, u64 value)
++{
++      value &= pfm_pmu_conf->ovfl_mask;
++
++      switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
++      case 0:
++              write_c0_perfcntr0(value);
++              break;
++      case 1:
++              write_c0_perfcntr1(value);
++              break;
++      case 2:
++              write_c0_perfcntr2(value);
++              break;
++      case 3:
++              write_c0_perfcntr3(value);
++              break;
++      default:
++              BUG();
++      }
++}
++
++static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
++{
++      switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
++      case 0:
++              return read_c0_perfcntr0();
++              break;
++      case 1:
++              return read_c0_perfcntr1();
++              break;
++      case 2:
++              return read_c0_perfcntr2();
++              break;
++      case 3:
++              return read_c0_perfcntr3();
++              break;
++      default:
++              BUG();
++              return 0;
++      }
++}
++
++static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
++{
++      switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
++      case 0:
++              return read_c0_perfctrl0();
++              break;
++      case 1:
++              return read_c0_perfctrl1();
++              break;
++      case 2:
++              return read_c0_perfctrl2();
++              break;
++      case 3:
++              return read_c0_perfctrl3();
++              break;
++      default:
++              BUG();
++              return 0;
++      }
++}
++
++/*
++ * For some CPUs, the upper bits of a counter must be set in order for the
++ * overflow interrupt to happen. On overflow, the counter has wrapped around,
++ * and the upper bits are cleared. This function may be used to set them back.
++ */
++static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
++                                         unsigned int cnum)
++{
++  u64 val;
++  val = pfm_arch_read_pmd(ctx, cnum);
++  /* This masks out overflow bit 31 */
++  pfm_arch_write_pmd(ctx, cnum, val);
++}
++
++/*
++ * At certain points, perfmon needs to know if monitoring has been
++ * explicitely started/stopped by user via pfm_start/pfm_stop. The
++ * information is tracked in ctx.flags.started. However on certain
++ * architectures, it may be possible to start/stop directly from
++ * user level with a single assembly instruction bypassing
++ * the kernel. This function must be used to determine by
++ * an arch-specific mean if monitoring is actually started/stopped.
++ */
++static inline int pfm_arch_is_active(struct pfm_context *ctx)
++{
++      return ctx->flags.started;
++}
++
++static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
++                                       struct pfm_context *ctx)
++{}
++
++static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
++                                      struct pfm_context *ctx)
++{}
++
++static inline void pfm_arch_ctxswin_thread(struct task_struct *task,
++                                         struct pfm_context *ctx)
++{}
++int  pfm_arch_ctxswout_thread(struct task_struct *task,
++                            struct pfm_context *ctx);
++
++int  pfm_arch_is_monitoring_active(struct pfm_context *ctx);
++void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
++void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
++void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
++void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
++char *pfm_arch_get_pmu_module_name(void);
++
++static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
++                                          struct pfm_event_set *set)
++{
++      pfm_arch_stop(current, ctx);
++      /*
++       * we mark monitoring as stopped to avoid
++       * certain side effects especially in
++       * pfm_switch_sets_from_intr() on
++       * pfm_arch_restore_pmcs()
++       */
++      ctx->flags.started = 0;
++}
++
++/*
++ * unfreeze PMU from pfm_do_interrupt_handler()
++ * ctx may be NULL for spurious
++ */
++static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
++{
++      if (!ctx)
++              return;
++
++      PFM_DBG_ovfl("state=%d", ctx->state);
++
++      ctx->flags.started = 1;
++
++      if (ctx->state == PFM_CTX_MASKED)
++              return;
++
++      pfm_arch_restore_pmcs(ctx, ctx->active_set);
++}
++
++/*
++ * this function is called from the PMU interrupt handler ONLY.
++ * On MIPS, the PMU is frozen via arch_stop, masking would be implemented
++ * via arch-stop as well. Given that the PMU is already stopped when
++ * entering the interrupt handler, we do not need to stop it again, so
++ * this function is a nop.
++ */
++static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
++                                          struct pfm_event_set *set)
++{}
++
++/*
++ * on MIPS masking/unmasking uses the start/stop mechanism, so we simply
++ * need to start here.
++ */
++static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
++                                            struct pfm_event_set *set)
++{
++      pfm_arch_start(current, ctx);
++}
++
++static inline int pfm_arch_context_create(struct pfm_context *ctx,
++                                        u32 ctx_flags)
++{
++      return 0;
++}
++
++static inline void pfm_arch_context_free(struct pfm_context *ctx)
++{}
++
++
++
++
++
++/*
++ * function called from pfm_setfl_sane(). Context is locked
++ * and interrupts are masked.
++ * The value of flags is the value of ctx_flags as passed by
++ * user.
++ *
++ * function must check arch-specific set flags.
++ * Return:
++ *    1 when flags are valid
++ *      0 on error
++ */
++static inline int
++pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
++{
++      return 0;
++}
++
++static inline int pfm_arch_init(void)
++{
++      return 0;
++}
++
++static inline void pfm_arch_init_percpu(void)
++{}
++
++static inline int pfm_arch_load_context(struct pfm_context *ctx)
++{
++      return 0;
++}
++
++static inline void pfm_arch_unload_context(struct pfm_context *ctx)
++{}
++
++static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
++{
++      return 0;
++}
++
++static inline void pfm_arch_pmu_release(void)
++{}
++
++#ifdef CONFIG_PERFMON_FLUSH
++/*
++ * due to cache aliasing problem on MIPS, it is necessary to flush
++ * pages out of the cache when they are modified.
++ */
++static inline void pfm_cacheflush(void *addr, unsigned int len)
++{
++      unsigned long start, end;
++
++      start = (unsigned long)addr & PAGE_MASK;
++      end = ((unsigned long)addr + len + PAGE_SIZE - 1) & PAGE_MASK;
++
++      while (start < end) {
++              flush_data_cache_page(start);
++              start += PAGE_SIZE;
++      }
++}
++#else
++static inline void pfm_cacheflush(void *addr, unsigned int len)
++{}
++#endif
++
++static inline void pfm_arch_arm_handle_work(struct task_struct *task)
++{}
++
++static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
++{}
++
++static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
++{
++      return 0;
++}
++
++static inline int pfm_arch_get_base_syscall(void)
++{
++      if (test_thread_flag(TIF_32BIT_ADDR)) {
++              if (test_thread_flag(TIF_32BIT_REGS))
++                      return __NR_O32_Linux+330;
++              return __NR_N32_Linux+293;
++      }
++      return __NR_64_Linux+289;
++}
++
++struct pfm_arch_context {
++      /* empty */
++};
++
++#define PFM_ARCH_CTX_SIZE     sizeof(struct pfm_arch_context)
++/*
++ * MIPS may need extra alignment requirements for the sampling buffer
++ */
++#ifdef CONFIG_PERFMON_SMPL_ALIGN
++#define PFM_ARCH_SMPL_ALIGN_SIZE      0x4000
++#else
++#define PFM_ARCH_SMPL_ALIGN_SIZE      0
++#endif
++
++#endif /* CONFIG_PERFMON */
++
++#endif /* __KERNEL__ */
++#endif /* _ASM_MIPS64_PERFMON_KERN_H_ */
diff --cc arch/mips/include/asm/spinlock.h

index 0000000,1a1f320..de03e47

mode 000000,100644..100644
--- /dev/null
--- 2/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@@ -1,0 -1,487 +1,489 @@@
+ /*
+  * This file is subject to the terms and conditions of the GNU General Public
+  * License.  See the file "COPYING" in the main directory of this archive
+  * for more details.
+  *
+  * Copyright (C) 1999, 2000, 06 Ralf Baechle (ralf@linux-mips.org)
+  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+  */
+ #ifndef _ASM_SPINLOCK_H
+ #define _ASM_SPINLOCK_H
+ 
+ #include <linux/compiler.h>
+ 
+ #include <asm/barrier.h>
+ #include <asm/war.h>
+ 
+ /*
+  * Your basic SMP spinlocks, allowing only a single CPU anywhere
+  *
+  * Simple spin lock operations.  There are two variants, one clears IRQ's
+  * on the local processor, one does not.
+  *
+  * These are fair FIFO ticket locks
+  *
+  * (the type definitions are in asm/spinlock_types.h)
+  */
+ 
+ 
+ /*
+  * Ticket locks are conceptually two parts, one indicating the current head of
+  * the queue, and the other indicating the current tail. The lock is acquired
+  * by atomically noting the tail and incrementing it by one (thus adding
+  * ourself to the queue and noting our position), then waiting until the head
+  * becomes equal to the the initial value of the tail.
+  */
+ 
+ static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+ {
+       unsigned int counters = ACCESS_ONCE(lock->lock);
+ 
+       return ((counters >> 14) ^ counters) & 0x1fff;
+ }
+ 
+ #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+ #define __raw_spin_unlock_wait(x) \
+       while (__raw_spin_is_locked(x)) { cpu_relax(); }
+ 
+ static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+ {
+       unsigned int counters = ACCESS_ONCE(lock->lock);
+ 
+       return (((counters >> 14) - counters) & 0x1fff) > 1;
+ }
+ 
+ static inline void __raw_spin_lock(raw_spinlock_t *lock)
+ {
+       int my_ticket;
+       int tmp;
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__ (
+               "       .set push               # __raw_spin_lock       \n"
+               "       .set noreorder                                  \n"
+               "                                                       \n"
+               "1:     ll      %[ticket], %[ticket_ptr]                \n"
+               "       addiu   %[my_ticket], %[ticket], 0x4000         \n"
+               "       sc      %[my_ticket], %[ticket_ptr]             \n"
+               "       beqzl   %[my_ticket], 1b                        \n"
+               "        nop                                            \n"
+               "       srl     %[my_ticket], %[ticket], 14             \n"
+               "       andi    %[my_ticket], %[my_ticket], 0x1fff      \n"
+               "       andi    %[ticket], %[ticket], 0x1fff            \n"
+               "       bne     %[ticket], %[my_ticket], 4f             \n"
+               "        subu   %[ticket], %[my_ticket], %[ticket]      \n"
+               "2:                                                     \n"
+               "       .subsection 2                                   \n"
+               "4:     andi    %[ticket], %[ticket], 0x1fff            \n"
+               "5:     sll     %[ticket], 5                            \n"
+               "                                                       \n"
+               "6:     bnez    %[ticket], 6b                           \n"
+               "        subu   %[ticket], 1                            \n"
+               "                                                       \n"
+               "       lw      %[ticket], %[ticket_ptr]                \n"
+               "       andi    %[ticket], %[ticket], 0x1fff            \n"
+               "       beq     %[ticket], %[my_ticket], 2b             \n"
+               "        subu   %[ticket], %[my_ticket], %[ticket]      \n"
+               "       b       5b                                      \n"
+               "        subu   %[ticket], %[ticket], 1                 \n"
+               "       .previous                                       \n"
+               "       .set pop                                        \n"
+               : [ticket_ptr] "+m" (lock->lock),
+                 [ticket] "=&r" (tmp),
+                 [my_ticket] "=&r" (my_ticket));
+       } else {
+               __asm__ __volatile__ (
+               "       .set push               # __raw_spin_lock       \n"
+               "       .set noreorder                                  \n"
+               "                                                       \n"
+               "       ll      %[ticket], %[ticket_ptr]                \n"
+               "1:     addiu   %[my_ticket], %[ticket], 0x4000         \n"
+               "       sc      %[my_ticket], %[ticket_ptr]             \n"
+               "       beqz    %[my_ticket], 3f                        \n"
+               "        nop                                            \n"
+               "       srl     %[my_ticket], %[ticket], 14             \n"
+               "       andi    %[my_ticket], %[my_ticket], 0x1fff      \n"
+               "       andi    %[ticket], %[ticket], 0x1fff            \n"
+               "       bne     %[ticket], %[my_ticket], 4f             \n"
+               "        subu   %[ticket], %[my_ticket], %[ticket]      \n"
+               "2:                                                     \n"
+               "       .subsection 2                                   \n"
+               "3:     b       1b                                      \n"
+               "        ll     %[ticket], %[ticket_ptr]                \n"
+               "                                                       \n"
+               "4:     andi    %[ticket], %[ticket], 0x1fff            \n"
+               "5:     sll     %[ticket], 5                            \n"
+               "                                                       \n"
+               "6:     bnez    %[ticket], 6b                           \n"
+               "        subu   %[ticket], 1                            \n"
+               "                                                       \n"
+               "       lw      %[ticket], %[ticket_ptr]                \n"
+               "       andi    %[ticket], %[ticket], 0x1fff            \n"
+               "       beq     %[ticket], %[my_ticket], 2b             \n"
+               "        subu   %[ticket], %[my_ticket], %[ticket]      \n"
+               "       b       5b                                      \n"
+               "        subu   %[ticket], %[ticket], 1                 \n"
+               "       .previous                                       \n"
+               "       .set pop                                        \n"
+               : [ticket_ptr] "+m" (lock->lock),
+                 [ticket] "=&r" (tmp),
+                 [my_ticket] "=&r" (my_ticket));
+       }
+ 
+       smp_llsc_mb();
+ }
+ 
+ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+ {
+       int tmp;
+ 
+       smp_llsc_mb();
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__ (
+               "                               # __raw_spin_unlock     \n"
+               "1:     ll      %[ticket], %[ticket_ptr]                \n"
+               "       addiu   %[ticket], %[ticket], 1                 \n"
+               "       ori     %[ticket], %[ticket], 0x2000            \n"
+               "       xori    %[ticket], %[ticket], 0x2000            \n"
+               "       sc      %[ticket], %[ticket_ptr]                \n"
+               "       beqzl   %[ticket], 1b                           \n"
+               : [ticket_ptr] "+m" (lock->lock),
+                 [ticket] "=&r" (tmp));
+       } else {
+               __asm__ __volatile__ (
+               "       .set push               # __raw_spin_unlock     \n"
+               "       .set noreorder                                  \n"
+               "                                                       \n"
+               "       ll      %[ticket], %[ticket_ptr]                \n"
+               "1:     addiu   %[ticket], %[ticket], 1                 \n"
+               "       ori     %[ticket], %[ticket], 0x2000            \n"
+               "       xori    %[ticket], %[ticket], 0x2000            \n"
+               "       sc      %[ticket], %[ticket_ptr]                \n"
+               "       beqz    %[ticket], 2f                           \n"
+               "        nop                                            \n"
+               "                                                       \n"
+               "       .subsection 2                                   \n"
+               "2:     b       1b                                      \n"
+               "        ll     %[ticket], %[ticket_ptr]                \n"
+               "       .previous                                       \n"
+               "       .set pop                                        \n"
+               : [ticket_ptr] "+m" (lock->lock),
+                 [ticket] "=&r" (tmp));
+       }
+ }
+ 
+ static inline unsigned int __raw_spin_trylock(raw_spinlock_t *lock)
+ {
+       int tmp, tmp2, tmp3;
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__ (
+               "       .set push               # __raw_spin_trylock    \n"
+               "       .set noreorder                                  \n"
+               "                                                       \n"
+               "1:     ll      %[ticket], %[ticket_ptr]                \n"
+               "       srl     %[my_ticket], %[ticket], 14             \n"
+               "       andi    %[my_ticket], %[my_ticket], 0x1fff      \n"
+               "       andi    %[now_serving], %[ticket], 0x1fff       \n"
+               "       bne     %[my_ticket], %[now_serving], 3f        \n"
+               "        addiu  %[ticket], %[ticket], 0x4000            \n"
+               "       sc      %[ticket], %[ticket_ptr]                \n"
+               "       beqzl   %[ticket], 1b                           \n"
+               "        li     %[ticket], 1                            \n"
+               "2:                                                     \n"
+               "       .subsection 2                                   \n"
+               "3:     b       2b                                      \n"
+               "        li     %[ticket], 0                            \n"
+               "       .previous                                       \n"
+               "       .set pop                                        \n"
+               : [ticket_ptr] "+m" (lock->lock),
+                 [ticket] "=&r" (tmp),
+                 [my_ticket] "=&r" (tmp2),
+                 [now_serving] "=&r" (tmp3));
+       } else {
+               __asm__ __volatile__ (
+               "       .set push               # __raw_spin_trylock    \n"
+               "       .set noreorder                                  \n"
+               "                                                       \n"
+               "       ll      %[ticket], %[ticket_ptr]                \n"
+               "1:     srl     %[my_ticket], %[ticket], 14             \n"
+               "       andi    %[my_ticket], %[my_ticket], 0x1fff      \n"
+               "       andi    %[now_serving], %[ticket], 0x1fff       \n"
+               "       bne     %[my_ticket], %[now_serving], 3f        \n"
+               "        addiu  %[ticket], %[ticket], 0x4000            \n"
+               "       sc      %[ticket], %[ticket_ptr]                \n"
+               "       beqz    %[ticket], 4f                           \n"
+               "        li     %[ticket], 1                            \n"
+               "2:                                                     \n"
+               "       .subsection 2                                   \n"
+               "3:     b       2b                                      \n"
+               "        li     %[ticket], 0                            \n"
+               "4:     b       1b                                      \n"
+               "        ll     %[ticket], %[ticket_ptr]                \n"
+               "       .previous                                       \n"
+               "       .set pop                                        \n"
+               : [ticket_ptr] "+m" (lock->lock),
+                 [ticket] "=&r" (tmp),
+                 [my_ticket] "=&r" (tmp2),
+                 [now_serving] "=&r" (tmp3));
+       }
+ 
+       smp_llsc_mb();
+ 
+       return tmp;
+ }
+ 
+ /*
+  * Read-write spinlocks, allowing multiple readers but only one writer.
+  *
+  * NOTE! it is quite common to have readers in interrupts but no interrupt
+  * writers. For those circumstances we can "mix" irq-safe locks - any writer
+  * needs to get a irq-safe write-lock, but readers can get non-irqsafe
+  * read-locks.
+  */
+ 
+ /*
+  * read_can_lock - would read_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+ #define __raw_read_can_lock(rw)       ((rw)->lock >= 0)
+ 
+ /*
+  * write_can_lock - would write_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+ #define __raw_write_can_lock(rw)      (!(rw)->lock)
+ 
+ static inline void __raw_read_lock(raw_rwlock_t *rw)
+ {
+       unsigned int tmp;
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_read_lock       \n"
+               "1:     ll      %1, %2                                  \n"
+               "       bltz    %1, 1b                                  \n"
+               "        addu   %1, 1                                   \n"
+               "       sc      %1, %0                                  \n"
+               "       beqzl   %1, 1b                                  \n"
+               "        nop                                            \n"
+               "       .set    reorder                                 \n"
+               : "=m" (rw->lock), "=&r" (tmp)
+               : "m" (rw->lock)
+               : "memory");
+       } else {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_read_lock       \n"
+               "1:     ll      %1, %2                                  \n"
+               "       bltz    %1, 2f                                  \n"
+               "        addu   %1, 1                                   \n"
+               "       sc      %1, %0                                  \n"
+               "       beqz    %1, 1b                                  \n"
+               "        nop                                            \n"
+               "       .subsection 2                                   \n"
+               "2:     ll      %1, %2                                  \n"
+               "       bltz    %1, 2b                                  \n"
+               "        addu   %1, 1                                   \n"
+               "       b       1b                                      \n"
+               "        nop                                            \n"
+               "       .previous                                       \n"
+               "       .set    reorder                                 \n"
+               : "=m" (rw->lock), "=&r" (tmp)
+               : "m" (rw->lock)
+               : "memory");
+       }
+ 
+       smp_llsc_mb();
+ }
+ 
+ /* Note the use of sub, not subu which will make the kernel die with an
+    overflow exception if we ever try to unlock an rwlock that is already
+    unlocked or is being held by a writer.  */
+ static inline void __raw_read_unlock(raw_rwlock_t *rw)
+ {
+       unsigned int tmp;
+ 
+       smp_llsc_mb();
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__(
+               "1:     ll      %1, %2          # __raw_read_unlock     \n"
+               "       sub     %1, 1                                   \n"
+               "       sc      %1, %0                                  \n"
+               "       beqzl   %1, 1b                                  \n"
+               : "=m" (rw->lock), "=&r" (tmp)
+               : "m" (rw->lock)
+               : "memory");
+       } else {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_read_unlock     \n"
+               "1:     ll      %1, %2                                  \n"
+               "       sub     %1, 1                                   \n"
+               "       sc      %1, %0                                  \n"
+               "       beqz    %1, 2f                                  \n"
+               "        nop                                            \n"
+               "       .subsection 2                                   \n"
+               "2:     b       1b                                      \n"
+               "        nop                                            \n"
+               "       .previous                                       \n"
+               "       .set    reorder                                 \n"
+               : "=m" (rw->lock), "=&r" (tmp)
+               : "m" (rw->lock)
+               : "memory");
+       }
+ }
+ 
+ static inline void __raw_write_lock(raw_rwlock_t *rw)
+ {
+       unsigned int tmp;
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_write_lock      \n"
+               "1:     ll      %1, %2                                  \n"
+               "       bnez    %1, 1b                                  \n"
+               "        lui    %1, 0x8000                              \n"
+               "       sc      %1, %0                                  \n"
+               "       beqzl   %1, 1b                                  \n"
+               "        nop                                            \n"
+               "       .set    reorder                                 \n"
+               : "=m" (rw->lock), "=&r" (tmp)
+               : "m" (rw->lock)
+               : "memory");
+       } else {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_write_lock      \n"
+               "1:     ll      %1, %2                                  \n"
+               "       bnez    %1, 2f                                  \n"
+               "        lui    %1, 0x8000                              \n"
+               "       sc      %1, %0                                  \n"
+               "       beqz    %1, 2f                                  \n"
+               "        nop                                            \n"
+               "       .subsection 2                                   \n"
+               "2:     ll      %1, %2                                  \n"
+               "       bnez    %1, 2b                                  \n"
+               "        lui    %1, 0x8000                              \n"
+               "       b       1b                                      \n"
+               "        nop                                            \n"
+               "       .previous                                       \n"
+               "       .set    reorder                                 \n"
+               : "=m" (rw->lock), "=&r" (tmp)
+               : "m" (rw->lock)
+               : "memory");
+       }
+ 
+       smp_llsc_mb();
+ }
+ 
+ static inline void __raw_write_unlock(raw_rwlock_t *rw)
+ {
+       smp_mb();
+ 
+       __asm__ __volatile__(
+       "                               # __raw_write_unlock    \n"
+       "       sw      $0, %0                                  \n"
+       : "=m" (rw->lock)
+       : "m" (rw->lock)
+       : "memory");
+ }
+ 
+ static inline int __raw_read_trylock(raw_rwlock_t *rw)
+ {
+       unsigned int tmp;
+       int ret;
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_read_trylock    \n"
+               "       li      %2, 0                                   \n"
+               "1:     ll      %1, %3                                  \n"
+               "       bltz    %1, 2f                                  \n"
+               "        addu   %1, 1                                   \n"
+               "       sc      %1, %0                                  \n"
+               "       .set    reorder                                 \n"
+               "       beqzl   %1, 1b                                  \n"
+               "        nop                                            \n"
+               __WEAK_LLSC_MB
+               "       li      %2, 1                                   \n"
+               "2:                                                     \n"
+               : "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
+               : "m" (rw->lock)
+               : "memory");
+       } else {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_read_trylock    \n"
+               "       li      %2, 0                                   \n"
+               "1:     ll      %1, %3                                  \n"
+               "       bltz    %1, 2f                                  \n"
+               "        addu   %1, 1                                   \n"
+               "       sc      %1, %0                                  \n"
+               "       beqz    %1, 1b                                  \n"
+               "        nop                                            \n"
+               "       .set    reorder                                 \n"
+               __WEAK_LLSC_MB
+               "       li      %2, 1                                   \n"
+               "2:                                                     \n"
+               : "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
+               : "m" (rw->lock)
+               : "memory");
+       }
+ 
+       return ret;
+ }
+ 
+ static inline int __raw_write_trylock(raw_rwlock_t *rw)
+ {
+       unsigned int tmp;
+       int ret;
+ 
+       if (R10000_LLSC_WAR) {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_write_trylock   \n"
+               "       li      %2, 0                                   \n"
+               "1:     ll      %1, %3                                  \n"
+               "       bnez    %1, 2f                                  \n"
+               "        lui    %1, 0x8000                              \n"
+               "       sc      %1, %0                                  \n"
+               "       beqzl   %1, 1b                                  \n"
+               "        nop                                            \n"
+               __WEAK_LLSC_MB
+               "       li      %2, 1                                   \n"
+               "       .set    reorder                                 \n"
+               "2:                                                     \n"
+               : "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
+               : "m" (rw->lock)
+               : "memory");
+       } else {
+               __asm__ __volatile__(
+               "       .set    noreorder       # __raw_write_trylock   \n"
+               "       li      %2, 0                                   \n"
+               "1:     ll      %1, %3                                  \n"
+               "       bnez    %1, 2f                                  \n"
+               "       lui     %1, 0x8000                              \n"
+               "       sc      %1, %0                                  \n"
+               "       beqz    %1, 3f                                  \n"
+               "        li     %2, 1                                   \n"
+               "2:                                                     \n"
+               __WEAK_LLSC_MB
+               "       .subsection 2                                   \n"
+               "3:     b       1b                                      \n"
+               "        li     %2, 0                                   \n"
+               "       .previous                                       \n"
+               "       .set    reorder                                 \n"
+               : "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
+               : "m" (rw->lock)
+               : "memory");
+       }
+ 
+       return ret;
+ }
+ 
++#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
++#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+ 
+ #define _raw_spin_relax(lock) cpu_relax()
+ #define _raw_read_relax(lock) cpu_relax()
+ #define _raw_write_relax(lock)        cpu_relax()
+ 
+ #endif /* _ASM_SPINLOCK_H */
diff --cc arch/mips/include/asm/system.h

index 0000000,cd30f83..ab15b20

mode 000000,100644..100644
--- /dev/null
--- 2/arch/mips/include/asm/system.h
+++ b/arch/mips/include/asm/system.h
@@@ -1,0 -1,222 +1,226 @@@
+ /*
+  * This file is subject to the terms and conditions of the GNU General Public
+  * License.  See the file "COPYING" in the main directory of this archive
+  * for more details.
+  *
+  * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003, 06 by Ralf Baechle
+  * Copyright (C) 1996 by Paul M. Antoine
+  * Copyright (C) 1999 Silicon Graphics
+  * Kevin D. Kissell, kevink@mips.org and Carsten Langgaard, carstenl@mips.com
+  * Copyright (C) 2000 MIPS Technologies, Inc.
+  */
+ #ifndef _ASM_SYSTEM_H
+ #define _ASM_SYSTEM_H
+ 
+ #include <linux/types.h>
+ #include <linux/irqflags.h>
+ 
+ #include <asm/addrspace.h>
+ #include <asm/barrier.h>
+ #include <asm/cmpxchg.h>
+ #include <asm/cpu-features.h>
+ #include <asm/dsp.h>
+ #include <asm/watch.h>
+ #include <asm/war.h>
+ 
+ 
+ /*
+  * switch_to(n) should switch tasks to task nr n, first
+  * checking that n isn't the current task, in which case it does nothing.
+  */
+ extern asmlinkage void *resume(void *last, void *next, void *next_ti);
+ 
+ struct task_struct;
+ 
+ #ifdef CONFIG_MIPS_MT_FPAFF
+ 
+ /*
+  * Handle the scheduler resume end of FPU affinity management.  We do this
+  * inline to try to keep the overhead down. If we have been forced to run on
+  * a "CPU" with an FPU because of a previous high level of FP computation,
+  * but did not actually use the FPU during the most recent time-slice (CU1
+  * isn't set), we undo the restriction on cpus_allowed.
+  *
+  * We're not calling set_cpus_allowed() here, because we have no need to
+  * force prompt migration - we're already switching the current CPU to a
+  * different thread.
+  */
+ 
+ #define __mips_mt_fpaff_switch_to(prev)                                       \
+ do {                                                                  \
+       struct thread_info *__prev_ti = task_thread_info(prev);         \
+                                                                       \
+       if (cpu_has_fpu &&                                              \
+           test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) &&             \
+           (!(KSTK_STATUS(prev) & ST0_CU1))) {                         \
+               clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND);          \
+               prev->cpus_allowed = prev->thread.user_cpus_allowed;    \
+       }                                                               \
+       next->thread.emulated_fp = 0;                                   \
+ } while(0)
+ 
+ #else
+ #define __mips_mt_fpaff_switch_to(prev) do { (void) (prev); } while (0)
+ #endif
+ 
+ #define switch_to(prev, next, last)                                   \
+ do {                                                                  \
+       __mips_mt_fpaff_switch_to(prev);                                \
+       if (cpu_has_dsp)                                                \
+               __save_dsp(prev);                                       \
++      if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW))              \
++              pfm_ctxsw_out(prev, next);                              \
++      if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW))              \
++              pfm_ctxsw_in(prev, next);                               \
+       (last) = resume(prev, next, task_thread_info(next));            \
+ } while (0)
+ 
+ #define finish_arch_switch(prev)                                      \
+ do {                                                                  \
+       if (cpu_has_dsp)                                                \
+               __restore_dsp(current);                                 \
+       if (cpu_has_userlocal)                                          \
+               write_c0_userlocal(current_thread_info()->tp_value);    \
+       __restore_watch();                                              \
+ } while (0)
+ 
+ static inline unsigned long __xchg_u32(volatile int * m, unsigned int val)
+ {
+       __u32 retval;
+ 
+       if (cpu_has_llsc && R10000_LLSC_WAR) {
+               unsigned long dummy;
+ 
+               __asm__ __volatile__(
+               "       .set    mips3                                   \n"
+               "1:     ll      %0, %3                  # xchg_u32      \n"
+               "       .set    mips0                                   \n"
+               "       move    %2, %z4                                 \n"
+               "       .set    mips3                                   \n"
+               "       sc      %2, %1                                  \n"
+               "       beqzl   %2, 1b                                  \n"
+               "       .set    mips0                                   \n"
+               : "=&r" (retval), "=m" (*m), "=&r" (dummy)
+               : "R" (*m), "Jr" (val)
+               : "memory");
+       } else if (cpu_has_llsc) {
+               unsigned long dummy;
+ 
+               __asm__ __volatile__(
+               "       .set    mips3                                   \n"
+               "1:     ll      %0, %3                  # xchg_u32      \n"
+               "       .set    mips0                                   \n"
+               "       move    %2, %z4                                 \n"
+               "       .set    mips3                                   \n"
+               "       sc      %2, %1                                  \n"
+               "       beqz    %2, 2f                                  \n"
+               "       .subsection 2                                   \n"
+               "2:     b       1b                                      \n"
+               "       .previous                                       \n"
+               "       .set    mips0                                   \n"
+               : "=&r" (retval), "=m" (*m), "=&r" (dummy)
+               : "R" (*m), "Jr" (val)
+               : "memory");
+       } else {
+               unsigned long flags;
+ 
+               raw_local_irq_save(flags);
+               retval = *m;
+               *m = val;
+               raw_local_irq_restore(flags);   /* implies memory barrier  */
+       }
+ 
+       smp_llsc_mb();
+ 
+       return retval;
+ }
+ 
+ #ifdef CONFIG_64BIT
+ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val)
+ {
+       __u64 retval;
+ 
+       if (cpu_has_llsc && R10000_LLSC_WAR) {
+               unsigned long dummy;
+ 
+               __asm__ __volatile__(
+               "       .set    mips3                                   \n"
+               "1:     lld     %0, %3                  # xchg_u64      \n"
+               "       move    %2, %z4                                 \n"
+               "       scd     %2, %1                                  \n"
+               "       beqzl   %2, 1b                                  \n"
+               "       .set    mips0                                   \n"
+               : "=&r" (retval), "=m" (*m), "=&r" (dummy)
+               : "R" (*m), "Jr" (val)
+               : "memory");
+       } else if (cpu_has_llsc) {
+               unsigned long dummy;
+ 
+               __asm__ __volatile__(
+               "       .set    mips3                                   \n"
+               "1:     lld     %0, %3                  # xchg_u64      \n"
+               "       move    %2, %z4                                 \n"
+               "       scd     %2, %1                                  \n"
+               "       beqz    %2, 2f                                  \n"
+               "       .subsection 2                                   \n"
+               "2:     b       1b                                      \n"
+               "       .previous                                       \n"
+               "       .set    mips0                                   \n"
+               : "=&r" (retval), "=m" (*m), "=&r" (dummy)
+               : "R" (*m), "Jr" (val)
+               : "memory");
+       } else {
+               unsigned long flags;
+ 
+               raw_local_irq_save(flags);
+               retval = *m;
+               *m = val;
+               raw_local_irq_restore(flags);   /* implies memory barrier  */
+       }
+ 
+       smp_llsc_mb();
+ 
+       return retval;
+ }
+ #else
+ extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 val);
+ #define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels
+ #endif
+ 
+ /* This function doesn't exist, so you'll get a linker error
+    if something tries to do an invalid xchg().  */
+ extern void __xchg_called_with_bad_pointer(void);
+ 
+ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+ {
+       switch (size) {
+       case 4:
+               return __xchg_u32(ptr, x);
+       case 8:
+               return __xchg_u64(ptr, x);
+       }
+       __xchg_called_with_bad_pointer();
+       return x;
+ }
+ 
+ #define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
+ 
+ extern void set_handler(unsigned long offset, void *addr, unsigned long len);
+ extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len);
+ 
+ typedef void (*vi_handler_t)(void);
+ extern void *set_vi_handler(int n, vi_handler_t addr);
+ 
+ extern void *set_except_vector(int n, void *addr);
+ extern unsigned long ebase;
+ extern void per_cpu_trap_init(void);
+ 
+ /*
+  * See include/asm-ia64/system.h; prevents deadlock on SMP
+  * systems.
+  */
+ #define __ARCH_WANT_UNLOCKED_CTXSW
+ 
+ extern unsigned long arch_align_stack(unsigned long sp);
+ 
+ #endif /* _ASM_SYSTEM_H */
diff --cc arch/mips/include/asm/thread_info.h

index 0000000,3f76de7..c5a6231

mode 000000,100644..100644
--- /dev/null
--- 2/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@@ -1,0 -1,153 +1,157 @@@
+ /* thread_info.h: MIPS low-level thread information
+  *
+  * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+  * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+  */
+ 
+ #ifndef _ASM_THREAD_INFO_H
+ #define _ASM_THREAD_INFO_H
+ 
+ #ifdef __KERNEL__
+ 
+ 
+ #ifndef __ASSEMBLY__
+ 
+ #include <asm/processor.h>
+ 
+ /*
+  * low level task data that entry.S needs immediate access to
+  * - this struct should fit entirely inside of one cache line
+  * - this struct shares the supervisor stack pages
+  * - if the contents of this structure are changed, the assembly constants
+  *   must also be changed
+  */
+ struct thread_info {
+       struct task_struct      *task;          /* main task structure */
+       struct exec_domain      *exec_domain;   /* execution domain */
+       unsigned long           flags;          /* low level flags */
+       unsigned long           tp_value;       /* thread pointer */
+       __u32                   cpu;            /* current CPU */
+       int                     preempt_count;  /* 0 => preemptable, <0 => BUG */
+ 
+       mm_segment_t            addr_limit;     /* thread address space:
+                                                  0-0xBFFFFFFF for user-thead
+                                                  0-0xFFFFFFFF for kernel-thread
+                                               */
+       struct restart_block    restart_block;
+       struct pt_regs          *regs;
+ };
+ 
+ /*
+  * macros/functions for gaining access to the thread information structure
+  *
+  * preempt_count needs to be 1 initially, until the scheduler is functional.
+  */
+ #define INIT_THREAD_INFO(tsk)                 \
+ {                                             \
+       .task           = &tsk,                 \
+       .exec_domain    = &default_exec_domain, \
+       .flags          = _TIF_FIXADE,          \
+       .cpu            = 0,                    \
+       .preempt_count  = 1,                    \
+       .addr_limit     = KERNEL_DS,            \
+       .restart_block  = {                     \
+               .fn = do_no_restart_syscall,    \
+       },                                      \
+ }
+ 
+ #define init_thread_info      (init_thread_union.thread_info)
+ #define init_stack            (init_thread_union.stack)
+ 
+ /* How to get the thread information struct from C.  */
+ register struct thread_info *__current_thread_info __asm__("$28");
+ #define current_thread_info()  __current_thread_info
+ 
+ /* thread information allocation */
+ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
+ #define THREAD_SIZE_ORDER (1)
+ #endif
+ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_64BIT)
+ #define THREAD_SIZE_ORDER (2)
+ #endif
+ #ifdef CONFIG_PAGE_SIZE_8KB
+ #define THREAD_SIZE_ORDER (1)
+ #endif
+ #ifdef CONFIG_PAGE_SIZE_16KB
+ #define THREAD_SIZE_ORDER (0)
+ #endif
+ #ifdef CONFIG_PAGE_SIZE_64KB
+ #define THREAD_SIZE_ORDER (0)
+ #endif
+ 
+ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+ #define THREAD_MASK (THREAD_SIZE - 1UL)
+ 
+ #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+ 
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ #define alloc_thread_info(tsk)                                        \
+ ({                                                            \
+       struct thread_info *ret;                                \
+                                                               \
+       ret = kzalloc(THREAD_SIZE, GFP_KERNEL);                 \
+                                                               \
+       ret;                                                    \
+ })
+ #else
+ #define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+ #endif
+ 
+ #define free_thread_info(info) kfree(info)
+ 
+ #endif /* !__ASSEMBLY__ */
+ 
+ #define PREEMPT_ACTIVE                0x10000000
+ 
+ /*
+  * thread information flags
+  * - these are process state flags that various assembly files may need to
+  *   access
+  * - pending work-to-be-done flags are in LSW
+  * - other flags in MSW
+  */
+ #define TIF_SIGPENDING                1       /* signal pending */
+ #define TIF_NEED_RESCHED      2       /* rescheduling necessary */
+ #define TIF_SYSCALL_AUDIT     3       /* syscall auditing active */
+ #define TIF_SECCOMP           4       /* secure computing */
++#define TIF_PERFMON_WORK      5       /* work for pfm_handle_work() */
+ #define TIF_RESTORE_SIGMASK   9       /* restore signal mask in do_signal() */
+ #define TIF_USEDFPU           16      /* FPU was used by this task this quantum (SMP) */
+ #define TIF_POLLING_NRFLAG    17      /* true if poll_idle() is polling TIF_NEED_RESCHED */
+ #define TIF_MEMDIE            18
+ #define TIF_FREEZE            19
+ #define TIF_FIXADE            20      /* Fix address errors in software */
+ #define TIF_LOGADE            21      /* Log address errors to syslog */
+ #define TIF_32BIT_REGS                22      /* also implies 16/32 fprs */
+ #define TIF_32BIT_ADDR                23      /* 32-bit address space (o32/n32) */
+ #define TIF_FPUBOUND          24      /* thread bound to FPU-full CPU set */
+ #define TIF_LOAD_WATCH                25      /* If set, load watch registers */
++#define TIF_PERFMON_CTXSW     26      /* perfmon needs ctxsw calls */
+ #define TIF_SYSCALL_TRACE     31      /* syscall trace active */
+ 
+ #define _TIF_SYSCALL_TRACE    (1<<TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING               (1<<TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED     (1<<TIF_NEED_RESCHED)
+ #define _TIF_SYSCALL_AUDIT    (1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP          (1<<TIF_SECCOMP)
+ #define _TIF_RESTORE_SIGMASK  (1<<TIF_RESTORE_SIGMASK)
+ #define _TIF_USEDFPU          (1<<TIF_USEDFPU)
+ #define _TIF_POLLING_NRFLAG   (1<<TIF_POLLING_NRFLAG)
+ #define _TIF_FREEZE           (1<<TIF_FREEZE)
+ #define _TIF_FIXADE           (1<<TIF_FIXADE)
+ #define _TIF_LOGADE           (1<<TIF_LOGADE)
+ #define _TIF_32BIT_REGS               (1<<TIF_32BIT_REGS)
+ #define _TIF_32BIT_ADDR               (1<<TIF_32BIT_ADDR)
+ #define _TIF_FPUBOUND         (1<<TIF_FPUBOUND)
+ #define _TIF_LOAD_WATCH               (1<<TIF_LOAD_WATCH)
++#define _TIF_PERFMON_WORK     (1<<TIF_PERFMON_WORK)
++#define _TIF_PERFMON_CTXSW    (1<<TIF_PERFMON_CTXSW)
+ 
+ /* work to do on interrupt/exception return */
+ #define _TIF_WORK_MASK                (0x0000ffef & ~_TIF_SECCOMP)
+ /* work to do on any return to u-space */
+ #define _TIF_ALLWORK_MASK     (0x8000ffff & ~_TIF_SECCOMP)
+ 
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _ASM_THREAD_INFO_H */
diff --cc arch/mips/kernel/process.c
Simple merge
diff --cc arch/mips/kernel/traps.c
Simple merge
diff --cc arch/parisc/include/asm/spinlock.h

index 0000000,f3d2090..fae03e1

mode 000000,100644..100644
--- /dev/null
--- 2/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@@ -1,0 -1,194 +1,197 @@@
+ #ifndef __ASM_SPINLOCK_H
+ #define __ASM_SPINLOCK_H
+ 
+ #include <asm/system.h>
+ #include <asm/processor.h>
+ #include <asm/spinlock_types.h>
+ 
+ static inline int __raw_spin_is_locked(raw_spinlock_t *x)
+ {
+       volatile unsigned int *a = __ldcw_align(x);
+       return *a == 0;
+ }
+ 
+ #define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
+ #define __raw_spin_unlock_wait(x) \
+               do { cpu_relax(); } while (__raw_spin_is_locked(x))
+ 
+ static inline void __raw_spin_lock_flags(raw_spinlock_t *x,
+                                        unsigned long flags)
+ {
+       volatile unsigned int *a;
+ 
+       mb();
+       a = __ldcw_align(x);
+       while (__ldcw(a) == 0)
+               while (*a == 0)
+                       if (flags & PSW_SM_I) {
+                               local_irq_enable();
+                               cpu_relax();
+                               local_irq_disable();
+                       } else
+                               cpu_relax();
+       mb();
+ }
+ 
+ static inline void __raw_spin_unlock(raw_spinlock_t *x)
+ {
+       volatile unsigned int *a;
+       mb();
+       a = __ldcw_align(x);
+       *a = 1;
+       mb();
+ }
+ 
+ static inline int __raw_spin_trylock(raw_spinlock_t *x)
+ {
+       volatile unsigned int *a;
+       int ret;
+ 
+       mb();
+       a = __ldcw_align(x);
+         ret = __ldcw(a) != 0;
+       mb();
+ 
+       return ret;
+ }
+ 
+ /*
+  * Read-write spinlocks, allowing multiple readers but only one writer.
+  * Linux rwlocks are unfair to writers; they can be starved for an indefinite
+  * time by readers.  With care, they can also be taken in interrupt context.
+  *
+  * In the PA-RISC implementation, we have a spinlock and a counter.
+  * Readers use the lock to serialise their access to the counter (which
+  * records how many readers currently hold the lock).
+  * Writers hold the spinlock, preventing any readers or other writers from
+  * grabbing the rwlock.
+  */
+ 
+ /* Note that we have to ensure interrupts are disabled in case we're
+  * interrupted by some other code that wants to grab the same read lock */
+ static  __inline__ void __raw_read_lock(raw_rwlock_t *rw)
+ {
+       unsigned long flags;
+       local_irq_save(flags);
+       __raw_spin_lock_flags(&rw->lock, flags);
+       rw->counter++;
+       __raw_spin_unlock(&rw->lock);
+       local_irq_restore(flags);
+ }
+ 
+ /* Note that we have to ensure interrupts are disabled in case we're
+  * interrupted by some other code that wants to grab the same read lock */
+ static  __inline__ void __raw_read_unlock(raw_rwlock_t *rw)
+ {
+       unsigned long flags;
+       local_irq_save(flags);
+       __raw_spin_lock_flags(&rw->lock, flags);
+       rw->counter--;
+       __raw_spin_unlock(&rw->lock);
+       local_irq_restore(flags);
+ }
+ 
+ /* Note that we have to ensure interrupts are disabled in case we're
+  * interrupted by some other code that wants to grab the same read lock */
+ static __inline__ int __raw_read_trylock(raw_rwlock_t *rw)
+ {
+       unsigned long flags;
+  retry:
+       local_irq_save(flags);
+       if (__raw_spin_trylock(&rw->lock)) {
+               rw->counter++;
+               __raw_spin_unlock(&rw->lock);
+               local_irq_restore(flags);
+               return 1;
+       }
+ 
+       local_irq_restore(flags);
+       /* If write-locked, we fail to acquire the lock */
+       if (rw->counter < 0)
+               return 0;
+ 
+       /* Wait until we have a realistic chance at the lock */
+       while (__raw_spin_is_locked(&rw->lock) && rw->counter >= 0)
+               cpu_relax();
+ 
+       goto retry;
+ }
+ 
+ /* Note that we have to ensure interrupts are disabled in case we're
+  * interrupted by some other code that wants to read_trylock() this lock */
+ static __inline__ void __raw_write_lock(raw_rwlock_t *rw)
+ {
+       unsigned long flags;
+ retry:
+       local_irq_save(flags);
+       __raw_spin_lock_flags(&rw->lock, flags);
+ 
+       if (rw->counter != 0) {
+               __raw_spin_unlock(&rw->lock);
+               local_irq_restore(flags);
+ 
+               while (rw->counter != 0)
+                       cpu_relax();
+ 
+               goto retry;
+       }
+ 
+       rw->counter = -1; /* mark as write-locked */
+       mb();
+       local_irq_restore(flags);
+ }
+ 
+ static __inline__ void __raw_write_unlock(raw_rwlock_t *rw)
+ {
+       rw->counter = 0;
+       __raw_spin_unlock(&rw->lock);
+ }
+ 
+ /* Note that we have to ensure interrupts are disabled in case we're
+  * interrupted by some other code that wants to read_trylock() this lock */
+ static __inline__ int __raw_write_trylock(raw_rwlock_t *rw)
+ {
+       unsigned long flags;
+       int result = 0;
+ 
+       local_irq_save(flags);
+       if (__raw_spin_trylock(&rw->lock)) {
+               if (rw->counter == 0) {
+                       rw->counter = -1;
+                       result = 1;
+               } else {
+                       /* Read-locked.  Oh well. */
+                       __raw_spin_unlock(&rw->lock);
+               }
+       }
+       local_irq_restore(flags);
+ 
+       return result;
+ }
+ 
+ /*
+  * read_can_lock - would read_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+ static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw)
+ {
+       return rw->counter >= 0;
+ }
+ 
+ /*
+  * write_can_lock - would write_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+ static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw)
+ {
+       return !rw->counter;
+ }
+ 
++#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
++#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
++
+ #define _raw_spin_relax(lock) cpu_relax()
+ #define _raw_read_relax(lock) cpu_relax()
+ #define _raw_write_relax(lock)        cpu_relax()
+ 
+ #endif /* __ASM_SPINLOCK_H */
diff --cc arch/powerpc/Kconfig
Simple merge
diff --cc arch/powerpc/Makefile
Simple merge
diff --cc arch/powerpc/include/asm/cell-pmu.h
Simple merge
diff --cc arch/powerpc/include/asm/ioctls.h

index 45643dd,1842186..01ccace
--- 1/arch/powerpc/include/asm/ioctls.h
--- 2/arch/powerpc/include/asm/ioctls.h
+++ b/arch/powerpc/include/asm/ioctls.h
@@@ -89,9 -89,10 +89,11 @@@
   #define TIOCSBRK      0x5427  /* BSD compatibility */
   #define TIOCCBRK      0x5428  /* BSD compatibility */
   #define TIOCGSID      0x5429  /* Return the session ID of FD */
+ #define TIOCGRS485    0x542e
+ #define TIOCSRS485    0x542f
   #define TIOCGPTN      _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
   #define TIOCSPTLCK    _IOW('T',0x31, int)  /* Lock/unlock Pty */
+ +#define TIOCGDEV      _IOR('T',0x32, unsigned int) /* Get real dev no below /dev/console */
   
   #define TIOCSERCONFIG 0x5453
   #define TIOCSERGWILD  0x5454
diff --cc arch/powerpc/include/asm/mpc52xx_psc.h
Simple merge
diff --cc arch/powerpc/include/asm/paca.h
Simple merge
diff --cc arch/powerpc/include/asm/reg.h
Simple merge
diff --cc arch/powerpc/include/asm/spinlock.h
Simple merge
diff --cc arch/powerpc/include/asm/systbl.h
Simple merge
diff --cc arch/powerpc/kernel/entry_32.S
Simple merge
diff --cc arch/powerpc/kernel/entry_64.S
Simple merge
diff --cc arch/powerpc/kernel/irq.c
Simple merge
diff --cc arch/powerpc/kernel/machine_kexec.c
Simple merge
diff --cc arch/powerpc/kernel/process.c

index 32dbc8e,fb7049c..cd52a00
--- 1/arch/powerpc/kernel/process.c
--- 2/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@@ -33,7 -33,7 +33,8 @@@
   #include <linux/mqueue.h>
   #include <linux/hardirq.h>
   #include <linux/utsname.h>
+ #include <linux/kernel_stat.h>
+ +#include <linux/perfmon_kern.h>
   
   #include <asm/pgtable.h>
   #include <asm/uaccess.h>
diff --cc arch/powerpc/kernel/prom_init.c
Simple merge
diff --cc arch/powerpc/kernel/vio.c
Simple merge
diff --cc arch/powerpc/oprofile/cell/spu_profiler.c

index 276e7f3,9305dda..b129d00
--- 1/arch/powerpc/oprofile/cell/spu_profiler.c
--- 2/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@@ -16,10 -16,8 +16,9 @@@
   #include <linux/smp.h>
   #include <linux/slab.h>
   #include <asm/cell-pmu.h>
+ +#include <asm/time.h>
   #include "pr_util.h"
   
- #define TRACE_ARRAY_SIZE 1024
   #define SCALE_SHIFT 14
   
   static u32 *samples;
diff --cc arch/powerpc/perfmon/Kconfig

index 3f4bbf2,0000000..10c250e

mode 100644,000000..100644
--- 1/arch/powerpc/perfmon/Kconfig
--- /dev/null
+++ b/arch/powerpc/perfmon/Kconfig
@@@ -1,67 -1,0 +1,68 @@@
+ +menu "Hardware Performance Monitoring support"
+ +config PERFMON
+ +      bool "Perfmon2 performance monitoring interface"
+ +      default n
+ +      help
+ +      Enables the perfmon2 interface to access the hardware
+ +      performance counters. See <http://perfmon2.sf.net/> for
+ +      more details.
+ +
+ +config PERFMON_DEBUG
+ +      bool "Perfmon debugging"
+ +      default n
+ +      depends on PERFMON
+ +      help
+ +      Enables perfmon debugging support
+ +
+ +config PERFMON_DEBUG_FS
+ +      bool "Enable perfmon statistics reporting via debugfs"
+ +      default y
+ +      depends on PERFMON && DEBUG_FS
+ +      help
+ +      Enable collection and reporting of perfmon timing statistics under
+ +      debugfs. This is used for debugging and performance analysis of the
+ +      subsystem. The debugfs filesystem must be mounted.
+ +
+ +config PERFMON_POWER4
+ +      tristate "Support for Power4 hardware performance counters"
+ +      depends on PERFMON && PPC64
+ +      default n
+ +      help
+ +      Enables support for the Power 4 hardware performance counters
+ +      If unsure, say M.
+ +
+ +config PERFMON_POWER5
+ +      tristate "Support for Power5 hardware performance counters"
+ +      depends on PERFMON && PPC64
+ +      default n
+ +      help
+ +      Enables support for the Power 5 hardware performance counters
+ +      If unsure, say M.
+ +
+ +config PERFMON_POWER6
+ +      tristate "Support for Power6 hardware performance counters"
+ +      depends on PERFMON && PPC64
+ +      default n
+ +      help
+ +      Enables support for the Power 6 hardware performance counters
+ +      If unsure, say M.
+ +
+ +config PERFMON_PPC32
+ +      tristate "Support for PPC32 hardware performance counters"
+ +      depends on PERFMON && PPC32
+ +      default n
+ +      help
+ +      Enables support for the PPC32 hardware performance counters
+ +      If unsure, say M.
+ +
+ +config PERFMON_CELL
+ +      tristate "Support for Cell hardware performance counters"
+ +      depends on PERFMON && PPC_CELL
+ +      select PS3_LPM if PPC_PS3
++      select SPU_BASE
+ +      default n
+ +      help
+ +      Enables support for the Cell hardware performance counters.
+ +      If unsure, say M.
+ +
+ +endmenu
diff --cc arch/powerpc/perfmon/perfmon_power6.c

index 7882feb,0000000..217ccdf

mode 100644,000000..100644
--- 1/arch/powerpc/perfmon/perfmon_power6.c
--- /dev/null
+++ b/arch/powerpc/perfmon/perfmon_power6.c
@@@ -1,520 -1,0 +1,521 @@@
+ +/*
+ + * This file contains the POWER6 PMU register description tables
+ + * and pmc checker used by perfmon.c.
+ + *
+ + * Copyright (c) 2007, IBM Corporation
+ + *
+ + * Based on perfmon_power5.c, and written by Carl Love <carll@us.ibm.com>
+ + * and Kevin Corry <kevcorry@us.ibm.com>.  Some fixes and refinement by
+ + * Corey Ashford <cjashfor@us.ibm.com>
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of version 2 of the GNU General Public
+ + * License as published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ + * 02111-1307 USA
+ + */
+ +#include <linux/module.h>
+ +#include <linux/perfmon_kern.h>
++#include <asm/time.h>
+ +
+ +MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>");
+ +MODULE_DESCRIPTION("POWER6 PMU description table");
+ +MODULE_LICENSE("GPL");
+ +
+ +static struct pfm_regmap_desc pfm_power6_pmc_desc[] = {
+ +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
+ +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
+ +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
+ +};
+ +#define PFM_PM_NUM_PMCS       ARRAY_SIZE(pfm_power6_pmc_desc)
+ +#define PFM_DELTA_TB    10000   /* Not a real registers */
+ +#define PFM_DELTA_PURR  10001
+ +
+ +/*
+ + * counters wrap to zero at transition from 2^32-1 to 2^32.  Note:
+ + * interrupt generated at transition from 2^31-1 to 2^31
+ + */
+ +#define OVERFLOW_VALUE    0x100000000UL
+ +
+ +/* The TB and PURR registers are read-only. Also, note that the TB register
+ + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
+ + * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
+ + */
+ +static struct pfm_regmap_desc pfm_power6_pmd_desc[] = {
+ +      /* On POWER 6 PMC5 and PMC6 are not writable, they do not
+ +       * generate interrupts, and do not qualify their counts
+ +       * based on problem mode, supervisor mode or hypervisor mode.
+ +       * These two counters are implemented as virtual counters
+ +       * to make the appear to work like the other counters.  A
+ +       * kernel timer is used sample the real PMC5 and PMC6 and
+ +       * update the virtual counters.
+ +       */
+ +/* tb    */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
+ +/* pmd1  */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
+ +/* pmd2  */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
+ +/* pmd3  */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
+ +/* pmd4  */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
+ +/* pmd5  */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC5", SPRN_PMC5),
+ +/* pmd6  */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC6", SPRN_PMC6),
+ +/* purr  */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
+ +/* delta purr */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_TB", PFM_DELTA_TB),
+ +/* delta tb   */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_PURR", PFM_DELTA_PURR),
+ +};
+ +
+ +#define PFM_PM_NUM_PMDS       ARRAY_SIZE(pfm_power6_pmd_desc)
+ +
+ +u32 pmc5_start_save[NR_CPUS];
+ +u32 pmc6_start_save[NR_CPUS];
+ +
+ +static struct timer_list pmc5_6_update[NR_CPUS];
+ +u64 enable_cntrs_cnt;
+ +u64 disable_cntrs_cnt;
+ +u64 call_delta;
+ +u64 pm5_6_interrupt;
+ +u64 pm1_4_interrupt;
+ +/* need ctx_arch for kernel timer.  Can't get it in context of the kernel
+ + * timer.
+ + */
+ +struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS];
+ +long int update_time;
+ +
+ +static void delta(int cpu_num, struct pfm_arch_context *ctx_arch)
+ +{
+ +      u32 tmp5, tmp6;
+ +
+ +      call_delta++;
+ +
+ +      tmp5 = (u32) mfspr(SPRN_PMC5);
+ +      tmp6 = (u32) mfspr(SPRN_PMC6);
+ +
+ +      /*
+ +       * The following difference calculation relies on 32-bit modular
+ +       * arithmetic for the deltas to come out correct (especially in the
+ +       * presence of a 32-bit counter wrap).
+ +       */
+ +      ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]);
+ +      ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]);
+ +
+ +      pmc5_start_save[cpu_num] = tmp5;
+ +      pmc6_start_save[cpu_num] = tmp6;
+ +
+ +      return;
+ +}
+ +
+ +
+ +static void pmc5_6_updater(unsigned long cpu_num)
+ +{
+ +      /* update the virtual pmd 5 and pmd 6 counters */
+ +
+ +      delta(cpu_num, pmc5_6_ctx_arch[cpu_num]);
+ +      mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time);
+ +}
+ +
+ +
+ +static int pfm_power6_probe_pmu(void)
+ +{
+ +      unsigned long pvr = mfspr(SPRN_PVR);
+ +
+ +      switch (PVR_VER(pvr)) {
+ +      case PV_POWER6:
+ +              return 0;
+ +      case PV_POWER5p:
+ +              /* If this is a POWER5+ and the revision is less than 0x300,
+ +                 don't treat it as a POWER6. */
+ +              return (PVR_REV(pvr) < 0x300) ? -1 : 0;
+ +      default:
+ +              return -1;
+ +      }
+ +}
+ +
+ +static void pfm_power6_write_pmc(unsigned int cnum, u64 value)
+ +{
+ +      switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
+ +      case SPRN_MMCR0:
+ +              mtspr(SPRN_MMCR0, value);
+ +              break;
+ +      case SPRN_MMCR1:
+ +              mtspr(SPRN_MMCR1, value);
+ +              break;
+ +      case SPRN_MMCRA:
+ +              mtspr(SPRN_MMCRA, value);
+ +              break;
+ +      default:
+ +              BUG();
+ +      }
+ +}
+ +
+ +static void pfm_power6_write_pmd(unsigned int cnum, u64 value)
+ +{
+ +      /* On POWER 6 PMC5 and PMC6 are implemented as
+ +       * virtual counters.  See comment in pfm_power6_pmd_desc
+ +       * definition.
+ +       */
+ +      u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
+ +
+ +      switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
+ +      case SPRN_PMC1:
+ +              mtspr(SPRN_PMC1, value & ovfl_mask);
+ +              break;
+ +      case SPRN_PMC2:
+ +              mtspr(SPRN_PMC2, value & ovfl_mask);
+ +              break;
+ +      case SPRN_PMC3:
+ +              mtspr(SPRN_PMC3, value & ovfl_mask);
+ +              break;
+ +      case SPRN_PMC4:
+ +              mtspr(SPRN_PMC4, value & ovfl_mask);
+ +              break;
+ +      case SPRN_TBRL:
+ +      case SPRN_PURR:
+ +              /* Ignore writes to read-only registers. */
+ +              break;
+ +      default:
+ +              BUG();
+ +      }
+ +}
+ +
+ +static u64 pfm_power6_sread(struct pfm_context *ctx, unsigned int cnum)
+ +{
+ +      struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
+ +      int cpu_num = smp_processor_id();
+ +
+ +      /* On POWER 6 PMC5 and PMC6 are implemented as
+ +       * virtual counters.  See comment in pfm_power6_pmd_desc
+ +       * definition.
+ +       */
+ +
+ +      switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
+ +      case SPRN_PMC5:
+ +              return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) - pmc5_start_save[cpu_num]);
+ +              break;
+ +
+ +      case SPRN_PMC6:
+ +              return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) - pmc6_start_save[cpu_num]);
+ +              break;
+ +
+ +      case PFM_DELTA_TB:
+ +              return ctx_arch->delta_tb
+ +                      + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
+ +                      - ctx_arch->delta_tb_start;
+ +              break;
+ +
+ +      case PFM_DELTA_PURR:
+ +              return ctx_arch->delta_purr
+ +                      + mfspr(SPRN_PURR)
+ +                      - ctx_arch->delta_purr_start;
+ +              break;
+ +
+ +      default:
+ +              BUG();
+ +      }
+ +}
+ +
+ +void pfm_power6_swrite(struct pfm_context *ctx, unsigned int cnum,
+ +      u64 val)
+ +{
+ +      struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
+ +      int cpu_num = smp_processor_id();
+ +
+ +      switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
+ +      case SPRN_PMC5:
+ +              pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
+ +              ctx_arch->powergs_pmc5 = val;
+ +              break;
+ +
+ +      case SPRN_PMC6:
+ +              pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
+ +              ctx_arch->powergs_pmc6 = val;
+ +              break;
+ +
+ +      case PFM_DELTA_TB:
+ +              ctx_arch->delta_tb_start =
+ +                      (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL));
+ +              ctx_arch->delta_tb = val;
+ +              break;
+ +
+ +      case PFM_DELTA_PURR:
+ +              ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
+ +              ctx_arch->delta_purr = val;
+ +              break;
+ +
+ +      default:
+ +              BUG();
+ +      }
+ +}
+ +
+ +static u64 pfm_power6_read_pmd(unsigned int cnum)
+ +{
+ +      switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
+ +      case SPRN_PMC1:
+ +              return mfspr(SPRN_PMC1);
+ +      case SPRN_PMC2:
+ +              return mfspr(SPRN_PMC2);
+ +      case SPRN_PMC3:
+ +              return mfspr(SPRN_PMC3);
+ +      case SPRN_PMC4:
+ +              return mfspr(SPRN_PMC4);
+ +      case SPRN_TBRL:
+ +              return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
+ +      case SPRN_PURR:
+ +              if (cpu_has_feature(CPU_FTR_PURR))
+ +                      return mfspr(SPRN_PURR);
+ +              else
+ +                      return 0;
+ +      default:
+ +              BUG();
+ +      }
+ +}
+ +
+ +
+ +/**
+ + * pfm_power6_enable_counters
+ + *
+ + **/
+ +static void pfm_power6_enable_counters(struct pfm_context *ctx,
+ +                                     struct pfm_event_set *set)
+ +{
+ +
+ +      unsigned int i, max_pmc;
+ +      int cpu_num = smp_processor_id();
+ +      struct pfm_arch_context *ctx_arch;
+ +
+ +      enable_cntrs_cnt++;
+ +
+ +      /* need the ctx passed down to the routine */
+ +      ctx_arch = pfm_ctx_arch(ctx);
+ +      max_pmc = ctx->regs.max_pmc;
+ +
+ +      /* Write MMCR0 last, and a fairly easy way to do this is to write
+ +         the registers in the reverse order */
+ +      for (i = max_pmc; i != 0; i--)
+ +              if (test_bit(i - 1, set->used_pmcs))
+ +                      pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]);
+ +
+ +      /* save current free running HW event count */
+ +      pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
+ +      pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
+ +
+ +      ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
+ +
+ +      if (cpu_has_feature(CPU_FTR_PURR))
+ +              ctx_arch->delta_tb_start =
+ +                      ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
+ +      else
+ +              ctx_arch->delta_tb_start = 0;
+ +
+ +      /* Start kernel timer for this cpu to periodically update
+ +       * the virtual counters.
+ +       */
+ +      init_timer(&pmc5_6_update[cpu_num]);
+ +      pmc5_6_update[cpu_num].function = pmc5_6_updater;
+ +      pmc5_6_update[cpu_num].data = (unsigned long) cpu_num;
+ +      pmc5_6_update[cpu_num].expires = jiffies + update_time;
+ +      /* context for this timer, timer will be removed if context
+ +       * is switched because the counters will be stopped first.
+ +       * NEEDS WORK, I think this is all ok, a little concerned about a
+ +       * race between the kernel timer going off right as the counters
+ +       * are being stopped and the context switching.  Need to think
+ +       * about this.
+ +       */
+ +      pmc5_6_ctx_arch[cpu_num] = ctx_arch;
+ +      add_timer(&pmc5_6_update[cpu_num]);
+ +}
+ +
+ +/**
+ + * pfm_power6_disable_counters
+ + *
+ + **/
+ +static void pfm_power6_disable_counters(struct pfm_context *ctx,
+ +                                      struct pfm_event_set *set)
+ +{
+ +      struct pfm_arch_context *ctx_arch;
+ +      int cpu_num = smp_processor_id();
+ +
+ +      disable_cntrs_cnt++;
+ +
+ +      /* Set the Freeze Counters bit */
+ +      mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+ +      asm volatile ("sync");
+ +
+ +      /* delete kernel update timer */
+ +      del_timer_sync(&pmc5_6_update[cpu_num]);
+ +
+ +      /* Update the virtual pmd 5 and 6 counters from the free running
+ +       * HW counters
+ +       */
+ +      ctx_arch = pfm_ctx_arch(ctx);
+ +      delta(cpu_num, ctx_arch);
+ +
+ +      ctx_arch->delta_tb +=
+ +              (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
+ +              - ctx_arch->delta_tb_start;
+ +
+ +      ctx_arch->delta_purr += mfspr(SPRN_PURR)
+ +              - ctx_arch->delta_purr_start;
+ +}
+ +
+ +/**
+ + * pfm_power6_get_ovfl_pmds
+ + *
+ + * Determine which counters in this set have overflowed and fill in the
+ + * set->povfl_pmds mask and set->npend_ovfls count.
+ + **/
+ +static void pfm_power6_get_ovfl_pmds(struct pfm_context *ctx,
+ +                                   struct pfm_event_set *set)
+ +{
+ +      unsigned int i;
+ +      unsigned int first_intr_pmd = ctx->regs.first_intr_pmd;
+ +      unsigned int max_intr_pmd = ctx->regs.max_intr_pmd;
+ +      u64 *used_pmds = set->used_pmds;
+ +      u64 *cntr_pmds = ctx->regs.cnt_pmds;
+ +      u64 width_mask = 1 << pfm_pmu_conf->counter_width;
+ +      u64 new_val, mask[PFM_PMD_BV];
+ +
+ +      bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), cast_ulp(used_pmds), max_intr_pmd);
+ +
+ +      /* max_intr_pmd is actually the last interrupting pmd register + 1 */
+ +      for (i = first_intr_pmd; i < max_intr_pmd; i++) {
+ +              if (test_bit(i, mask)) {
+ +                      new_val = pfm_power6_read_pmd(i);
+ +                      if (new_val & width_mask) {
+ +                              set_bit(i, set->povfl_pmds);
+ +                              set->npend_ovfls++;
+ +                      }
+ +              }
+ +      }
+ +}
+ +
+ +static void pfm_power6_irq_handler(struct pt_regs *regs,
+ +                                 struct pfm_context *ctx)
+ +{
+ +      u32 mmcr0;
+ +      u64 mmcra;
+ +
+ +      /* Disable the counters (set the freeze bit) to not polute
+ +       * the counts.
+ +       */
+ +      mmcr0 = mfspr(SPRN_MMCR0);
+ +      mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
+ +      mmcra = mfspr(SPRN_MMCRA);
+ +
+ +      /* Set the PMM bit (see comment below). */
+ +      mtmsrd(mfmsr() | MSR_PMM);
+ +
+ +      pm1_4_interrupt++;
+ +
+ +      pfm_interrupt_handler(instruction_pointer(regs), regs);
+ +
+ +      mmcr0 = mfspr(SPRN_MMCR0);
+ +
+ +      /*
+ +       * Reset the perfmon trigger if
+ +       * not in masking mode.
+ +       */
+ +      if (ctx->state != PFM_CTX_MASKED)
+ +              mmcr0 |= MMCR0_PMXE;
+ +
+ +      /*
+ +       * Clear the PMU Alert Occurred bit
+ +       */
+ +      mmcr0 &= ~MMCR0_PMAO;
+ +
+ +      /* Clear the appropriate bits in the MMCRA. */
+ +      mmcra &= ~(POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER);
+ +      mtspr(SPRN_MMCRA, mmcra);
+ +
+ +      /*
+ +       * Now clear the freeze bit, counting will not start until we
+ +       * rfid from this exception, because only at that point will
+ +       * the PMM bit be cleared.
+ +       */
+ +      mmcr0 &= ~MMCR0_FC;
+ +      mtspr(SPRN_MMCR0, mmcr0);
+ +}
+ +
+ +static void pfm_power6_resend_irq(struct pfm_context *ctx)
+ +{
+ +      /*
+ +       * Assert the PMAO bit to cause a PMU interrupt.  Make sure we
+ +       * trigger the edge detection circuitry for PMAO
+ +       */
+ +      mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
+ +      mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
+ +}
+ +
+ +struct pfm_arch_pmu_info pfm_power6_pmu_info = {
+ +      .pmu_style        = PFM_POWERPC_PMU_POWER6,
+ +      .write_pmc        = pfm_power6_write_pmc,
+ +      .write_pmd        = pfm_power6_write_pmd,
+ +      .read_pmd         = pfm_power6_read_pmd,
+ +      .irq_handler      = pfm_power6_irq_handler,
+ +      .get_ovfl_pmds    = pfm_power6_get_ovfl_pmds,
+ +      .enable_counters  = pfm_power6_enable_counters,
+ +      .disable_counters = pfm_power6_disable_counters,
+ +      .resend_irq       = pfm_power6_resend_irq
+ +};
+ +
+ +/*
+ + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ + */
+ +static struct pfm_pmu_config pfm_power6_pmu_conf = {
+ +      .pmu_name = "POWER6",
+ +      .counter_width = 31,
+ +      .pmd_desc = pfm_power6_pmd_desc,
+ +      .pmc_desc = pfm_power6_pmc_desc,
+ +      .num_pmc_entries = PFM_PM_NUM_PMCS,
+ +      .num_pmd_entries = PFM_PM_NUM_PMDS,
+ +      .probe_pmu  = pfm_power6_probe_pmu,
+ +      .pmu_info = &pfm_power6_pmu_info,
+ +      .pmd_sread = pfm_power6_sread,
+ +      .pmd_swrite = pfm_power6_swrite,
+ +      .flags = PFM_PMU_BUILTIN_FLAG,
+ +      .owner = THIS_MODULE
+ +};
+ +
+ +static int __init pfm_power6_pmu_init_module(void)
+ +{
+ +      int ret;
+ +      disable_cntrs_cnt = 0;
+ +      enable_cntrs_cnt = 0;
+ +      call_delta = 0;
+ +      pm5_6_interrupt = 0;
+ +      pm1_4_interrupt = 0;
+ +
+ +      /* calculate the time for updating counters 5 and 6 */
+ +
+ +      /*
+ +       * MAX_EVENT_RATE assumes a max instruction issue rate of 2
+ +       * instructions per clock cycle.  Experience shows that this factor
+ +       * of 2 is more than adequate.
+ +       */
+ +
+ +# define MAX_EVENT_RATE (ppc_proc_freq * 2)
+ +
+ +      /*
+ +       * Calculate the time, in jiffies, it takes for event counter 5 or
+ +       * 6 to completely wrap when counting at the max event rate, and
+ +       * then figure on sampling at twice that rate.
+ +       */
+ +      update_time = (((unsigned long)HZ * OVERFLOW_VALUE)
+ +                     / ((unsigned long)MAX_EVENT_RATE)) / 2;
+ +
+ +      ret =  pfm_pmu_register(&pfm_power6_pmu_conf);
+ +      return ret;
+ +}
+ +
+ +static void __exit pfm_power6_pmu_cleanup_module(void)
+ +{
+ +      pfm_pmu_unregister(&pfm_power6_pmu_conf);
+ +}
+ +
+ +module_init(pfm_power6_pmu_init_module);
+ +module_exit(pfm_power6_pmu_cleanup_module);
diff --cc arch/powerpc/platforms/52xx/mpc52xx_gpio.c
Simple merge
diff --cc arch/powerpc/platforms/82xx/ep8248e.c
Simple merge
diff --cc arch/powerpc/platforms/cell/spufs/file.c

index 1b26071,0da7f2b..7f95029
--- 1/arch/powerpc/platforms/cell/spufs/file.c
--- 2/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@@ -611,6 -607,6 +607,11 @@@ spufs_fpcr_read(struct file *file, cha
         int ret;
         struct spu_context *ctx = file->private_data;
   
++      /* pre-check for file position: if we'd return EOF, there's no point
++       * causing a deschedule */
++      if (*pos >= sizeof(ctx->csa.lscsa->gprs))
++              return 0;
++
         ret = spu_acquire_saved(ctx);
         if (ret)
                 return ret;
diff --cc arch/powerpc/platforms/chrp/setup.c
Simple merge
diff --cc arch/powerpc/platforms/pasemi/gpio_mdio.c
Simple merge
diff --cc arch/powerpc/platforms/pseries/setup.c
Simple merge
diff --cc arch/powerpc/sysdev/fsl_msi.c
Simple merge
diff --cc arch/powerpc/xmon/xmon.c
Simple merge
diff --cc arch/sparc/Kconfig

index 1059914,c3ea215..a39295f
--- 1/arch/sparc/Kconfig
--- 2/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@@ -270,41 -520,6 +520,8 @@@ source "drivers/sbus/char/Kconfig
   
   source "fs/Kconfig"
   
++source "arch/sparc/perfmon/Kconfig"
++
   source "arch/sparc/Kconfig.debug"
   
   source "security/Kconfig"
diff --cc arch/sparc/Makefile

index 9592889,2003ded..28fe556
--- 1/arch/sparc/Makefile
--- 2/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@@ -33,30 -78,20 +78,22 @@@ libs-y                 += arch/sparc/li
   
   drivers-$(CONFIG_OPROFILE)    += arch/sparc/oprofile/
   
++core-$(CONFIG_PERFMON)                += arch/sparc/perfmon/
++
   # Export what is needed by arch/sparc/boot/Makefile
- # Renaming is done to avoid confusing pattern matching rules in 2.5.45 (multy-)
- INIT_Y                := $(patsubst %/, %/built-in.o, $(init-y))
- CORE_Y                := $(core-y)
- CORE_Y                += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
- CORE_Y                := $(patsubst %/, %/built-in.o, $(CORE_Y))
- DRIVERS_Y     := $(patsubst %/, %/built-in.o, $(drivers-y))
- NET_Y         := $(patsubst %/, %/built-in.o, $(net-y))
- LIBS_Y1               := $(patsubst %/, %/lib.a, $(libs-y))
- LIBS_Y2               := $(patsubst %/, %/built-in.o, $(libs-y))
- LIBS_Y                := $(LIBS_Y1) $(LIBS_Y2)
+ export VMLINUX_INIT VMLINUX_MAIN
+ VMLINUX_INIT := $(head-y) $(init-y)
+ VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/
+ VMLINUX_MAIN += $(patsubst %/, %/lib.a, $(libs-y)) $(libs-y)
+ VMLINUX_MAIN += $(drivers-y) $(net-y)
   
   ifdef CONFIG_KALLSYMS
- kallsyms.o := .tmp_kallsyms2.o
+ export kallsyms.o := .tmp_kallsyms2.o
   endif
   
- export INIT_Y CORE_Y DRIVERS_Y NET_Y LIBS_Y HEAD_Y kallsyms.o
- 
- # Default target
- all: zImage
- 
   boot := arch/sparc/boot
   
- image zImage tftpboot.img: vmlinux
+ image zImage tftpboot.img vmlinux.aout: vmlinux
         $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
   
   archclean:
diff --cc arch/sparc/include/asm/parport.h
Simple merge
diff --cc arch/sparc/include/asm/spinlock_32.h
Simple merge
diff --cc arch/sparc/include/asm/spinlock_64.h
Simple merge
diff --cc arch/sparc/include/asm/system_64.h

index 2a9ddb9,6c07781..5159450
--- 1/arch/sparc/include/asm/system_64.h
--- 2/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@@ -26,13 -26,9 +26,12 @@@ enum sparc_cpu 
   
   #define sparc_cpu_model sun4u
   
- /* This cannot ever be a sun4c nor sun4 :) That's just history. */
- #define ARCH_SUN4C_SUN4 0
- #define ARCH_SUN4 0
+ /* This cannot ever be a sun4c :) That's just history. */
+ #define ARCH_SUN4C 0
   
+ +extern char *sparc_cpu_type;
+ +extern char *sparc_fpu_type;
+ +extern char *sparc_pmu_type;
   extern char reboot_command[];
   
   /* These are here in an effort to more fully work around Spitfire Errata
diff --cc arch/sparc/include/asm/thread_info_64.h

index 53857f7,639ac80..97527fa
--- 1/arch/sparc/include/asm/thread_info_64.h
--- 2/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@@ -227,7 -237,7 +227,8 @@@ register struct thread_info *current_th
   #define TIF_ABI_PENDING               12
   #define TIF_MEMDIE            13
   #define TIF_POLLING_NRFLAG    14
- #define TIF_PERFMON_CTXSW     15      /* perfmon needs ctxsw calls */
+ #define TIF_FREEZE            15      /* is freezing for suspend */
++#define TIF_PERFMON_CTXSW     16      /* perfmon needs ctxsw calls */
   
   #define _TIF_SYSCALL_TRACE    (1<<TIF_SYSCALL_TRACE)
   #define _TIF_NOTIFY_RESUME    (1<<TIF_NOTIFY_RESUME)
@@@ -240,7 -250,7 +241,8 @@@
   #define _TIF_SYSCALL_AUDIT    (1<<TIF_SYSCALL_AUDIT)
   #define _TIF_ABI_PENDING      (1<<TIF_ABI_PENDING)
   #define _TIF_POLLING_NRFLAG   (1<<TIF_POLLING_NRFLAG)
+ #define _TIF_FREEZE           (1<<TIF_FREEZE)
+ +#define _TIF_PERFMON_CTXSW    (1<<TIF_PERFMON_CTXSW)
   
   #define _TIF_USER_WORK_MASK   ((0xff << TI_FLAG_WSAVED_SHIFT) | \
                                  _TIF_DO_NOTIFY_RESUME_MASK | \
diff --cc arch/sparc/include/asm/unistd.h

index 4207fb3,031f038..44f9b1b
--- 1/arch/sparc/include/asm/unistd.h
--- 2/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@@ -1,8 -1,444 +1,456 @@@
- #ifndef ___ASM_SPARC_UNISTD_H
- #define ___ASM_SPARC_UNISTD_H
- #if defined(__sparc__) && defined(__arch64__)
- #include <asm/unistd_64.h>
+ #ifndef _SPARC_UNISTD_H
+ #define _SPARC_UNISTD_H
+ 
+ /*
+  * System calls under the Sparc.
+  *
+  * Don't be scared by the ugly clobbers, it is the only way I can
+  * think of right now to force the arguments into fixed registers
+  * before the trap into the system call with gcc 'asm' statements.
+  *
+  * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+  *
+  * SunOS compatibility based upon preliminary work which is:
+  *
+  * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
+  */
+ #ifndef __32bit_syscall_numbers__
+ #ifndef __arch64__
+ #define __32bit_syscall_numbers__
+ #endif
+ #endif
+ 
+ #define __NR_restart_syscall      0 /* Linux Specific                            */
+ #define __NR_exit                 1 /* Common                                      */
+ #define __NR_fork                 2 /* Common                                      */
+ #define __NR_read                 3 /* Common                                      */
+ #define __NR_write                4 /* Common                                      */
+ #define __NR_open                 5 /* Common                                      */
+ #define __NR_close                6 /* Common                                      */
+ #define __NR_wait4                7 /* Common                                      */
+ #define __NR_creat                8 /* Common                                      */
+ #define __NR_link                 9 /* Common                                      */
+ #define __NR_unlink              10 /* Common                                      */
+ #define __NR_execv               11 /* SunOS Specific                              */
+ #define __NR_chdir               12 /* Common                                      */
+ #define __NR_chown             13 /* Common                                      */
+ #define __NR_mknod               14 /* Common                                      */
+ #define __NR_chmod               15 /* Common                                      */
+ #define __NR_lchown              16 /* Common                                      */
+ #define __NR_brk                 17 /* Common                                      */
+ #define __NR_perfctr             18 /* Performance counter operations              */
+ #define __NR_lseek               19 /* Common                                      */
+ #define __NR_getpid              20 /* Common                                      */
+ #define __NR_capget            21 /* Linux Specific                              */
+ #define __NR_capset            22 /* Linux Specific                              */
+ #define __NR_setuid              23 /* Implemented via setreuid in SunOS           */
+ #define __NR_getuid              24 /* Common                                      */
+ #define __NR_vmsplice          25 /* ENOSYS under SunOS                          */
+ #define __NR_ptrace              26 /* Common                                      */
+ #define __NR_alarm               27 /* Implemented via setitimer in SunOS          */
+ #define __NR_sigaltstack       28 /* Common                                      */
+ #define __NR_pause               29 /* Is sigblock(0)->sigpause() in SunOS         */
+ #define __NR_utime               30 /* Implemented via utimes() under SunOS        */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_lchown32            31 /* Linux sparc32 specific                      */
+ #define __NR_fchown32            32 /* Linux sparc32 specific                      */
+ #endif
+ #define __NR_access              33 /* Common                                      */
+ #define __NR_nice                34 /* Implemented via get/setpriority() in SunOS  */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_chown32             35 /* Linux sparc32 specific                      */
+ #endif
+ #define __NR_sync                36 /* Common                                      */
+ #define __NR_kill                37 /* Common                                      */
+ #define __NR_stat                38 /* Common                                      */
+ #define __NR_sendfile          39 /* Linux Specific                              */
+ #define __NR_lstat               40 /* Common                                      */
+ #define __NR_dup                 41 /* Common                                      */
+ #define __NR_pipe                42 /* Common                                      */
+ #define __NR_times               43 /* Implemented via getrusage() in SunOS        */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_getuid32            44 /* Linux sparc32 specific                      */
+ #endif
+ #define __NR_umount2             45 /* Linux Specific                              */
+ #define __NR_setgid              46 /* Implemented via setregid() in SunOS         */
+ #define __NR_getgid              47 /* Common                                      */
+ #define __NR_signal              48 /* Implemented via sigvec() in SunOS           */
+ #define __NR_geteuid             49 /* SunOS calls getuid()                        */
+ #define __NR_getegid             50 /* SunOS calls getgid()                        */
+ #define __NR_acct                51 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_getgid32            53 /* Linux sparc32 specific                      */
+ #else
+ #define __NR_memory_ordering   52 /* Linux Specific                              */
+ #endif
+ #define __NR_ioctl               54 /* Common                                      */
+ #define __NR_reboot              55 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_mmap2             56 /* Linux sparc32 Specific                      */
+ #endif
+ #define __NR_symlink             57 /* Common                                      */
+ #define __NR_readlink            58 /* Common                                      */
+ #define __NR_execve              59 /* Common                                      */
+ #define __NR_umask               60 /* Common                                      */
+ #define __NR_chroot              61 /* Common                                      */
+ #define __NR_fstat               62 /* Common                                      */
+ #define __NR_fstat64           63 /* Linux Specific                              */
+ #define __NR_getpagesize         64 /* Common                                      */
+ #define __NR_msync               65 /* Common in newer 1.3.x revs...               */
+ #define __NR_vfork               66 /* Common                                      */
+ #define __NR_pread64             67 /* Linux Specific                              */
+ #define __NR_pwrite64            68 /* Linux Specific                              */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_geteuid32           69 /* Linux sparc32, sbrk under SunOS             */
+ #define __NR_getegid32           70 /* Linux sparc32, sstk under SunOS             */
+ #endif
+ #define __NR_mmap                71 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_setreuid32          72 /* Linux sparc32, vadvise under SunOS          */
+ #endif
+ #define __NR_munmap              73 /* Common                                      */
+ #define __NR_mprotect            74 /* Common                                      */
+ #define __NR_madvise             75 /* Common                                      */
+ #define __NR_vhangup             76 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_truncate64                77 /* Linux sparc32 Specific                      */
+ #endif
+ #define __NR_mincore             78 /* Common                                      */
+ #define __NR_getgroups           79 /* Common                                      */
+ #define __NR_setgroups           80 /* Common                                      */
+ #define __NR_getpgrp             81 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_setgroups32         82 /* Linux sparc32, setpgrp under SunOS          */
+ #endif
+ #define __NR_setitimer           83 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_ftruncate64       84 /* Linux sparc32 Specific                      */
+ #endif
+ #define __NR_swapon              85 /* Common                                      */
+ #define __NR_getitimer           86 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_setuid32            87 /* Linux sparc32, gethostname under SunOS      */
+ #endif
+ #define __NR_sethostname         88 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_setgid32            89 /* Linux sparc32, getdtablesize under SunOS    */
+ #endif
+ #define __NR_dup2                90 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_setfsuid32          91 /* Linux sparc32, getdopt under SunOS          */
+ #endif
+ #define __NR_fcntl               92 /* Common                                      */
+ #define __NR_select              93 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_setfsgid32          94 /* Linux sparc32, setdopt under SunOS          */
+ #endif
+ #define __NR_fsync               95 /* Common                                      */
+ #define __NR_setpriority         96 /* Common                                      */
+ #define __NR_socket              97 /* Common                                      */
+ #define __NR_connect             98 /* Common                                      */
+ #define __NR_accept              99 /* Common                                      */
+ #define __NR_getpriority        100 /* Common                                      */
+ #define __NR_rt_sigreturn       101 /* Linux Specific                              */
+ #define __NR_rt_sigaction       102 /* Linux Specific                              */
+ #define __NR_rt_sigprocmask     103 /* Linux Specific                              */
+ #define __NR_rt_sigpending      104 /* Linux Specific                              */
+ #define __NR_rt_sigtimedwait    105 /* Linux Specific                              */
+ #define __NR_rt_sigqueueinfo    106 /* Linux Specific                              */
+ #define __NR_rt_sigsuspend      107 /* Linux Specific                              */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_setresuid32        108 /* Linux Specific, sigvec under SunOS        */
+ #define __NR_getresuid32        109 /* Linux Specific, sigblock under SunOS      */
+ #define __NR_setresgid32        110 /* Linux Specific, sigsetmask under SunOS    */
+ #define __NR_getresgid32        111 /* Linux Specific, sigpause under SunOS      */
+ #define __NR_setregid32         112 /* Linux sparc32, sigstack under SunOS         */
+ #else
+ #define __NR_setresuid          108 /* Linux Specific, sigvec under SunOS        */
+ #define __NR_getresuid          109 /* Linux Specific, sigblock under SunOS      */
+ #define __NR_setresgid          110 /* Linux Specific, sigsetmask under SunOS    */
+ #define __NR_getresgid          111 /* Linux Specific, sigpause under SunOS      */
+ #endif
+ #define __NR_recvmsg            113 /* Common                                      */
+ #define __NR_sendmsg            114 /* Common                                      */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_getgroups32        115 /* Linux sparc32, vtrace under SunOS           */
+ #endif
+ #define __NR_gettimeofday       116 /* Common                                      */
+ #define __NR_getrusage          117 /* Common                                      */
+ #define __NR_getsockopt         118 /* Common                                      */
+ #define __NR_getcwd           119 /* Linux Specific                              */
+ #define __NR_readv              120 /* Common                                      */
+ #define __NR_writev             121 /* Common                                      */
+ #define __NR_settimeofday       122 /* Common                                      */
+ #define __NR_fchown             123 /* Common                                      */
+ #define __NR_fchmod             124 /* Common                                      */
+ #define __NR_recvfrom           125 /* Common                                      */
+ #define __NR_setreuid           126 /* Common                                      */
+ #define __NR_setregid           127 /* Common                                      */
+ #define __NR_rename             128 /* Common                                      */
+ #define __NR_truncate           129 /* Common                                      */
+ #define __NR_ftruncate          130 /* Common                                      */
+ #define __NR_flock              131 /* Common                                      */
+ #define __NR_lstat64          132 /* Linux Specific                              */
+ #define __NR_sendto             133 /* Common                                      */
+ #define __NR_shutdown           134 /* Common                                      */
+ #define __NR_socketpair         135 /* Common                                      */
+ #define __NR_mkdir              136 /* Common                                      */
+ #define __NR_rmdir              137 /* Common                                      */
+ #define __NR_utimes             138 /* SunOS Specific                              */
+ #define __NR_stat64           139 /* Linux Specific                              */
+ #define __NR_sendfile64         140 /* adjtime under SunOS                         */
+ #define __NR_getpeername        141 /* Common                                      */
+ #define __NR_futex              142 /* gethostid under SunOS                       */
+ #define __NR_gettid             143 /* ENOSYS under SunOS                          */
+ #define __NR_getrlimit                144 /* Common                                      */
+ #define __NR_setrlimit          145 /* Common                                      */
+ #define __NR_pivot_root               146 /* Linux Specific, killpg under SunOS          */
+ #define __NR_prctl            147 /* ENOSYS under SunOS                          */
+ #define __NR_pciconfig_read   148 /* ENOSYS under SunOS                          */
+ #define __NR_pciconfig_write  149 /* ENOSYS under SunOS                          */
+ #define __NR_getsockname        150 /* Common                                      */
+ #define __NR_inotify_init       151 /* Linux specific                              */
+ #define __NR_inotify_add_watch  152 /* Linux specific                              */
+ #define __NR_poll               153 /* Common                                      */
+ #define __NR_getdents64               154 /* Linux specific                              */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_fcntl64          155 /* Linux sparc32 Specific                      */
+ #endif
+ #define __NR_inotify_rm_watch   156 /* Linux specific                            */
+ #define __NR_statfs             157 /* Common                                      */
+ #define __NR_fstatfs            158 /* Common                                      */
+ #define __NR_umount             159 /* Common                                      */
+ #define __NR_sched_set_affinity 160 /* Linux specific, async_daemon under SunOS    */
+ #define __NR_sched_get_affinity 161 /* Linux specific, getfh under SunOS           */
+ #define __NR_getdomainname      162 /* SunOS Specific                              */
+ #define __NR_setdomainname      163 /* Common                                      */
+ #ifndef __32bit_syscall_numbers__
+ #define __NR_utrap_install    164 /* SYSV ABI/v9 required                        */
+ #endif
+ #define __NR_quotactl           165 /* Common                                      */
+ #define __NR_set_tid_address    166 /* Linux specific, exportfs under SunOS        */
+ #define __NR_mount              167 /* Common                                      */
+ #define __NR_ustat              168 /* Common                                      */
+ #define __NR_setxattr           169 /* SunOS: semsys                               */
+ #define __NR_lsetxattr          170 /* SunOS: msgsys                               */
+ #define __NR_fsetxattr          171 /* SunOS: shmsys                               */
+ #define __NR_getxattr           172 /* SunOS: auditsys                             */
+ #define __NR_lgetxattr          173 /* SunOS: rfssys                               */
+ #define __NR_getdents           174 /* Common                                      */
+ #define __NR_setsid             175 /* Common                                      */
+ #define __NR_fchdir             176 /* Common                                      */
+ #define __NR_fgetxattr          177 /* SunOS: fchroot                              */
+ #define __NR_listxattr          178 /* SunOS: vpixsys                              */
+ #define __NR_llistxattr         179 /* SunOS: aioread                              */
+ #define __NR_flistxattr         180 /* SunOS: aiowrite                             */
+ #define __NR_removexattr        181 /* SunOS: aiowait                              */
+ #define __NR_lremovexattr       182 /* SunOS: aiocancel                            */
+ #define __NR_sigpending         183 /* Common                                      */
+ #define __NR_query_module     184 /* Linux Specific                              */
+ #define __NR_setpgid            185 /* Common                                      */
+ #define __NR_fremovexattr       186 /* SunOS: pathconf                             */
+ #define __NR_tkill              187 /* SunOS: fpathconf                            */
+ #define __NR_exit_group               188 /* Linux specific, sysconf undef SunOS         */
+ #define __NR_uname              189 /* Linux Specific                              */
+ #define __NR_init_module        190 /* Linux Specific                              */
+ #define __NR_personality        191 /* Linux Specific                              */
+ #define __NR_remap_file_pages   192 /* Linux Specific                              */
+ #define __NR_epoll_create       193 /* Linux Specific                              */
+ #define __NR_epoll_ctl          194 /* Linux Specific                              */
+ #define __NR_epoll_wait         195 /* Linux Specific                              */
+ #define __NR_ioprio_set         196 /* Linux Specific                              */
+ #define __NR_getppid            197 /* Linux Specific                              */
+ #define __NR_sigaction          198 /* Linux Specific                              */
+ #define __NR_sgetmask           199 /* Linux Specific                              */
+ #define __NR_ssetmask           200 /* Linux Specific                              */
+ #define __NR_sigsuspend         201 /* Linux Specific                              */
+ #define __NR_oldlstat           202 /* Linux Specific                              */
+ #define __NR_uselib             203 /* Linux Specific                              */
+ #define __NR_readdir            204 /* Linux Specific                              */
+ #define __NR_readahead          205 /* Linux Specific                              */
+ #define __NR_socketcall         206 /* Linux Specific                              */
+ #define __NR_syslog             207 /* Linux Specific                              */
+ #define __NR_lookup_dcookie     208 /* Linux Specific                              */
+ #define __NR_fadvise64          209 /* Linux Specific                              */
+ #define __NR_fadvise64_64       210 /* Linux Specific                              */
+ #define __NR_tgkill             211 /* Linux Specific                              */
+ #define __NR_waitpid            212 /* Linux Specific                              */
+ #define __NR_swapoff            213 /* Linux Specific                              */
+ #define __NR_sysinfo            214 /* Linux Specific                              */
+ #define __NR_ipc                215 /* Linux Specific                              */
+ #define __NR_sigreturn          216 /* Linux Specific                              */
+ #define __NR_clone              217 /* Linux Specific                              */
+ #define __NR_ioprio_get         218 /* Linux Specific                              */
+ #define __NR_adjtimex           219 /* Linux Specific                              */
+ #define __NR_sigprocmask        220 /* Linux Specific                              */
+ #define __NR_create_module      221 /* Linux Specific                              */
+ #define __NR_delete_module      222 /* Linux Specific                              */
+ #define __NR_get_kernel_syms    223 /* Linux Specific                              */
+ #define __NR_getpgid            224 /* Linux Specific                              */
+ #define __NR_bdflush            225 /* Linux Specific                              */
+ #define __NR_sysfs              226 /* Linux Specific                              */
+ #define __NR_afs_syscall        227 /* Linux Specific                              */
+ #define __NR_setfsuid           228 /* Linux Specific                              */
+ #define __NR_setfsgid           229 /* Linux Specific                              */
+ #define __NR__newselect         230 /* Linux Specific                              */
+ #ifdef __32bit_syscall_numbers__
+ #define __NR_time               231 /* Linux Specific                              */
   #else
- #include <asm/unistd_32.h>
+ #ifdef __KERNEL__
+ #define __NR_time             231 /* Linux sparc32                               */
+ #endif
+ #endif
+ #define __NR_splice             232 /* Linux Specific                              */
+ #define __NR_stime              233 /* Linux Specific                              */
+ #define __NR_statfs64           234 /* Linux Specific                              */
+ #define __NR_fstatfs64          235 /* Linux Specific                              */
+ #define __NR__llseek            236 /* Linux Specific                              */
+ #define __NR_mlock              237
+ #define __NR_munlock            238
+ #define __NR_mlockall           239
+ #define __NR_munlockall         240
+ #define __NR_sched_setparam     241
+ #define __NR_sched_getparam     242
+ #define __NR_sched_setscheduler 243
+ #define __NR_sched_getscheduler 244
+ #define __NR_sched_yield        245
+ #define __NR_sched_get_priority_max 246
+ #define __NR_sched_get_priority_min 247
+ #define __NR_sched_rr_get_interval  248
+ #define __NR_nanosleep          249
+ #define __NR_mremap             250
+ #define __NR__sysctl            251
+ #define __NR_getsid             252
+ #define __NR_fdatasync          253
+ #define __NR_nfsservctl         254
+ #define __NR_sync_file_range  255
+ #define __NR_clock_settime    256
+ #define __NR_clock_gettime    257
+ #define __NR_clock_getres     258
+ #define __NR_clock_nanosleep  259
+ #define __NR_sched_getaffinity        260
+ #define __NR_sched_setaffinity        261
+ #define __NR_timer_settime    262
+ #define __NR_timer_gettime    263
+ #define __NR_timer_getoverrun 264
+ #define __NR_timer_delete     265
+ #define __NR_timer_create     266
+ /* #define __NR_vserver               267 Reserved for VSERVER */
+ #define __NR_io_setup         268
+ #define __NR_io_destroy               269
+ #define __NR_io_submit                270
+ #define __NR_io_cancel                271
+ #define __NR_io_getevents     272
+ #define __NR_mq_open          273
+ #define __NR_mq_unlink                274
+ #define __NR_mq_timedsend     275
+ #define __NR_mq_timedreceive  276
+ #define __NR_mq_notify                277
+ #define __NR_mq_getsetattr    278
+ #define __NR_waitid           279
+ #define __NR_tee              280
+ #define __NR_add_key          281
+ #define __NR_request_key      282
+ #define __NR_keyctl           283
+ #define __NR_openat           284
+ #define __NR_mkdirat          285
+ #define __NR_mknodat          286
+ #define __NR_fchownat         287
+ #define __NR_futimesat                288
+ #define __NR_fstatat64                289
+ #define __NR_unlinkat         290
+ #define __NR_renameat         291
+ #define __NR_linkat           292
+ #define __NR_symlinkat                293
+ #define __NR_readlinkat               294
+ #define __NR_fchmodat         295
+ #define __NR_faccessat                296
+ #define __NR_pselect6         297
+ #define __NR_ppoll            298
+ #define __NR_unshare          299
+ #define __NR_set_robust_list  300
+ #define __NR_get_robust_list  301
+ #define __NR_migrate_pages    302
+ #define __NR_mbind            303
+ #define __NR_get_mempolicy    304
+ #define __NR_set_mempolicy    305
+ #define __NR_kexec_load               306
+ #define __NR_move_pages               307
+ #define __NR_getcpu           308
+ #define __NR_epoll_pwait      309
+ #define __NR_utimensat                310
+ #define __NR_signalfd         311
+ #define __NR_timerfd_create   312
+ #define __NR_eventfd          313
+ #define __NR_fallocate                314
+ #define __NR_timerfd_settime  315
+ #define __NR_timerfd_gettime  316
+ #define __NR_signalfd4                317
+ #define __NR_eventfd2         318
+ #define __NR_epoll_create1    319
+ #define __NR_dup3             320
+ #define __NR_pipe2            321
+ #define __NR_inotify_init1    322
+ #define __NR_accept4          323
++#define __NR_pfm_create_context 324
++#define __NR_pfm_write_pmcs   325
++#define __NR_pfm_write_pmds   326
++#define __NR_pfm_read_pmds    327
++#define __NR_pfm_load_context 328
++#define __NR_pfm_start                329
++#define __NR_pfm_stop         330
++#define __NR_pfm_restart      331
++#define __NR_pfm_create_evtsets       332
++#define __NR_pfm_getinfo_evtsets 333
++#define __NR_pfm_delete_evtsets       334
++#define __NR_pfm_unload_context       335
+ 
- -#define NR_SYSCALLS           324
++#define NR_SYSCALLS           336
+ 
+ #ifdef __32bit_syscall_numbers__
+ /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
+  * it never had the plain ones and there is no value to adding those
+  * old versions into the syscall table.
+  */
+ #define __IGNORE_setresuid
+ #define __IGNORE_getresuid
+ #define __IGNORE_setresgid
+ #define __IGNORE_getresgid
   #endif
+ 
+ #ifdef __KERNEL__
+ #define __ARCH_WANT_IPC_PARSE_VERSION
+ #define __ARCH_WANT_OLD_READDIR
+ #define __ARCH_WANT_STAT64
+ #define __ARCH_WANT_SYS_ALARM
+ #define __ARCH_WANT_SYS_GETHOSTNAME
+ #define __ARCH_WANT_SYS_PAUSE
+ #define __ARCH_WANT_SYS_SGETMASK
+ #define __ARCH_WANT_SYS_SIGNAL
+ #define __ARCH_WANT_SYS_TIME
+ #define __ARCH_WANT_SYS_UTIME
+ #define __ARCH_WANT_SYS_WAITPID
+ #define __ARCH_WANT_SYS_SOCKETCALL
+ #define __ARCH_WANT_SYS_FADVISE64
+ #define __ARCH_WANT_SYS_GETPGRP
+ #define __ARCH_WANT_SYS_LLSEEK
+ #define __ARCH_WANT_SYS_NICE
+ #define __ARCH_WANT_SYS_OLDUMOUNT
+ #define __ARCH_WANT_SYS_SIGPENDING
+ #define __ARCH_WANT_SYS_SIGPROCMASK
+ #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+ #ifndef __32bit_syscall_numbers__
+ #define __ARCH_WANT_COMPAT_SYS_TIME
+ #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
   #endif
+ 
+ /*
+  * "Conditional" syscalls
+  *
+  * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+  * but it doesn't work on all toolchains, so we just do it by hand
+  */
+ #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+ 
+ #endif /* __KERNEL__ */
+ #endif /* _SPARC_UNISTD_H */
diff --cc arch/sparc/kernel/apc.c

index 5267d48,9c11582..4da0b33
--- 1/arch/sparc/kernel/apc.c
--- 2/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@@ -196,6 -182,25 +182,26 @@@ static int __devinit apc_probe(struct o
         return 0;
   }
   
+ static struct of_device_id __initdata apc_match[] = {
+       {
+               .name = APC_OBPNAME,
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, apc_match);
+ 
+ static struct of_platform_driver apc_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "apc",
+       .match_table    = apc_match,
+       .probe          = apc_probe,
+ };
+ 
+ static int __init apc_init(void)
+ {
+       return of_register_driver(&apc_driver, &of_bus_type);
+ }
+ 
   /* This driver is not critical to the boot process
    * and is easiest to ioremap when SBus is already
    * initialized, so we install ourselves thusly:
diff --cc arch/sparc/kernel/auxio_64.c

index 0000000,9f52db2..2cd9de0

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/auxio_64.c
+++ b/arch/sparc/kernel/auxio_64.c
@@@ -1,0 -1,151 +1,152 @@@
+ /* auxio.c: Probing for the Sparc AUXIO register at boot time.
+  *
+  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+  *
+  * Refactoring for unified NCR/PCIO support 2002 Eric Brower (ebrower@usa.net)
+  */
+ 
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+ #include <linux/ioport.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/prom.h>
+ #include <asm/io.h>
+ #include <asm/auxio.h>
+ 
+ void __iomem *auxio_register = NULL;
+ EXPORT_SYMBOL(auxio_register);
+ 
+ enum auxio_type {
+       AUXIO_TYPE_NODEV,
+       AUXIO_TYPE_SBUS,
+       AUXIO_TYPE_EBUS
+ };
+ 
+ static enum auxio_type auxio_devtype = AUXIO_TYPE_NODEV;
+ static DEFINE_SPINLOCK(auxio_lock);
+ 
+ static void __auxio_rmw(u8 bits_on, u8 bits_off, int ebus)
+ {
+       if (auxio_register) {
+               unsigned long flags;
+               u8 regval, newval;
+ 
+               spin_lock_irqsave(&auxio_lock, flags);
+ 
+               regval = (ebus ?
+                         (u8) readl(auxio_register) :
+                         sbus_readb(auxio_register));
+               newval =  regval | bits_on;
+               newval &= ~bits_off;
+               if (!ebus)
+                       newval &= ~AUXIO_AUX1_MASK;
+               if (ebus)
+                       writel((u32) newval, auxio_register);
+               else
+                       sbus_writeb(newval, auxio_register);
+               
+               spin_unlock_irqrestore(&auxio_lock, flags);
+       }
+ }
+ 
+ static void __auxio_set_bit(u8 bit, int on, int ebus)
+ {
+       u8 bits_on = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+       u8 bits_off = 0;
+ 
+       if (!on) {
+               u8 tmp = bits_off;
+               bits_off = bits_on;
+               bits_on = tmp;
+       }
+       __auxio_rmw(bits_on, bits_off, ebus);
+ }
+ 
+ void auxio_set_led(int on)
+ {
+       int ebus = auxio_devtype == AUXIO_TYPE_EBUS;
+       u8 bit;
+ 
+       bit = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+       __auxio_set_bit(bit, on, ebus);
+ }
+ EXPORT_SYMBOL(auxio_set_led);
+ 
+ static void __auxio_sbus_set_lte(int on)
+ {
+       __auxio_set_bit(AUXIO_AUX1_LTE, on, 0);
+ }
+ 
+ void auxio_set_lte(int on)
+ {
+       switch(auxio_devtype) {
+       case AUXIO_TYPE_SBUS:
+               __auxio_sbus_set_lte(on);
+               break;
+       case AUXIO_TYPE_EBUS:
+               /* FALL-THROUGH */
+       default:
+               break;
+       }
+ }
+ EXPORT_SYMBOL(auxio_set_lte);
+ 
+ static struct of_device_id __initdata auxio_match[] = {
+       {
+               .name = "auxio",
+       },
+       {},
+ };
+ 
+ MODULE_DEVICE_TABLE(of, auxio_match);
+ 
+ static int __devinit auxio_probe(struct of_device *dev, const struct of_device_id *match)
+ {
+       struct device_node *dp = dev->node;
+       unsigned long size;
+ 
+       if (!strcmp(dp->parent->name, "ebus")) {
+               auxio_devtype = AUXIO_TYPE_EBUS;
+               size = sizeof(u32);
+       } else if (!strcmp(dp->parent->name, "sbus")) {
+               auxio_devtype = AUXIO_TYPE_SBUS;
+               size = 1;
+       } else {
+               printk("auxio: Unknown parent bus type [%s]\n",
+                      dp->parent->name);
+               return -ENODEV;
+       }
+       auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio");
+       if (!auxio_register)
+               return -ENODEV;
+ 
+       printk(KERN_INFO "AUXIO: Found device at %s\n",
+              dp->full_name);
+ 
+       if (auxio_devtype == AUXIO_TYPE_EBUS)
+               auxio_set_led(AUXIO_LED_ON);
+ 
+       return 0;
+ }
+ 
+ static struct of_platform_driver auxio_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = auxio_match,
+       .probe          = auxio_probe,
+       .driver         = {
+               .name   = "auxio",
+       },
+ };
+ 
+ static int __init auxio_init(void)
+ {
+       return of_register_driver(&auxio_driver, &of_platform_bus_type);
+ }
+ 
+ /* Must be after subsys_initcall() so that busses are probed.  Must
+  * be before device_initcall() because things like the floppy driver
+  * need to use the AUXIO register.
+  */
+ fs_initcall(auxio_init);
diff --cc arch/sparc/kernel/central.c

index 0000000,f3b5466..cfdbf65

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/central.c
+++ b/arch/sparc/kernel/central.c
@@@ -1,0 -1,270 +1,272 @@@
+ /* central.c: Central FHC driver for Sunfire/Starfire/Wildfire.
+  *
+  * Copyright (C) 1997, 1999, 2008 David S. Miller (davem@davemloft.net)
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/string.h>
+ #include <linux/init.h>
+ #include <linux/of_device.h>
+ #include <linux/platform_device.h>
+ 
+ #include <asm/fhc.h>
+ #include <asm/upa.h>
+ 
+ struct clock_board {
+       void __iomem            *clock_freq_regs;
+       void __iomem            *clock_regs;
+       void __iomem            *clock_ver_reg;
+       int                     num_slots;
+       struct resource         leds_resource;
+       struct platform_device  leds_pdev;
+ };
+ 
+ struct fhc {
+       void __iomem            *pregs;
+       bool                    central;
+       bool                    jtag_master;
+       int                     board_num;
+       struct resource         leds_resource;
+       struct platform_device  leds_pdev;
+ };
+ 
+ static int __devinit clock_board_calc_nslots(struct clock_board *p)
+ {
+       u8 reg = upa_readb(p->clock_regs + CLOCK_STAT1) & 0xc0;
+ 
+       switch (reg) {
+       case 0x40:
+               return 16;
+ 
+       case 0xc0:
+               return 8;
+ 
+       case 0x80:
+               reg = 0;
+               if (p->clock_ver_reg)
+                       reg = upa_readb(p->clock_ver_reg);
+               if (reg) {
+                       if (reg & 0x80)
+                               return 4;
+                       else
+                               return 5;
+               }
+               /* Fallthrough */
+       default:
+               return 4;
+       }
+ }
+ 
+ static int __devinit clock_board_probe(struct of_device *op,
+                                      const struct of_device_id *match)
+ {
+       struct clock_board *p = kzalloc(sizeof(*p), GFP_KERNEL);
+       int err = -ENOMEM;
+ 
+       if (!p) {
+               printk(KERN_ERR "clock_board: Cannot allocate struct clock_board\n");
+               goto out;
+       }
+ 
+       p->clock_freq_regs = of_ioremap(&op->resource[0], 0,
+                                       resource_size(&op->resource[0]),
+                                       "clock_board_freq");
+       if (!p->clock_freq_regs) {
+               printk(KERN_ERR "clock_board: Cannot map clock_freq_regs\n");
+               goto out_free;
+       }
+ 
+       p->clock_regs = of_ioremap(&op->resource[1], 0,
+                                  resource_size(&op->resource[1]),
+                                  "clock_board_regs");
+       if (!p->clock_regs) {
+               printk(KERN_ERR "clock_board: Cannot map clock_regs\n");
+               goto out_unmap_clock_freq_regs;
+       }
+ 
+       if (op->resource[2].flags) {
+               p->clock_ver_reg = of_ioremap(&op->resource[2], 0,
+                                             resource_size(&op->resource[2]),
+                                             "clock_ver_reg");
+               if (!p->clock_ver_reg) {
+                       printk(KERN_ERR "clock_board: Cannot map clock_ver_reg\n");
+                       goto out_unmap_clock_regs;
+               }
+       }
+ 
+       p->num_slots = clock_board_calc_nslots(p);
+ 
+       p->leds_resource.start = (unsigned long)
+               (p->clock_regs + CLOCK_CTRL);
+       p->leds_resource.end = p->leds_resource.end;
+       p->leds_resource.name = "leds";
+ 
+       p->leds_pdev.name = "sunfire-clockboard-leds";
+       p->leds_pdev.id = -1;
+       p->leds_pdev.resource = &p->leds_resource;
+       p->leds_pdev.num_resources = 1;
+       p->leds_pdev.dev.parent = &op->dev;
+ 
+       err = platform_device_register(&p->leds_pdev);
+       if (err) {
+               printk(KERN_ERR "clock_board: Could not register LEDS "
+                      "platform device\n");
+               goto out_unmap_clock_ver_reg;
+       }
+ 
+       printk(KERN_INFO "clock_board: Detected %d slot Enterprise system.\n",
+              p->num_slots);
+ 
+       err = 0;
+ out:
+       return err;
+ 
+ out_unmap_clock_ver_reg:
+       if (p->clock_ver_reg)
+               of_iounmap(&op->resource[2], p->clock_ver_reg,
+                          resource_size(&op->resource[2]));
+ 
+ out_unmap_clock_regs:
+       of_iounmap(&op->resource[1], p->clock_regs,
+                  resource_size(&op->resource[1]));
+ 
+ out_unmap_clock_freq_regs:
+       of_iounmap(&op->resource[0], p->clock_freq_regs,
+                  resource_size(&op->resource[0]));
+ 
+ out_free:
+       kfree(p);
+       goto out;
+ }
+ 
+ static struct of_device_id __initdata clock_board_match[] = {
+       {
+               .name = "clock-board",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver clock_board_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = clock_board_match,
+       .probe          = clock_board_probe,
+       .driver         = {
+               .name   = "clock_board",
+       },
+ };
+ 
+ static int __devinit fhc_probe(struct of_device *op,
+                              const struct of_device_id *match)
+ {
+       struct fhc *p = kzalloc(sizeof(*p), GFP_KERNEL);
+       int err = -ENOMEM;
+       u32 reg;
+ 
+       if (!p) {
+               printk(KERN_ERR "fhc: Cannot allocate struct fhc\n");
+               goto out;
+       }
+ 
+       if (!strcmp(op->node->parent->name, "central"))
+               p->central = true;
+ 
+       p->pregs = of_ioremap(&op->resource[0], 0,
+                             resource_size(&op->resource[0]),
+                             "fhc_pregs");
+       if (!p->pregs) {
+               printk(KERN_ERR "fhc: Cannot map pregs\n");
+               goto out_free;
+       }
+ 
+       if (p->central) {
+               reg = upa_readl(p->pregs + FHC_PREGS_BSR);
+               p->board_num = ((reg >> 16) & 1) | ((reg >> 12) & 0x0e);
+       } else {
+               p->board_num = of_getintprop_default(op->node, "board#", -1);
+               if (p->board_num == -1) {
+                       printk(KERN_ERR "fhc: No board# property\n");
+                       goto out_unmap_pregs;
+               }
+               if (upa_readl(p->pregs + FHC_PREGS_JCTRL) & FHC_JTAG_CTRL_MENAB)
+                       p->jtag_master = true;
+       }
+ 
+       if (!p->central) {
+               p->leds_resource.start = (unsigned long)
+                       (p->pregs + FHC_PREGS_CTRL);
+               p->leds_resource.end = p->leds_resource.end;
+               p->leds_resource.name = "leds";
+ 
+               p->leds_pdev.name = "sunfire-fhc-leds";
+               p->leds_pdev.id = p->board_num;
+               p->leds_pdev.resource = &p->leds_resource;
+               p->leds_pdev.num_resources = 1;
+               p->leds_pdev.dev.parent = &op->dev;
+ 
+               err = platform_device_register(&p->leds_pdev);
+               if (err) {
+                       printk(KERN_ERR "fhc: Could not register LEDS "
+                              "platform device\n");
+                       goto out_unmap_pregs;
+               }
+       }
+       reg = upa_readl(p->pregs + FHC_PREGS_CTRL);
+ 
+       if (!p->central)
+               reg |= FHC_CONTROL_IXIST;
+ 
+       reg &= ~(FHC_CONTROL_AOFF |
+                FHC_CONTROL_BOFF |
+                FHC_CONTROL_SLINE);
+ 
+       upa_writel(reg, p->pregs + FHC_PREGS_CTRL);
+       upa_readl(p->pregs + FHC_PREGS_CTRL);
+ 
+       reg = upa_readl(p->pregs + FHC_PREGS_ID);
+       printk(KERN_INFO "fhc: Board #%d, Version[%x] PartID[%x] Manuf[%x] %s\n",
+              p->board_num,
+              (reg & FHC_ID_VERS) >> 28,
+              (reg & FHC_ID_PARTID) >> 12,
+              (reg & FHC_ID_MANUF) >> 1,
+              (p->jtag_master ?
+               "(JTAG Master)" :
+               (p->central ? "(Central)" : "")));
+ 
+       err = 0;
+ 
+ out:
+       return err;
+ 
+ out_unmap_pregs:
+       of_iounmap(&op->resource[0], p->pregs, resource_size(&op->resource[0]));
+ 
+ out_free:
+       kfree(p);
+       goto out;
+ }
+ 
+ static struct of_device_id __initdata fhc_match[] = {
+       {
+               .name = "fhc",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver fhc_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = fhc_match,
+       .probe          = fhc_probe,
+       .driver         = {
+               .name   = "fhc",
+       },
+ };
+ 
+ static int __init sunfire_init(void)
+ {
+       (void) of_register_driver(&fhc_driver, &of_platform_bus_type);
+       (void) of_register_driver(&clock_board_driver, &of_platform_bus_type);
+       return 0;
+ }
+ 
+ subsys_initcall(sunfire_init);
diff --cc arch/sparc/kernel/chmc.c

index 0000000,3b9f4d6..4feb3d7

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/chmc.c
+++ b/arch/sparc/kernel/chmc.c
@@@ -1,0 -1,863 +1,864 @@@
+ /* chmc.c: Driver for UltraSPARC-III memory controller.
+  *
+  * Copyright (C) 2001, 2007, 2008 David S. Miller (davem@davemloft.net)
+  */
+ 
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/slab.h>
+ #include <linux/list.h>
+ #include <linux/string.h>
+ #include <linux/sched.h>
+ #include <linux/smp.h>
+ #include <linux/errno.h>
+ #include <linux/init.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+ #include <asm/spitfire.h>
+ #include <asm/chmctrl.h>
+ #include <asm/cpudata.h>
+ #include <asm/oplib.h>
+ #include <asm/prom.h>
+ #include <asm/head.h>
+ #include <asm/io.h>
+ #include <asm/memctrl.h>
+ 
+ #define DRV_MODULE_NAME               "chmc"
+ #define PFX DRV_MODULE_NAME   ": "
+ #define DRV_MODULE_VERSION    "0.2"
+ 
+ MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+ MODULE_DESCRIPTION("UltraSPARC-III memory controller driver");
+ MODULE_LICENSE("GPL");
+ MODULE_VERSION(DRV_MODULE_VERSION);
+ 
+ static int mc_type;
+ #define MC_TYPE_SAFARI                1
+ #define MC_TYPE_JBUS          2
+ 
+ static dimm_printer_t us3mc_dimm_printer;
+ 
+ #define CHMCTRL_NDGRPS        2
+ #define CHMCTRL_NDIMMS        4
+ 
+ #define CHMC_DIMMS_PER_MC     (CHMCTRL_NDGRPS * CHMCTRL_NDIMMS)
+ 
+ /* OBP memory-layout property format. */
+ struct chmc_obp_map {
+       unsigned char   dimm_map[144];
+       unsigned char   pin_map[576];
+ };
+ 
+ #define DIMM_LABEL_SZ 8
+ 
+ struct chmc_obp_mem_layout {
+       /* One max 8-byte string label per DIMM.  Usually
+        * this matches the label on the motherboard where
+        * that DIMM resides.
+        */
+       char                    dimm_labels[CHMC_DIMMS_PER_MC][DIMM_LABEL_SZ];
+ 
+       /* If symmetric use map[0], else it is
+        * asymmetric and map[1] should be used.
+        */
+       char                    symmetric;
+ 
+       struct chmc_obp_map     map[2];
+ };
+ 
+ #define CHMCTRL_NBANKS        4
+ 
+ struct chmc_bank_info {
+       struct chmc             *p;
+       int                     bank_id;
+ 
+       u64                     raw_reg;
+       int                     valid;
+       int                     uk;
+       int                     um;
+       int                     lk;
+       int                     lm;
+       int                     interleave;
+       unsigned long           base;
+       unsigned long           size;
+ };
+ 
+ struct chmc {
+       struct list_head                list;
+       int                             portid;
+ 
+       struct chmc_obp_mem_layout      layout_prop;
+       int                             layout_size;
+ 
+       void __iomem                    *regs;
+ 
+       u64                             timing_control1;
+       u64                             timing_control2;
+       u64                             timing_control3;
+       u64                             timing_control4;
+       u64                             memaddr_control;
+ 
+       struct chmc_bank_info           logical_banks[CHMCTRL_NBANKS];
+ };
+ 
+ #define JBUSMC_REGS_SIZE              8
+ 
+ #define JB_MC_REG1_DIMM2_BANK3                0x8000000000000000UL
+ #define JB_MC_REG1_DIMM1_BANK1                0x4000000000000000UL
+ #define JB_MC_REG1_DIMM2_BANK2                0x2000000000000000UL
+ #define JB_MC_REG1_DIMM1_BANK0                0x1000000000000000UL
+ #define JB_MC_REG1_XOR                        0x0000010000000000UL
+ #define JB_MC_REG1_ADDR_GEN_2         0x000000e000000000UL
+ #define JB_MC_REG1_ADDR_GEN_2_SHIFT   37
+ #define JB_MC_REG1_ADDR_GEN_1         0x0000001c00000000UL
+ #define JB_MC_REG1_ADDR_GEN_1_SHIFT   34
+ #define JB_MC_REG1_INTERLEAVE         0x0000000001800000UL
+ #define JB_MC_REG1_INTERLEAVE_SHIFT   23
+ #define JB_MC_REG1_DIMM2_PTYPE                0x0000000000200000UL
+ #define JB_MC_REG1_DIMM2_PTYPE_SHIFT  21
+ #define JB_MC_REG1_DIMM1_PTYPE                0x0000000000100000UL
+ #define JB_MC_REG1_DIMM1_PTYPE_SHIFT  20
+ 
+ #define PART_TYPE_X8          0
+ #define PART_TYPE_X4          1
+ 
+ #define INTERLEAVE_NONE               0
+ #define INTERLEAVE_SAME               1
+ #define INTERLEAVE_INTERNAL   2
+ #define INTERLEAVE_BOTH               3
+ 
+ #define ADDR_GEN_128MB                0
+ #define ADDR_GEN_256MB                1
+ #define ADDR_GEN_512MB                2
+ #define ADDR_GEN_1GB          3
+ 
+ #define JB_NUM_DIMM_GROUPS    2
+ #define JB_NUM_DIMMS_PER_GROUP        2
+ #define JB_NUM_DIMMS          (JB_NUM_DIMM_GROUPS * JB_NUM_DIMMS_PER_GROUP)
+ 
+ struct jbusmc_obp_map {
+       unsigned char   dimm_map[18];
+       unsigned char   pin_map[144];
+ };
+ 
+ struct jbusmc_obp_mem_layout {
+       /* One max 8-byte string label per DIMM.  Usually
+        * this matches the label on the motherboard where
+        * that DIMM resides.
+        */
+       char            dimm_labels[JB_NUM_DIMMS][DIMM_LABEL_SZ];
+ 
+       /* If symmetric use map[0], else it is
+        * asymmetric and map[1] should be used.
+        */
+       char                    symmetric;
+ 
+       struct jbusmc_obp_map   map;
+ 
+       char                    _pad;
+ };
+ 
+ struct jbusmc_dimm_group {
+       struct jbusmc                   *controller;
+       int                             index;
+       u64                             base_addr;
+       u64                             size;
+ };
+ 
+ struct jbusmc {
+       void __iomem                    *regs;
+       u64                             mc_reg_1;
+       u32                             portid;
+       struct jbusmc_obp_mem_layout    layout;
+       int                             layout_len;
+       int                             num_dimm_groups;
+       struct jbusmc_dimm_group        dimm_groups[JB_NUM_DIMM_GROUPS];
+       struct list_head                list;
+ };
+ 
+ static DEFINE_SPINLOCK(mctrl_list_lock);
+ static LIST_HEAD(mctrl_list);
+ 
+ static void mc_list_add(struct list_head *list)
+ {
+       spin_lock(&mctrl_list_lock);
+       list_add(list, &mctrl_list);
+       spin_unlock(&mctrl_list_lock);
+ }
+ 
+ static void mc_list_del(struct list_head *list)
+ {
+       spin_lock(&mctrl_list_lock);
+       list_del_init(list);
+       spin_unlock(&mctrl_list_lock);
+ }
+ 
+ #define SYNDROME_MIN  -1
+ #define SYNDROME_MAX  144
+ 
+ /* Covert syndrome code into the way the bits are positioned
+  * on the bus.
+  */
+ static int syndrome_to_qword_code(int syndrome_code)
+ {
+       if (syndrome_code < 128)
+               syndrome_code += 16;
+       else if (syndrome_code < 128 + 9)
+               syndrome_code -= (128 - 7);
+       else if (syndrome_code < (128 + 9 + 3))
+               syndrome_code -= (128 + 9 - 4);
+       else
+               syndrome_code -= (128 + 9 + 3);
+       return syndrome_code;
+ }
+ 
+ /* All this magic has to do with how a cache line comes over the wire
+  * on Safari and JBUS.  A 64-bit line comes over in 1 or more quadword
+  * cycles, each of which transmit ECC/MTAG info as well as the actual
+  * data.
+  */
+ #define L2_LINE_SIZE          64
+ #define L2_LINE_ADDR_MSK      (L2_LINE_SIZE - 1)
+ #define QW_PER_LINE           4
+ #define QW_BYTES              (L2_LINE_SIZE / QW_PER_LINE)
+ #define QW_BITS                       144
+ #define SAFARI_LAST_BIT               (576 - 1)
+ #define JBUS_LAST_BIT         (144 - 1)
+ 
+ static void get_pin_and_dimm_str(int syndrome_code, unsigned long paddr,
+                                int *pin_p, char **dimm_str_p, void *_prop,
+                                int base_dimm_offset)
+ {
+       int qword_code = syndrome_to_qword_code(syndrome_code);
+       int cache_line_offset;
+       int offset_inverse;
+       int dimm_map_index;
+       int map_val;
+ 
+       if (mc_type == MC_TYPE_JBUS) {
+               struct jbusmc_obp_mem_layout *p = _prop;
+ 
+               /* JBUS */
+               cache_line_offset = qword_code;
+               offset_inverse = (JBUS_LAST_BIT - cache_line_offset);
+               dimm_map_index = offset_inverse / 8;
+               map_val = p->map.dimm_map[dimm_map_index];
+               map_val = ((map_val >> ((7 - (offset_inverse & 7)))) & 1);
+               *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+               *pin_p = p->map.pin_map[cache_line_offset];
+       } else {
+               struct chmc_obp_mem_layout *p = _prop;
+               struct chmc_obp_map *mp;
+               int qword;
+ 
+               /* Safari */
+               if (p->symmetric)
+                       mp = &p->map[0];
+               else
+                       mp = &p->map[1];
+ 
+               qword = (paddr & L2_LINE_ADDR_MSK) / QW_BYTES;
+               cache_line_offset = ((3 - qword) * QW_BITS) + qword_code;
+               offset_inverse = (SAFARI_LAST_BIT - cache_line_offset);
+               dimm_map_index = offset_inverse >> 2;
+               map_val = mp->dimm_map[dimm_map_index];
+               map_val = ((map_val >> ((3 - (offset_inverse & 3)) << 1)) & 0x3);
+               *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+               *pin_p = mp->pin_map[cache_line_offset];
+       }
+ }
+ 
+ static struct jbusmc_dimm_group *jbusmc_find_dimm_group(unsigned long phys_addr)
+ {
+       struct jbusmc *p;
+ 
+       list_for_each_entry(p, &mctrl_list, list) {
+               int i;
+ 
+               for (i = 0; i < p->num_dimm_groups; i++) {
+                       struct jbusmc_dimm_group *dp = &p->dimm_groups[i];
+ 
+                       if (phys_addr < dp->base_addr ||
+                           (dp->base_addr + dp->size) <= phys_addr)
+                               continue;
+ 
+                       return dp;
+               }
+       }
+       return NULL;
+ }
+ 
+ static int jbusmc_print_dimm(int syndrome_code,
+                            unsigned long phys_addr,
+                            char *buf, int buflen)
+ {
+       struct jbusmc_obp_mem_layout *prop;
+       struct jbusmc_dimm_group *dp;
+       struct jbusmc *p;
+       int first_dimm;
+ 
+       dp = jbusmc_find_dimm_group(phys_addr);
+       if (dp == NULL ||
+           syndrome_code < SYNDROME_MIN ||
+           syndrome_code > SYNDROME_MAX) {
+               buf[0] = '?';
+               buf[1] = '?';
+               buf[2] = '?';
+               buf[3] = '\0';
+       }
+       p = dp->controller;
+       prop = &p->layout;
+ 
+       first_dimm = dp->index * JB_NUM_DIMMS_PER_GROUP;
+ 
+       if (syndrome_code != SYNDROME_MIN) {
+               char *dimm_str;
+               int pin;
+ 
+               get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+                                    &dimm_str, prop, first_dimm);
+               sprintf(buf, "%s, pin %3d", dimm_str, pin);
+       } else {
+               int dimm;
+ 
+               /* Multi-bit error, we just dump out all the
+                * dimm labels associated with this dimm group.
+                */
+               for (dimm = 0; dimm < JB_NUM_DIMMS_PER_GROUP; dimm++) {
+                       sprintf(buf, "%s ",
+                               prop->dimm_labels[first_dimm + dimm]);
+                       buf += strlen(buf);
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ static u64 __devinit jbusmc_dimm_group_size(u64 base,
+                                           const struct linux_prom64_registers *mem_regs,
+                                           int num_mem_regs)
+ {
+       u64 max = base + (8UL * 1024 * 1024 * 1024);
+       u64 max_seen = base;
+       int i;
+ 
+       for (i = 0; i < num_mem_regs; i++) {
+               const struct linux_prom64_registers *ent;
+               u64 this_base;
+               u64 this_end;
+ 
+               ent = &mem_regs[i];
+               this_base = ent->phys_addr;
+               this_end = this_base + ent->reg_size;
+               if (base < this_base || base >= this_end)
+                       continue;
+               if (this_end > max)
+                       this_end = max;
+               if (this_end > max_seen)
+                       max_seen = this_end;
+       }
+ 
+       return max_seen - base;
+ }
+ 
+ static void __devinit jbusmc_construct_one_dimm_group(struct jbusmc *p,
+                                                     unsigned long index,
+                                                     const struct linux_prom64_registers *mem_regs,
+                                                     int num_mem_regs)
+ {
+       struct jbusmc_dimm_group *dp = &p->dimm_groups[index];
+ 
+       dp->controller = p;
+       dp->index = index;
+ 
+       dp->base_addr  = (p->portid * (64UL * 1024 * 1024 * 1024));
+       dp->base_addr += (index * (8UL * 1024 * 1024 * 1024));
+       dp->size = jbusmc_dimm_group_size(dp->base_addr, mem_regs, num_mem_regs);
+ }
+ 
+ static void __devinit jbusmc_construct_dimm_groups(struct jbusmc *p,
+                                                  const struct linux_prom64_registers *mem_regs,
+                                                  int num_mem_regs)
+ {
+       if (p->mc_reg_1 & JB_MC_REG1_DIMM1_BANK0) {
+               jbusmc_construct_one_dimm_group(p, 0, mem_regs, num_mem_regs);
+               p->num_dimm_groups++;
+       }
+       if (p->mc_reg_1 & JB_MC_REG1_DIMM2_BANK2) {
+               jbusmc_construct_one_dimm_group(p, 1, mem_regs, num_mem_regs);
+               p->num_dimm_groups++;
+       }
+ }
+ 
+ static int __devinit jbusmc_probe(struct of_device *op,
+                                 const struct of_device_id *match)
+ {
+       const struct linux_prom64_registers *mem_regs;
+       struct device_node *mem_node;
+       int err, len, num_mem_regs;
+       struct jbusmc *p;
+       const u32 *prop;
+       const void *ml;
+ 
+       err = -ENODEV;
+       mem_node = of_find_node_by_path("/memory");
+       if (!mem_node) {
+               printk(KERN_ERR PFX "Cannot find /memory node.\n");
+               goto out;
+       }
+       mem_regs = of_get_property(mem_node, "reg", &len);
+       if (!mem_regs) {
+               printk(KERN_ERR PFX "Cannot get reg property of /memory node.\n");
+               goto out;
+       }
+       num_mem_regs = len / sizeof(*mem_regs);
+ 
+       err = -ENOMEM;
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p) {
+               printk(KERN_ERR PFX "Cannot allocate struct jbusmc.\n");
+               goto out;
+       }
+ 
+       INIT_LIST_HEAD(&p->list);
+ 
+       err = -ENODEV;
+       prop = of_get_property(op->node, "portid", &len);
+       if (!prop || len != 4) {
+               printk(KERN_ERR PFX "Cannot find portid.\n");
+               goto out_free;
+       }
+ 
+       p->portid = *prop;
+ 
+       prop = of_get_property(op->node, "memory-control-register-1", &len);
+       if (!prop || len != 8) {
+               printk(KERN_ERR PFX "Cannot get memory control register 1.\n");
+               goto out_free;
+       }
+ 
+       p->mc_reg_1 = ((u64)prop[0] << 32) | (u64) prop[1];
+ 
+       err = -ENOMEM;
+       p->regs = of_ioremap(&op->resource[0], 0, JBUSMC_REGS_SIZE, "jbusmc");
+       if (!p->regs) {
+               printk(KERN_ERR PFX "Cannot map jbusmc regs.\n");
+               goto out_free;
+       }
+ 
+       err = -ENODEV;
+       ml = of_get_property(op->node, "memory-layout", &p->layout_len);
+       if (!ml) {
+               printk(KERN_ERR PFX "Cannot get memory layout property.\n");
+               goto out_iounmap;
+       }
+       if (p->layout_len > sizeof(p->layout)) {
+               printk(KERN_ERR PFX "Unexpected memory-layout size %d\n",
+                      p->layout_len);
+               goto out_iounmap;
+       }
+       memcpy(&p->layout, ml, p->layout_len);
+ 
+       jbusmc_construct_dimm_groups(p, mem_regs, num_mem_regs);
+ 
+       mc_list_add(&p->list);
+ 
+       printk(KERN_INFO PFX "UltraSPARC-IIIi memory controller at %s\n",
+              op->node->full_name);
+ 
+       dev_set_drvdata(&op->dev, p);
+ 
+       err = 0;
+ 
+ out:
+       return err;
+ 
+ out_iounmap:
+       of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+ 
+ out_free:
+       kfree(p);
+       goto out;
+ }
+ 
+ /* Does BANK decode PHYS_ADDR? */
+ static int chmc_bank_match(struct chmc_bank_info *bp, unsigned long phys_addr)
+ {
+       unsigned long upper_bits = (phys_addr & PA_UPPER_BITS) >> PA_UPPER_BITS_SHIFT;
+       unsigned long lower_bits = (phys_addr & PA_LOWER_BITS) >> PA_LOWER_BITS_SHIFT;
+ 
+       /* Bank must be enabled to match. */
+       if (bp->valid == 0)
+               return 0;
+ 
+       /* Would BANK match upper bits? */
+       upper_bits ^= bp->um;           /* What bits are different? */
+       upper_bits  = ~upper_bits;      /* Invert. */
+       upper_bits |= bp->uk;           /* What bits don't matter for matching? */
+       upper_bits  = ~upper_bits;      /* Invert. */
+ 
+       if (upper_bits)
+               return 0;
+ 
+       /* Would BANK match lower bits? */
+       lower_bits ^= bp->lm;           /* What bits are different? */
+       lower_bits  = ~lower_bits;      /* Invert. */
+       lower_bits |= bp->lk;           /* What bits don't matter for matching? */
+       lower_bits  = ~lower_bits;      /* Invert. */
+ 
+       if (lower_bits)
+               return 0;
+ 
+       /* I always knew you'd be the one. */
+       return 1;
+ }
+ 
+ /* Given PHYS_ADDR, search memory controller banks for a match. */
+ static struct chmc_bank_info *chmc_find_bank(unsigned long phys_addr)
+ {
+       struct chmc *p;
+ 
+       list_for_each_entry(p, &mctrl_list, list) {
+               int bank_no;
+ 
+               for (bank_no = 0; bank_no < CHMCTRL_NBANKS; bank_no++) {
+                       struct chmc_bank_info *bp;
+ 
+                       bp = &p->logical_banks[bank_no];
+                       if (chmc_bank_match(bp, phys_addr))
+                               return bp;
+               }
+       }
+ 
+       return NULL;
+ }
+ 
+ /* This is the main purpose of this driver. */
+ static int chmc_print_dimm(int syndrome_code,
+                          unsigned long phys_addr,
+                          char *buf, int buflen)
+ {
+       struct chmc_bank_info *bp;
+       struct chmc_obp_mem_layout *prop;
+       int bank_in_controller, first_dimm;
+ 
+       bp = chmc_find_bank(phys_addr);
+       if (bp == NULL ||
+           syndrome_code < SYNDROME_MIN ||
+           syndrome_code > SYNDROME_MAX) {
+               buf[0] = '?';
+               buf[1] = '?';
+               buf[2] = '?';
+               buf[3] = '\0';
+               return 0;
+       }
+ 
+       prop = &bp->p->layout_prop;
+       bank_in_controller = bp->bank_id & (CHMCTRL_NBANKS - 1);
+       first_dimm  = (bank_in_controller & (CHMCTRL_NDGRPS - 1));
+       first_dimm *= CHMCTRL_NDIMMS;
+ 
+       if (syndrome_code != SYNDROME_MIN) {
+               char *dimm_str;
+               int pin;
+ 
+               get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+                                    &dimm_str, prop, first_dimm);
+               sprintf(buf, "%s, pin %3d", dimm_str, pin);
+       } else {
+               int dimm;
+ 
+               /* Multi-bit error, we just dump out all the
+                * dimm labels associated with this bank.
+                */
+               for (dimm = 0; dimm < CHMCTRL_NDIMMS; dimm++) {
+                       sprintf(buf, "%s ",
+                               prop->dimm_labels[first_dimm + dimm]);
+                       buf += strlen(buf);
+               }
+       }
+       return 0;
+ }
+ 
+ /* Accessing the registers is slightly complicated.  If you want
+  * to get at the memory controller which is on the same processor
+  * the code is executing, you must use special ASI load/store else
+  * you go through the global mapping.
+  */
+ static u64 chmc_read_mcreg(struct chmc *p, unsigned long offset)
+ {
+       unsigned long ret, this_cpu;
+ 
+       preempt_disable();
+ 
+       this_cpu = real_hard_smp_processor_id();
+ 
+       if (p->portid == this_cpu) {
+               __asm__ __volatile__("ldxa      [%1] %2, %0"
+                                    : "=r" (ret)
+                                    : "r" (offset), "i" (ASI_MCU_CTRL_REG));
+       } else {
+               __asm__ __volatile__("ldxa      [%1] %2, %0"
+                                    : "=r" (ret)
+                                    : "r" (p->regs + offset),
+                                      "i" (ASI_PHYS_BYPASS_EC_E));
+       }
+ 
+       preempt_enable();
+ 
+       return ret;
+ }
+ 
+ #if 0 /* currently unused */
+ static void chmc_write_mcreg(struct chmc *p, unsigned long offset, u64 val)
+ {
+       if (p->portid == smp_processor_id()) {
+               __asm__ __volatile__("stxa      %0, [%1] %2"
+                                    : : "r" (val),
+                                        "r" (offset), "i" (ASI_MCU_CTRL_REG));
+       } else {
+               __asm__ __volatile__("ldxa      %0, [%1] %2"
+                                    : : "r" (val),
+                                        "r" (p->regs + offset),
+                                        "i" (ASI_PHYS_BYPASS_EC_E));
+       }
+ }
+ #endif
+ 
+ static void chmc_interpret_one_decode_reg(struct chmc *p, int which_bank, u64 val)
+ {
+       struct chmc_bank_info *bp = &p->logical_banks[which_bank];
+ 
+       bp->p = p;
+       bp->bank_id = (CHMCTRL_NBANKS * p->portid) + which_bank;
+       bp->raw_reg = val;
+       bp->valid = (val & MEM_DECODE_VALID) >> MEM_DECODE_VALID_SHIFT;
+       bp->uk = (val & MEM_DECODE_UK) >> MEM_DECODE_UK_SHIFT;
+       bp->um = (val & MEM_DECODE_UM) >> MEM_DECODE_UM_SHIFT;
+       bp->lk = (val & MEM_DECODE_LK) >> MEM_DECODE_LK_SHIFT;
+       bp->lm = (val & MEM_DECODE_LM) >> MEM_DECODE_LM_SHIFT;
+ 
+       bp->base  =  (bp->um);
+       bp->base &= ~(bp->uk);
+       bp->base <<= PA_UPPER_BITS_SHIFT;
+ 
+       switch(bp->lk) {
+       case 0xf:
+       default:
+               bp->interleave = 1;
+               break;
+ 
+       case 0xe:
+               bp->interleave = 2;
+               break;
+ 
+       case 0xc:
+               bp->interleave = 4;
+               break;
+ 
+       case 0x8:
+               bp->interleave = 8;
+               break;
+ 
+       case 0x0:
+               bp->interleave = 16;
+               break;
+       };
+ 
+       /* UK[10] is reserved, and UK[11] is not set for the SDRAM
+        * bank size definition.
+        */
+       bp->size = (((unsigned long)bp->uk &
+                    ((1UL << 10UL) - 1UL)) + 1UL) << PA_UPPER_BITS_SHIFT;
+       bp->size /= bp->interleave;
+ }
+ 
+ static void chmc_fetch_decode_regs(struct chmc *p)
+ {
+       if (p->layout_size == 0)
+               return;
+ 
+       chmc_interpret_one_decode_reg(p, 0,
+                                     chmc_read_mcreg(p, CHMCTRL_DECODE1));
+       chmc_interpret_one_decode_reg(p, 1,
+                                     chmc_read_mcreg(p, CHMCTRL_DECODE2));
+       chmc_interpret_one_decode_reg(p, 2,
+                                     chmc_read_mcreg(p, CHMCTRL_DECODE3));
+       chmc_interpret_one_decode_reg(p, 3,
+                                     chmc_read_mcreg(p, CHMCTRL_DECODE4));
+ }
+ 
+ static int __devinit chmc_probe(struct of_device *op,
+                               const struct of_device_id *match)
+ {
+       struct device_node *dp = op->node;
+       unsigned long ver;
+       const void *pval;
+       int len, portid;
+       struct chmc *p;
+       int err;
+ 
+       err = -ENODEV;
+       __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+       if ((ver >> 32UL) == __JALAPENO_ID ||
+           (ver >> 32UL) == __SERRANO_ID)
+               goto out;
+ 
+       portid = of_getintprop_default(dp, "portid", -1);
+       if (portid == -1)
+               goto out;
+ 
+       pval = of_get_property(dp, "memory-layout", &len);
+       if (pval && len > sizeof(p->layout_prop)) {
+               printk(KERN_ERR PFX "Unexpected memory-layout property "
+                      "size %d.\n", len);
+               goto out;
+       }
+ 
+       err = -ENOMEM;
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p) {
+               printk(KERN_ERR PFX "Could not allocate struct chmc.\n");
+               goto out;
+       }
+ 
+       p->portid = portid;
+       p->layout_size = len;
+       if (!pval)
+               p->layout_size = 0;
+       else
+               memcpy(&p->layout_prop, pval, len);
+ 
+       p->regs = of_ioremap(&op->resource[0], 0, 0x48, "chmc");
+       if (!p->regs) {
+               printk(KERN_ERR PFX "Could not map registers.\n");
+               goto out_free;
+       }
+ 
+       if (p->layout_size != 0UL) {
+               p->timing_control1 = chmc_read_mcreg(p, CHMCTRL_TCTRL1);
+               p->timing_control2 = chmc_read_mcreg(p, CHMCTRL_TCTRL2);
+               p->timing_control3 = chmc_read_mcreg(p, CHMCTRL_TCTRL3);
+               p->timing_control4 = chmc_read_mcreg(p, CHMCTRL_TCTRL4);
+               p->memaddr_control = chmc_read_mcreg(p, CHMCTRL_MACTRL);
+       }
+ 
+       chmc_fetch_decode_regs(p);
+ 
+       mc_list_add(&p->list);
+ 
+       printk(KERN_INFO PFX "UltraSPARC-III memory controller at %s [%s]\n",
+              dp->full_name,
+              (p->layout_size ? "ACTIVE" : "INACTIVE"));
+ 
+       dev_set_drvdata(&op->dev, p);
+ 
+       err = 0;
+ 
+ out:
+       return err;
+ 
+ out_free:
+       kfree(p);
+       goto out;
+ }
+ 
+ static int __devinit us3mc_probe(struct of_device *op,
+                               const struct of_device_id *match)
+ {
+       if (mc_type == MC_TYPE_SAFARI)
+               return chmc_probe(op, match);
+       else if (mc_type == MC_TYPE_JBUS)
+               return jbusmc_probe(op, match);
+       return -ENODEV;
+ }
+ 
+ static void __devexit chmc_destroy(struct of_device *op, struct chmc *p)
+ {
+       list_del(&p->list);
+       of_iounmap(&op->resource[0], p->regs, 0x48);
+       kfree(p);
+ }
+ 
+ static void __devexit jbusmc_destroy(struct of_device *op, struct jbusmc *p)
+ {
+       mc_list_del(&p->list);
+       of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+       kfree(p);
+ }
+ 
+ static int __devexit us3mc_remove(struct of_device *op)
+ {
+       void *p = dev_get_drvdata(&op->dev);
+ 
+       if (p) {
+               if (mc_type == MC_TYPE_SAFARI)
+                       chmc_destroy(op, p);
+               else if (mc_type == MC_TYPE_JBUS)
+                       jbusmc_destroy(op, p);
+       }
+       return 0;
+ }
+ 
+ static const struct of_device_id us3mc_match[] = {
+       {
+               .name = "memory-controller",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, us3mc_match);
+ 
+ static struct of_platform_driver us3mc_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "us3mc",
+       .match_table    = us3mc_match,
+       .probe          = us3mc_probe,
+       .remove         = __devexit_p(us3mc_remove),
+ };
+ 
+ static inline bool us3mc_platform(void)
+ {
+       if (tlb_type == cheetah || tlb_type == cheetah_plus)
+               return true;
+       return false;
+ }
+ 
+ static int __init us3mc_init(void)
+ {
+       unsigned long ver;
+       int ret;
+ 
+       if (!us3mc_platform())
+               return -ENODEV;
+ 
+       __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+       if ((ver >> 32UL) == __JALAPENO_ID ||
+           (ver >> 32UL) == __SERRANO_ID) {
+               mc_type = MC_TYPE_JBUS;
+               us3mc_dimm_printer = jbusmc_print_dimm;
+       } else {
+               mc_type = MC_TYPE_SAFARI;
+               us3mc_dimm_printer = chmc_print_dimm;
+       }
+ 
+       ret = register_dimm_printer(us3mc_dimm_printer);
+ 
+       if (!ret) {
+               ret = of_register_driver(&us3mc_driver, &of_bus_type);
+               if (ret)
+                       unregister_dimm_printer(us3mc_dimm_printer);
+       }
+       return ret;
+ }
+ 
+ static void __exit us3mc_cleanup(void)
+ {
+       if (us3mc_platform()) {
+               unregister_dimm_printer(us3mc_dimm_printer);
+               of_unregister_driver(&us3mc_driver);
+       }
+ }
+ 
+ module_init(us3mc_init);
+ module_exit(us3mc_cleanup);
diff --cc arch/sparc/kernel/cpu.c

index e7a0edf,32d32b4..be1b661
--- 1/arch/sparc/kernel/cpu.c
--- 2/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@@ -15,153 -18,335 +18,350 @@@
   #include <asm/mbus.h>
   #include <asm/cpudata.h>
   
+ #include "kernel.h"
+ 
   DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
+ EXPORT_PER_CPU_SYMBOL(__cpu_data);
   
- struct cpu_iu_info {
-   int psr_impl;
-   int psr_vers;
-   char* cpu_name;   /* should be enough I hope... */
+ struct cpu_info {
+       int psr_vers;
+       const char *name;
++      char *pmu_name;
   };
   
- struct cpu_fp_info {
-   int psr_impl;
-   int fp_vers;
-   char* fp_name;
+ struct fpu_info {
+       int fp_vers;
+       const char *name;
   };
   
+ #define NOCPU 8
+ #define NOFPU 8
+ 
+ struct manufacturer_info {
+       int psr_impl;
+       struct cpu_info cpu_info[NOCPU];
+       struct fpu_info fpu_info[NOFPU];
+ };
+ 
+ #define CPU(ver, _name) \
- -{ .psr_vers = ver, .name = _name }
++{ .psr_vers = ver, .name = _name, .pmu_name = NULL }
++
++#define CPUPMU(ver, _name, _pmu_name) \
++{ .psr_vers = ver, .name = _name, .pmu_name = _pmu_name }
+ 
+ #define FPU(ver, _name) \
+ { .fp_vers = ver, .name = _name }
+ 
+ static const struct manufacturer_info __initconst manufacturer_info[] = {
+ {
+       0,
+       /* Sun4/100, 4/200, SLC */
+       .cpu_info = {
+               CPU(0, "Fujitsu  MB86900/1A or LSI L64831 SparcKIT-40"),
+               /* borned STP1012PGA */
+               CPU(4,  "Fujitsu  MB86904"),
+               CPU(5, "Fujitsu TurboSparc MB86907"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(0, "Fujitsu MB86910 or Weitek WTL1164/5"),
+               FPU(1, "Fujitsu MB86911 or Weitek WTL1164/5 or LSI L64831"),
+               FPU(2, "LSI Logic L64802 or Texas Instruments ACT8847"),
+               /* SparcStation SLC, SparcStation1 */
+               FPU(3, "Weitek WTL3170/2"),
+               /* SPARCstation-5 */
+               FPU(4, "Lsi Logic/Meiko L64804 or compatible"),
+               FPU(-1, NULL)
+       }
+ },{
+       1,
+       .cpu_info = {
+               /* SparcStation2, SparcServer 490 & 690 */
+               CPU(0, "LSI Logic Corporation - L64811"),
+               /* SparcStation2 */
+               CPU(1, "Cypress/ROSS CY7C601"),
+               /* Embedded controller */
+               CPU(3, "Cypress/ROSS CY7C611"),
+               /* Ross Technologies HyperSparc */
+               CPU(0xf, "ROSS HyperSparc RT620"),
+               CPU(0xe, "ROSS HyperSparc RT625 or RT626"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(0, "ROSS HyperSparc combined IU/FPU"),
+               FPU(1, "Lsi Logic L64814"),
+               FPU(2, "Texas Instruments TMS390-C602A"),
+               FPU(3, "Cypress CY7C602 FPU"),
+               FPU(-1, NULL)
+       }
+ },{
+       2,
+       .cpu_info = {
+               /* ECL Implementation, CRAY S-MP Supercomputer... AIEEE! */
+               /* Someone please write the code to support this beast! ;) */
+               CPU(0, "Bipolar Integrated Technology - B5010"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(-1, NULL)
+       }
+ },{
+       3,
+       .cpu_info = {
+               CPU(0, "LSI Logic Corporation - unknown-type"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(-1, NULL)
+       }
+ },{
+       4,
+       .cpu_info = {
+               CPU(0, "Texas Instruments, Inc. - SuperSparc-(II)"),
+               /* SparcClassic  --  borned STP1010TAB-50*/
+               CPU(1, "Texas Instruments, Inc. - MicroSparc"),
+               CPU(2, "Texas Instruments, Inc. - MicroSparc II"),
+               CPU(3, "Texas Instruments, Inc. - SuperSparc 51"),
+               CPU(4, "Texas Instruments, Inc. - SuperSparc 61"),
+               CPU(5, "Texas Instruments, Inc. - unknown"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               /* SuperSparc 50 module */
+               FPU(0, "SuperSparc on-chip FPU"),
+               /* SparcClassic */
+               FPU(4, "TI MicroSparc on chip FPU"),
+               FPU(-1, NULL)
+       }
+ },{
+       5,
+       .cpu_info = {
+               CPU(0, "Matsushita - MN10501"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(0, "Matsushita MN10501"),
+               FPU(-1, NULL)
+       }
+ },{
+       6,
+       .cpu_info = {
+               CPU(0, "Philips Corporation - unknown"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(-1, NULL)
+       }
+ },{
+       7,
+       .cpu_info = {
+               CPU(0, "Harvest VLSI Design Center, Inc. - unknown"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(-1, NULL)
+       }
+ },{
+       8,
+       .cpu_info = {
+               CPU(0, "Systems and Processes Engineering Corporation (SPEC)"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(-1, NULL)
+       }
+ },{
+       9,
+       .cpu_info = {
+               /* Gallium arsenide 200MHz, BOOOOGOOOOMIPS!!! */
+               CPU(0, "Fujitsu or Weitek Power-UP"),
+               CPU(1, "Fujitsu or Weitek Power-UP"),
+               CPU(2, "Fujitsu or Weitek Power-UP"),
+               CPU(3, "Fujitsu or Weitek Power-UP"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(3, "Fujitsu or Weitek on-chip FPU"),
+               FPU(-1, NULL)
+       }
+ },{
+       0x17,
+       .cpu_info = {
- -              CPU(0x10, "TI UltraSparc I   (SpitFire)"),
- -              CPU(0x11, "TI UltraSparc II  (BlackBird)"),
- -              CPU(0x12, "TI UltraSparc IIi (Sabre)"),
- -              CPU(0x13, "TI UltraSparc IIe (Hummingbird)"),
++              CPUPMU(0x10, "TI UltraSparc I   (SpitFire)", "ultra12"),
++              CPUPMU(0x11, "TI UltraSparc II  (BlackBird)", "ultra12"),
++              CPUPMU(0x12, "TI UltraSparc IIi (Sabre)", "ultra12"),
++              CPUPMU(0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(0x10, "UltraSparc I integrated FPU"),
+               FPU(0x11, "UltraSparc II integrated FPU"),
+               FPU(0x12, "UltraSparc IIi integrated FPU"),
+               FPU(0x13, "UltraSparc IIe integrated FPU"),
+               FPU(-1, NULL)
+       }
+ },{
+       0x22,
+       .cpu_info = {
- -              CPU(0x10, "TI UltraSparc I   (SpitFire)"),
++              CPUPMU(0x10, "TI UltraSparc I   (SpitFire)", "ultra12"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(0x10, "UltraSparc I integrated FPU"),
+               FPU(-1, NULL)
+       }
+ },{
+       0x3e,
+       .cpu_info = {
- -              CPU(0x14, "TI UltraSparc III (Cheetah)"),
- -              CPU(0x15, "TI UltraSparc III+ (Cheetah+)"),
- -              CPU(0x16, "TI UltraSparc IIIi (Jalapeno)"),
- -              CPU(0x18, "TI UltraSparc IV (Jaguar)"),
- -              CPU(0x19, "TI UltraSparc IV+ (Panther)"),
- -              CPU(0x22, "TI UltraSparc IIIi+ (Serrano)"),
++              CPUPMU(0x14, "TI UltraSparc III (Cheetah)", "ultra3"),
++              CPUPMU(0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"),
++              CPUPMU(0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"),
++              CPUPMU(0x18, "TI UltraSparc IV (Jaguar)", "ultra4"),
++              CPUPMU(0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"),
++              CPUPMU(0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3+"),
+               CPU(-1, NULL)
+       },
+       .fpu_info = {
+               FPU(0x14, "UltraSparc III integrated FPU"),
+               FPU(0x15, "UltraSparc III+ integrated FPU"),
+               FPU(0x16, "UltraSparc IIIi integrated FPU"),
+               FPU(0x18, "UltraSparc IV integrated FPU"),
+               FPU(0x19, "UltraSparc IV+ integrated FPU"),
+               FPU(0x22, "UltraSparc IIIi+ integrated FPU"),
+               FPU(-1, NULL)
+       }
+ }};
+ 
   /* In order to get the fpu type correct, you need to take the IDPROM's
    * machine type value into consideration too.  I will fix this.
    */
- static struct cpu_fp_info linux_sparc_fpu[] = {
-   { 0, 0, "Fujitsu MB86910 or Weitek WTL1164/5"},
-   { 0, 1, "Fujitsu MB86911 or Weitek WTL1164/5 or LSI L64831"},
-   { 0, 2, "LSI Logic L64802 or Texas Instruments ACT8847"},
-   /* SparcStation SLC, SparcStation1 */
-   { 0, 3, "Weitek WTL3170/2"},
-   /* SPARCstation-5 */
-   { 0, 4, "Lsi Logic/Meiko L64804 or compatible"},
-   { 0, 5, "reserved"},
-   { 0, 6, "reserved"},
-   { 0, 7, "No FPU"},
-   { 1, 0, "ROSS HyperSparc combined IU/FPU"},
-   { 1, 1, "Lsi Logic L64814"},
-   { 1, 2, "Texas Instruments TMS390-C602A"},
-   { 1, 3, "Cypress CY7C602 FPU"},
-   { 1, 4, "reserved"},
-   { 1, 5, "reserved"},
-   { 1, 6, "reserved"},
-   { 1, 7, "No FPU"},
-   { 2, 0, "BIT B5010 or B5110/20 or B5210"},
-   { 2, 1, "reserved"},
-   { 2, 2, "reserved"},
-   { 2, 3, "reserved"},
-   { 2, 4, "reserved"},
-   { 2, 5, "reserved"},
-   { 2, 6, "reserved"},
-   { 2, 7, "No FPU"},
-   /* SuperSparc 50 module */
-   { 4, 0, "SuperSparc on-chip FPU"},
-   /* SparcClassic */
-   { 4, 4, "TI MicroSparc on chip FPU"},
-   { 5, 0, "Matsushita MN10501"},
-   { 5, 1, "reserved"},
-   { 5, 2, "reserved"},
-   { 5, 3, "reserved"},
-   { 5, 4, "reserved"},
-   { 5, 5, "reserved"},
-   { 5, 6, "reserved"},
-   { 5, 7, "No FPU"},
-   { 9, 3, "Fujitsu or Weitek on-chip FPU"},
- };
   
- #define NSPARCFPU  ARRAY_SIZE(linux_sparc_fpu)
- 
- static struct cpu_iu_info linux_sparc_chips[] = {
-   /* Sun4/100, 4/200, SLC */
-   { 0, 0, "Fujitsu  MB86900/1A or LSI L64831 SparcKIT-40"},
-   /* borned STP1012PGA */
-   { 0, 4, "Fujitsu  MB86904"},
-   { 0, 5, "Fujitsu TurboSparc MB86907"},
-   /* SparcStation2, SparcServer 490 & 690 */
-   { 1, 0, "LSI Logic Corporation - L64811"},
-   /* SparcStation2 */
-   { 1, 1, "Cypress/ROSS CY7C601"},
-   /* Embedded controller */
-   { 1, 3, "Cypress/ROSS CY7C611"},
-   /* Ross Technologies HyperSparc */
-   { 1, 0xf, "ROSS HyperSparc RT620"},
-   { 1, 0xe, "ROSS HyperSparc RT625 or RT626"},
-   /* ECL Implementation, CRAY S-MP Supercomputer... AIEEE! */
-   /* Someone please write the code to support this beast! ;) */
-   { 2, 0, "Bipolar Integrated Technology - B5010"},
-   { 3, 0, "LSI Logic Corporation - unknown-type"},
-   { 4, 0, "Texas Instruments, Inc. - SuperSparc-(II)"},
-   /* SparcClassic  --  borned STP1010TAB-50*/
-   { 4, 1, "Texas Instruments, Inc. - MicroSparc"},
-   { 4, 2, "Texas Instruments, Inc. - MicroSparc II"},
-   { 4, 3, "Texas Instruments, Inc. - SuperSparc 51"},
-   { 4, 4, "Texas Instruments, Inc. - SuperSparc 61"},
-   { 4, 5, "Texas Instruments, Inc. - unknown"},
-   { 5, 0, "Matsushita - MN10501"},
-   { 6, 0, "Philips Corporation - unknown"},
-   { 7, 0, "Harvest VLSI Design Center, Inc. - unknown"},
-   /* Gallium arsenide 200MHz, BOOOOGOOOOMIPS!!! */
-   { 8, 0, "Systems and Processes Engineering Corporation (SPEC)"},
-   { 9, 0, "Fujitsu or Weitek Power-UP"},
-   { 9, 1, "Fujitsu or Weitek Power-UP"},
-   { 9, 2, "Fujitsu or Weitek Power-UP"},
-   { 9, 3, "Fujitsu or Weitek Power-UP"},
-   { 0xa, 0, "UNKNOWN CPU-VENDOR/TYPE"},
-   { 0xb, 0, "UNKNOWN CPU-VENDOR/TYPE"},
-   { 0xc, 0, "UNKNOWN CPU-VENDOR/TYPE"},
-   { 0xd, 0, "UNKNOWN CPU-VENDOR/TYPE"},
-   { 0xe, 0, "UNKNOWN CPU-VENDOR/TYPE"},
-   { 0xf, 0, "UNKNOWN CPU-VENDOR/TYPE"},
- };
+ const char *sparc_cpu_type;
+ const char *sparc_fpu_type;
++const char *sparc_pmu_type;
+ 
+ unsigned int fsr_storage;
   
- #define NSPARCCHIPS  ARRAY_SIZE(linux_sparc_chips)
+ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
+ {
+       const struct manufacturer_info *manuf;
+       int i;
   
- char *sparc_cpu_type;
- char *sparc_fpu_type;
+       sparc_cpu_type = NULL;
+       sparc_fpu_type = NULL;
+       manuf = NULL;
   
- unsigned int fsr_storage;
+       for (i = 0; i < ARRAY_SIZE(manufacturer_info); i++)
+       {
+               if (psr_impl == manufacturer_info[i].psr_impl) {
+                       manuf = &manufacturer_info[i];
+                       break;
+               }
+       }
+       if (manuf != NULL)
+       {
+               const struct cpu_info *cpu;
+               const struct fpu_info *fpu;
+ 
+               cpu = &manuf->cpu_info[0];
+               while (cpu->psr_vers != -1)
+               {
+                       if (cpu->psr_vers == psr_vers) {
+                               sparc_cpu_type = cpu->name;
++                              sparc_pmu_type = cpu->pmu_name;
+                               sparc_fpu_type = "No FPU";
+                               break;
+                       }
+                       cpu++;
+               }
+               fpu =  &manuf->fpu_info[0];
+               while (fpu->fp_vers != -1)
+               {
+                       if (fpu->fp_vers == fpu_vers) {
+                               sparc_fpu_type = fpu->name;
+                               break;
+                       }
+                       fpu++;
+               }
+       }
+       if (sparc_cpu_type == NULL)
+       {
+               printk(KERN_ERR "CPU: Unknown chip, impl[0x%x] vers[0x%x]\n",
+                      psr_impl, psr_vers);
+               sparc_cpu_type = "Unknown CPU";
+       }
++      if (sparc_pmu_type == NULL)
++      {
++              printk(KERN_ERR "PMU: Unknown chip, impl[0x%x] vers[0x%x]\n",
++                     psr_impl, psr_vers);
++              sparc_pmu_type = "Unknown PMU";
++      }
+       if (sparc_fpu_type == NULL)
+       {
+               printk(KERN_ERR "FPU: Unknown chip, impl[0x%x] vers[0x%x]\n",
+                      psr_impl, fpu_vers);
+               sparc_fpu_type = "Unknown FPU";
+       }
+ }
   
- void __init cpu_probe(void)
+ #ifdef CONFIG_SPARC32
+ void __cpuinit cpu_probe(void)
   {
         int psr_impl, psr_vers, fpu_vers;
-       int i, psr;
+       int psr;
   
-       psr_impl = ((get_psr()>>28)&0xf);
-       psr_vers = ((get_psr()>>24)&0xf);
+       psr_impl = ((get_psr() >> 28) & 0xf);
+       psr_vers = ((get_psr() >> 24) & 0xf);
   
         psr = get_psr();
         put_psr(psr | PSR_EF);
-       fpu_vers = ((get_fsr()>>17)&0x7);
+       fpu_vers = ((get_fsr() >> 17) & 0x7);
         put_psr(psr);
   
-       for(i = 0; i<NSPARCCHIPS; i++) {
-               if(linux_sparc_chips[i].psr_impl == psr_impl)
-                       if(linux_sparc_chips[i].psr_vers == psr_vers) {
-                               sparc_cpu_type = linux_sparc_chips[i].cpu_name;
-                               break;
-                       }
-       }
+       set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
+ }
+ #else
+ static void __init sun4v_cpu_probe(void)
+ {
+       switch (sun4v_chip_type) {
+       case SUN4V_CHIP_NIAGARA1:
+               sparc_cpu_type = "UltraSparc T1 (Niagara)";
+               sparc_fpu_type = "UltraSparc T1 integrated FPU";
++              sparc_pmu_type = "niagara";
+               break;
   
-       if(i==NSPARCCHIPS)
-               printk("DEBUG: psr.impl = 0x%x   psr.vers = 0x%x\n", psr_impl, 
-                           psr_vers);
+       case SUN4V_CHIP_NIAGARA2:
+               sparc_cpu_type = "UltraSparc T2 (Niagara2)";
+               sparc_fpu_type = "UltraSparc T2 integrated FPU";
++              sparc_pmu_type = "niagara2";
+               break;
   
-       for(i = 0; i<NSPARCFPU; i++) {
-               if(linux_sparc_fpu[i].psr_impl == psr_impl)
-                       if(linux_sparc_fpu[i].fp_vers == fpu_vers) {
-                               sparc_fpu_type = linux_sparc_fpu[i].fp_name;
-                               break;
-                       }
+       default:
+               printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
+                      prom_cpu_compatible);
+               sparc_cpu_type = "Unknown SUN4V CPU";
+               sparc_fpu_type = "Unknown SUN4V FPU";
++              sparc_pmu_type = "Unknown SUN4V PMU";
+               break;
         }
+ }
+ 
+ static int __init cpu_type_probe(void)
+ {
+       if (tlb_type == hypervisor) {
+               sun4v_cpu_probe();
+       } else {
+               unsigned long ver;
+               int manuf, impl;
+ 
+               __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
   
-       if(i == NSPARCFPU) {
-               printk("DEBUG: psr.impl = 0x%x  fsr.vers = 0x%x\n", psr_impl,
-                           fpu_vers);
-               sparc_fpu_type = linux_sparc_fpu[31].fp_name;
+               manuf = ((ver >> 48) & 0xffff);
+               impl = ((ver >> 32) & 0xffff);
+               set_cpu_and_fpu(manuf, impl, impl);
         }
+       return 0;
   }
+ 
+ arch_initcall(cpu_type_probe);
+ #endif
diff --cc arch/sparc/kernel/pci_fire.c

index 0000000,9462b68..1f8adc3

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/pci_fire.c
+++ b/arch/sparc/kernel/pci_fire.c
@@@ -1,0 -1,521 +1,522 @@@
+ /* pci_fire.c: Sun4u platform PCI-E controller support.
+  *
+  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+  */
+ #include <linux/kernel.h>
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+ #include <linux/init.h>
+ #include <linux/msi.h>
+ #include <linux/irq.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/prom.h>
+ #include <asm/irq.h>
+ #include <asm/upa.h>
+ 
+ #include "pci_impl.h"
+ 
+ #define DRIVER_NAME   "fire"
+ #define PFX           DRIVER_NAME ": "
+ 
+ #define FIRE_IOMMU_CONTROL    0x40000UL
+ #define FIRE_IOMMU_TSBBASE    0x40008UL
+ #define FIRE_IOMMU_FLUSH      0x40100UL
+ #define FIRE_IOMMU_FLUSHINV   0x40108UL
+ 
+ static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm)
+ {
+       struct iommu *iommu = pbm->iommu;
+       u32 vdma[2], dma_mask;
+       u64 control;
+       int tsbsize, err;
+ 
+       /* No virtual-dma property on these guys, use largest size.  */
+       vdma[0] = 0xc0000000; /* base */
+       vdma[1] = 0x40000000; /* size */
+       dma_mask = 0xffffffff;
+       tsbsize = 128;
+ 
+       /* Register addresses. */
+       iommu->iommu_control  = pbm->pbm_regs + FIRE_IOMMU_CONTROL;
+       iommu->iommu_tsbbase  = pbm->pbm_regs + FIRE_IOMMU_TSBBASE;
+       iommu->iommu_flush    = pbm->pbm_regs + FIRE_IOMMU_FLUSH;
+       iommu->iommu_flushinv = pbm->pbm_regs + FIRE_IOMMU_FLUSHINV;
+ 
+       /* We use the main control/status register of FIRE as the write
+        * completion register.
+        */
+       iommu->write_complete_reg = pbm->controller_regs + 0x410000UL;
+ 
+       /*
+        * Invalidate TLB Entries.
+        */
+       upa_writeq(~(u64)0, iommu->iommu_flushinv);
+ 
+       err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+                              pbm->numa_node);
+       if (err)
+               return err;
+ 
+       upa_writeq(__pa(iommu->page_table) | 0x7UL, iommu->iommu_tsbbase);
+ 
+       control = upa_readq(iommu->iommu_control);
+       control |= (0x00000400 /* TSB cache snoop enable */     |
+                   0x00000300 /* Cache mode */                 |
+                   0x00000002 /* Bypass enable */              |
+                   0x00000001 /* Translation enable */);
+       upa_writeq(control, iommu->iommu_control);
+ 
+       return 0;
+ }
+ 
+ #ifdef CONFIG_PCI_MSI
+ struct pci_msiq_entry {
+       u64             word0;
+ #define MSIQ_WORD0_RESV                       0x8000000000000000UL
+ #define MSIQ_WORD0_FMT_TYPE           0x7f00000000000000UL
+ #define MSIQ_WORD0_FMT_TYPE_SHIFT     56
+ #define MSIQ_WORD0_LEN                        0x00ffc00000000000UL
+ #define MSIQ_WORD0_LEN_SHIFT          46
+ #define MSIQ_WORD0_ADDR0              0x00003fff00000000UL
+ #define MSIQ_WORD0_ADDR0_SHIFT                32
+ #define MSIQ_WORD0_RID                        0x00000000ffff0000UL
+ #define MSIQ_WORD0_RID_SHIFT          16
+ #define MSIQ_WORD0_DATA0              0x000000000000ffffUL
+ #define MSIQ_WORD0_DATA0_SHIFT                0
+ 
+ #define MSIQ_TYPE_MSG                 0x6
+ #define MSIQ_TYPE_MSI32                       0xb
+ #define MSIQ_TYPE_MSI64                       0xf
+ 
+       u64             word1;
+ #define MSIQ_WORD1_ADDR1              0xffffffffffff0000UL
+ #define MSIQ_WORD1_ADDR1_SHIFT                16
+ #define MSIQ_WORD1_DATA1              0x000000000000ffffUL
+ #define MSIQ_WORD1_DATA1_SHIFT                0
+ 
+       u64             resv[6];
+ };
+ 
+ /* All MSI registers are offset from pbm->pbm_regs */
+ #define EVENT_QUEUE_BASE_ADDR_REG     0x010000UL
+ #define  EVENT_QUEUE_BASE_ADDR_ALL_ONES       0xfffc000000000000UL
+ 
+ #define EVENT_QUEUE_CONTROL_SET(EQ)   (0x011000UL + (EQ) * 0x8UL)
+ #define  EVENT_QUEUE_CONTROL_SET_OFLOW        0x0200000000000000UL
+ #define  EVENT_QUEUE_CONTROL_SET_EN   0x0000100000000000UL
+ 
+ #define EVENT_QUEUE_CONTROL_CLEAR(EQ) (0x011200UL + (EQ) * 0x8UL)
+ #define  EVENT_QUEUE_CONTROL_CLEAR_OF 0x0200000000000000UL
+ #define  EVENT_QUEUE_CONTROL_CLEAR_E2I        0x0000800000000000UL
+ #define  EVENT_QUEUE_CONTROL_CLEAR_DIS        0x0000100000000000UL
+ 
+ #define EVENT_QUEUE_STATE(EQ)         (0x011400UL + (EQ) * 0x8UL)
+ #define  EVENT_QUEUE_STATE_MASK               0x0000000000000007UL
+ #define  EVENT_QUEUE_STATE_IDLE               0x0000000000000001UL
+ #define  EVENT_QUEUE_STATE_ACTIVE     0x0000000000000002UL
+ #define  EVENT_QUEUE_STATE_ERROR      0x0000000000000004UL
+ 
+ #define EVENT_QUEUE_TAIL(EQ)          (0x011600UL + (EQ) * 0x8UL)
+ #define  EVENT_QUEUE_TAIL_OFLOW               0x0200000000000000UL
+ #define  EVENT_QUEUE_TAIL_VAL         0x000000000000007fUL
+ 
+ #define EVENT_QUEUE_HEAD(EQ)          (0x011800UL + (EQ) * 0x8UL)
+ #define  EVENT_QUEUE_HEAD_VAL         0x000000000000007fUL
+ 
+ #define MSI_MAP(MSI)                  (0x020000UL + (MSI) * 0x8UL)
+ #define  MSI_MAP_VALID                        0x8000000000000000UL
+ #define  MSI_MAP_EQWR_N                       0x4000000000000000UL
+ #define  MSI_MAP_EQNUM                        0x000000000000003fUL
+ 
+ #define MSI_CLEAR(MSI)                        (0x028000UL + (MSI) * 0x8UL)
+ #define  MSI_CLEAR_EQWR_N             0x4000000000000000UL
+ 
+ #define IMONDO_DATA0                  0x02C000UL
+ #define  IMONDO_DATA0_DATA            0xffffffffffffffc0UL
+ 
+ #define IMONDO_DATA1                  0x02C008UL
+ #define  IMONDO_DATA1_DATA            0xffffffffffffffffUL
+ 
+ #define MSI_32BIT_ADDR                        0x034000UL
+ #define  MSI_32BIT_ADDR_VAL           0x00000000ffff0000UL
+ 
+ #define MSI_64BIT_ADDR                        0x034008UL
+ #define  MSI_64BIT_ADDR_VAL           0xffffffffffff0000UL
+ 
+ static int pci_fire_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                            unsigned long *head)
+ {
+       *head = upa_readq(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+       return 0;
+ }
+ 
+ static int pci_fire_dequeue_msi(struct pci_pbm_info *pbm, unsigned long msiqid,
+                               unsigned long *head, unsigned long *msi)
+ {
+       unsigned long type_fmt, type, msi_num;
+       struct pci_msiq_entry *base, *ep;
+ 
+       base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192));
+       ep = &base[*head];
+ 
+       if ((ep->word0 & MSIQ_WORD0_FMT_TYPE) == 0)
+               return 0;
+ 
+       type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >>
+                   MSIQ_WORD0_FMT_TYPE_SHIFT);
+       type = (type_fmt >> 3);
+       if (unlikely(type != MSIQ_TYPE_MSI32 &&
+                    type != MSIQ_TYPE_MSI64))
+               return -EINVAL;
+ 
+       *msi = msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >>
+                         MSIQ_WORD0_DATA0_SHIFT);
+ 
+       upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi_num));
+ 
+       /* Clear the entry.  */
+       ep->word0 &= ~MSIQ_WORD0_FMT_TYPE;
+ 
+       /* Go to next entry in ring.  */
+       (*head)++;
+       if (*head >= pbm->msiq_ent_count)
+               *head = 0;
+ 
+       return 1;
+ }
+ 
+ static int pci_fire_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                            unsigned long head)
+ {
+       upa_writeq(head, pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+       return 0;
+ }
+ 
+ static int pci_fire_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+                             unsigned long msi, int is_msi64)
+ {
+       u64 val;
+ 
+       val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+       val &= ~(MSI_MAP_EQNUM);
+       val |= msiqid;
+       upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+ 
+       upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi));
+ 
+       val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+       val |= MSI_MAP_VALID;
+       upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+ 
+       return 0;
+ }
+ 
+ static int pci_fire_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
+ {
+       unsigned long msiqid;
+       u64 val;
+ 
+       val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+       msiqid = (val & MSI_MAP_EQNUM);
+ 
+       val &= ~MSI_MAP_VALID;
+ 
+       upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+ 
+       return 0;
+ }
+ 
+ static int pci_fire_msiq_alloc(struct pci_pbm_info *pbm)
+ {
+       unsigned long pages, order, i;
+ 
+       order = get_order(512 * 1024);
+       pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
+       if (pages == 0UL) {
+               printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
+                      order);
+               return -ENOMEM;
+       }
+       memset((char *)pages, 0, PAGE_SIZE << order);
+       pbm->msi_queues = (void *) pages;
+ 
+       upa_writeq((EVENT_QUEUE_BASE_ADDR_ALL_ONES |
+                   __pa(pbm->msi_queues)),
+                  pbm->pbm_regs + EVENT_QUEUE_BASE_ADDR_REG);
+ 
+       upa_writeq(pbm->portid << 6, pbm->pbm_regs + IMONDO_DATA0);
+       upa_writeq(0, pbm->pbm_regs + IMONDO_DATA1);
+ 
+       upa_writeq(pbm->msi32_start, pbm->pbm_regs + MSI_32BIT_ADDR);
+       upa_writeq(pbm->msi64_start, pbm->pbm_regs + MSI_64BIT_ADDR);
+ 
+       for (i = 0; i < pbm->msiq_num; i++) {
+               upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_HEAD(i));
+               upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_TAIL(i));
+       }
+ 
+       return 0;
+ }
+ 
+ static void pci_fire_msiq_free(struct pci_pbm_info *pbm)
+ {
+       unsigned long pages, order;
+ 
+       order = get_order(512 * 1024);
+       pages = (unsigned long) pbm->msi_queues;
+ 
+       free_pages(pages, order);
+ 
+       pbm->msi_queues = NULL;
+ }
+ 
+ static int pci_fire_msiq_build_irq(struct pci_pbm_info *pbm,
+                                  unsigned long msiqid,
+                                  unsigned long devino)
+ {
+       unsigned long cregs = (unsigned long) pbm->pbm_regs;
+       unsigned long imap_reg, iclr_reg, int_ctrlr;
+       unsigned int virt_irq;
+       int fixup;
+       u64 val;
+ 
+       imap_reg = cregs + (0x001000UL + (devino * 0x08UL));
+       iclr_reg = cregs + (0x001400UL + (devino * 0x08UL));
+ 
+       /* XXX iterate amongst the 4 IRQ controllers XXX */
+       int_ctrlr = (1UL << 6);
+ 
+       val = upa_readq(imap_reg);
+       val |= (1UL << 63) | int_ctrlr;
+       upa_writeq(val, imap_reg);
+ 
+       fixup = ((pbm->portid << 6) | devino) - int_ctrlr;
+ 
+       virt_irq = build_irq(fixup, iclr_reg, imap_reg);
+       if (!virt_irq)
+               return -ENOMEM;
+ 
+       upa_writeq(EVENT_QUEUE_CONTROL_SET_EN,
+                  pbm->pbm_regs + EVENT_QUEUE_CONTROL_SET(msiqid));
+ 
+       return virt_irq;
+ }
+ 
+ static const struct sparc64_msiq_ops pci_fire_msiq_ops = {
+       .get_head       =       pci_fire_get_head,
+       .dequeue_msi    =       pci_fire_dequeue_msi,
+       .set_head       =       pci_fire_set_head,
+       .msi_setup      =       pci_fire_msi_setup,
+       .msi_teardown   =       pci_fire_msi_teardown,
+       .msiq_alloc     =       pci_fire_msiq_alloc,
+       .msiq_free      =       pci_fire_msiq_free,
+       .msiq_build_irq =       pci_fire_msiq_build_irq,
+ };
+ 
+ static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+ {
+       sparc64_pbm_msi_init(pbm, &pci_fire_msiq_ops);
+ }
+ #else /* CONFIG_PCI_MSI */
+ static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+ {
+ }
+ #endif /* !(CONFIG_PCI_MSI) */
+ 
+ /* Based at pbm->controller_regs */
+ #define FIRE_PARITY_CONTROL   0x470010UL
+ #define  FIRE_PARITY_ENAB     0x8000000000000000UL
+ #define FIRE_FATAL_RESET_CTL  0x471028UL
+ #define  FIRE_FATAL_RESET_SPARE       0x0000000004000000UL
+ #define  FIRE_FATAL_RESET_MB  0x0000000002000000UL
+ #define  FIRE_FATAL_RESET_CPE 0x0000000000008000UL
+ #define  FIRE_FATAL_RESET_APE 0x0000000000004000UL
+ #define  FIRE_FATAL_RESET_PIO 0x0000000000000040UL
+ #define  FIRE_FATAL_RESET_JW  0x0000000000000004UL
+ #define  FIRE_FATAL_RESET_JI  0x0000000000000002UL
+ #define  FIRE_FATAL_RESET_JR  0x0000000000000001UL
+ #define FIRE_CORE_INTR_ENABLE 0x471800UL
+ 
+ /* Based at pbm->pbm_regs */
+ #define FIRE_TLU_CTRL         0x80000UL
+ #define  FIRE_TLU_CTRL_TIM    0x00000000da000000UL
+ #define  FIRE_TLU_CTRL_QDET   0x0000000000000100UL
+ #define  FIRE_TLU_CTRL_CFG    0x0000000000000001UL
+ #define FIRE_TLU_DEV_CTRL     0x90008UL
+ #define FIRE_TLU_LINK_CTRL    0x90020UL
+ #define FIRE_TLU_LINK_CTRL_CLK        0x0000000000000040UL
+ #define FIRE_LPU_RESET                0xe2008UL
+ #define FIRE_LPU_LLCFG                0xe2200UL
+ #define  FIRE_LPU_LLCFG_VC0   0x0000000000000100UL
+ #define FIRE_LPU_FCTRL_UCTRL  0xe2240UL
+ #define  FIRE_LPU_FCTRL_UCTRL_N       0x0000000000000002UL
+ #define  FIRE_LPU_FCTRL_UCTRL_P       0x0000000000000001UL
+ #define FIRE_LPU_TXL_FIFOP    0xe2430UL
+ #define FIRE_LPU_LTSSM_CFG2   0xe2788UL
+ #define FIRE_LPU_LTSSM_CFG3   0xe2790UL
+ #define FIRE_LPU_LTSSM_CFG4   0xe2798UL
+ #define FIRE_LPU_LTSSM_CFG5   0xe27a0UL
+ #define FIRE_DMC_IENAB                0x31800UL
+ #define FIRE_DMC_DBG_SEL_A    0x53000UL
+ #define FIRE_DMC_DBG_SEL_B    0x53008UL
+ #define FIRE_PEC_IENAB                0x51800UL
+ 
+ static void pci_fire_hw_init(struct pci_pbm_info *pbm)
+ {
+       u64 val;
+ 
+       upa_writeq(FIRE_PARITY_ENAB,
+                  pbm->controller_regs + FIRE_PARITY_CONTROL);
+ 
+       upa_writeq((FIRE_FATAL_RESET_SPARE |
+                   FIRE_FATAL_RESET_MB |
+                   FIRE_FATAL_RESET_CPE |
+                   FIRE_FATAL_RESET_APE |
+                   FIRE_FATAL_RESET_PIO |
+                   FIRE_FATAL_RESET_JW |
+                   FIRE_FATAL_RESET_JI |
+                   FIRE_FATAL_RESET_JR),
+                  pbm->controller_regs + FIRE_FATAL_RESET_CTL);
+ 
+       upa_writeq(~(u64)0, pbm->controller_regs + FIRE_CORE_INTR_ENABLE);
+ 
+       val = upa_readq(pbm->pbm_regs + FIRE_TLU_CTRL);
+       val |= (FIRE_TLU_CTRL_TIM |
+               FIRE_TLU_CTRL_QDET |
+               FIRE_TLU_CTRL_CFG);
+       upa_writeq(val, pbm->pbm_regs + FIRE_TLU_CTRL);
+       upa_writeq(0, pbm->pbm_regs + FIRE_TLU_DEV_CTRL);
+       upa_writeq(FIRE_TLU_LINK_CTRL_CLK,
+                  pbm->pbm_regs + FIRE_TLU_LINK_CTRL);
+ 
+       upa_writeq(0, pbm->pbm_regs + FIRE_LPU_RESET);
+       upa_writeq(FIRE_LPU_LLCFG_VC0, pbm->pbm_regs + FIRE_LPU_LLCFG);
+       upa_writeq((FIRE_LPU_FCTRL_UCTRL_N | FIRE_LPU_FCTRL_UCTRL_P),
+                  pbm->pbm_regs + FIRE_LPU_FCTRL_UCTRL);
+       upa_writeq(((0xffff << 16) | (0x0000 << 0)),
+                  pbm->pbm_regs + FIRE_LPU_TXL_FIFOP);
+       upa_writeq(3000000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG2);
+       upa_writeq(500000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG3);
+       upa_writeq((2 << 16) | (140 << 8),
+                  pbm->pbm_regs + FIRE_LPU_LTSSM_CFG4);
+       upa_writeq(0, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG5);
+ 
+       upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_DMC_IENAB);
+       upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_A);
+       upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_B);
+ 
+       upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_PEC_IENAB);
+ }
+ 
+ static int __init pci_fire_pbm_init(struct pci_pbm_info *pbm,
+                                   struct of_device *op, u32 portid)
+ {
+       const struct linux_prom64_registers *regs;
+       struct device_node *dp = op->node;
+       int err;
+ 
+       pbm->numa_node = -1;
+ 
+       pbm->pci_ops = &sun4u_pci_ops;
+       pbm->config_space_reg_bits = 12;
+ 
+       pbm->index = pci_num_pbms++;
+ 
+       pbm->portid = portid;
+       pbm->op = op;
+       pbm->name = dp->full_name;
+ 
+       regs = of_get_property(dp, "reg", NULL);
+       pbm->pbm_regs = regs[0].phys_addr;
+       pbm->controller_regs = regs[1].phys_addr - 0x410000UL;
+ 
+       printk("%s: SUN4U PCIE Bus Module\n", pbm->name);
+ 
+       pci_determine_mem_io_space(pbm);
+ 
+       pci_get_pbm_props(pbm);
+ 
+       pci_fire_hw_init(pbm);
+ 
+       err = pci_fire_pbm_iommu_init(pbm);
+       if (err)
+               return err;
+ 
+       pci_fire_msi_init(pbm);
+ 
+       pbm->pci_bus = pci_scan_one_pbm(pbm, &op->dev);
+ 
+       /* XXX register error interrupt handlers XXX */
+ 
+       pbm->next = pci_pbm_root;
+       pci_pbm_root = pbm;
+ 
+       return 0;
+ }
+ 
+ static int __devinit fire_probe(struct of_device *op,
+                               const struct of_device_id *match)
+ {
+       struct device_node *dp = op->node;
+       struct pci_pbm_info *pbm;
+       struct iommu *iommu;
+       u32 portid;
+       int err;
+ 
+       portid = of_getintprop_default(dp, "portid", 0xff);
+ 
+       err = -ENOMEM;
+       pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+       if (!pbm) {
+               printk(KERN_ERR PFX "Cannot allocate pci_pbminfo.\n");
+               goto out_err;
+       }
+ 
+       iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+       if (!iommu) {
+               printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+               goto out_free_controller;
+       }
+ 
+       pbm->iommu = iommu;
+ 
+       err = pci_fire_pbm_init(pbm, op, portid);
+       if (err)
+               goto out_free_iommu;
+ 
+       dev_set_drvdata(&op->dev, pbm);
+ 
+       return 0;
+ 
+ out_free_iommu:
+       kfree(pbm->iommu);
+                       
+ out_free_controller:
+       kfree(pbm);
+ 
+ out_err:
+       return err;
+ }
+ 
+ static struct of_device_id __initdata fire_match[] = {
+       {
+               .name = "pci",
+               .compatible = "pciex108e,80f0",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver fire_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = fire_match,
+       .probe          = fire_probe,
+ };
+ 
+ static int __init fire_init(void)
+ {
+       return of_register_driver(&fire_driver, &of_bus_type);
+ }
+ 
+ subsys_initcall(fire_init);
diff --cc arch/sparc/kernel/pci_sabre.c

index 0000000,713257b..a2ebfbc

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@@ -1,0 -1,609 +1,610 @@@
+ /* pci_sabre.c: Sabre specific PCI controller support.
+  *
+  * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net)
+  * Copyright (C) 1998, 1999 Eddie C. Dost   (ecd@skynet.be)
+  * Copyright (C) 1999 Jakub Jelinek   (jakub@redhat.com)
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/pci.h>
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/interrupt.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/apb.h>
+ #include <asm/iommu.h>
+ #include <asm/irq.h>
+ #include <asm/prom.h>
+ #include <asm/upa.h>
+ 
+ #include "pci_impl.h"
+ #include "iommu_common.h"
+ #include "psycho_common.h"
+ 
+ #define DRIVER_NAME   "sabre"
+ #define PFX           DRIVER_NAME ": "
+ 
+ /* SABRE PCI controller register offsets and definitions. */
+ #define SABRE_UE_AFSR         0x0030UL
+ #define  SABRE_UEAFSR_PDRD     0x4000000000000000UL   /* Primary PCI DMA Read */
+ #define  SABRE_UEAFSR_PDWR     0x2000000000000000UL   /* Primary PCI DMA Write */
+ #define  SABRE_UEAFSR_SDRD     0x0800000000000000UL   /* Secondary PCI DMA Read */
+ #define  SABRE_UEAFSR_SDWR     0x0400000000000000UL   /* Secondary PCI DMA Write */
+ #define  SABRE_UEAFSR_SDTE     0x0200000000000000UL   /* Secondary DMA Translation Error */
+ #define  SABRE_UEAFSR_PDTE     0x0100000000000000UL   /* Primary DMA Translation Error */
+ #define  SABRE_UEAFSR_BMSK     0x0000ffff00000000UL   /* Bytemask */
+ #define  SABRE_UEAFSR_OFF      0x00000000e0000000UL   /* Offset (AFAR bits [5:3] */
+ #define  SABRE_UEAFSR_BLK      0x0000000000800000UL   /* Was block operation */
+ #define SABRE_UECE_AFAR               0x0038UL
+ #define SABRE_CE_AFSR         0x0040UL
+ #define  SABRE_CEAFSR_PDRD     0x4000000000000000UL   /* Primary PCI DMA Read */
+ #define  SABRE_CEAFSR_PDWR     0x2000000000000000UL   /* Primary PCI DMA Write */
+ #define  SABRE_CEAFSR_SDRD     0x0800000000000000UL   /* Secondary PCI DMA Read */
+ #define  SABRE_CEAFSR_SDWR     0x0400000000000000UL   /* Secondary PCI DMA Write */
+ #define  SABRE_CEAFSR_ESYND    0x00ff000000000000UL   /* ECC Syndrome */
+ #define  SABRE_CEAFSR_BMSK     0x0000ffff00000000UL   /* Bytemask */
+ #define  SABRE_CEAFSR_OFF      0x00000000e0000000UL   /* Offset */
+ #define  SABRE_CEAFSR_BLK      0x0000000000800000UL   /* Was block operation */
+ #define SABRE_UECE_AFAR_ALIAS 0x0048UL        /* Aliases to 0x0038 */
+ #define SABRE_IOMMU_CONTROL   0x0200UL
+ #define  SABRE_IOMMUCTRL_ERRSTS        0x0000000006000000UL   /* Error status bits */
+ #define  SABRE_IOMMUCTRL_ERR   0x0000000001000000UL   /* Error present in IOTLB */
+ #define  SABRE_IOMMUCTRL_LCKEN         0x0000000000800000UL   /* IOTLB lock enable */
+ #define  SABRE_IOMMUCTRL_LCKPTR        0x0000000000780000UL   /* IOTLB lock pointer */
+ #define  SABRE_IOMMUCTRL_TSBSZ         0x0000000000070000UL   /* TSB Size */
+ #define  SABRE_IOMMU_TSBSZ_1K   0x0000000000000000
+ #define  SABRE_IOMMU_TSBSZ_2K   0x0000000000010000
+ #define  SABRE_IOMMU_TSBSZ_4K   0x0000000000020000
+ #define  SABRE_IOMMU_TSBSZ_8K   0x0000000000030000
+ #define  SABRE_IOMMU_TSBSZ_16K  0x0000000000040000
+ #define  SABRE_IOMMU_TSBSZ_32K  0x0000000000050000
+ #define  SABRE_IOMMU_TSBSZ_64K  0x0000000000060000
+ #define  SABRE_IOMMU_TSBSZ_128K 0x0000000000070000
+ #define  SABRE_IOMMUCTRL_TBWSZ         0x0000000000000004UL   /* TSB assumed page size */
+ #define  SABRE_IOMMUCTRL_DENAB         0x0000000000000002UL   /* Diagnostic Mode Enable */
+ #define  SABRE_IOMMUCTRL_ENAB  0x0000000000000001UL   /* IOMMU Enable */
+ #define SABRE_IOMMU_TSBBASE   0x0208UL
+ #define SABRE_IOMMU_FLUSH     0x0210UL
+ #define SABRE_IMAP_A_SLOT0    0x0c00UL
+ #define SABRE_IMAP_B_SLOT0    0x0c20UL
+ #define SABRE_IMAP_SCSI               0x1000UL
+ #define SABRE_IMAP_ETH                0x1008UL
+ #define SABRE_IMAP_BPP                0x1010UL
+ #define SABRE_IMAP_AU_REC     0x1018UL
+ #define SABRE_IMAP_AU_PLAY    0x1020UL
+ #define SABRE_IMAP_PFAIL      0x1028UL
+ #define SABRE_IMAP_KMS                0x1030UL
+ #define SABRE_IMAP_FLPY               0x1038UL
+ #define SABRE_IMAP_SHW                0x1040UL
+ #define SABRE_IMAP_KBD                0x1048UL
+ #define SABRE_IMAP_MS         0x1050UL
+ #define SABRE_IMAP_SER                0x1058UL
+ #define SABRE_IMAP_UE         0x1070UL
+ #define SABRE_IMAP_CE         0x1078UL
+ #define SABRE_IMAP_PCIERR     0x1080UL
+ #define SABRE_IMAP_GFX                0x1098UL
+ #define SABRE_IMAP_EUPA               0x10a0UL
+ #define SABRE_ICLR_A_SLOT0    0x1400UL
+ #define SABRE_ICLR_B_SLOT0    0x1480UL
+ #define SABRE_ICLR_SCSI               0x1800UL
+ #define SABRE_ICLR_ETH                0x1808UL
+ #define SABRE_ICLR_BPP                0x1810UL
+ #define SABRE_ICLR_AU_REC     0x1818UL
+ #define SABRE_ICLR_AU_PLAY    0x1820UL
+ #define SABRE_ICLR_PFAIL      0x1828UL
+ #define SABRE_ICLR_KMS                0x1830UL
+ #define SABRE_ICLR_FLPY               0x1838UL
+ #define SABRE_ICLR_SHW                0x1840UL
+ #define SABRE_ICLR_KBD                0x1848UL
+ #define SABRE_ICLR_MS         0x1850UL
+ #define SABRE_ICLR_SER                0x1858UL
+ #define SABRE_ICLR_UE         0x1870UL
+ #define SABRE_ICLR_CE         0x1878UL
+ #define SABRE_ICLR_PCIERR     0x1880UL
+ #define SABRE_WRSYNC          0x1c20UL
+ #define SABRE_PCICTRL         0x2000UL
+ #define  SABRE_PCICTRL_MRLEN   0x0000001000000000UL   /* Use MemoryReadLine for block loads/stores */
+ #define  SABRE_PCICTRL_SERR    0x0000000400000000UL   /* Set when SERR asserted on PCI bus */
+ #define  SABRE_PCICTRL_ARBPARK         0x0000000000200000UL   /* Bus Parking 0=Ultra-IIi 1=prev-bus-owner */
+ #define  SABRE_PCICTRL_CPUPRIO         0x0000000000100000UL   /* Ultra-IIi granted every other bus cycle */
+ #define  SABRE_PCICTRL_ARBPRIO         0x00000000000f0000UL   /* Slot which is granted every other bus cycle */
+ #define  SABRE_PCICTRL_ERREN   0x0000000000000100UL   /* PCI Error Interrupt Enable */
+ #define  SABRE_PCICTRL_RTRYWE  0x0000000000000080UL   /* DMA Flow Control 0=wait-if-possible 1=retry */
+ #define  SABRE_PCICTRL_AEN     0x000000000000000fUL   /* Slot PCI arbitration enables */
+ #define SABRE_PIOAFSR         0x2010UL
+ #define  SABRE_PIOAFSR_PMA     0x8000000000000000UL   /* Primary Master Abort */
+ #define  SABRE_PIOAFSR_PTA     0x4000000000000000UL   /* Primary Target Abort */
+ #define  SABRE_PIOAFSR_PRTRY   0x2000000000000000UL   /* Primary Excessive Retries */
+ #define  SABRE_PIOAFSR_PPERR   0x1000000000000000UL   /* Primary Parity Error */
+ #define  SABRE_PIOAFSR_SMA     0x0800000000000000UL   /* Secondary Master Abort */
+ #define  SABRE_PIOAFSR_STA     0x0400000000000000UL   /* Secondary Target Abort */
+ #define  SABRE_PIOAFSR_SRTRY   0x0200000000000000UL   /* Secondary Excessive Retries */
+ #define  SABRE_PIOAFSR_SPERR   0x0100000000000000UL   /* Secondary Parity Error */
+ #define  SABRE_PIOAFSR_BMSK    0x0000ffff00000000UL   /* Byte Mask */
+ #define  SABRE_PIOAFSR_BLK     0x0000000080000000UL   /* Was Block Operation */
+ #define SABRE_PIOAFAR         0x2018UL
+ #define SABRE_PCIDIAG         0x2020UL
+ #define  SABRE_PCIDIAG_DRTRY   0x0000000000000040UL   /* Disable PIO Retry Limit */
+ #define  SABRE_PCIDIAG_IPAPAR  0x0000000000000008UL   /* Invert PIO Address Parity */
+ #define  SABRE_PCIDIAG_IPDPAR  0x0000000000000004UL   /* Invert PIO Data Parity */
+ #define  SABRE_PCIDIAG_IDDPAR  0x0000000000000002UL   /* Invert DMA Data Parity */
+ #define  SABRE_PCIDIAG_ELPBK   0x0000000000000001UL   /* Loopback Enable - not supported */
+ #define SABRE_PCITASR         0x2028UL
+ #define  SABRE_PCITASR_EF      0x0000000000000080UL   /* Respond to 0xe0000000-0xffffffff */
+ #define  SABRE_PCITASR_CD      0x0000000000000040UL   /* Respond to 0xc0000000-0xdfffffff */
+ #define  SABRE_PCITASR_AB      0x0000000000000020UL   /* Respond to 0xa0000000-0xbfffffff */
+ #define  SABRE_PCITASR_89      0x0000000000000010UL   /* Respond to 0x80000000-0x9fffffff */
+ #define  SABRE_PCITASR_67      0x0000000000000008UL   /* Respond to 0x60000000-0x7fffffff */
+ #define  SABRE_PCITASR_45      0x0000000000000004UL   /* Respond to 0x40000000-0x5fffffff */
+ #define  SABRE_PCITASR_23      0x0000000000000002UL   /* Respond to 0x20000000-0x3fffffff */
+ #define  SABRE_PCITASR_01      0x0000000000000001UL   /* Respond to 0x00000000-0x1fffffff */
+ #define SABRE_PIOBUF_DIAG     0x5000UL
+ #define SABRE_DMABUF_DIAGLO   0x5100UL
+ #define SABRE_DMABUF_DIAGHI   0x51c0UL
+ #define SABRE_IMAP_GFX_ALIAS  0x6000UL        /* Aliases to 0x1098 */
+ #define SABRE_IMAP_EUPA_ALIAS 0x8000UL        /* Aliases to 0x10a0 */
+ #define SABRE_IOMMU_VADIAG    0xa400UL
+ #define SABRE_IOMMU_TCDIAG    0xa408UL
+ #define SABRE_IOMMU_TAG               0xa580UL
+ #define  SABRE_IOMMUTAG_ERRSTS         0x0000000001800000UL   /* Error status bits */
+ #define  SABRE_IOMMUTAG_ERR    0x0000000000400000UL   /* Error present */
+ #define  SABRE_IOMMUTAG_WRITE  0x0000000000200000UL   /* Page is writable */
+ #define  SABRE_IOMMUTAG_STREAM         0x0000000000100000UL   /* Streamable bit - unused */
+ #define  SABRE_IOMMUTAG_SIZE   0x0000000000080000UL   /* 0=8k 1=16k */
+ #define  SABRE_IOMMUTAG_VPN    0x000000000007ffffUL   /* Virtual Page Number [31:13] */
+ #define SABRE_IOMMU_DATA      0xa600UL
+ #define SABRE_IOMMUDATA_VALID  0x0000000040000000UL   /* Valid */
+ #define SABRE_IOMMUDATA_USED   0x0000000020000000UL   /* Used (for LRU algorithm) */
+ #define SABRE_IOMMUDATA_CACHE  0x0000000010000000UL   /* Cacheable */
+ #define SABRE_IOMMUDATA_PPN    0x00000000001fffffUL   /* Physical Page Number [33:13] */
+ #define SABRE_PCI_IRQSTATE    0xa800UL
+ #define SABRE_OBIO_IRQSTATE   0xa808UL
+ #define SABRE_FFBCFG          0xf000UL
+ #define  SABRE_FFBCFG_SPRQS    0x000000000f000000     /* Slave P_RQST queue size */
+ #define  SABRE_FFBCFG_ONEREAD  0x0000000000004000     /* Slave supports one outstanding read */
+ #define SABRE_MCCTRL0         0xf010UL
+ #define  SABRE_MCCTRL0_RENAB   0x0000000080000000     /* Refresh Enable */
+ #define  SABRE_MCCTRL0_EENAB   0x0000000010000000     /* Enable all ECC functions */
+ #define  SABRE_MCCTRL0_11BIT   0x0000000000001000     /* Enable 11-bit column addressing */
+ #define  SABRE_MCCTRL0_DPP     0x0000000000000f00     /* DIMM Pair Present Bits */
+ #define  SABRE_MCCTRL0_RINTVL  0x00000000000000ff     /* Refresh Interval */
+ #define SABRE_MCCTRL1         0xf018UL
+ #define  SABRE_MCCTRL1_AMDC    0x0000000038000000     /* Advance Memdata Clock */
+ #define  SABRE_MCCTRL1_ARDC    0x0000000007000000     /* Advance DRAM Read Data Clock */
+ #define  SABRE_MCCTRL1_CSR     0x0000000000e00000     /* CAS to RAS delay for CBR refresh */
+ #define  SABRE_MCCTRL1_CASRW   0x00000000001c0000     /* CAS length for read/write */
+ #define  SABRE_MCCTRL1_RCD     0x0000000000038000     /* RAS to CAS delay */
+ #define  SABRE_MCCTRL1_CP      0x0000000000007000     /* CAS Precharge */
+ #define  SABRE_MCCTRL1_RP      0x0000000000000e00     /* RAS Precharge */
+ #define  SABRE_MCCTRL1_RAS     0x00000000000001c0     /* Length of RAS for refresh */
+ #define  SABRE_MCCTRL1_CASRW2  0x0000000000000038     /* Must be same as CASRW */
+ #define  SABRE_MCCTRL1_RSC     0x0000000000000007     /* RAS after CAS hold time */
+ #define SABRE_RESETCTRL               0xf020UL
+ 
+ #define SABRE_CONFIGSPACE     0x001000000UL
+ #define SABRE_IOSPACE         0x002000000UL
+ #define SABRE_IOSPACE_SIZE    0x000ffffffUL
+ #define SABRE_MEMSPACE                0x100000000UL
+ #define SABRE_MEMSPACE_SIZE   0x07fffffffUL
+ 
+ static int hummingbird_p;
+ static struct pci_bus *sabre_root_bus;
+ 
+ static irqreturn_t sabre_ue_intr(int irq, void *dev_id)
+ {
+       struct pci_pbm_info *pbm = dev_id;
+       unsigned long afsr_reg = pbm->controller_regs + SABRE_UE_AFSR;
+       unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+       unsigned long afsr, afar, error_bits;
+       int reported;
+ 
+       /* Latch uncorrectable error status. */
+       afar = upa_readq(afar_reg);
+       afsr = upa_readq(afsr_reg);
+ 
+       /* Clear the primary/secondary error status bits. */
+       error_bits = afsr &
+               (SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+                SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+                SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE);
+       if (!error_bits)
+               return IRQ_NONE;
+       upa_writeq(error_bits, afsr_reg);
+ 
+       /* Log the error. */
+       printk("%s: Uncorrectable Error, primary error type[%s%s]\n",
+              pbm->name,
+              ((error_bits & SABRE_UEAFSR_PDRD) ?
+               "DMA Read" :
+               ((error_bits & SABRE_UEAFSR_PDWR) ?
+                "DMA Write" : "???")),
+              ((error_bits & SABRE_UEAFSR_PDTE) ?
+               ":Translation Error" : ""));
+       printk("%s: bytemask[%04lx] dword_offset[%lx] was_block(%d)\n",
+              pbm->name,
+              (afsr & SABRE_UEAFSR_BMSK) >> 32UL,
+              (afsr & SABRE_UEAFSR_OFF) >> 29UL,
+              ((afsr & SABRE_UEAFSR_BLK) ? 1 : 0));
+       printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+       printk("%s: UE Secondary errors [", pbm->name);
+       reported = 0;
+       if (afsr & SABRE_UEAFSR_SDRD) {
+               reported++;
+               printk("(DMA Read)");
+       }
+       if (afsr & SABRE_UEAFSR_SDWR) {
+               reported++;
+               printk("(DMA Write)");
+       }
+       if (afsr & SABRE_UEAFSR_SDTE) {
+               reported++;
+               printk("(Translation Error)");
+       }
+       if (!reported)
+               printk("(none)");
+       printk("]\n");
+ 
+       /* Interrogate IOMMU for error status. */
+       psycho_check_iommu_error(pbm, afsr, afar, UE_ERR);
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ static irqreturn_t sabre_ce_intr(int irq, void *dev_id)
+ {
+       struct pci_pbm_info *pbm = dev_id;
+       unsigned long afsr_reg = pbm->controller_regs + SABRE_CE_AFSR;
+       unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+       unsigned long afsr, afar, error_bits;
+       int reported;
+ 
+       /* Latch error status. */
+       afar = upa_readq(afar_reg);
+       afsr = upa_readq(afsr_reg);
+ 
+       /* Clear primary/secondary error status bits. */
+       error_bits = afsr &
+               (SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+                SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR);
+       if (!error_bits)
+               return IRQ_NONE;
+       upa_writeq(error_bits, afsr_reg);
+ 
+       /* Log the error. */
+       printk("%s: Correctable Error, primary error type[%s]\n",
+              pbm->name,
+              ((error_bits & SABRE_CEAFSR_PDRD) ?
+               "DMA Read" :
+               ((error_bits & SABRE_CEAFSR_PDWR) ?
+                "DMA Write" : "???")));
+ 
+       /* XXX Use syndrome and afar to print out module string just like
+        * XXX UDB CE trap handler does... -DaveM
+        */
+       printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] "
+              "was_block(%d)\n",
+              pbm->name,
+              (afsr & SABRE_CEAFSR_ESYND) >> 48UL,
+              (afsr & SABRE_CEAFSR_BMSK) >> 32UL,
+              (afsr & SABRE_CEAFSR_OFF) >> 29UL,
+              ((afsr & SABRE_CEAFSR_BLK) ? 1 : 0));
+       printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+       printk("%s: CE Secondary errors [", pbm->name);
+       reported = 0;
+       if (afsr & SABRE_CEAFSR_SDRD) {
+               reported++;
+               printk("(DMA Read)");
+       }
+       if (afsr & SABRE_CEAFSR_SDWR) {
+               reported++;
+               printk("(DMA Write)");
+       }
+       if (!reported)
+               printk("(none)");
+       printk("]\n");
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ static void sabre_register_error_handlers(struct pci_pbm_info *pbm)
+ {
+       struct device_node *dp = pbm->op->node;
+       struct of_device *op;
+       unsigned long base = pbm->controller_regs;
+       u64 tmp;
+       int err;
+ 
+       if (pbm->chip_type == PBM_CHIP_TYPE_SABRE)
+               dp = dp->parent;
+ 
+       op = of_find_device_by_node(dp);
+       if (!op)
+               return;
+ 
+       /* Sabre/Hummingbird IRQ property layout is:
+        * 0: PCI ERR
+        * 1: UE ERR
+        * 2: CE ERR
+        * 3: POWER FAIL
+        */
+       if (op->num_irqs < 4)
+               return;
+ 
+       /* We clear the error bits in the appropriate AFSR before
+        * registering the handler so that we don't get spurious
+        * interrupts.
+        */
+       upa_writeq((SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+                   SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+                   SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE),
+                  base + SABRE_UE_AFSR);
+ 
+       err = request_irq(op->irqs[1], sabre_ue_intr, 0, "SABRE_UE", pbm);
+       if (err)
+               printk(KERN_WARNING "%s: Couldn't register UE, err=%d.\n",
+                      pbm->name, err);
+ 
+       upa_writeq((SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+                   SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR),
+                  base + SABRE_CE_AFSR);
+ 
+ 
+       err = request_irq(op->irqs[2], sabre_ce_intr, 0, "SABRE_CE", pbm);
+       if (err)
+               printk(KERN_WARNING "%s: Couldn't register CE, err=%d.\n",
+                      pbm->name, err);
+       err = request_irq(op->irqs[0], psycho_pcierr_intr, 0,
+                         "SABRE_PCIERR", pbm);
+       if (err)
+               printk(KERN_WARNING "%s: Couldn't register PCIERR, err=%d.\n",
+                      pbm->name, err);
+ 
+       tmp = upa_readq(base + SABRE_PCICTRL);
+       tmp |= SABRE_PCICTRL_ERREN;
+       upa_writeq(tmp, base + SABRE_PCICTRL);
+ }
+ 
+ static void apb_init(struct pci_bus *sabre_bus)
+ {
+       struct pci_dev *pdev;
+ 
+       list_for_each_entry(pdev, &sabre_bus->devices, bus_list) {
+               if (pdev->vendor == PCI_VENDOR_ID_SUN &&
+                   pdev->device == PCI_DEVICE_ID_SUN_SIMBA) {
+                       u16 word16;
+ 
+                       pci_read_config_word(pdev, PCI_COMMAND, &word16);
+                       word16 |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY |
+                               PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY |
+                               PCI_COMMAND_IO;
+                       pci_write_config_word(pdev, PCI_COMMAND, word16);
+ 
+                       /* Status register bits are "write 1 to clear". */
+                       pci_write_config_word(pdev, PCI_STATUS, 0xffff);
+                       pci_write_config_word(pdev, PCI_SEC_STATUS, 0xffff);
+ 
+                       /* Use a primary/seconday latency timer value
+                        * of 64.
+                        */
+                       pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64);
+                       pci_write_config_byte(pdev, PCI_SEC_LATENCY_TIMER, 64);
+ 
+                       /* Enable reporting/forwarding of master aborts,
+                        * parity, and SERR.
+                        */
+                       pci_write_config_byte(pdev, PCI_BRIDGE_CONTROL,
+                                             (PCI_BRIDGE_CTL_PARITY |
+                                              PCI_BRIDGE_CTL_SERR |
+                                              PCI_BRIDGE_CTL_MASTER_ABORT));
+               }
+       }
+ }
+ 
+ static void __init sabre_scan_bus(struct pci_pbm_info *pbm,
+                                 struct device *parent)
+ {
+       static int once;
+ 
+       /* The APB bridge speaks to the Sabre host PCI bridge
+        * at 66Mhz, but the front side of APB runs at 33Mhz
+        * for both segments.
+        *
+        * Hummingbird systems do not use APB, so they run
+        * at 66MHZ.
+        */
+       if (hummingbird_p)
+               pbm->is_66mhz_capable = 1;
+       else
+               pbm->is_66mhz_capable = 0;
+ 
+       /* This driver has not been verified to handle
+        * multiple SABREs yet, so trap this.
+        *
+        * Also note that the SABRE host bridge is hardwired
+        * to live at bus 0.
+        */
+       if (once != 0) {
+               printk(KERN_ERR PFX "Multiple controllers unsupported.\n");
+               return;
+       }
+       once++;
+ 
+       pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+       if (!pbm->pci_bus)
+               return;
+ 
+       sabre_root_bus = pbm->pci_bus;
+ 
+       apb_init(pbm->pci_bus);
+ 
+       sabre_register_error_handlers(pbm);
+ }
+ 
+ static void __init sabre_pbm_init(struct pci_pbm_info *pbm,
+                                 struct of_device *op)
+ {
+       psycho_pbm_init_common(pbm, op, "SABRE", PBM_CHIP_TYPE_SABRE);
+       pbm->pci_afsr = pbm->controller_regs + SABRE_PIOAFSR;
+       pbm->pci_afar = pbm->controller_regs + SABRE_PIOAFAR;
+       pbm->pci_csr = pbm->controller_regs + SABRE_PCICTRL;
+       sabre_scan_bus(pbm, &op->dev);
+ }
+ 
+ static int __devinit sabre_probe(struct of_device *op,
+                                const struct of_device_id *match)
+ {
+       const struct linux_prom64_registers *pr_regs;
+       struct device_node *dp = op->node;
+       struct pci_pbm_info *pbm;
+       u32 upa_portid, dma_mask;
+       struct iommu *iommu;
+       int tsbsize, err;
+       const u32 *vdma;
+       u64 clear_irq;
+ 
+       hummingbird_p = (match->data != NULL);
+       if (!hummingbird_p) {
+               struct device_node *cpu_dp;
+ 
+               /* Of course, Sun has to encode things a thousand
+                * different ways, inconsistently.
+                */
+               for_each_node_by_type(cpu_dp, "cpu") {
+                       if (!strcmp(cpu_dp->name, "SUNW,UltraSPARC-IIe"))
+                               hummingbird_p = 1;
+               }
+       }
+ 
+       err = -ENOMEM;
+       pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+       if (!pbm) {
+               printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+               goto out_err;
+       }
+ 
+       iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+       if (!iommu) {
+               printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+               goto out_free_controller;
+       }
+ 
+       pbm->iommu = iommu;
+ 
+       upa_portid = of_getintprop_default(dp, "upa-portid", 0xff);
+ 
+       pbm->portid = upa_portid;
+ 
+       /*
+        * Map in SABRE register set and report the presence of this SABRE.
+        */
+       
+       pr_regs = of_get_property(dp, "reg", NULL);
+       err = -ENODEV;
+       if (!pr_regs) {
+               printk(KERN_ERR PFX "No reg property\n");
+               goto out_free_iommu;
+       }
+ 
+       /*
+        * First REG in property is base of entire SABRE register space.
+        */
+       pbm->controller_regs = pr_regs[0].phys_addr;
+ 
+       /* Clear interrupts */
+ 
+       /* PCI first */
+       for (clear_irq = SABRE_ICLR_A_SLOT0; clear_irq < SABRE_ICLR_B_SLOT0 + 0x80; clear_irq += 8)
+               upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+ 
+       /* Then OBIO */
+       for (clear_irq = SABRE_ICLR_SCSI; clear_irq < SABRE_ICLR_SCSI + 0x80; clear_irq += 8)
+               upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+ 
+       /* Error interrupts are enabled later after the bus scan. */
+       upa_writeq((SABRE_PCICTRL_MRLEN   | SABRE_PCICTRL_SERR |
+                   SABRE_PCICTRL_ARBPARK | SABRE_PCICTRL_AEN),
+                  pbm->controller_regs + SABRE_PCICTRL);
+ 
+       /* Now map in PCI config space for entire SABRE. */
+       pbm->config_space = pbm->controller_regs + SABRE_CONFIGSPACE;
+ 
+       vdma = of_get_property(dp, "virtual-dma", NULL);
+       if (!vdma) {
+               printk(KERN_ERR PFX "No virtual-dma property\n");
+               goto out_free_iommu;
+       }
+ 
+       dma_mask = vdma[0];
+       switch(vdma[1]) {
+               case 0x20000000:
+                       dma_mask |= 0x1fffffff;
+                       tsbsize = 64;
+                       break;
+               case 0x40000000:
+                       dma_mask |= 0x3fffffff;
+                       tsbsize = 128;
+                       break;
+ 
+               case 0x80000000:
+                       dma_mask |= 0x7fffffff;
+                       tsbsize = 128;
+                       break;
+               default:
+                       printk(KERN_ERR PFX "Strange virtual-dma size.\n");
+                       goto out_free_iommu;
+       }
+ 
+       err = psycho_iommu_init(pbm, tsbsize, vdma[0], dma_mask, SABRE_WRSYNC);
+       if (err)
+               goto out_free_iommu;
+ 
+       /*
+        * Look for APB underneath.
+        */
+       sabre_pbm_init(pbm, op);
+ 
+       pbm->next = pci_pbm_root;
+       pci_pbm_root = pbm;
+ 
+       dev_set_drvdata(&op->dev, pbm);
+ 
+       return 0;
+ 
+ out_free_iommu:
+       kfree(pbm->iommu);
+ 
+ out_free_controller:
+       kfree(pbm);
+ 
+ out_err:
+       return err;
+ }
+ 
+ static struct of_device_id __initdata sabre_match[] = {
+       {
+               .name = "pci",
+               .compatible = "pci108e,a001",
+               .data = (void *) 1,
+       },
+       {
+               .name = "pci",
+               .compatible = "pci108e,a000",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver sabre_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = sabre_match,
+       .probe          = sabre_probe,
+ };
+ 
+ static int __init sabre_init(void)
+ {
+       return of_register_driver(&sabre_driver, &of_bus_type);
+ }
+ 
+ subsys_initcall(sabre_init);
diff --cc arch/sparc/kernel/pci_schizo.c

index 0000000,2b5cdde..3a1389d

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@@ -1,0 -1,1504 +1,1505 @@@
+ /* pci_schizo.c: SCHIZO/TOMATILLO specific PCI controller support.
+  *
+  * Copyright (C) 2001, 2002, 2003, 2007, 2008 David S. Miller (davem@davemloft.net)
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/pci.h>
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/interrupt.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/iommu.h>
+ #include <asm/irq.h>
+ #include <asm/pstate.h>
+ #include <asm/prom.h>
+ #include <asm/upa.h>
+ 
+ #include "pci_impl.h"
+ #include "iommu_common.h"
+ 
+ #define DRIVER_NAME   "schizo"
+ #define PFX           DRIVER_NAME ": "
+ 
+ /* This is a convention that at least Excalibur and Merlin
+  * follow.  I suppose the SCHIZO used in Starcat and friends
+  * will do similar.
+  *
+  * The only way I could see this changing is if the newlink
+  * block requires more space in Schizo's address space than
+  * they predicted, thus requiring an address space reorg when
+  * the newer Schizo is taped out.
+  */
+ 
+ /* Streaming buffer control register. */
+ #define SCHIZO_STRBUF_CTRL_LPTR    0x00000000000000f0UL /* LRU Lock Pointer */
+ #define SCHIZO_STRBUF_CTRL_LENAB   0x0000000000000008UL /* LRU Lock Enable */
+ #define SCHIZO_STRBUF_CTRL_RRDIS   0x0000000000000004UL /* Rerun Disable */
+ #define SCHIZO_STRBUF_CTRL_DENAB   0x0000000000000002UL /* Diagnostic Mode Enable */
+ #define SCHIZO_STRBUF_CTRL_ENAB    0x0000000000000001UL /* Streaming Buffer Enable */
+ 
+ /* IOMMU control register. */
+ #define SCHIZO_IOMMU_CTRL_RESV     0xfffffffff9000000UL /* Reserved                      */
+ #define SCHIZO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status      */
+ #define SCHIZO_IOMMU_CTRL_XLTEERR  0x0000000001000000UL /* Translation Error encountered */
+ #define SCHIZO_IOMMU_CTRL_LCKEN    0x0000000000800000UL /* Enable translation locking    */
+ #define SCHIZO_IOMMU_CTRL_LCKPTR   0x0000000000780000UL /* Translation lock pointer      */
+ #define SCHIZO_IOMMU_CTRL_TSBSZ    0x0000000000070000UL /* TSB Size                      */
+ #define SCHIZO_IOMMU_TSBSZ_1K      0x0000000000000000UL /* TSB Table 1024 8-byte entries */
+ #define SCHIZO_IOMMU_TSBSZ_2K      0x0000000000010000UL /* TSB Table 2048 8-byte entries */
+ #define SCHIZO_IOMMU_TSBSZ_4K      0x0000000000020000UL /* TSB Table 4096 8-byte entries */
+ #define SCHIZO_IOMMU_TSBSZ_8K      0x0000000000030000UL /* TSB Table 8192 8-byte entries */
+ #define SCHIZO_IOMMU_TSBSZ_16K     0x0000000000040000UL /* TSB Table 16k 8-byte entries  */
+ #define SCHIZO_IOMMU_TSBSZ_32K     0x0000000000050000UL /* TSB Table 32k 8-byte entries  */
+ #define SCHIZO_IOMMU_TSBSZ_64K     0x0000000000060000UL /* TSB Table 64k 8-byte entries  */
+ #define SCHIZO_IOMMU_TSBSZ_128K    0x0000000000070000UL /* TSB Table 128k 8-byte entries */
+ #define SCHIZO_IOMMU_CTRL_RESV2    0x000000000000fff8UL /* Reserved                      */
+ #define SCHIZO_IOMMU_CTRL_TBWSZ    0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */
+ #define SCHIZO_IOMMU_CTRL_DENAB    0x0000000000000002UL /* Diagnostic mode enable        */
+ #define SCHIZO_IOMMU_CTRL_ENAB     0x0000000000000001UL /* IOMMU Enable                  */
+ 
+ /* Schizo config space address format is nearly identical to
+  * that of PSYCHO:
+  *
+  *  32             24 23 16 15    11 10       8 7   2  1 0
+  * ---------------------------------------------------------
+  * |0 0 0 0 0 0 0 0 0| bus | device | function | reg | 0 0 |
+  * ---------------------------------------------------------
+  */
+ #define SCHIZO_CONFIG_BASE(PBM)       ((PBM)->config_space)
+ #define SCHIZO_CONFIG_ENCODE(BUS, DEVFN, REG) \
+       (((unsigned long)(BUS)   << 16) |       \
+        ((unsigned long)(DEVFN) << 8)  |       \
+        ((unsigned long)(REG)))
+ 
+ static void *schizo_pci_config_mkaddr(struct pci_pbm_info *pbm,
+                                     unsigned char bus,
+                                     unsigned int devfn,
+                                     int where)
+ {
+       if (!pbm)
+               return NULL;
+       bus -= pbm->pci_first_busno;
+       return (void *)
+               (SCHIZO_CONFIG_BASE(pbm) |
+                SCHIZO_CONFIG_ENCODE(bus, devfn, where));
+ }
+ 
+ /* SCHIZO error handling support. */
+ enum schizo_error_type {
+       UE_ERR, CE_ERR, PCI_ERR, SAFARI_ERR
+ };
+ 
+ static DEFINE_SPINLOCK(stc_buf_lock);
+ static unsigned long stc_error_buf[128];
+ static unsigned long stc_tag_buf[16];
+ static unsigned long stc_line_buf[16];
+ 
+ #define SCHIZO_UE_INO         0x30 /* Uncorrectable ECC error */
+ #define SCHIZO_CE_INO         0x31 /* Correctable ECC error */
+ #define SCHIZO_PCIERR_A_INO   0x32 /* PBM A PCI bus error */
+ #define SCHIZO_PCIERR_B_INO   0x33 /* PBM B PCI bus error */
+ #define SCHIZO_SERR_INO               0x34 /* Safari interface error */
+ 
+ #define SCHIZO_STC_ERR        0xb800UL /* --> 0xba00 */
+ #define SCHIZO_STC_TAG        0xba00UL /* --> 0xba80 */
+ #define SCHIZO_STC_LINE       0xbb00UL /* --> 0xbb80 */
+ 
+ #define SCHIZO_STCERR_WRITE   0x2UL
+ #define SCHIZO_STCERR_READ    0x1UL
+ 
+ #define SCHIZO_STCTAG_PPN     0x3fffffff00000000UL
+ #define SCHIZO_STCTAG_VPN     0x00000000ffffe000UL
+ #define SCHIZO_STCTAG_VALID   0x8000000000000000UL
+ #define SCHIZO_STCTAG_READ    0x4000000000000000UL
+ 
+ #define SCHIZO_STCLINE_LINDX  0x0000000007800000UL
+ #define SCHIZO_STCLINE_SPTR   0x000000000007e000UL
+ #define SCHIZO_STCLINE_LADDR  0x0000000000001fc0UL
+ #define SCHIZO_STCLINE_EPTR   0x000000000000003fUL
+ #define SCHIZO_STCLINE_VALID  0x0000000000600000UL
+ #define SCHIZO_STCLINE_FOFN   0x0000000000180000UL
+ 
+ static void __schizo_check_stc_error_pbm(struct pci_pbm_info *pbm,
+                                        enum schizo_error_type type)
+ {
+       struct strbuf *strbuf = &pbm->stc;
+       unsigned long regbase = pbm->pbm_regs;
+       unsigned long err_base, tag_base, line_base;
+       u64 control;
+       int i;
+ 
+       err_base = regbase + SCHIZO_STC_ERR;
+       tag_base = regbase + SCHIZO_STC_TAG;
+       line_base = regbase + SCHIZO_STC_LINE;
+ 
+       spin_lock(&stc_buf_lock);
+ 
+       /* This is __REALLY__ dangerous.  When we put the
+        * streaming buffer into diagnostic mode to probe
+        * it's tags and error status, we _must_ clear all
+        * of the line tag valid bits before re-enabling
+        * the streaming buffer.  If any dirty data lives
+        * in the STC when we do this, we will end up
+        * invalidating it before it has a chance to reach
+        * main memory.
+        */
+       control = upa_readq(strbuf->strbuf_control);
+       upa_writeq((control | SCHIZO_STRBUF_CTRL_DENAB),
+                  strbuf->strbuf_control);
+       for (i = 0; i < 128; i++) {
+               unsigned long val;
+ 
+               val = upa_readq(err_base + (i * 8UL));
+               upa_writeq(0UL, err_base + (i * 8UL));
+               stc_error_buf[i] = val;
+       }
+       for (i = 0; i < 16; i++) {
+               stc_tag_buf[i] = upa_readq(tag_base + (i * 8UL));
+               stc_line_buf[i] = upa_readq(line_base + (i * 8UL));
+               upa_writeq(0UL, tag_base + (i * 8UL));
+               upa_writeq(0UL, line_base + (i * 8UL));
+       }
+ 
+       /* OK, state is logged, exit diagnostic mode. */
+       upa_writeq(control, strbuf->strbuf_control);
+ 
+       for (i = 0; i < 16; i++) {
+               int j, saw_error, first, last;
+ 
+               saw_error = 0;
+               first = i * 8;
+               last = first + 8;
+               for (j = first; j < last; j++) {
+                       unsigned long errval = stc_error_buf[j];
+                       if (errval != 0) {
+                               saw_error++;
+                               printk("%s: STC_ERR(%d)[wr(%d)rd(%d)]\n",
+                                      pbm->name,
+                                      j,
+                                      (errval & SCHIZO_STCERR_WRITE) ? 1 : 0,
+                                      (errval & SCHIZO_STCERR_READ) ? 1 : 0);
+                       }
+               }
+               if (saw_error != 0) {
+                       unsigned long tagval = stc_tag_buf[i];
+                       unsigned long lineval = stc_line_buf[i];
+                       printk("%s: STC_TAG(%d)[PA(%016lx)VA(%08lx)V(%d)R(%d)]\n",
+                              pbm->name,
+                              i,
+                              ((tagval & SCHIZO_STCTAG_PPN) >> 19UL),
+                              (tagval & SCHIZO_STCTAG_VPN),
+                              ((tagval & SCHIZO_STCTAG_VALID) ? 1 : 0),
+                              ((tagval & SCHIZO_STCTAG_READ) ? 1 : 0));
+ 
+                       /* XXX Should spit out per-bank error information... -DaveM */
+                       printk("%s: STC_LINE(%d)[LIDX(%lx)SP(%lx)LADDR(%lx)EP(%lx)"
+                              "V(%d)FOFN(%d)]\n",
+                              pbm->name,
+                              i,
+                              ((lineval & SCHIZO_STCLINE_LINDX) >> 23UL),
+                              ((lineval & SCHIZO_STCLINE_SPTR) >> 13UL),
+                              ((lineval & SCHIZO_STCLINE_LADDR) >> 6UL),
+                              ((lineval & SCHIZO_STCLINE_EPTR) >> 0UL),
+                              ((lineval & SCHIZO_STCLINE_VALID) ? 1 : 0),
+                              ((lineval & SCHIZO_STCLINE_FOFN) ? 1 : 0));
+               }
+       }
+ 
+       spin_unlock(&stc_buf_lock);
+ }
+ 
+ /* IOMMU is per-PBM in Schizo, so interrogate both for anonymous
+  * controller level errors.
+  */
+ 
+ #define SCHIZO_IOMMU_TAG      0xa580UL
+ #define SCHIZO_IOMMU_DATA     0xa600UL
+ 
+ #define SCHIZO_IOMMU_TAG_CTXT 0x0000001ffe000000UL
+ #define SCHIZO_IOMMU_TAG_ERRSTS       0x0000000001800000UL
+ #define SCHIZO_IOMMU_TAG_ERR  0x0000000000400000UL
+ #define SCHIZO_IOMMU_TAG_WRITE        0x0000000000200000UL
+ #define SCHIZO_IOMMU_TAG_STREAM       0x0000000000100000UL
+ #define SCHIZO_IOMMU_TAG_SIZE 0x0000000000080000UL
+ #define SCHIZO_IOMMU_TAG_VPAGE        0x000000000007ffffUL
+ 
+ #define SCHIZO_IOMMU_DATA_VALID       0x0000000100000000UL
+ #define SCHIZO_IOMMU_DATA_CACHE       0x0000000040000000UL
+ #define SCHIZO_IOMMU_DATA_PPAGE       0x000000003fffffffUL
+ 
+ static void schizo_check_iommu_error_pbm(struct pci_pbm_info *pbm,
+                                        enum schizo_error_type type)
+ {
+       struct iommu *iommu = pbm->iommu;
+       unsigned long iommu_tag[16];
+       unsigned long iommu_data[16];
+       unsigned long flags;
+       u64 control;
+       int i;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+       control = upa_readq(iommu->iommu_control);
+       if (control & SCHIZO_IOMMU_CTRL_XLTEERR) {
+               unsigned long base;
+               char *type_string;
+ 
+               /* Clear the error encountered bit. */
+               control &= ~SCHIZO_IOMMU_CTRL_XLTEERR;
+               upa_writeq(control, iommu->iommu_control);
+ 
+               switch((control & SCHIZO_IOMMU_CTRL_XLTESTAT) >> 25UL) {
+               case 0:
+                       type_string = "Protection Error";
+                       break;
+               case 1:
+                       type_string = "Invalid Error";
+                       break;
+               case 2:
+                       type_string = "TimeOut Error";
+                       break;
+               case 3:
+               default:
+                       type_string = "ECC Error";
+                       break;
+               };
+               printk("%s: IOMMU Error, type[%s]\n",
+                      pbm->name, type_string);
+ 
+               /* Put the IOMMU into diagnostic mode and probe
+                * it's TLB for entries with error status.
+                *
+                * It is very possible for another DVMA to occur
+                * while we do this probe, and corrupt the system
+                * further.  But we are so screwed at this point
+                * that we are likely to crash hard anyways, so
+                * get as much diagnostic information to the
+                * console as we can.
+                */
+               upa_writeq(control | SCHIZO_IOMMU_CTRL_DENAB,
+                          iommu->iommu_control);
+ 
+               base = pbm->pbm_regs;
+ 
+               for (i = 0; i < 16; i++) {
+                       iommu_tag[i] =
+                               upa_readq(base + SCHIZO_IOMMU_TAG + (i * 8UL));
+                       iommu_data[i] =
+                               upa_readq(base + SCHIZO_IOMMU_DATA + (i * 8UL));
+ 
+                       /* Now clear out the entry. */
+                       upa_writeq(0, base + SCHIZO_IOMMU_TAG + (i * 8UL));
+                       upa_writeq(0, base + SCHIZO_IOMMU_DATA + (i * 8UL));
+               }
+ 
+               /* Leave diagnostic mode. */
+               upa_writeq(control, iommu->iommu_control);
+ 
+               for (i = 0; i < 16; i++) {
+                       unsigned long tag, data;
+ 
+                       tag = iommu_tag[i];
+                       if (!(tag & SCHIZO_IOMMU_TAG_ERR))
+                               continue;
+ 
+                       data = iommu_data[i];
+                       switch((tag & SCHIZO_IOMMU_TAG_ERRSTS) >> 23UL) {
+                       case 0:
+                               type_string = "Protection Error";
+                               break;
+                       case 1:
+                               type_string = "Invalid Error";
+                               break;
+                       case 2:
+                               type_string = "TimeOut Error";
+                               break;
+                       case 3:
+                       default:
+                               type_string = "ECC Error";
+                               break;
+                       };
+                       printk("%s: IOMMU TAG(%d)[error(%s) ctx(%x) wr(%d) str(%d) "
+                              "sz(%dK) vpg(%08lx)]\n",
+                              pbm->name, i, type_string,
+                              (int)((tag & SCHIZO_IOMMU_TAG_CTXT) >> 25UL),
+                              ((tag & SCHIZO_IOMMU_TAG_WRITE) ? 1 : 0),
+                              ((tag & SCHIZO_IOMMU_TAG_STREAM) ? 1 : 0),
+                              ((tag & SCHIZO_IOMMU_TAG_SIZE) ? 64 : 8),
+                              (tag & SCHIZO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT);
+                       printk("%s: IOMMU DATA(%d)[valid(%d) cache(%d) ppg(%016lx)]\n",
+                              pbm->name, i,
+                              ((data & SCHIZO_IOMMU_DATA_VALID) ? 1 : 0),
+                              ((data & SCHIZO_IOMMU_DATA_CACHE) ? 1 : 0),
+                              (data & SCHIZO_IOMMU_DATA_PPAGE) << IOMMU_PAGE_SHIFT);
+               }
+       }
+       if (pbm->stc.strbuf_enabled)
+               __schizo_check_stc_error_pbm(pbm, type);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+ static void schizo_check_iommu_error(struct pci_pbm_info *pbm,
+                                    enum schizo_error_type type)
+ {
+       schizo_check_iommu_error_pbm(pbm, type);
+       if (pbm->sibling)
+               schizo_check_iommu_error_pbm(pbm->sibling, type);
+ }
+ 
+ /* Uncorrectable ECC error status gathering. */
+ #define SCHIZO_UE_AFSR        0x10030UL
+ #define SCHIZO_UE_AFAR        0x10038UL
+ 
+ #define SCHIZO_UEAFSR_PPIO    0x8000000000000000UL /* Safari */
+ #define SCHIZO_UEAFSR_PDRD    0x4000000000000000UL /* Safari/Tomatillo */
+ #define SCHIZO_UEAFSR_PDWR    0x2000000000000000UL /* Safari */
+ #define SCHIZO_UEAFSR_SPIO    0x1000000000000000UL /* Safari */
+ #define SCHIZO_UEAFSR_SDMA    0x0800000000000000UL /* Safari/Tomatillo */
+ #define SCHIZO_UEAFSR_ERRPNDG 0x0300000000000000UL /* Safari */
+ #define SCHIZO_UEAFSR_BMSK    0x000003ff00000000UL /* Safari */
+ #define SCHIZO_UEAFSR_QOFF    0x00000000c0000000UL /* Safari/Tomatillo */
+ #define SCHIZO_UEAFSR_AID     0x000000001f000000UL /* Safari/Tomatillo */
+ #define SCHIZO_UEAFSR_PARTIAL 0x0000000000800000UL /* Safari */
+ #define SCHIZO_UEAFSR_OWNEDIN 0x0000000000400000UL /* Safari */
+ #define SCHIZO_UEAFSR_MTAGSYND        0x00000000000f0000UL /* Safari */
+ #define SCHIZO_UEAFSR_MTAG    0x000000000000e000UL /* Safari */
+ #define SCHIZO_UEAFSR_ECCSYND 0x00000000000001ffUL /* Safari */
+ 
+ static irqreturn_t schizo_ue_intr(int irq, void *dev_id)
+ {
+       struct pci_pbm_info *pbm = dev_id;
+       unsigned long afsr_reg = pbm->controller_regs + SCHIZO_UE_AFSR;
+       unsigned long afar_reg = pbm->controller_regs + SCHIZO_UE_AFAR;
+       unsigned long afsr, afar, error_bits;
+       int reported, limit;
+ 
+       /* Latch uncorrectable error status. */
+       afar = upa_readq(afar_reg);
+ 
+       /* If either of the error pending bits are set in the
+        * AFSR, the error status is being actively updated by
+        * the hardware and we must re-read to get a clean value.
+        */
+       limit = 1000;
+       do {
+               afsr = upa_readq(afsr_reg);
+       } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+ 
+       /* Clear the primary/secondary error status bits. */
+       error_bits = afsr &
+               (SCHIZO_UEAFSR_PPIO | SCHIZO_UEAFSR_PDRD | SCHIZO_UEAFSR_PDWR |
+                SCHIZO_UEAFSR_SPIO | SCHIZO_UEAFSR_SDMA);
+       if (!error_bits)
+               return IRQ_NONE;
+       upa_writeq(error_bits, afsr_reg);
+ 
+       /* Log the error. */
+       printk("%s: Uncorrectable Error, primary error type[%s]\n",
+              pbm->name,
+              (((error_bits & SCHIZO_UEAFSR_PPIO) ?
+                "PIO" :
+                ((error_bits & SCHIZO_UEAFSR_PDRD) ?
+                 "DMA Read" :
+                 ((error_bits & SCHIZO_UEAFSR_PDWR) ?
+                  "DMA Write" : "???")))));
+       printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+              pbm->name,
+              (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+              (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+              (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+       printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+              pbm->name,
+              (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+              (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+              (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+              (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+              (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+       printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+       printk("%s: UE Secondary errors [", pbm->name);
+       reported = 0;
+       if (afsr & SCHIZO_UEAFSR_SPIO) {
+               reported++;
+               printk("(PIO)");
+       }
+       if (afsr & SCHIZO_UEAFSR_SDMA) {
+               reported++;
+               printk("(DMA)");
+       }
+       if (!reported)
+               printk("(none)");
+       printk("]\n");
+ 
+       /* Interrogate IOMMU for error status. */
+       schizo_check_iommu_error(pbm, UE_ERR);
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ #define SCHIZO_CE_AFSR        0x10040UL
+ #define SCHIZO_CE_AFAR        0x10048UL
+ 
+ #define SCHIZO_CEAFSR_PPIO    0x8000000000000000UL
+ #define SCHIZO_CEAFSR_PDRD    0x4000000000000000UL
+ #define SCHIZO_CEAFSR_PDWR    0x2000000000000000UL
+ #define SCHIZO_CEAFSR_SPIO    0x1000000000000000UL
+ #define SCHIZO_CEAFSR_SDMA    0x0800000000000000UL
+ #define SCHIZO_CEAFSR_ERRPNDG 0x0300000000000000UL
+ #define SCHIZO_CEAFSR_BMSK    0x000003ff00000000UL
+ #define SCHIZO_CEAFSR_QOFF    0x00000000c0000000UL
+ #define SCHIZO_CEAFSR_AID     0x000000001f000000UL
+ #define SCHIZO_CEAFSR_PARTIAL 0x0000000000800000UL
+ #define SCHIZO_CEAFSR_OWNEDIN 0x0000000000400000UL
+ #define SCHIZO_CEAFSR_MTAGSYND        0x00000000000f0000UL
+ #define SCHIZO_CEAFSR_MTAG    0x000000000000e000UL
+ #define SCHIZO_CEAFSR_ECCSYND 0x00000000000001ffUL
+ 
+ static irqreturn_t schizo_ce_intr(int irq, void *dev_id)
+ {
+       struct pci_pbm_info *pbm = dev_id;
+       unsigned long afsr_reg = pbm->controller_regs + SCHIZO_CE_AFSR;
+       unsigned long afar_reg = pbm->controller_regs + SCHIZO_CE_AFAR;
+       unsigned long afsr, afar, error_bits;
+       int reported, limit;
+ 
+       /* Latch error status. */
+       afar = upa_readq(afar_reg);
+ 
+       /* If either of the error pending bits are set in the
+        * AFSR, the error status is being actively updated by
+        * the hardware and we must re-read to get a clean value.
+        */
+       limit = 1000;
+       do {
+               afsr = upa_readq(afsr_reg);
+       } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+ 
+       /* Clear primary/secondary error status bits. */
+       error_bits = afsr &
+               (SCHIZO_CEAFSR_PPIO | SCHIZO_CEAFSR_PDRD | SCHIZO_CEAFSR_PDWR |
+                SCHIZO_CEAFSR_SPIO | SCHIZO_CEAFSR_SDMA);
+       if (!error_bits)
+               return IRQ_NONE;
+       upa_writeq(error_bits, afsr_reg);
+ 
+       /* Log the error. */
+       printk("%s: Correctable Error, primary error type[%s]\n",
+              pbm->name,
+              (((error_bits & SCHIZO_CEAFSR_PPIO) ?
+                "PIO" :
+                ((error_bits & SCHIZO_CEAFSR_PDRD) ?
+                 "DMA Read" :
+                 ((error_bits & SCHIZO_CEAFSR_PDWR) ?
+                  "DMA Write" : "???")))));
+ 
+       /* XXX Use syndrome and afar to print out module string just like
+        * XXX UDB CE trap handler does... -DaveM
+        */
+       printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+              pbm->name,
+              (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+              (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+              (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+       printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+              pbm->name,
+              (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+              (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+              (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+              (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+              (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+       printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+       printk("%s: CE Secondary errors [", pbm->name);
+       reported = 0;
+       if (afsr & SCHIZO_CEAFSR_SPIO) {
+               reported++;
+               printk("(PIO)");
+       }
+       if (afsr & SCHIZO_CEAFSR_SDMA) {
+               reported++;
+               printk("(DMA)");
+       }
+       if (!reported)
+               printk("(none)");
+       printk("]\n");
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ #define SCHIZO_PCI_AFSR       0x2010UL
+ #define SCHIZO_PCI_AFAR       0x2018UL
+ 
+ #define SCHIZO_PCIAFSR_PMA    0x8000000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_PTA    0x4000000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_PRTRY  0x2000000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_PPERR  0x1000000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_PTTO   0x0800000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_PUNUS  0x0400000000000000UL /* Schizo */
+ #define SCHIZO_PCIAFSR_SMA    0x0200000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_STA    0x0100000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_SRTRY  0x0080000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_SPERR  0x0040000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_STTO   0x0020000000000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_SUNUS  0x0010000000000000UL /* Schizo */
+ #define SCHIZO_PCIAFSR_BMSK   0x000003ff00000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_BLK    0x0000000080000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_CFG    0x0000000040000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_MEM    0x0000000020000000UL /* Schizo/Tomatillo */
+ #define SCHIZO_PCIAFSR_IO     0x0000000010000000UL /* Schizo/Tomatillo */
+ 
+ #define SCHIZO_PCI_CTRL               (0x2000UL)
+ #define SCHIZO_PCICTRL_BUS_UNUS       (1UL << 63UL) /* Safari */
+ #define SCHIZO_PCICTRL_DTO_INT        (1UL << 61UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_ARB_PRIO (0x1ff << 52UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_ESLCK  (1UL << 51UL) /* Safari */
+ #define SCHIZO_PCICTRL_ERRSLOT        (7UL << 48UL) /* Safari */
+ #define SCHIZO_PCICTRL_TTO_ERR        (1UL << 38UL) /* Safari/Tomatillo */
+ #define SCHIZO_PCICTRL_RTRY_ERR       (1UL << 37UL) /* Safari/Tomatillo */
+ #define SCHIZO_PCICTRL_DTO_ERR        (1UL << 36UL) /* Safari/Tomatillo */
+ #define SCHIZO_PCICTRL_SBH_ERR        (1UL << 35UL) /* Safari */
+ #define SCHIZO_PCICTRL_SERR   (1UL << 34UL) /* Safari/Tomatillo */
+ #define SCHIZO_PCICTRL_PCISPD (1UL << 33UL) /* Safari */
+ #define SCHIZO_PCICTRL_MRM_PREF       (1UL << 30UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_RDO_PREF       (1UL << 29UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_RDL_PREF       (1UL << 28UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_PTO    (3UL << 24UL) /* Safari/Tomatillo */
+ #define SCHIZO_PCICTRL_PTO_SHIFT 24UL
+ #define SCHIZO_PCICTRL_TRWSW  (7UL << 21UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_F_TGT_A        (1UL << 20UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_S_DTO_INT (1UL << 19UL) /* Safari */
+ #define SCHIZO_PCICTRL_F_TGT_RT       (1UL << 19UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_SBH_INT        (1UL << 18UL) /* Safari */
+ #define SCHIZO_PCICTRL_T_DTO_INT (1UL << 18UL) /* Tomatillo */
+ #define SCHIZO_PCICTRL_EEN    (1UL << 17UL) /* Safari/Tomatillo */
+ #define SCHIZO_PCICTRL_PARK   (1UL << 16UL) /* Safari/Tomatillo */
+ #define SCHIZO_PCICTRL_PCIRST (1UL <<  8UL) /* Safari */
+ #define SCHIZO_PCICTRL_ARB_S  (0x3fUL << 0UL) /* Safari */
+ #define SCHIZO_PCICTRL_ARB_T  (0xffUL << 0UL) /* Tomatillo */
+ 
+ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm)
+ {
+       unsigned long csr_reg, csr, csr_error_bits;
+       irqreturn_t ret = IRQ_NONE;
+       u16 stat;
+ 
+       csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL;
+       csr = upa_readq(csr_reg);
+       csr_error_bits =
+               csr & (SCHIZO_PCICTRL_BUS_UNUS |
+                      SCHIZO_PCICTRL_TTO_ERR |
+                      SCHIZO_PCICTRL_RTRY_ERR |
+                      SCHIZO_PCICTRL_DTO_ERR |
+                      SCHIZO_PCICTRL_SBH_ERR |
+                      SCHIZO_PCICTRL_SERR);
+       if (csr_error_bits) {
+               /* Clear the errors.  */
+               upa_writeq(csr, csr_reg);
+ 
+               /* Log 'em.  */
+               if (csr_error_bits & SCHIZO_PCICTRL_BUS_UNUS)
+                       printk("%s: Bus unusable error asserted.\n",
+                              pbm->name);
+               if (csr_error_bits & SCHIZO_PCICTRL_TTO_ERR)
+                       printk("%s: PCI TRDY# timeout error asserted.\n",
+                              pbm->name);
+               if (csr_error_bits & SCHIZO_PCICTRL_RTRY_ERR)
+                       printk("%s: PCI excessive retry error asserted.\n",
+                              pbm->name);
+               if (csr_error_bits & SCHIZO_PCICTRL_DTO_ERR)
+                       printk("%s: PCI discard timeout error asserted.\n",
+                              pbm->name);
+               if (csr_error_bits & SCHIZO_PCICTRL_SBH_ERR)
+                       printk("%s: PCI streaming byte hole error asserted.\n",
+                              pbm->name);
+               if (csr_error_bits & SCHIZO_PCICTRL_SERR)
+                       printk("%s: PCI SERR signal asserted.\n",
+                              pbm->name);
+               ret = IRQ_HANDLED;
+       }
+       pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat);
+       if (stat & (PCI_STATUS_PARITY |
+                   PCI_STATUS_SIG_TARGET_ABORT |
+                   PCI_STATUS_REC_TARGET_ABORT |
+                   PCI_STATUS_REC_MASTER_ABORT |
+                   PCI_STATUS_SIG_SYSTEM_ERROR)) {
+               printk("%s: PCI bus error, PCI_STATUS[%04x]\n",
+                      pbm->name, stat);
+               pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff);
+               ret = IRQ_HANDLED;
+       }
+       return ret;
+ }
+ 
+ static irqreturn_t schizo_pcierr_intr(int irq, void *dev_id)
+ {
+       struct pci_pbm_info *pbm = dev_id;
+       unsigned long afsr_reg, afar_reg, base;
+       unsigned long afsr, afar, error_bits;
+       int reported;
+ 
+       base = pbm->pbm_regs;
+ 
+       afsr_reg = base + SCHIZO_PCI_AFSR;
+       afar_reg = base + SCHIZO_PCI_AFAR;
+ 
+       /* Latch error status. */
+       afar = upa_readq(afar_reg);
+       afsr = upa_readq(afsr_reg);
+ 
+       /* Clear primary/secondary error status bits. */
+       error_bits = afsr &
+               (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+                SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+                SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS |
+                SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+                SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+                SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS);
+       if (!error_bits)
+               return schizo_pcierr_intr_other(pbm);
+       upa_writeq(error_bits, afsr_reg);
+ 
+       /* Log the error. */
+       printk("%s: PCI Error, primary error type[%s]\n",
+              pbm->name,
+              (((error_bits & SCHIZO_PCIAFSR_PMA) ?
+                "Master Abort" :
+                ((error_bits & SCHIZO_PCIAFSR_PTA) ?
+                 "Target Abort" :
+                 ((error_bits & SCHIZO_PCIAFSR_PRTRY) ?
+                  "Excessive Retries" :
+                  ((error_bits & SCHIZO_PCIAFSR_PPERR) ?
+                   "Parity Error" :
+                   ((error_bits & SCHIZO_PCIAFSR_PTTO) ?
+                    "Timeout" :
+                    ((error_bits & SCHIZO_PCIAFSR_PUNUS) ?
+                     "Bus Unusable" : "???"))))))));
+       printk("%s: bytemask[%04lx] was_block(%d) space(%s)\n",
+              pbm->name,
+              (afsr & SCHIZO_PCIAFSR_BMSK) >> 32UL,
+              (afsr & SCHIZO_PCIAFSR_BLK) ? 1 : 0,
+              ((afsr & SCHIZO_PCIAFSR_CFG) ?
+               "Config" :
+               ((afsr & SCHIZO_PCIAFSR_MEM) ?
+                "Memory" :
+                ((afsr & SCHIZO_PCIAFSR_IO) ?
+                 "I/O" : "???"))));
+       printk("%s: PCI AFAR [%016lx]\n",
+              pbm->name, afar);
+       printk("%s: PCI Secondary errors [",
+              pbm->name);
+       reported = 0;
+       if (afsr & SCHIZO_PCIAFSR_SMA) {
+               reported++;
+               printk("(Master Abort)");
+       }
+       if (afsr & SCHIZO_PCIAFSR_STA) {
+               reported++;
+               printk("(Target Abort)");
+       }
+       if (afsr & SCHIZO_PCIAFSR_SRTRY) {
+               reported++;
+               printk("(Excessive Retries)");
+       }
+       if (afsr & SCHIZO_PCIAFSR_SPERR) {
+               reported++;
+               printk("(Parity Error)");
+       }
+       if (afsr & SCHIZO_PCIAFSR_STTO) {
+               reported++;
+               printk("(Timeout)");
+       }
+       if (afsr & SCHIZO_PCIAFSR_SUNUS) {
+               reported++;
+               printk("(Bus Unusable)");
+       }
+       if (!reported)
+               printk("(none)");
+       printk("]\n");
+ 
+       /* For the error types shown, scan PBM's PCI bus for devices
+        * which have logged that error type.
+        */
+ 
+       /* If we see a Target Abort, this could be the result of an
+        * IOMMU translation error of some sort.  It is extremely
+        * useful to log this information as usually it indicates
+        * a bug in the IOMMU support code or a PCI device driver.
+        */
+       if (error_bits & (SCHIZO_PCIAFSR_PTA | SCHIZO_PCIAFSR_STA)) {
+               schizo_check_iommu_error(pbm, PCI_ERR);
+               pci_scan_for_target_abort(pbm, pbm->pci_bus);
+       }
+       if (error_bits & (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_SMA))
+               pci_scan_for_master_abort(pbm, pbm->pci_bus);
+ 
+       /* For excessive retries, PSYCHO/PBM will abort the device
+        * and there is no way to specifically check for excessive
+        * retries in the config space status registers.  So what
+        * we hope is that we'll catch it via the master/target
+        * abort events.
+        */
+ 
+       if (error_bits & (SCHIZO_PCIAFSR_PPERR | SCHIZO_PCIAFSR_SPERR))
+               pci_scan_for_parity_error(pbm, pbm->pci_bus);
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ #define SCHIZO_SAFARI_ERRLOG  0x10018UL
+ 
+ #define SAFARI_ERRLOG_ERROUT  0x8000000000000000UL
+ 
+ #define BUS_ERROR_BADCMD      0x4000000000000000UL /* Schizo/Tomatillo */
+ #define BUS_ERROR_SSMDIS      0x2000000000000000UL /* Safari */
+ #define BUS_ERROR_BADMA               0x1000000000000000UL /* Safari */
+ #define BUS_ERROR_BADMB               0x0800000000000000UL /* Safari */
+ #define BUS_ERROR_BADMC               0x0400000000000000UL /* Safari */
+ #define BUS_ERROR_SNOOP_GR    0x0000000000200000UL /* Tomatillo */
+ #define BUS_ERROR_SNOOP_PCI   0x0000000000100000UL /* Tomatillo */
+ #define BUS_ERROR_SNOOP_RD    0x0000000000080000UL /* Tomatillo */
+ #define BUS_ERROR_SNOOP_RDS   0x0000000000020000UL /* Tomatillo */
+ #define BUS_ERROR_SNOOP_RDSA  0x0000000000010000UL /* Tomatillo */
+ #define BUS_ERROR_SNOOP_OWN   0x0000000000008000UL /* Tomatillo */
+ #define BUS_ERROR_SNOOP_RDO   0x0000000000004000UL /* Tomatillo */
+ #define BUS_ERROR_CPU1PS      0x0000000000002000UL /* Safari */
+ #define BUS_ERROR_WDATA_PERR  0x0000000000002000UL /* Tomatillo */
+ #define BUS_ERROR_CPU1PB      0x0000000000001000UL /* Safari */
+ #define BUS_ERROR_CTRL_PERR   0x0000000000001000UL /* Tomatillo */
+ #define BUS_ERROR_CPU0PS      0x0000000000000800UL /* Safari */
+ #define BUS_ERROR_SNOOP_ERR   0x0000000000000800UL /* Tomatillo */
+ #define BUS_ERROR_CPU0PB      0x0000000000000400UL /* Safari */
+ #define BUS_ERROR_JBUS_ILL_B  0x0000000000000400UL /* Tomatillo */
+ #define BUS_ERROR_CIQTO               0x0000000000000200UL /* Safari */
+ #define BUS_ERROR_LPQTO               0x0000000000000100UL /* Safari */
+ #define BUS_ERROR_JBUS_ILL_C  0x0000000000000100UL /* Tomatillo */
+ #define BUS_ERROR_SFPQTO      0x0000000000000080UL /* Safari */
+ #define BUS_ERROR_UFPQTO      0x0000000000000040UL /* Safari */
+ #define BUS_ERROR_RD_PERR     0x0000000000000040UL /* Tomatillo */
+ #define BUS_ERROR_APERR               0x0000000000000020UL /* Safari/Tomatillo */
+ #define BUS_ERROR_UNMAP               0x0000000000000010UL /* Safari/Tomatillo */
+ #define BUS_ERROR_BUSERR      0x0000000000000004UL /* Safari/Tomatillo */
+ #define BUS_ERROR_TIMEOUT     0x0000000000000002UL /* Safari/Tomatillo */
+ #define BUS_ERROR_ILL         0x0000000000000001UL /* Safari */
+ 
+ /* We only expect UNMAP errors here.  The rest of the Safari errors
+  * are marked fatal and thus cause a system reset.
+  */
+ static irqreturn_t schizo_safarierr_intr(int irq, void *dev_id)
+ {
+       struct pci_pbm_info *pbm = dev_id;
+       u64 errlog;
+ 
+       errlog = upa_readq(pbm->controller_regs + SCHIZO_SAFARI_ERRLOG);
+       upa_writeq(errlog & ~(SAFARI_ERRLOG_ERROUT),
+                  pbm->controller_regs + SCHIZO_SAFARI_ERRLOG);
+ 
+       if (!(errlog & BUS_ERROR_UNMAP)) {
+               printk("%s: Unexpected Safari/JBUS error interrupt, errlog[%016llx]\n",
+                      pbm->name, errlog);
+ 
+               return IRQ_HANDLED;
+       }
+ 
+       printk("%s: Safari/JBUS interrupt, UNMAPPED error, interrogating IOMMUs.\n",
+              pbm->name);
+       schizo_check_iommu_error(pbm, SAFARI_ERR);
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ /* Nearly identical to PSYCHO equivalents... */
+ #define SCHIZO_ECC_CTRL               0x10020UL
+ #define  SCHIZO_ECCCTRL_EE     0x8000000000000000UL /* Enable ECC Checking */
+ #define  SCHIZO_ECCCTRL_UE     0x4000000000000000UL /* Enable UE Interrupts */
+ #define  SCHIZO_ECCCTRL_CE     0x2000000000000000UL /* Enable CE INterrupts */
+ 
+ #define SCHIZO_SAFARI_ERRCTRL 0x10008UL
+ #define  SCHIZO_SAFERRCTRL_EN  0x8000000000000000UL
+ #define SCHIZO_SAFARI_IRQCTRL 0x10010UL
+ #define  SCHIZO_SAFIRQCTRL_EN  0x8000000000000000UL
+ 
+ static int pbm_routes_this_ino(struct pci_pbm_info *pbm, u32 ino)
+ {
+       ino &= IMAP_INO;
+ 
+       if (pbm->ino_bitmap & (1UL << ino))
+               return 1;
+ 
+       return 0;
+ }
+ 
+ /* How the Tomatillo IRQs are routed around is pure guesswork here.
+  *
+  * All the Tomatillo devices I see in prtconf dumps seem to have only
+  * a single PCI bus unit attached to it.  It would seem they are separate
+  * devices because their PortID (ie. JBUS ID) values are all different
+  * and thus the registers are mapped to totally different locations.
+  *
+  * However, two Tomatillo's look "similar" in that the only difference
+  * in their PortID is the lowest bit.
+  *
+  * So if we were to ignore this lower bit, it certainly looks like two
+  * PCI bus units of the same Tomatillo.  I still have not really
+  * figured this out...
+  */
+ static void tomatillo_register_error_handlers(struct pci_pbm_info *pbm)
+ {
+       struct of_device *op = of_find_device_by_node(pbm->op->node);
+       u64 tmp, err_mask, err_no_mask;
+       int err;
+ 
+       /* Tomatillo IRQ property layout is:
+        * 0: PCIERR
+        * 1: UE ERR
+        * 2: CE ERR
+        * 3: SERR
+        * 4: POWER FAIL?
+        */
+ 
+       if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) {
+               err = request_irq(op->irqs[1], schizo_ue_intr, 0,
+                                 "TOMATILLO_UE", pbm);
+               if (err)
+                       printk(KERN_WARNING "%s: Could not register UE, "
+                              "err=%d\n", pbm->name, err);
+       }
+       if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) {
+               err = request_irq(op->irqs[2], schizo_ce_intr, 0,
+                                 "TOMATILLO_CE", pbm);
+               if (err)
+                       printk(KERN_WARNING "%s: Could not register CE, "
+                              "err=%d\n", pbm->name, err);
+       }
+       err = 0;
+       if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) {
+               err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+                                 "TOMATILLO_PCIERR", pbm);
+       } else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) {
+               err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+                                 "TOMATILLO_PCIERR", pbm);
+       }
+       if (err)
+               printk(KERN_WARNING "%s: Could not register PCIERR, "
+                      "err=%d\n", pbm->name, err);
+ 
+       if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) {
+               err = request_irq(op->irqs[3], schizo_safarierr_intr, 0,
+                                 "TOMATILLO_SERR", pbm);
+               if (err)
+                       printk(KERN_WARNING "%s: Could not register SERR, "
+                              "err=%d\n", pbm->name, err);
+       }
+ 
+       /* Enable UE and CE interrupts for controller. */
+       upa_writeq((SCHIZO_ECCCTRL_EE |
+                   SCHIZO_ECCCTRL_UE |
+                   SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL);
+ 
+       /* Enable PCI Error interrupts and clear error
+        * bits.
+        */
+       err_mask = (SCHIZO_PCICTRL_BUS_UNUS |
+                   SCHIZO_PCICTRL_TTO_ERR |
+                   SCHIZO_PCICTRL_RTRY_ERR |
+                   SCHIZO_PCICTRL_SERR |
+                   SCHIZO_PCICTRL_EEN);
+ 
+       err_no_mask = SCHIZO_PCICTRL_DTO_ERR;
+ 
+       tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+       tmp |= err_mask;
+       tmp &= ~err_no_mask;
+       upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+ 
+       err_mask = (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+                   SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+                   SCHIZO_PCIAFSR_PTTO |
+                   SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+                   SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+                   SCHIZO_PCIAFSR_STTO);
+ 
+       upa_writeq(err_mask, pbm->pbm_regs + SCHIZO_PCI_AFSR);
+ 
+       err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SNOOP_GR |
+                   BUS_ERROR_SNOOP_PCI | BUS_ERROR_SNOOP_RD |
+                   BUS_ERROR_SNOOP_RDS | BUS_ERROR_SNOOP_RDSA |
+                   BUS_ERROR_SNOOP_OWN | BUS_ERROR_SNOOP_RDO |
+                   BUS_ERROR_WDATA_PERR | BUS_ERROR_CTRL_PERR |
+                   BUS_ERROR_SNOOP_ERR | BUS_ERROR_JBUS_ILL_B |
+                   BUS_ERROR_JBUS_ILL_C | BUS_ERROR_RD_PERR |
+                   BUS_ERROR_APERR | BUS_ERROR_UNMAP |
+                   BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT);
+ 
+       upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask),
+                  pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL);
+ 
+       upa_writeq((SCHIZO_SAFIRQCTRL_EN | (BUS_ERROR_UNMAP)),
+                  pbm->controller_regs + SCHIZO_SAFARI_IRQCTRL);
+ }
+ 
+ static void schizo_register_error_handlers(struct pci_pbm_info *pbm)
+ {
+       struct of_device *op = of_find_device_by_node(pbm->op->node);
+       u64 tmp, err_mask, err_no_mask;
+       int err;
+ 
+       /* Schizo IRQ property layout is:
+        * 0: PCIERR
+        * 1: UE ERR
+        * 2: CE ERR
+        * 3: SERR
+        * 4: POWER FAIL?
+        */
+ 
+       if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) {
+               err = request_irq(op->irqs[1], schizo_ue_intr, 0,
+                                 "SCHIZO_UE", pbm);
+               if (err)
+                       printk(KERN_WARNING "%s: Could not register UE, "
+                              "err=%d\n", pbm->name, err);
+       }
+       if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) {
+               err = request_irq(op->irqs[2], schizo_ce_intr, 0,
+                                 "SCHIZO_CE", pbm);
+               if (err)
+                       printk(KERN_WARNING "%s: Could not register CE, "
+                              "err=%d\n", pbm->name, err);
+       }
+       err = 0;
+       if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) {
+               err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+                                 "SCHIZO_PCIERR", pbm);
+       } else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) {
+               err = request_irq(op->irqs[0], schizo_pcierr_intr, 0,
+                                 "SCHIZO_PCIERR", pbm);
+       }
+       if (err)
+               printk(KERN_WARNING "%s: Could not register PCIERR, "
+                      "err=%d\n", pbm->name, err);
+ 
+       if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) {
+               err = request_irq(op->irqs[3], schizo_safarierr_intr, 0,
+                                 "SCHIZO_SERR", pbm);
+               if (err)
+                       printk(KERN_WARNING "%s: Could not register SERR, "
+                              "err=%d\n", pbm->name, err);
+       }
+ 
+       /* Enable UE and CE interrupts for controller. */
+       upa_writeq((SCHIZO_ECCCTRL_EE |
+                   SCHIZO_ECCCTRL_UE |
+                   SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL);
+ 
+       err_mask = (SCHIZO_PCICTRL_BUS_UNUS |
+                   SCHIZO_PCICTRL_ESLCK |
+                   SCHIZO_PCICTRL_TTO_ERR |
+                   SCHIZO_PCICTRL_RTRY_ERR |
+                   SCHIZO_PCICTRL_SBH_ERR |
+                   SCHIZO_PCICTRL_SERR |
+                   SCHIZO_PCICTRL_EEN);
+ 
+       err_no_mask = (SCHIZO_PCICTRL_DTO_ERR |
+                      SCHIZO_PCICTRL_SBH_INT);
+ 
+       /* Enable PCI Error interrupts and clear error
+        * bits for each PBM.
+        */
+       tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+       tmp |= err_mask;
+       tmp &= ~err_no_mask;
+       upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+ 
+       upa_writeq((SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+                   SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+                   SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS |
+                   SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+                   SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+                   SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS),
+                  pbm->pbm_regs + SCHIZO_PCI_AFSR);
+ 
+       /* Make all Safari error conditions fatal except unmapped
+        * errors which we make generate interrupts.
+        */
+       err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SSMDIS |
+                   BUS_ERROR_BADMA | BUS_ERROR_BADMB |
+                   BUS_ERROR_BADMC |
+                   BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB |
+                   BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB |
+                   BUS_ERROR_CIQTO |
+                   BUS_ERROR_LPQTO | BUS_ERROR_SFPQTO |
+                   BUS_ERROR_UFPQTO | BUS_ERROR_APERR |
+                   BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT |
+                   BUS_ERROR_ILL);
+ #if 1
+       /* XXX Something wrong with some Excalibur systems
+        * XXX Sun is shipping.  The behavior on a 2-cpu
+        * XXX machine is that both CPU1 parity error bits
+        * XXX are set and are immediately set again when
+        * XXX their error status bits are cleared.  Just
+        * XXX ignore them for now.  -DaveM
+        */
+       err_mask &= ~(BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB |
+                     BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB);
+ #endif
+ 
+       upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask),
+                  pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL);
+ }
+ 
+ static void pbm_config_busmastering(struct pci_pbm_info *pbm)
+ {
+       u8 *addr;
+ 
+       /* Set cache-line size to 64 bytes, this is actually
+        * a nop but I do it for completeness.
+        */
+       addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+                                       0, PCI_CACHE_LINE_SIZE);
+       pci_config_write8(addr, 64 / sizeof(u32));
+ 
+       /* Set PBM latency timer to 64 PCI clocks. */
+       addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+                                       0, PCI_LATENCY_TIMER);
+       pci_config_write8(addr, 64);
+ }
+ 
+ static void __devinit schizo_scan_bus(struct pci_pbm_info *pbm,
+                                     struct device *parent)
+ {
+       pbm_config_busmastering(pbm);
+       pbm->is_66mhz_capable =
+               (of_find_property(pbm->op->node, "66mhz-capable", NULL)
+                != NULL);
+ 
+       pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+ 
+       if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO)
+               tomatillo_register_error_handlers(pbm);
+       else
+               schizo_register_error_handlers(pbm);
+ }
+ 
+ #define SCHIZO_STRBUF_CONTROL         (0x02800UL)
+ #define SCHIZO_STRBUF_FLUSH           (0x02808UL)
+ #define SCHIZO_STRBUF_FSYNC           (0x02810UL)
+ #define SCHIZO_STRBUF_CTXFLUSH                (0x02818UL)
+ #define SCHIZO_STRBUF_CTXMATCH                (0x10000UL)
+ 
+ static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm)
+ {
+       unsigned long base = pbm->pbm_regs;
+       u64 control;
+ 
+       if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+               /* TOMATILLO lacks streaming cache.  */
+               return;
+       }
+ 
+       /* SCHIZO has context flushing. */
+       pbm->stc.strbuf_control         = base + SCHIZO_STRBUF_CONTROL;
+       pbm->stc.strbuf_pflush          = base + SCHIZO_STRBUF_FLUSH;
+       pbm->stc.strbuf_fsync           = base + SCHIZO_STRBUF_FSYNC;
+       pbm->stc.strbuf_ctxflush        = base + SCHIZO_STRBUF_CTXFLUSH;
+       pbm->stc.strbuf_ctxmatch_base   = base + SCHIZO_STRBUF_CTXMATCH;
+ 
+       pbm->stc.strbuf_flushflag = (volatile unsigned long *)
+               ((((unsigned long)&pbm->stc.__flushflag_buf[0])
+                 + 63UL)
+                & ~63UL);
+       pbm->stc.strbuf_flushflag_pa = (unsigned long)
+               __pa(pbm->stc.strbuf_flushflag);
+ 
+       /* Turn off LRU locking and diag mode, enable the
+        * streaming buffer and leave the rerun-disable
+        * setting however OBP set it.
+        */
+       control = upa_readq(pbm->stc.strbuf_control);
+       control &= ~(SCHIZO_STRBUF_CTRL_LPTR |
+                    SCHIZO_STRBUF_CTRL_LENAB |
+                    SCHIZO_STRBUF_CTRL_DENAB);
+       control |= SCHIZO_STRBUF_CTRL_ENAB;
+       upa_writeq(control, pbm->stc.strbuf_control);
+ 
+       pbm->stc.strbuf_enabled = 1;
+ }
+ 
+ #define SCHIZO_IOMMU_CONTROL          (0x00200UL)
+ #define SCHIZO_IOMMU_TSBBASE          (0x00208UL)
+ #define SCHIZO_IOMMU_FLUSH            (0x00210UL)
+ #define SCHIZO_IOMMU_CTXFLUSH         (0x00218UL)
+ 
+ static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
+ {
+       static const u32 vdma_default[] = { 0xc0000000, 0x40000000 };
+       unsigned long i, tagbase, database;
+       struct iommu *iommu = pbm->iommu;
+       int tsbsize, err;
+       const u32 *vdma;
+       u32 dma_mask;
+       u64 control;
+ 
+       vdma = of_get_property(pbm->op->node, "virtual-dma", NULL);
+       if (!vdma)
+               vdma = vdma_default;
+ 
+       dma_mask = vdma[0];
+       switch (vdma[1]) {
+               case 0x20000000:
+                       dma_mask |= 0x1fffffff;
+                       tsbsize = 64;
+                       break;
+ 
+               case 0x40000000:
+                       dma_mask |= 0x3fffffff;
+                       tsbsize = 128;
+                       break;
+ 
+               case 0x80000000:
+                       dma_mask |= 0x7fffffff;
+                       tsbsize = 128;
+                       break;
+ 
+               default:
+                       printk(KERN_ERR PFX "Strange virtual-dma size.\n");
+                       return -EINVAL;
+       }
+ 
+       /* Register addresses, SCHIZO has iommu ctx flushing. */
+       iommu->iommu_control  = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
+       iommu->iommu_tsbbase  = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE;
+       iommu->iommu_flush    = pbm->pbm_regs + SCHIZO_IOMMU_FLUSH;
+       iommu->iommu_tags     = iommu->iommu_flush + (0xa580UL - 0x0210UL);
+       iommu->iommu_ctxflush = pbm->pbm_regs + SCHIZO_IOMMU_CTXFLUSH;
+ 
+       /* We use the main control/status register of SCHIZO as the write
+        * completion register.
+        */
+       iommu->write_complete_reg = pbm->controller_regs + 0x10000UL;
+ 
+       /*
+        * Invalidate TLB Entries.
+        */
+       control = upa_readq(iommu->iommu_control);
+       control |= SCHIZO_IOMMU_CTRL_DENAB;
+       upa_writeq(control, iommu->iommu_control);
+ 
+       tagbase = SCHIZO_IOMMU_TAG, database = SCHIZO_IOMMU_DATA;
+ 
+       for (i = 0; i < 16; i++) {
+               upa_writeq(0, pbm->pbm_regs + tagbase + (i * 8UL));
+               upa_writeq(0, pbm->pbm_regs + database + (i * 8UL));
+       }
+ 
+       /* Leave diag mode enabled for full-flushing done
+        * in pci_iommu.c
+        */
+       err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+                              pbm->numa_node);
+       if (err) {
+               printk(KERN_ERR PFX "iommu_table_init() fails with %d\n", err);
+               return err;
+       }
+ 
+       upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase);
+ 
+       control = upa_readq(iommu->iommu_control);
+       control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ);
+       switch (tsbsize) {
+       case 64:
+               control |= SCHIZO_IOMMU_TSBSZ_64K;
+               break;
+       case 128:
+               control |= SCHIZO_IOMMU_TSBSZ_128K;
+               break;
+       }
+ 
+       control |= SCHIZO_IOMMU_CTRL_ENAB;
+       upa_writeq(control, iommu->iommu_control);
+ 
+       return 0;
+ }
+ 
+ #define SCHIZO_PCI_IRQ_RETRY  (0x1a00UL)
+ #define  SCHIZO_IRQ_RETRY_INF  0xffUL
+ 
+ #define SCHIZO_PCI_DIAG                       (0x2020UL)
+ #define  SCHIZO_PCIDIAG_D_BADECC      (1UL << 10UL) /* Disable BAD ECC errors (Schizo) */
+ #define  SCHIZO_PCIDIAG_D_BYPASS      (1UL <<  9UL) /* Disable MMU bypass mode (Schizo/Tomatillo) */
+ #define  SCHIZO_PCIDIAG_D_TTO         (1UL <<  8UL) /* Disable TTO errors (Schizo/Tomatillo) */
+ #define  SCHIZO_PCIDIAG_D_RTRYARB     (1UL <<  7UL) /* Disable retry arbitration (Schizo) */
+ #define  SCHIZO_PCIDIAG_D_RETRY               (1UL <<  6UL) /* Disable retry limit (Schizo/Tomatillo) */
+ #define  SCHIZO_PCIDIAG_D_INTSYNC     (1UL <<  5UL) /* Disable interrupt/DMA synch (Schizo/Tomatillo) */
+ #define  SCHIZO_PCIDIAG_I_DMA_PARITY  (1UL <<  3UL) /* Invert DMA parity (Schizo/Tomatillo) */
+ #define  SCHIZO_PCIDIAG_I_PIOD_PARITY (1UL <<  2UL) /* Invert PIO data parity (Schizo/Tomatillo) */
+ #define  SCHIZO_PCIDIAG_I_PIOA_PARITY (1UL <<  1UL) /* Invert PIO address parity (Schizo/Tomatillo) */
+ 
+ #define TOMATILLO_PCI_IOC_CSR         (0x2248UL)
+ #define TOMATILLO_IOC_PART_WPENAB     0x0000000000080000UL
+ #define TOMATILLO_IOC_RDMULT_PENAB    0x0000000000040000UL
+ #define TOMATILLO_IOC_RDONE_PENAB     0x0000000000020000UL
+ #define TOMATILLO_IOC_RDLINE_PENAB    0x0000000000010000UL
+ #define TOMATILLO_IOC_RDMULT_PLEN     0x000000000000c000UL
+ #define TOMATILLO_IOC_RDMULT_PLEN_SHIFT       14UL
+ #define TOMATILLO_IOC_RDONE_PLEN      0x0000000000003000UL
+ #define TOMATILLO_IOC_RDONE_PLEN_SHIFT        12UL
+ #define TOMATILLO_IOC_RDLINE_PLEN     0x0000000000000c00UL
+ #define TOMATILLO_IOC_RDLINE_PLEN_SHIFT       10UL
+ #define TOMATILLO_IOC_PREF_OFF                0x00000000000003f8UL
+ #define TOMATILLO_IOC_PREF_OFF_SHIFT  3UL
+ #define TOMATILLO_IOC_RDMULT_CPENAB   0x0000000000000004UL
+ #define TOMATILLO_IOC_RDONE_CPENAB    0x0000000000000002UL
+ #define TOMATILLO_IOC_RDLINE_CPENAB   0x0000000000000001UL
+ 
+ #define TOMATILLO_PCI_IOC_TDIAG               (0x2250UL)
+ #define TOMATILLO_PCI_IOC_DDIAG               (0x2290UL)
+ 
+ static void schizo_pbm_hw_init(struct pci_pbm_info *pbm)
+ {
+       u64 tmp;
+ 
+       upa_writeq(5, pbm->pbm_regs + SCHIZO_PCI_IRQ_RETRY);
+ 
+       tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+ 
+       /* Enable arbiter for all PCI slots.  */
+       tmp |= 0xff;
+ 
+       if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO &&
+           pbm->chip_version >= 0x2)
+               tmp |= 0x3UL << SCHIZO_PCICTRL_PTO_SHIFT;
+ 
+       if (!of_find_property(pbm->op->node, "no-bus-parking", NULL))
+               tmp |= SCHIZO_PCICTRL_PARK;
+       else
+               tmp &= ~SCHIZO_PCICTRL_PARK;
+ 
+       if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO &&
+           pbm->chip_version <= 0x1)
+               tmp |= SCHIZO_PCICTRL_DTO_INT;
+       else
+               tmp &= ~SCHIZO_PCICTRL_DTO_INT;
+ 
+       if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO)
+               tmp |= (SCHIZO_PCICTRL_MRM_PREF |
+                       SCHIZO_PCICTRL_RDO_PREF |
+                       SCHIZO_PCICTRL_RDL_PREF);
+ 
+       upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+ 
+       tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_DIAG);
+       tmp &= ~(SCHIZO_PCIDIAG_D_RTRYARB |
+                SCHIZO_PCIDIAG_D_RETRY |
+                SCHIZO_PCIDIAG_D_INTSYNC);
+       upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_DIAG);
+ 
+       if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+               /* Clear prefetch lengths to workaround a bug in
+                * Jalapeno...
+                */
+               tmp = (TOMATILLO_IOC_PART_WPENAB |
+                      (1 << TOMATILLO_IOC_PREF_OFF_SHIFT) |
+                      TOMATILLO_IOC_RDMULT_CPENAB |
+                      TOMATILLO_IOC_RDONE_CPENAB |
+                      TOMATILLO_IOC_RDLINE_CPENAB);
+ 
+               upa_writeq(tmp, pbm->pbm_regs + TOMATILLO_PCI_IOC_CSR);
+       }
+ }
+ 
+ static int __devinit schizo_pbm_init(struct pci_pbm_info *pbm,
+                                    struct of_device *op, u32 portid,
+                                    int chip_type)
+ {
+       const struct linux_prom64_registers *regs;
+       struct device_node *dp = op->node;
+       const char *chipset_name;
+       int is_pbm_a, err;
+ 
+       switch (chip_type) {
+       case PBM_CHIP_TYPE_TOMATILLO:
+               chipset_name = "TOMATILLO";
+               break;
+ 
+       case PBM_CHIP_TYPE_SCHIZO_PLUS:
+               chipset_name = "SCHIZO+";
+               break;
+ 
+       case PBM_CHIP_TYPE_SCHIZO:
+       default:
+               chipset_name = "SCHIZO";
+               break;
+       };
+ 
+       /* For SCHIZO, three OBP regs:
+        * 1) PBM controller regs
+        * 2) Schizo front-end controller regs (same for both PBMs)
+        * 3) PBM PCI config space
+        *
+        * For TOMATILLO, four OBP regs:
+        * 1) PBM controller regs
+        * 2) Tomatillo front-end controller regs
+        * 3) PBM PCI config space
+        * 4) Ichip regs
+        */
+       regs = of_get_property(dp, "reg", NULL);
+ 
+       is_pbm_a = ((regs[0].phys_addr & 0x00700000) == 0x00600000);
+ 
+       pbm->next = pci_pbm_root;
+       pci_pbm_root = pbm;
+ 
+       pbm->numa_node = -1;
+ 
+       pbm->pci_ops = &sun4u_pci_ops;
+       pbm->config_space_reg_bits = 8;
+ 
+       pbm->index = pci_num_pbms++;
+ 
+       pbm->portid = portid;
+       pbm->op = op;
+ 
+       pbm->chip_type = chip_type;
+       pbm->chip_version = of_getintprop_default(dp, "version#", 0);
+       pbm->chip_revision = of_getintprop_default(dp, "module-version#", 0);
+ 
+       pbm->pbm_regs = regs[0].phys_addr;
+       pbm->controller_regs = regs[1].phys_addr - 0x10000UL;
+ 
+       if (chip_type == PBM_CHIP_TYPE_TOMATILLO)
+               pbm->sync_reg = regs[3].phys_addr + 0x1a18UL;
+ 
+       pbm->name = dp->full_name;
+ 
+       printk("%s: %s PCI Bus Module ver[%x:%x]\n",
+              pbm->name, chipset_name,
+              pbm->chip_version, pbm->chip_revision);
+ 
+       schizo_pbm_hw_init(pbm);
+ 
+       pci_determine_mem_io_space(pbm);
+ 
+       pci_get_pbm_props(pbm);
+ 
+       err = schizo_pbm_iommu_init(pbm);
+       if (err)
+               return err;
+ 
+       schizo_pbm_strbuf_init(pbm);
+ 
+       schizo_scan_bus(pbm, &op->dev);
+ 
+       return 0;
+ }
+ 
+ static inline int portid_compare(u32 x, u32 y, int chip_type)
+ {
+       if (chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+               if (x == (y ^ 1))
+                       return 1;
+               return 0;
+       }
+       return (x == y);
+ }
+ 
+ static struct pci_pbm_info * __devinit schizo_find_sibling(u32 portid,
+                                                          int chip_type)
+ {
+       struct pci_pbm_info *pbm;
+ 
+       for (pbm = pci_pbm_root; pbm; pbm = pbm->next) {
+               if (portid_compare(pbm->portid, portid, chip_type))
+                       return pbm;
+       }
+       return NULL;
+ }
+ 
+ static int __devinit __schizo_init(struct of_device *op, unsigned long chip_type)
+ {
+       struct device_node *dp = op->node;
+       struct pci_pbm_info *pbm;
+       struct iommu *iommu;
+       u32 portid;
+       int err;
+ 
+       portid = of_getintprop_default(dp, "portid", 0xff);
+ 
+       err = -ENOMEM;
+       pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+       if (!pbm) {
+               printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+               goto out_err;
+       }
+ 
+       pbm->sibling = schizo_find_sibling(portid, chip_type);
+ 
+       iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+       if (!iommu) {
+               printk(KERN_ERR PFX "Cannot allocate PBM A iommu.\n");
+               goto out_free_pbm;
+       }
+ 
+       pbm->iommu = iommu;
+ 
+       if (schizo_pbm_init(pbm, op, portid, chip_type))
+               goto out_free_iommu;
+ 
+       if (pbm->sibling)
+               pbm->sibling->sibling = pbm;
+ 
+       dev_set_drvdata(&op->dev, pbm);
+ 
+       return 0;
+ 
+ out_free_iommu:
+       kfree(pbm->iommu);
+ 
+ out_free_pbm:
+       kfree(pbm);
+ 
+ out_err:
+       return err;
+ }
+ 
+ static int __devinit schizo_probe(struct of_device *op,
+                                 const struct of_device_id *match)
+ {
+       return __schizo_init(op, (unsigned long) match->data);
+ }
+ 
+ /* The ordering of this table is very important.  Some Tomatillo
+  * nodes announce that they are compatible with both pci108e,a801
+  * and pci108e,8001.  So list the chips in reverse chronological
+  * order.
+  */
+ static struct of_device_id __initdata schizo_match[] = {
+       {
+               .name = "pci",
+               .compatible = "pci108e,a801",
+               .data = (void *) PBM_CHIP_TYPE_TOMATILLO,
+       },
+       {
+               .name = "pci",
+               .compatible = "pci108e,8002",
+               .data = (void *) PBM_CHIP_TYPE_SCHIZO_PLUS,
+       },
+       {
+               .name = "pci",
+               .compatible = "pci108e,8001",
+               .data = (void *) PBM_CHIP_TYPE_SCHIZO,
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver schizo_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = schizo_match,
+       .probe          = schizo_probe,
+ };
+ 
+ static int __init schizo_init(void)
+ {
+       return of_register_driver(&schizo_driver, &of_bus_type);
+ }
+ 
+ subsys_initcall(schizo_init);
diff --cc arch/sparc/kernel/pci_sun4v.c

index 0000000,0ef0ab3..1053021

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@@ -1,0 -1,1033 +1,1034 @@@
+ /* pci_sun4v.c: SUN4V specific PCI controller support.
+  *
+  * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/pci.h>
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/interrupt.h>
+ #include <linux/percpu.h>
+ #include <linux/irq.h>
+ #include <linux/msi.h>
+ #include <linux/log2.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/iommu.h>
+ #include <asm/irq.h>
+ #include <asm/hypervisor.h>
+ #include <asm/prom.h>
+ 
+ #include "pci_impl.h"
+ #include "iommu_common.h"
+ 
+ #include "pci_sun4v.h"
+ 
+ #define DRIVER_NAME   "pci_sun4v"
+ #define PFX           DRIVER_NAME ": "
+ 
+ static unsigned long vpci_major = 1;
+ static unsigned long vpci_minor = 1;
+ 
+ #define PGLIST_NENTS  (PAGE_SIZE / sizeof(u64))
+ 
+ struct iommu_batch {
+       struct device   *dev;           /* Device mapping is for.       */
+       unsigned long   prot;           /* IOMMU page protections       */
+       unsigned long   entry;          /* Index into IOTSB.            */
+       u64             *pglist;        /* List of physical pages       */
+       unsigned long   npages;         /* Number of pages in list.     */
+ };
+ 
+ static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
+ static int iommu_batch_initialized;
+ 
+ /* Interrupts must be disabled.  */
+ static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
+ {
+       struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+ 
+       p->dev          = dev;
+       p->prot         = prot;
+       p->entry        = entry;
+       p->npages       = 0;
+ }
+ 
+ /* Interrupts must be disabled.  */
+ static long iommu_batch_flush(struct iommu_batch *p)
+ {
+       struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
+       unsigned long devhandle = pbm->devhandle;
+       unsigned long prot = p->prot;
+       unsigned long entry = p->entry;
+       u64 *pglist = p->pglist;
+       unsigned long npages = p->npages;
+ 
+       while (npages != 0) {
+               long num;
+ 
+               num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
+                                         npages, prot, __pa(pglist));
+               if (unlikely(num < 0)) {
+                       if (printk_ratelimit())
+                               printk("iommu_batch_flush: IOMMU map of "
+                                      "[%08lx:%08llx:%lx:%lx:%lx] failed with "
+                                      "status %ld\n",
+                                      devhandle, HV_PCI_TSBID(0, entry),
+                                      npages, prot, __pa(pglist), num);
+                       return -1;
+               }
+ 
+               entry += num;
+               npages -= num;
+               pglist += num;
+       }
+ 
+       p->entry = entry;
+       p->npages = 0;
+ 
+       return 0;
+ }
+ 
+ static inline void iommu_batch_new_entry(unsigned long entry)
+ {
+       struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+ 
+       if (p->entry + p->npages == entry)
+               return;
+       if (p->entry != ~0UL)
+               iommu_batch_flush(p);
+       p->entry = entry;
+ }
+ 
+ /* Interrupts must be disabled.  */
+ static inline long iommu_batch_add(u64 phys_page)
+ {
+       struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+ 
+       BUG_ON(p->npages >= PGLIST_NENTS);
+ 
+       p->pglist[p->npages++] = phys_page;
+       if (p->npages == PGLIST_NENTS)
+               return iommu_batch_flush(p);
+ 
+       return 0;
+ }
+ 
+ /* Interrupts must be disabled.  */
+ static inline long iommu_batch_end(void)
+ {
+       struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+ 
+       BUG_ON(p->npages >= PGLIST_NENTS);
+ 
+       return iommu_batch_flush(p);
+ }
+ 
+ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
+                                  dma_addr_t *dma_addrp, gfp_t gfp)
+ {
+       unsigned long flags, order, first_page, npages, n;
+       struct iommu *iommu;
+       struct page *page;
+       void *ret;
+       long entry;
+       int nid;
+ 
+       size = IO_PAGE_ALIGN(size);
+       order = get_order(size);
+       if (unlikely(order >= MAX_ORDER))
+               return NULL;
+ 
+       npages = size >> IO_PAGE_SHIFT;
+ 
+       nid = dev->archdata.numa_node;
+       page = alloc_pages_node(nid, gfp, order);
+       if (unlikely(!page))
+               return NULL;
+ 
+       first_page = (unsigned long) page_address(page);
+       memset((char *)first_page, 0, PAGE_SIZE << order);
+ 
+       iommu = dev->archdata.iommu;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+       entry = iommu_range_alloc(dev, iommu, npages, NULL);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       if (unlikely(entry == DMA_ERROR_CODE))
+               goto range_alloc_fail;
+ 
+       *dma_addrp = (iommu->page_table_map_base +
+                     (entry << IO_PAGE_SHIFT));
+       ret = (void *) first_page;
+       first_page = __pa(first_page);
+ 
+       local_irq_save(flags);
+ 
+       iommu_batch_start(dev,
+                         (HV_PCI_MAP_ATTR_READ |
+                          HV_PCI_MAP_ATTR_WRITE),
+                         entry);
+ 
+       for (n = 0; n < npages; n++) {
+               long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
+               if (unlikely(err < 0L))
+                       goto iommu_map_fail;
+       }
+ 
+       if (unlikely(iommu_batch_end() < 0L))
+               goto iommu_map_fail;
+ 
+       local_irq_restore(flags);
+ 
+       return ret;
+ 
+ iommu_map_fail:
+       /* Interrupts are disabled.  */
+       spin_lock(&iommu->lock);
+       iommu_range_free(iommu, *dma_addrp, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+ range_alloc_fail:
+       free_pages(first_page, order);
+       return NULL;
+ }
+ 
+ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
+                                dma_addr_t dvma)
+ {
+       struct pci_pbm_info *pbm;
+       struct iommu *iommu;
+       unsigned long flags, order, npages, entry;
+       u32 devhandle;
+ 
+       npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+       iommu = dev->archdata.iommu;
+       pbm = dev->archdata.host_controller;
+       devhandle = pbm->devhandle;
+       entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+ 
+       iommu_range_free(iommu, dvma, npages);
+ 
+       do {
+               unsigned long num;
+ 
+               num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+                                           npages);
+               entry += num;
+               npages -= num;
+       } while (npages != 0);
+ 
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       order = get_order(size);
+       if (order < 10)
+               free_pages((unsigned long)cpu, order);
+ }
+ 
+ static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
+                                   enum dma_data_direction direction)
+ {
+       struct iommu *iommu;
+       unsigned long flags, npages, oaddr;
+       unsigned long i, base_paddr;
+       u32 bus_addr, ret;
+       unsigned long prot;
+       long entry;
+ 
+       iommu = dev->archdata.iommu;
+ 
+       if (unlikely(direction == DMA_NONE))
+               goto bad;
+ 
+       oaddr = (unsigned long)ptr;
+       npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
+       npages >>= IO_PAGE_SHIFT;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+       entry = iommu_range_alloc(dev, iommu, npages, NULL);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       if (unlikely(entry == DMA_ERROR_CODE))
+               goto bad;
+ 
+       bus_addr = (iommu->page_table_map_base +
+                   (entry << IO_PAGE_SHIFT));
+       ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
+       base_paddr = __pa(oaddr & IO_PAGE_MASK);
+       prot = HV_PCI_MAP_ATTR_READ;
+       if (direction != DMA_TO_DEVICE)
+               prot |= HV_PCI_MAP_ATTR_WRITE;
+ 
+       local_irq_save(flags);
+ 
+       iommu_batch_start(dev, prot, entry);
+ 
+       for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
+               long err = iommu_batch_add(base_paddr);
+               if (unlikely(err < 0L))
+                       goto iommu_map_fail;
+       }
+       if (unlikely(iommu_batch_end() < 0L))
+               goto iommu_map_fail;
+ 
+       local_irq_restore(flags);
+ 
+       return ret;
+ 
+ bad:
+       if (printk_ratelimit())
+               WARN_ON(1);
+       return DMA_ERROR_CODE;
+ 
+ iommu_map_fail:
+       /* Interrupts are disabled.  */
+       spin_lock(&iommu->lock);
+       iommu_range_free(iommu, bus_addr, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       return DMA_ERROR_CODE;
+ }
+ 
+ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
+                               size_t sz, enum dma_data_direction direction)
+ {
+       struct pci_pbm_info *pbm;
+       struct iommu *iommu;
+       unsigned long flags, npages;
+       long entry;
+       u32 devhandle;
+ 
+       if (unlikely(direction == DMA_NONE)) {
+               if (printk_ratelimit())
+                       WARN_ON(1);
+               return;
+       }
+ 
+       iommu = dev->archdata.iommu;
+       pbm = dev->archdata.host_controller;
+       devhandle = pbm->devhandle;
+ 
+       npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+       npages >>= IO_PAGE_SHIFT;
+       bus_addr &= IO_PAGE_MASK;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+ 
+       iommu_range_free(iommu, bus_addr, npages);
+ 
+       entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
+       do {
+               unsigned long num;
+ 
+               num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+                                           npages);
+               entry += num;
+               npages -= num;
+       } while (npages != 0);
+ 
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
+                        int nelems, enum dma_data_direction direction)
+ {
+       struct scatterlist *s, *outs, *segstart;
+       unsigned long flags, handle, prot;
+       dma_addr_t dma_next = 0, dma_addr;
+       unsigned int max_seg_size;
+       unsigned long seg_boundary_size;
+       int outcount, incount, i;
+       struct iommu *iommu;
+       unsigned long base_shift;
+       long err;
+ 
+       BUG_ON(direction == DMA_NONE);
+ 
+       iommu = dev->archdata.iommu;
+       if (nelems == 0 || !iommu)
+               return 0;
+       
+       prot = HV_PCI_MAP_ATTR_READ;
+       if (direction != DMA_TO_DEVICE)
+               prot |= HV_PCI_MAP_ATTR_WRITE;
+ 
+       outs = s = segstart = &sglist[0];
+       outcount = 1;
+       incount = nelems;
+       handle = 0;
+ 
+       /* Init first segment length for backout at failure */
+       outs->dma_length = 0;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+ 
+       iommu_batch_start(dev, prot, ~0UL);
+ 
+       max_seg_size = dma_get_max_seg_size(dev);
+       seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+                                 IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
+       base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
+       for_each_sg(sglist, s, nelems, i) {
+               unsigned long paddr, npages, entry, out_entry = 0, slen;
+ 
+               slen = s->length;
+               /* Sanity check */
+               if (slen == 0) {
+                       dma_next = 0;
+                       continue;
+               }
+               /* Allocate iommu entries for that segment */
+               paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+               npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
+               entry = iommu_range_alloc(dev, iommu, npages, &handle);
+ 
+               /* Handle failure */
+               if (unlikely(entry == DMA_ERROR_CODE)) {
+                       if (printk_ratelimit())
+                               printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+                                      " npages %lx\n", iommu, paddr, npages);
+                       goto iommu_map_failed;
+               }
+ 
+               iommu_batch_new_entry(entry);
+ 
+               /* Convert entry to a dma_addr_t */
+               dma_addr = iommu->page_table_map_base +
+                       (entry << IO_PAGE_SHIFT);
+               dma_addr |= (s->offset & ~IO_PAGE_MASK);
+ 
+               /* Insert into HW table */
+               paddr &= IO_PAGE_MASK;
+               while (npages--) {
+                       err = iommu_batch_add(paddr);
+                       if (unlikely(err < 0L))
+                               goto iommu_map_failed;
+                       paddr += IO_PAGE_SIZE;
+               }
+ 
+               /* If we are in an open segment, try merging */
+               if (segstart != s) {
+                       /* We cannot merge if:
+                        * - allocated dma_addr isn't contiguous to previous allocation
+                        */
+                       if ((dma_addr != dma_next) ||
+                           (outs->dma_length + s->length > max_seg_size) ||
+                           (is_span_boundary(out_entry, base_shift,
+                                             seg_boundary_size, outs, s))) {
+                               /* Can't merge: create a new segment */
+                               segstart = s;
+                               outcount++;
+                               outs = sg_next(outs);
+                       } else {
+                               outs->dma_length += s->length;
+                       }
+               }
+ 
+               if (segstart == s) {
+                       /* This is a new segment, fill entries */
+                       outs->dma_address = dma_addr;
+                       outs->dma_length = slen;
+                       out_entry = entry;
+               }
+ 
+               /* Calculate next page pointer for contiguous check */
+               dma_next = dma_addr + slen;
+       }
+ 
+       err = iommu_batch_end();
+ 
+       if (unlikely(err < 0L))
+               goto iommu_map_failed;
+ 
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       if (outcount < incount) {
+               outs = sg_next(outs);
+               outs->dma_address = DMA_ERROR_CODE;
+               outs->dma_length = 0;
+       }
+ 
+       return outcount;
+ 
+ iommu_map_failed:
+       for_each_sg(sglist, s, nelems, i) {
+               if (s->dma_length != 0) {
+                       unsigned long vaddr, npages;
+ 
+                       vaddr = s->dma_address & IO_PAGE_MASK;
+                       npages = iommu_num_pages(s->dma_address, s->dma_length,
+                                                IO_PAGE_SIZE);
+                       iommu_range_free(iommu, vaddr, npages);
+                       /* XXX demap? XXX */
+                       s->dma_address = DMA_ERROR_CODE;
+                       s->dma_length = 0;
+               }
+               if (s == outs)
+                       break;
+       }
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       return 0;
+ }
+ 
+ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
+                           int nelems, enum dma_data_direction direction)
+ {
+       struct pci_pbm_info *pbm;
+       struct scatterlist *sg;
+       struct iommu *iommu;
+       unsigned long flags;
+       u32 devhandle;
+ 
+       BUG_ON(direction == DMA_NONE);
+ 
+       iommu = dev->archdata.iommu;
+       pbm = dev->archdata.host_controller;
+       devhandle = pbm->devhandle;
+       
+       spin_lock_irqsave(&iommu->lock, flags);
+ 
+       sg = sglist;
+       while (nelems--) {
+               dma_addr_t dma_handle = sg->dma_address;
+               unsigned int len = sg->dma_length;
+               unsigned long npages, entry;
+ 
+               if (!len)
+                       break;
+               npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+               iommu_range_free(iommu, dma_handle, npages);
+ 
+               entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+               while (npages) {
+                       unsigned long num;
+ 
+                       num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+                                                   npages);
+                       entry += num;
+                       npages -= num;
+               }
+ 
+               sg = sg_next(sg);
+       }
+ 
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+ static void dma_4v_sync_single_for_cpu(struct device *dev,
+                                      dma_addr_t bus_addr, size_t sz,
+                                      enum dma_data_direction direction)
+ {
+       /* Nothing to do... */
+ }
+ 
+ static void dma_4v_sync_sg_for_cpu(struct device *dev,
+                                  struct scatterlist *sglist, int nelems,
+                                  enum dma_data_direction direction)
+ {
+       /* Nothing to do... */
+ }
+ 
+ static const struct dma_ops sun4v_dma_ops = {
+       .alloc_coherent                 = dma_4v_alloc_coherent,
+       .free_coherent                  = dma_4v_free_coherent,
+       .map_single                     = dma_4v_map_single,
+       .unmap_single                   = dma_4v_unmap_single,
+       .map_sg                         = dma_4v_map_sg,
+       .unmap_sg                       = dma_4v_unmap_sg,
+       .sync_single_for_cpu            = dma_4v_sync_single_for_cpu,
+       .sync_sg_for_cpu                = dma_4v_sync_sg_for_cpu,
+ };
+ 
+ static void __init pci_sun4v_scan_bus(struct pci_pbm_info *pbm,
+                                     struct device *parent)
+ {
+       struct property *prop;
+       struct device_node *dp;
+ 
+       dp = pbm->op->node;
+       prop = of_find_property(dp, "66mhz-capable", NULL);
+       pbm->is_66mhz_capable = (prop != NULL);
+       pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+ 
+       /* XXX register error interrupt handlers XXX */
+ }
+ 
+ static unsigned long __init probe_existing_entries(struct pci_pbm_info *pbm,
+                                                  struct iommu *iommu)
+ {
+       struct iommu_arena *arena = &iommu->arena;
+       unsigned long i, cnt = 0;
+       u32 devhandle;
+ 
+       devhandle = pbm->devhandle;
+       for (i = 0; i < arena->limit; i++) {
+               unsigned long ret, io_attrs, ra;
+ 
+               ret = pci_sun4v_iommu_getmap(devhandle,
+                                            HV_PCI_TSBID(0, i),
+                                            &io_attrs, &ra);
+               if (ret == HV_EOK) {
+                       if (page_in_phys_avail(ra)) {
+                               pci_sun4v_iommu_demap(devhandle,
+                                                     HV_PCI_TSBID(0, i), 1);
+                       } else {
+                               cnt++;
+                               __set_bit(i, arena->map);
+                       }
+               }
+       }
+ 
+       return cnt;
+ }
+ 
+ static int __init pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
+ {
+       static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
+       struct iommu *iommu = pbm->iommu;
+       unsigned long num_tsb_entries, sz, tsbsize;
+       u32 dma_mask, dma_offset;
+       const u32 *vdma;
+ 
+       vdma = of_get_property(pbm->op->node, "virtual-dma", NULL);
+       if (!vdma)
+               vdma = vdma_default;
+ 
+       if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
+               printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
+                      vdma[0], vdma[1]);
+               return -EINVAL;
+       };
+ 
+       dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
+       num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
+       tsbsize = num_tsb_entries * sizeof(iopte_t);
+ 
+       dma_offset = vdma[0];
+ 
+       /* Setup initial software IOMMU state. */
+       spin_lock_init(&iommu->lock);
+       iommu->ctx_lowest_free = 1;
+       iommu->page_table_map_base = dma_offset;
+       iommu->dma_addr_mask = dma_mask;
+ 
+       /* Allocate and initialize the free area map.  */
+       sz = (num_tsb_entries + 7) / 8;
+       sz = (sz + 7UL) & ~7UL;
+       iommu->arena.map = kzalloc(sz, GFP_KERNEL);
+       if (!iommu->arena.map) {
+               printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
+               return -ENOMEM;
+       }
+       iommu->arena.limit = num_tsb_entries;
+ 
+       sz = probe_existing_entries(pbm, iommu);
+       if (sz)
+               printk("%s: Imported %lu TSB entries from OBP\n",
+                      pbm->name, sz);
+ 
+       return 0;
+ }
+ 
+ #ifdef CONFIG_PCI_MSI
+ struct pci_sun4v_msiq_entry {
+       u64             version_type;
+ #define MSIQ_VERSION_MASK             0xffffffff00000000UL
+ #define MSIQ_VERSION_SHIFT            32
+ #define MSIQ_TYPE_MASK                        0x00000000000000ffUL
+ #define MSIQ_TYPE_SHIFT                       0
+ #define MSIQ_TYPE_NONE                        0x00
+ #define MSIQ_TYPE_MSG                 0x01
+ #define MSIQ_TYPE_MSI32                       0x02
+ #define MSIQ_TYPE_MSI64                       0x03
+ #define MSIQ_TYPE_INTX                        0x08
+ #define MSIQ_TYPE_NONE2                       0xff
+ 
+       u64             intx_sysino;
+       u64             reserved1;
+       u64             stick;
+       u64             req_id;  /* bus/device/func */
+ #define MSIQ_REQID_BUS_MASK           0xff00UL
+ #define MSIQ_REQID_BUS_SHIFT          8
+ #define MSIQ_REQID_DEVICE_MASK                0x00f8UL
+ #define MSIQ_REQID_DEVICE_SHIFT               3
+ #define MSIQ_REQID_FUNC_MASK          0x0007UL
+ #define MSIQ_REQID_FUNC_SHIFT         0
+ 
+       u64             msi_address;
+ 
+       /* The format of this value is message type dependent.
+        * For MSI bits 15:0 are the data from the MSI packet.
+        * For MSI-X bits 31:0 are the data from the MSI packet.
+        * For MSG, the message code and message routing code where:
+        *      bits 39:32 is the bus/device/fn of the msg target-id
+        *      bits 18:16 is the message routing code
+        *      bits 7:0 is the message code
+        * For INTx the low order 2-bits are:
+        *      00 - INTA
+        *      01 - INTB
+        *      10 - INTC
+        *      11 - INTD
+        */
+       u64             msi_data;
+ 
+       u64             reserved2;
+ };
+ 
+ static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                             unsigned long *head)
+ {
+       unsigned long err, limit;
+ 
+       err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
+       if (unlikely(err))
+               return -ENXIO;
+ 
+       limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+       if (unlikely(*head >= limit))
+               return -EFBIG;
+ 
+       return 0;
+ }
+ 
+ static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
+                                unsigned long msiqid, unsigned long *head,
+                                unsigned long *msi)
+ {
+       struct pci_sun4v_msiq_entry *ep;
+       unsigned long err, type;
+ 
+       /* Note: void pointer arithmetic, 'head' is a byte offset  */
+       ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
+                                (pbm->msiq_ent_count *
+                                 sizeof(struct pci_sun4v_msiq_entry))) +
+             *head);
+ 
+       if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
+               return 0;
+ 
+       type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
+       if (unlikely(type != MSIQ_TYPE_MSI32 &&
+                    type != MSIQ_TYPE_MSI64))
+               return -EINVAL;
+ 
+       *msi = ep->msi_data;
+ 
+       err = pci_sun4v_msi_setstate(pbm->devhandle,
+                                    ep->msi_data /* msi_num */,
+                                    HV_MSISTATE_IDLE);
+       if (unlikely(err))
+               return -ENXIO;
+ 
+       /* Clear the entry.  */
+       ep->version_type &= ~MSIQ_TYPE_MASK;
+ 
+       (*head) += sizeof(struct pci_sun4v_msiq_entry);
+       if (*head >=
+           (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
+               *head = 0;
+ 
+       return 1;
+ }
+ 
+ static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                             unsigned long head)
+ {
+       unsigned long err;
+ 
+       err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
+       if (unlikely(err))
+               return -EINVAL;
+ 
+       return 0;
+ }
+ 
+ static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+                              unsigned long msi, int is_msi64)
+ {
+       if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
+                                 (is_msi64 ?
+                                  HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
+               return -ENXIO;
+       if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
+               return -ENXIO;
+       if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
+               return -ENXIO;
+       return 0;
+ }
+ 
+ static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
+ {
+       unsigned long err, msiqid;
+ 
+       err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
+       if (err)
+               return -ENXIO;
+ 
+       pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);
+ 
+       return 0;
+ }
+ 
+ static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
+ {
+       unsigned long q_size, alloc_size, pages, order;
+       int i;
+ 
+       q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+       alloc_size = (pbm->msiq_num * q_size);
+       order = get_order(alloc_size);
+       pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
+       if (pages == 0UL) {
+               printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
+                      order);
+               return -ENOMEM;
+       }
+       memset((char *)pages, 0, PAGE_SIZE << order);
+       pbm->msi_queues = (void *) pages;
+ 
+       for (i = 0; i < pbm->msiq_num; i++) {
+               unsigned long err, base = __pa(pages + (i * q_size));
+               unsigned long ret1, ret2;
+ 
+               err = pci_sun4v_msiq_conf(pbm->devhandle,
+                                         pbm->msiq_first + i,
+                                         base, pbm->msiq_ent_count);
+               if (err) {
+                       printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
+                              err);
+                       goto h_error;
+               }
+ 
+               err = pci_sun4v_msiq_info(pbm->devhandle,
+                                         pbm->msiq_first + i,
+                                         &ret1, &ret2);
+               if (err) {
+                       printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
+                              err);
+                       goto h_error;
+               }
+               if (ret1 != base || ret2 != pbm->msiq_ent_count) {
+                       printk(KERN_ERR "MSI: Bogus qconf "
+                              "expected[%lx:%x] got[%lx:%lx]\n",
+                              base, pbm->msiq_ent_count,
+                              ret1, ret2);
+                       goto h_error;
+               }
+       }
+ 
+       return 0;
+ 
+ h_error:
+       free_pages(pages, order);
+       return -EINVAL;
+ }
+ 
+ static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
+ {
+       unsigned long q_size, alloc_size, pages, order;
+       int i;
+ 
+       for (i = 0; i < pbm->msiq_num; i++) {
+               unsigned long msiqid = pbm->msiq_first + i;
+ 
+               (void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
+       }
+ 
+       q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+       alloc_size = (pbm->msiq_num * q_size);
+       order = get_order(alloc_size);
+ 
+       pages = (unsigned long) pbm->msi_queues;
+ 
+       free_pages(pages, order);
+ 
+       pbm->msi_queues = NULL;
+ }
+ 
+ static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
+                                   unsigned long msiqid,
+                                   unsigned long devino)
+ {
+       unsigned int virt_irq = sun4v_build_irq(pbm->devhandle, devino);
+ 
+       if (!virt_irq)
+               return -ENOMEM;
+ 
+       if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
+               return -EINVAL;
+       if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
+               return -EINVAL;
+ 
+       return virt_irq;
+ }
+ 
+ static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
+       .get_head       =       pci_sun4v_get_head,
+       .dequeue_msi    =       pci_sun4v_dequeue_msi,
+       .set_head       =       pci_sun4v_set_head,
+       .msi_setup      =       pci_sun4v_msi_setup,
+       .msi_teardown   =       pci_sun4v_msi_teardown,
+       .msiq_alloc     =       pci_sun4v_msiq_alloc,
+       .msiq_free      =       pci_sun4v_msiq_free,
+       .msiq_build_irq =       pci_sun4v_msiq_build_irq,
+ };
+ 
+ static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
+ {
+       sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
+ }
+ #else /* CONFIG_PCI_MSI */
+ static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
+ {
+ }
+ #endif /* !(CONFIG_PCI_MSI) */
+ 
+ static int __init pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
+                                    struct of_device *op, u32 devhandle)
+ {
+       struct device_node *dp = op->node;
+       int err;
+ 
+       pbm->numa_node = of_node_to_nid(dp);
+ 
+       pbm->pci_ops = &sun4v_pci_ops;
+       pbm->config_space_reg_bits = 12;
+ 
+       pbm->index = pci_num_pbms++;
+ 
+       pbm->op = op;
+ 
+       pbm->devhandle = devhandle;
+ 
+       pbm->name = dp->full_name;
+ 
+       printk("%s: SUN4V PCI Bus Module\n", pbm->name);
+       printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
+ 
+       pci_determine_mem_io_space(pbm);
+ 
+       pci_get_pbm_props(pbm);
+ 
+       err = pci_sun4v_iommu_init(pbm);
+       if (err)
+               return err;
+ 
+       pci_sun4v_msi_init(pbm);
+ 
+       pci_sun4v_scan_bus(pbm, &op->dev);
+ 
+       pbm->next = pci_pbm_root;
+       pci_pbm_root = pbm;
+ 
+       return 0;
+ }
+ 
+ static int __devinit pci_sun4v_probe(struct of_device *op,
+                                    const struct of_device_id *match)
+ {
+       const struct linux_prom64_registers *regs;
+       static int hvapi_negotiated = 0;
+       struct pci_pbm_info *pbm;
+       struct device_node *dp;
+       struct iommu *iommu;
+       u32 devhandle;
+       int i, err;
+ 
+       dp = op->node;
+ 
+       if (!hvapi_negotiated++) {
+               err = sun4v_hvapi_register(HV_GRP_PCI,
+                                          vpci_major,
+                                          &vpci_minor);
+ 
+               if (err) {
+                       printk(KERN_ERR PFX "Could not register hvapi, "
+                              "err=%d\n", err);
+                       return err;
+               }
+               printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n",
+                      vpci_major, vpci_minor);
+ 
+               dma_ops = &sun4v_dma_ops;
+       }
+ 
+       regs = of_get_property(dp, "reg", NULL);
+       err = -ENODEV;
+       if (!regs) {
+               printk(KERN_ERR PFX "Could not find config registers\n");
+               goto out_err;
+       }
+       devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
+ 
+       err = -ENOMEM;
+       if (!iommu_batch_initialized) {
+               for_each_possible_cpu(i) {
+                       unsigned long page = get_zeroed_page(GFP_KERNEL);
+ 
+                       if (!page)
+                               goto out_err;
+ 
+                       per_cpu(iommu_batch, i).pglist = (u64 *) page;
+               }
+               iommu_batch_initialized = 1;
+       }
+ 
+       pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+       if (!pbm) {
+               printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
+               goto out_err;
+       }
+ 
+       iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+       if (!iommu) {
+               printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
+               goto out_free_controller;
+       }
+ 
+       pbm->iommu = iommu;
+ 
+       err = pci_sun4v_pbm_init(pbm, op, devhandle);
+       if (err)
+               goto out_free_iommu;
+ 
+       dev_set_drvdata(&op->dev, pbm);
+ 
+       return 0;
+ 
+ out_free_iommu:
+       kfree(pbm->iommu);
+ 
+ out_free_controller:
+       kfree(pbm);
+ 
+ out_err:
+       return err;
+ }
+ 
+ static struct of_device_id __initdata pci_sun4v_match[] = {
+       {
+               .name = "pci",
+               .compatible = "SUNW,sun4v-pci",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver pci_sun4v_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = pci_sun4v_match,
+       .probe          = pci_sun4v_probe,
+ };
+ 
+ static int __init pci_sun4v_init(void)
+ {
+       return of_register_driver(&pci_sun4v_driver, &of_bus_type);
+ }
+ 
+ subsys_initcall(pci_sun4v_init);
diff --cc arch/sparc/kernel/pmc.c

index 7eca887,5e4563d..0d3cd96
--- 1/arch/sparc/kernel/pmc.c
--- 2/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@@ -92,6 -70,25 +70,26 @@@ static int __devinit pmc_probe(struct o
         return 0;
   }
   
+ static struct of_device_id __initdata pmc_match[] = {
+       {
+               .name = PMC_OBPNAME,
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, pmc_match);
+ 
+ static struct of_platform_driver pmc_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "pmc",
+       .match_table    = pmc_match,
+       .probe          = pmc_probe,
+ };
+ 
+ static int __init pmc_init(void)
+ {
+       return of_register_driver(&pmc_driver, &of_bus_type);
+ }
+ 
   /* This driver is not critical to the boot process
    * and is easiest to ioremap when SBus is already
    * initialized, so we install ourselves thusly:
diff --cc arch/sparc/kernel/power.c

index 0000000,ae88f06..7e45022

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/power.c
+++ b/arch/sparc/kernel/power.c
@@@ -1,0 -1,75 +1,76 @@@
+ /* power.c: Power management driver.
+  *
+  * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/reboot.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/prom.h>
+ #include <asm/io.h>
+ 
+ static void __iomem *power_reg;
+ 
+ static irqreturn_t power_handler(int irq, void *dev_id)
+ {
+       orderly_poweroff(true);
+ 
+       /* FIXME: Check registers for status... */
+       return IRQ_HANDLED;
+ }
+ 
+ static int __init has_button_interrupt(unsigned int irq, struct device_node *dp)
+ {
+       if (irq == 0xffffffff)
+               return 0;
+       if (!of_find_property(dp, "button", NULL))
+               return 0;
+ 
+       return 1;
+ }
+ 
+ static int __devinit power_probe(struct of_device *op, const struct of_device_id *match)
+ {
+       struct resource *res = &op->resource[0];
+       unsigned int irq= op->irqs[0];
+ 
+       power_reg = of_ioremap(res, 0, 0x4, "power");
+ 
+       printk(KERN_INFO "%s: Control reg at %llx\n",
+              op->node->name, res->start);
+ 
+       if (has_button_interrupt(irq, op->node)) {
+               if (request_irq(irq,
+                               power_handler, 0, "power", NULL) < 0)
+                       printk(KERN_ERR "power: Cannot setup IRQ handler.\n");
+       }
+ 
+       return 0;
+ }
+ 
+ static struct of_device_id __initdata power_match[] = {
+       {
+               .name = "power",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver power_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = power_match,
+       .probe          = power_probe,
+       .driver         = {
+               .name   = "power",
+       },
+ };
+ 
+ static int __init power_init(void)
+ {
+       return of_register_driver(&power_driver, &of_platform_bus_type);
+ }
+ 
+ device_initcall(power_init);
diff --cc arch/sparc/kernel/process_64.c

index 0000000,cc8b560..c869706

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@@ -1,0 -1,814 +1,796 @@@
+ /*  arch/sparc64/kernel/process.c
+  *
+  *  Copyright (C) 1995, 1996, 2008 David S. Miller (davem@davemloft.net)
+  *  Copyright (C) 1996       Eddie C. Dost   (ecd@skynet.be)
+  *  Copyright (C) 1997, 1998 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
+  */
+ 
+ /*
+  * This file handles the architecture-dependent parts of process handling..
+  */
+ 
+ #include <stdarg.h>
+ 
+ #include <linux/errno.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/fs.h>
+ #include <linux/smp.h>
+ #include <linux/stddef.h>
+ #include <linux/ptrace.h>
+ #include <linux/slab.h>
+ #include <linux/user.h>
+ #include <linux/delay.h>
+ #include <linux/compat.h>
+ #include <linux/tick.h>
+ #include <linux/init.h>
+ #include <linux/cpu.h>
+ #include <linux/elfcore.h>
+ #include <linux/sysrq.h>
++#include <linux/perfmon_kern.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+ #include <asm/page.h>
+ #include <asm/pgalloc.h>
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+ #include <asm/pstate.h>
+ #include <asm/elf.h>
+ #include <asm/fpumacro.h>
+ #include <asm/head.h>
+ #include <asm/cpudata.h>
+ #include <asm/mmu_context.h>
+ #include <asm/unistd.h>
+ #include <asm/hypervisor.h>
+ #include <asm/syscalls.h>
+ #include <asm/irq_regs.h>
+ #include <asm/smp.h>
+ 
+ #include "kstack.h"
+ 
+ static void sparc64_yield(int cpu)
+ {
+       if (tlb_type != hypervisor)
+               return;
+ 
+       clear_thread_flag(TIF_POLLING_NRFLAG);
+       smp_mb__after_clear_bit();
+ 
+       while (!need_resched() && !cpu_is_offline(cpu)) {
+               unsigned long pstate;
+ 
+               /* Disable interrupts. */
+               __asm__ __volatile__(
+                       "rdpr %%pstate, %0\n\t"
+                       "andn %0, %1, %0\n\t"
+                       "wrpr %0, %%g0, %%pstate"
+                       : "=&r" (pstate)
+                       : "i" (PSTATE_IE));
+ 
+               if (!need_resched() && !cpu_is_offline(cpu))
+                       sun4v_cpu_yield();
+ 
+               /* Re-enable interrupts. */
+               __asm__ __volatile__(
+                       "rdpr %%pstate, %0\n\t"
+                       "or %0, %1, %0\n\t"
+                       "wrpr %0, %%g0, %%pstate"
+                       : "=&r" (pstate)
+                       : "i" (PSTATE_IE));
+       }
+ 
+       set_thread_flag(TIF_POLLING_NRFLAG);
+ }
+ 
+ /* The idle loop on sparc64. */
+ void cpu_idle(void)
+ {
+       int cpu = smp_processor_id();
+ 
+       set_thread_flag(TIF_POLLING_NRFLAG);
+ 
+       while(1) {
+               tick_nohz_stop_sched_tick(1);
+ 
+               while (!need_resched() && !cpu_is_offline(cpu))
+                       sparc64_yield(cpu);
+ 
+               tick_nohz_restart_sched_tick();
+ 
+               preempt_enable_no_resched();
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+               if (cpu_is_offline(cpu))
+                       cpu_play_dead();
+ #endif
+ 
+               schedule();
+               preempt_disable();
+       }
+ }
+ 
+ #ifdef CONFIG_COMPAT
+ static void show_regwindow32(struct pt_regs *regs)
+ {
+       struct reg_window32 __user *rw;
+       struct reg_window32 r_w;
+       mm_segment_t old_fs;
+       
+       __asm__ __volatile__ ("flushw");
+       rw = compat_ptr((unsigned)regs->u_regs[14]);
+       old_fs = get_fs();
+       set_fs (USER_DS);
+       if (copy_from_user (&r_w, rw, sizeof(r_w))) {
+               set_fs (old_fs);
+               return;
+       }
+ 
+       set_fs (old_fs);                        
+       printk("l0: %08x l1: %08x l2: %08x l3: %08x "
+              "l4: %08x l5: %08x l6: %08x l7: %08x\n",
+              r_w.locals[0], r_w.locals[1], r_w.locals[2], r_w.locals[3],
+              r_w.locals[4], r_w.locals[5], r_w.locals[6], r_w.locals[7]);
+       printk("i0: %08x i1: %08x i2: %08x i3: %08x "
+              "i4: %08x i5: %08x i6: %08x i7: %08x\n",
+              r_w.ins[0], r_w.ins[1], r_w.ins[2], r_w.ins[3],
+              r_w.ins[4], r_w.ins[5], r_w.ins[6], r_w.ins[7]);
+ }
+ #else
+ #define show_regwindow32(regs)        do { } while (0)
+ #endif
+ 
+ static void show_regwindow(struct pt_regs *regs)
+ {
+       struct reg_window __user *rw;
+       struct reg_window *rwk;
+       struct reg_window r_w;
+       mm_segment_t old_fs;
+ 
+       if ((regs->tstate & TSTATE_PRIV) || !(test_thread_flag(TIF_32BIT))) {
+               __asm__ __volatile__ ("flushw");
+               rw = (struct reg_window __user *)
+                       (regs->u_regs[14] + STACK_BIAS);
+               rwk = (struct reg_window *)
+                       (regs->u_regs[14] + STACK_BIAS);
+               if (!(regs->tstate & TSTATE_PRIV)) {
+                       old_fs = get_fs();
+                       set_fs (USER_DS);
+                       if (copy_from_user (&r_w, rw, sizeof(r_w))) {
+                               set_fs (old_fs);
+                               return;
+                       }
+                       rwk = &r_w;
+                       set_fs (old_fs);                        
+               }
+       } else {
+               show_regwindow32(regs);
+               return;
+       }
+       printk("l0: %016lx l1: %016lx l2: %016lx l3: %016lx\n",
+              rwk->locals[0], rwk->locals[1], rwk->locals[2], rwk->locals[3]);
+       printk("l4: %016lx l5: %016lx l6: %016lx l7: %016lx\n",
+              rwk->locals[4], rwk->locals[5], rwk->locals[6], rwk->locals[7]);
+       printk("i0: %016lx i1: %016lx i2: %016lx i3: %016lx\n",
+              rwk->ins[0], rwk->ins[1], rwk->ins[2], rwk->ins[3]);
+       printk("i4: %016lx i5: %016lx i6: %016lx i7: %016lx\n",
+              rwk->ins[4], rwk->ins[5], rwk->ins[6], rwk->ins[7]);
+       if (regs->tstate & TSTATE_PRIV)
+               printk("I7: <%pS>\n", (void *) rwk->ins[7]);
+ }
+ 
+ void show_regs(struct pt_regs *regs)
+ {
+       printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x    %s\n", regs->tstate,
+              regs->tpc, regs->tnpc, regs->y, print_tainted());
+       printk("TPC: <%pS>\n", (void *) regs->tpc);
+       printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n",
+              regs->u_regs[0], regs->u_regs[1], regs->u_regs[2],
+              regs->u_regs[3]);
+       printk("g4: %016lx g5: %016lx g6: %016lx g7: %016lx\n",
+              regs->u_regs[4], regs->u_regs[5], regs->u_regs[6],
+              regs->u_regs[7]);
+       printk("o0: %016lx o1: %016lx o2: %016lx o3: %016lx\n",
+              regs->u_regs[8], regs->u_regs[9], regs->u_regs[10],
+              regs->u_regs[11]);
+       printk("o4: %016lx o5: %016lx sp: %016lx ret_pc: %016lx\n",
+              regs->u_regs[12], regs->u_regs[13], regs->u_regs[14],
+              regs->u_regs[15]);
+       printk("RPC: <%pS>\n", (void *) regs->u_regs[15]);
+       show_regwindow(regs);
+ }
+ 
+ struct global_reg_snapshot global_reg_snapshot[NR_CPUS];
+ static DEFINE_SPINLOCK(global_reg_snapshot_lock);
+ 
+ static void __global_reg_self(struct thread_info *tp, struct pt_regs *regs,
+                             int this_cpu)
+ {
+       flushw_all();
+ 
+       global_reg_snapshot[this_cpu].tstate = regs->tstate;
+       global_reg_snapshot[this_cpu].tpc = regs->tpc;
+       global_reg_snapshot[this_cpu].tnpc = regs->tnpc;
+       global_reg_snapshot[this_cpu].o7 = regs->u_regs[UREG_I7];
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               struct reg_window *rw;
+ 
+               rw = (struct reg_window *)
+                       (regs->u_regs[UREG_FP] + STACK_BIAS);
+               if (kstack_valid(tp, (unsigned long) rw)) {
+                       global_reg_snapshot[this_cpu].i7 = rw->ins[7];
+                       rw = (struct reg_window *)
+                               (rw->ins[6] + STACK_BIAS);
+                       if (kstack_valid(tp, (unsigned long) rw))
+                               global_reg_snapshot[this_cpu].rpc = rw->ins[7];
+               }
+       } else {
+               global_reg_snapshot[this_cpu].i7 = 0;
+               global_reg_snapshot[this_cpu].rpc = 0;
+       }
+       global_reg_snapshot[this_cpu].thread = tp;
+ }
+ 
+ /* In order to avoid hangs we do not try to synchronize with the
+  * global register dump client cpus.  The last store they make is to
+  * the thread pointer, so do a short poll waiting for that to become
+  * non-NULL.
+  */
+ static void __global_reg_poll(struct global_reg_snapshot *gp)
+ {
+       int limit = 0;
+ 
+       while (!gp->thread && ++limit < 100) {
+               barrier();
+               udelay(1);
+       }
+ }
+ 
+ void __trigger_all_cpu_backtrace(void)
+ {
+       struct thread_info *tp = current_thread_info();
+       struct pt_regs *regs = get_irq_regs();
+       unsigned long flags;
+       int this_cpu, cpu;
+ 
+       if (!regs)
+               regs = tp->kregs;
+ 
+       spin_lock_irqsave(&global_reg_snapshot_lock, flags);
+ 
+       memset(global_reg_snapshot, 0, sizeof(global_reg_snapshot));
+ 
+       this_cpu = raw_smp_processor_id();
+ 
+       __global_reg_self(tp, regs, this_cpu);
+ 
+       smp_fetch_global_regs();
+ 
+       for_each_online_cpu(cpu) {
+               struct global_reg_snapshot *gp = &global_reg_snapshot[cpu];
+ 
+               __global_reg_poll(gp);
+ 
+               tp = gp->thread;
+               printk("%c CPU[%3d]: TSTATE[%016lx] TPC[%016lx] TNPC[%016lx] TASK[%s:%d]\n",
+                      (cpu == this_cpu ? '*' : ' '), cpu,
+                      gp->tstate, gp->tpc, gp->tnpc,
+                      ((tp && tp->task) ? tp->task->comm : "NULL"),
+                      ((tp && tp->task) ? tp->task->pid : -1));
+ 
+               if (gp->tstate & TSTATE_PRIV) {
+                       printk("             TPC[%pS] O7[%pS] I7[%pS] RPC[%pS]\n",
+                              (void *) gp->tpc,
+                              (void *) gp->o7,
+                              (void *) gp->i7,
+                              (void *) gp->rpc);
+               } else {
+                       printk("             TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
+                              gp->tpc, gp->o7, gp->i7, gp->rpc);
+               }
+       }
+ 
+       memset(global_reg_snapshot, 0, sizeof(global_reg_snapshot));
+ 
+       spin_unlock_irqrestore(&global_reg_snapshot_lock, flags);
+ }
+ 
+ #ifdef CONFIG_MAGIC_SYSRQ
+ 
+ static void sysrq_handle_globreg(int key, struct tty_struct *tty)
+ {
+       __trigger_all_cpu_backtrace();
+ }
+ 
+ static struct sysrq_key_op sparc_globalreg_op = {
+       .handler        = sysrq_handle_globreg,
+       .help_msg       = "Globalregs",
+       .action_msg     = "Show Global CPU Regs",
+ };
+ 
+ static int __init sparc_globreg_init(void)
+ {
+       return register_sysrq_key('y', &sparc_globalreg_op);
+ }
+ 
+ core_initcall(sparc_globreg_init);
+ 
+ #endif
+ 
+ unsigned long thread_saved_pc(struct task_struct *tsk)
+ {
+       struct thread_info *ti = task_thread_info(tsk);
+       unsigned long ret = 0xdeadbeefUL;
+       
+       if (ti && ti->ksp) {
+               unsigned long *sp;
+               sp = (unsigned long *)(ti->ksp + STACK_BIAS);
+               if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
+                   sp[14]) {
+                       unsigned long *fp;
+                       fp = (unsigned long *)(sp[14] + STACK_BIAS);
+                       if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
+                               ret = fp[15];
+               }
+       }
+       return ret;
+ }
+ 
+ /* Free current thread data structures etc.. */
+ void exit_thread(void)
+ {
+       struct thread_info *t = current_thread_info();
+ 
+       if (t->utraps) {
+               if (t->utraps[0] < 2)
+                       kfree (t->utraps);
+               else
+                       t->utraps[0]--;
+       }
+ 
- -      if (test_and_clear_thread_flag(TIF_PERFCTR)) {
- -              t->user_cntd0 = t->user_cntd1 = NULL;
- -              t->pcr_reg = 0;
- -              write_pcr(0);
- -      }
++      pfm_exit_thread();
+ }
+ 
+ void flush_thread(void)
+ {
+       struct thread_info *t = current_thread_info();
+       struct mm_struct *mm;
+ 
+       if (test_ti_thread_flag(t, TIF_ABI_PENDING)) {
+               clear_ti_thread_flag(t, TIF_ABI_PENDING);
+               if (test_ti_thread_flag(t, TIF_32BIT))
+                       clear_ti_thread_flag(t, TIF_32BIT);
+               else
+                       set_ti_thread_flag(t, TIF_32BIT);
+       }
+ 
+       mm = t->task->mm;
+       if (mm)
+               tsb_context_switch(mm);
+ 
+       set_thread_wsaved(0);
+ 
- -      /* Turn off performance counters if on. */
- -      if (test_and_clear_thread_flag(TIF_PERFCTR)) {
- -              t->user_cntd0 = t->user_cntd1 = NULL;
- -              t->pcr_reg = 0;
- -              write_pcr(0);
- -      }
- -
+       /* Clear FPU register state. */
+       t->fpsaved[0] = 0;
+       
+       if (get_thread_current_ds() != ASI_AIUS)
+               set_fs(USER_DS);
+ }
+ 
+ /* It's a bit more tricky when 64-bit tasks are involved... */
+ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
+ {
+       unsigned long fp, distance, rval;
+ 
+       if (!(test_thread_flag(TIF_32BIT))) {
+               csp += STACK_BIAS;
+               psp += STACK_BIAS;
+               __get_user(fp, &(((struct reg_window __user *)psp)->ins[6]));
+               fp += STACK_BIAS;
+       } else
+               __get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6]));
+ 
+       /* Now 8-byte align the stack as this is mandatory in the
+        * Sparc ABI due to how register windows work.  This hides
+        * the restriction from thread libraries etc.  -DaveM
+        */
+       csp &= ~7UL;
+ 
+       distance = fp - psp;
+       rval = (csp - distance);
+       if (copy_in_user((void __user *) rval, (void __user *) psp, distance))
+               rval = 0;
+       else if (test_thread_flag(TIF_32BIT)) {
+               if (put_user(((u32)csp),
+                            &(((struct reg_window32 __user *)rval)->ins[6])))
+                       rval = 0;
+       } else {
+               if (put_user(((u64)csp - STACK_BIAS),
+                            &(((struct reg_window __user *)rval)->ins[6])))
+                       rval = 0;
+               else
+                       rval = rval - STACK_BIAS;
+       }
+ 
+       return rval;
+ }
+ 
+ /* Standard stuff. */
+ static inline void shift_window_buffer(int first_win, int last_win,
+                                      struct thread_info *t)
+ {
+       int i;
+ 
+       for (i = first_win; i < last_win; i++) {
+               t->rwbuf_stkptrs[i] = t->rwbuf_stkptrs[i+1];
+               memcpy(&t->reg_window[i], &t->reg_window[i+1],
+                      sizeof(struct reg_window));
+       }
+ }
+ 
+ void synchronize_user_stack(void)
+ {
+       struct thread_info *t = current_thread_info();
+       unsigned long window;
+ 
+       flush_user_windows();
+       if ((window = get_thread_wsaved()) != 0) {
+               int winsize = sizeof(struct reg_window);
+               int bias = 0;
+ 
+               if (test_thread_flag(TIF_32BIT))
+                       winsize = sizeof(struct reg_window32);
+               else
+                       bias = STACK_BIAS;
+ 
+               window -= 1;
+               do {
+                       unsigned long sp = (t->rwbuf_stkptrs[window] + bias);
+                       struct reg_window *rwin = &t->reg_window[window];
+ 
+                       if (!copy_to_user((char __user *)sp, rwin, winsize)) {
+                               shift_window_buffer(window, get_thread_wsaved() - 1, t);
+                               set_thread_wsaved(get_thread_wsaved() - 1);
+                       }
+               } while (window--);
+       }
+ }
+ 
+ static void stack_unaligned(unsigned long sp)
+ {
+       siginfo_t info;
+ 
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_ADRALN;
+       info.si_addr = (void __user *) sp;
+       info.si_trapno = 0;
+       force_sig_info(SIGBUS, &info, current);
+ }
+ 
+ void fault_in_user_windows(void)
+ {
+       struct thread_info *t = current_thread_info();
+       unsigned long window;
+       int winsize = sizeof(struct reg_window);
+       int bias = 0;
+ 
+       if (test_thread_flag(TIF_32BIT))
+               winsize = sizeof(struct reg_window32);
+       else
+               bias = STACK_BIAS;
+ 
+       flush_user_windows();
+       window = get_thread_wsaved();
+ 
+       if (likely(window != 0)) {
+               window -= 1;
+               do {
+                       unsigned long sp = (t->rwbuf_stkptrs[window] + bias);
+                       struct reg_window *rwin = &t->reg_window[window];
+ 
+                       if (unlikely(sp & 0x7UL))
+                               stack_unaligned(sp);
+ 
+                       if (unlikely(copy_to_user((char __user *)sp,
+                                                 rwin, winsize)))
+                               goto barf;
+               } while (window--);
+       }
+       set_thread_wsaved(0);
+       return;
+ 
+ barf:
+       set_thread_wsaved(window + 1);
+       do_exit(SIGILL);
+ }
+ 
+ asmlinkage long sparc_do_fork(unsigned long clone_flags,
+                             unsigned long stack_start,
+                             struct pt_regs *regs,
+                             unsigned long stack_size)
+ {
+       int __user *parent_tid_ptr, *child_tid_ptr;
+       unsigned long orig_i1 = regs->u_regs[UREG_I1];
+       long ret;
+ 
+ #ifdef CONFIG_COMPAT
+       if (test_thread_flag(TIF_32BIT)) {
+               parent_tid_ptr = compat_ptr(regs->u_regs[UREG_I2]);
+               child_tid_ptr = compat_ptr(regs->u_regs[UREG_I4]);
+       } else
+ #endif
+       {
+               parent_tid_ptr = (int __user *) regs->u_regs[UREG_I2];
+               child_tid_ptr = (int __user *) regs->u_regs[UREG_I4];
+       }
+ 
+       ret = do_fork(clone_flags, stack_start,
+                     regs, stack_size,
+                     parent_tid_ptr, child_tid_ptr);
+ 
+       /* If we get an error and potentially restart the system
+        * call, we're screwed because copy_thread() clobbered
+        * the parent's %o1.  So detect that case and restore it
+        * here.
+        */
+       if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK)
+               regs->u_regs[UREG_I1] = orig_i1;
+ 
+       return ret;
+ }
+ 
+ /* Copy a Sparc thread.  The fork() return value conventions
+  * under SunOS are nothing short of bletcherous:
+  * Parent -->  %o0 == childs  pid, %o1 == 0
+  * Child  -->  %o0 == parents pid, %o1 == 1
+  */
+ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
+               unsigned long unused,
+               struct task_struct *p, struct pt_regs *regs)
+ {
+       struct thread_info *t = task_thread_info(p);
+       struct sparc_stackf *parent_sf;
+       unsigned long child_stack_sz;
+       char *child_trap_frame;
+       int kernel_thread;
+ 
+       kernel_thread = (regs->tstate & TSTATE_PRIV) ? 1 : 0;
+       parent_sf = ((struct sparc_stackf *) regs) - 1;
+ 
+       /* Calculate offset to stack_frame & pt_regs */
+       child_stack_sz = ((STACKFRAME_SZ + TRACEREG_SZ) +
+                         (kernel_thread ? STACKFRAME_SZ : 0));
+       child_trap_frame = (task_stack_page(p) +
+                           (THREAD_SIZE - child_stack_sz));
+       memcpy(child_trap_frame, parent_sf, child_stack_sz);
+ 
+       t->flags = (t->flags & ~((0xffUL << TI_FLAG_CWP_SHIFT) |
+                                (0xffUL << TI_FLAG_CURRENT_DS_SHIFT))) |
+               (((regs->tstate + 1) & TSTATE_CWP) << TI_FLAG_CWP_SHIFT);
+       t->new_child = 1;
+       t->ksp = ((unsigned long) child_trap_frame) - STACK_BIAS;
+       t->kregs = (struct pt_regs *) (child_trap_frame +
+                                      sizeof(struct sparc_stackf));
+       t->fpsaved[0] = 0;
+ 
+       if (kernel_thread) {
+               struct sparc_stackf *child_sf = (struct sparc_stackf *)
+                       (child_trap_frame + (STACKFRAME_SZ + TRACEREG_SZ));
+ 
+               /* Zero terminate the stack backtrace.  */
+               child_sf->fp = NULL;
+               t->kregs->u_regs[UREG_FP] =
+                 ((unsigned long) child_sf) - STACK_BIAS;
+ 
- -              /* Special case, if we are spawning a kernel thread from
- -               * a userspace task (usermode helper, NFS or similar), we
- -               * must disable performance counters in the child because
- -               * the address space and protection realm are changing.
- -               */
- -              if (t->flags & _TIF_PERFCTR) {
- -                      t->user_cntd0 = t->user_cntd1 = NULL;
- -                      t->pcr_reg = 0;
- -                      t->flags &= ~_TIF_PERFCTR;
- -              }
+               t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
+               t->kregs->u_regs[UREG_G6] = (unsigned long) t;
+               t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
+       } else {
+               if (t->flags & _TIF_32BIT) {
+                       sp &= 0x00000000ffffffffUL;
+                       regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+               }
+               t->kregs->u_regs[UREG_FP] = sp;
+               t->flags |= ((long)ASI_AIUS << TI_FLAG_CURRENT_DS_SHIFT);
+               if (sp != regs->u_regs[UREG_FP]) {
+                       unsigned long csp;
+ 
+                       csp = clone_stackframe(sp, regs->u_regs[UREG_FP]);
+                       if (!csp)
+                               return -EFAULT;
+                       t->kregs->u_regs[UREG_FP] = csp;
+               }
+               if (t->utraps)
+                       t->utraps[0]++;
+       }
+ 
+       /* Set the return value for the child. */
+       t->kregs->u_regs[UREG_I0] = current->pid;
+       t->kregs->u_regs[UREG_I1] = 1;
+ 
+       /* Set the second return value for the parent. */
+       regs->u_regs[UREG_I1] = 0;
+ 
+       if (clone_flags & CLONE_SETTLS)
+               t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
+ 
++      pfm_copy_thread(p);
++
+       return 0;
+ }
+ 
+ /*
+  * This is the mechanism for creating a new kernel thread.
+  *
+  * NOTE! Only a kernel-only process(ie the swapper or direct descendants
+  * who haven't done an "execve()") should use this: it will work within
+  * a system call from a "real" process, but the process memory space will
+  * not be freed until both the parent and the child have exited.
+  */
+ pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+ {
+       long retval;
+ 
+       /* If the parent runs before fn(arg) is called by the child,
+        * the input registers of this function can be clobbered.
+        * So we stash 'fn' and 'arg' into global registers which
+        * will not be modified by the parent.
+        */
+       __asm__ __volatile__("mov %4, %%g2\n\t"    /* Save FN into global */
+                            "mov %5, %%g3\n\t"    /* Save ARG into global */
+                            "mov %1, %%g1\n\t"    /* Clone syscall nr. */
+                            "mov %2, %%o0\n\t"    /* Clone flags. */
+                            "mov 0, %%o1\n\t"     /* usp arg == 0 */
+                            "t 0x6d\n\t"          /* Linux/Sparc clone(). */
+                            "brz,a,pn %%o1, 1f\n\t" /* Parent, just return. */
+                            " mov %%o0, %0\n\t"
+                            "jmpl %%g2, %%o7\n\t"   /* Call the function. */
+                            " mov %%g3, %%o0\n\t"   /* Set arg in delay. */
+                            "mov %3, %%g1\n\t"
+                            "t 0x6d\n\t"          /* Linux/Sparc exit(). */
+                            /* Notreached by child. */
+                            "1:" :
+                            "=r" (retval) :
+                            "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
+                            "i" (__NR_exit),  "r" (fn), "r" (arg) :
+                            "g1", "g2", "g3", "o0", "o1", "memory", "cc");
+       return retval;
+ }
+ EXPORT_SYMBOL(kernel_thread);
+ 
+ typedef struct {
+       union {
+               unsigned int    pr_regs[32];
+               unsigned long   pr_dregs[16];
+       } pr_fr;
+       unsigned int __unused;
+       unsigned int    pr_fsr;
+       unsigned char   pr_qcnt;
+       unsigned char   pr_q_entrysize;
+       unsigned char   pr_en;
+       unsigned int    pr_q[64];
+ } elf_fpregset_t32;
+ 
+ /*
+  * fill in the fpu structure for a core dump.
+  */
+ int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs)
+ {
+       unsigned long *kfpregs = current_thread_info()->fpregs;
+       unsigned long fprs = current_thread_info()->fpsaved[0];
+ 
+       if (test_thread_flag(TIF_32BIT)) {
+               elf_fpregset_t32 *fpregs32 = (elf_fpregset_t32 *)fpregs;
+ 
+               if (fprs & FPRS_DL)
+                       memcpy(&fpregs32->pr_fr.pr_regs[0], kfpregs,
+                              sizeof(unsigned int) * 32);
+               else
+                       memset(&fpregs32->pr_fr.pr_regs[0], 0,
+                              sizeof(unsigned int) * 32);
+               fpregs32->pr_qcnt = 0;
+               fpregs32->pr_q_entrysize = 8;
+               memset(&fpregs32->pr_q[0], 0,
+                      (sizeof(unsigned int) * 64));
+               if (fprs & FPRS_FEF) {
+                       fpregs32->pr_fsr = (unsigned int) current_thread_info()->xfsr[0];
+                       fpregs32->pr_en = 1;
+               } else {
+                       fpregs32->pr_fsr = 0;
+                       fpregs32->pr_en = 0;
+               }
+       } else {
+               if(fprs & FPRS_DL)
+                       memcpy(&fpregs->pr_regs[0], kfpregs,
+                              sizeof(unsigned int) * 32);
+               else
+                       memset(&fpregs->pr_regs[0], 0,
+                              sizeof(unsigned int) * 32);
+               if(fprs & FPRS_DU)
+                       memcpy(&fpregs->pr_regs[16], kfpregs+16,
+                              sizeof(unsigned int) * 32);
+               else
+                       memset(&fpregs->pr_regs[16], 0,
+                              sizeof(unsigned int) * 32);
+               if(fprs & FPRS_FEF) {
+                       fpregs->pr_fsr = current_thread_info()->xfsr[0];
+                       fpregs->pr_gsr = current_thread_info()->gsr[0];
+               } else {
+                       fpregs->pr_fsr = fpregs->pr_gsr = 0;
+               }
+               fpregs->pr_fprs = fprs;
+       }
+       return 1;
+ }
+ EXPORT_SYMBOL(dump_fpu);
+ 
+ /*
+  * sparc_execve() executes a new program after the asm stub has set
+  * things up for us.  This should basically do what I want it to.
+  */
+ asmlinkage int sparc_execve(struct pt_regs *regs)
+ {
+       int error, base = 0;
+       char *filename;
+ 
+       /* User register window flush is done by entry.S */
+ 
+       /* Check for indirect call. */
+       if (regs->u_regs[UREG_G1] == 0)
+               base = 1;
+ 
+       filename = getname((char __user *)regs->u_regs[base + UREG_I0]);
+       error = PTR_ERR(filename);
+       if (IS_ERR(filename))
+               goto out;
+       error = do_execve(filename,
+                         (char __user * __user *)
+                         regs->u_regs[base + UREG_I1],
+                         (char __user * __user *)
+                         regs->u_regs[base + UREG_I2], regs);
+       putname(filename);
+       if (!error) {
+               fprs_write(0);
+               current_thread_info()->xfsr[0] = 0;
+               current_thread_info()->fpsaved[0] = 0;
+               regs->tstate &= ~TSTATE_PEF;
+       }
+ out:
+       return error;
+ }
+ 
+ unsigned long get_wchan(struct task_struct *task)
+ {
+       unsigned long pc, fp, bias = 0;
+       struct thread_info *tp;
+       struct reg_window *rw;
+         unsigned long ret = 0;
+       int count = 0; 
+ 
+       if (!task || task == current ||
+             task->state == TASK_RUNNING)
+               goto out;
+ 
+       tp = task_thread_info(task);
+       bias = STACK_BIAS;
+       fp = task_thread_info(task)->ksp + bias;
+ 
+       do {
+               if (!kstack_valid(tp, fp))
+                       break;
+               rw = (struct reg_window *) fp;
+               pc = rw->ins[7];
+               if (!in_sched_functions(pc)) {
+                       ret = pc;
+                       goto out;
+               }
+               fp = rw->ins[6] + bias;
+       } while (++count < 16);
+ 
+ out:
+       return ret;
+ }
diff --cc arch/sparc/kernel/rtrap_64.S

index 0000000,fd3cee4..112ebb3

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@@ -1,0 -1,450 +1,405 @@@
+ /*
+  * rtrap.S: Preparing for return from trap on Sparc V9.
+  *
+  * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+  * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+  */
+ 
+ 
+ #include <asm/asi.h>
+ #include <asm/pstate.h>
+ #include <asm/ptrace.h>
+ #include <asm/spitfire.h>
+ #include <asm/head.h>
+ #include <asm/visasm.h>
+ #include <asm/processor.h>
+ 
+ #define               RTRAP_PSTATE            (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
+ #define               RTRAP_PSTATE_IRQOFF     (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
+ #define               RTRAP_PSTATE_AG_IRQOFF  (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
+ 
+               .text
+               .align                  32
+ __handle_softirq:
+               call                    do_softirq
+                nop
+               ba,a,pt                 %xcc, __handle_softirq_continue
+                nop
+ __handle_preemption:
+               call                    schedule
+                wrpr                   %g0, RTRAP_PSTATE, %pstate
+               ba,pt                   %xcc, __handle_preemption_continue
+                wrpr                   %g0, RTRAP_PSTATE_IRQOFF, %pstate
+ 
+ __handle_user_windows:
+               call                    fault_in_user_windows
+                wrpr                   %g0, RTRAP_PSTATE, %pstate
+               wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
+               /* Redo sched+sig checks */
+               ldx                     [%g6 + TI_FLAGS], %l0
+               andcc                   %l0, _TIF_NEED_RESCHED, %g0
+ 
+               be,pt                   %xcc, 1f
+                nop
+               call                    schedule
+                wrpr                   %g0, RTRAP_PSTATE, %pstate
+               wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
+               ldx                     [%g6 + TI_FLAGS], %l0
+ 
+ 1:            andcc                   %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
+               be,pt                   %xcc, __handle_user_windows_continue
+                nop
+               mov                     %l5, %o1
+               add                     %sp, PTREGS_OFF, %o0
+               mov                     %l0, %o2
+ 
+               call                    do_notify_resume
+                wrpr                   %g0, RTRAP_PSTATE, %pstate
+               wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
+               /* Signal delivery can modify pt_regs tstate, so we must
+                * reload it.
+                */
+               ldx                     [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+               sethi                   %hi(0xf << 20), %l4
+               and                     %l1, %l4, %l4
+               ba,pt                   %xcc, __handle_user_windows_continue
+ 
+                andn                   %l1, %l4, %l1
- -__handle_perfctrs:
- -              call                    update_perfctrs
- -               wrpr                   %g0, RTRAP_PSTATE, %pstate
- -              wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
- -              ldub                    [%g6 + TI_WSAVED], %o2
- -              brz,pt                  %o2, 1f
- -               nop
- -              /* Redo userwin+sched+sig checks */
- -              call                    fault_in_user_windows
- -
- -               wrpr                   %g0, RTRAP_PSTATE, %pstate
- -              wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
- -              ldx                     [%g6 + TI_FLAGS], %l0
- -              andcc                   %l0, _TIF_NEED_RESCHED, %g0
- -              be,pt                   %xcc, 1f
- -
- -               nop
- -              call                    schedule
- -               wrpr                   %g0, RTRAP_PSTATE, %pstate
- -              wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
- -              ldx                     [%g6 + TI_FLAGS], %l0
- -1:            andcc                   %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
- -
- -              be,pt                   %xcc, __handle_perfctrs_continue
- -               sethi                  %hi(TSTATE_PEF), %o0
- -              mov                     %l5, %o1
- -              add                     %sp, PTREGS_OFF, %o0
- -              mov                     %l0, %o2
- -              call                    do_notify_resume
- -
- -               wrpr                   %g0, RTRAP_PSTATE, %pstate
- -              wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
- -              /* Signal delivery can modify pt_regs tstate, so we must
- -               * reload it.
- -               */
- -              ldx                     [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
- -              sethi                   %hi(0xf << 20), %l4
- -              and                     %l1, %l4, %l4
- -              andn                    %l1, %l4, %l1
- -              ba,pt                   %xcc, __handle_perfctrs_continue
- -
- -               sethi                  %hi(TSTATE_PEF), %o0
+ __handle_userfpu:
+               rd                      %fprs, %l5
+               andcc                   %l5, FPRS_FEF, %g0
+               sethi                   %hi(TSTATE_PEF), %o0
+               be,a,pn                 %icc, __handle_userfpu_continue
+                andn                   %l1, %o0, %l1
- -              ba,a,pt                 %xcc, __handle_userfpu_continue
++              ba,pt                   %xcc, __handle_userfpu_continue
++               nop
+ 
+ __handle_signal:
+               mov                     %l5, %o1
+               add                     %sp, PTREGS_OFF, %o0
+               mov                     %l0, %o2
+               call                    do_notify_resume
+                wrpr                   %g0, RTRAP_PSTATE, %pstate
+               wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
+ 
+               /* Signal delivery can modify pt_regs tstate, so we must
+                * reload it.
+                */
+               ldx                     [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+               sethi                   %hi(0xf << 20), %l4
+               and                     %l1, %l4, %l4
+               ba,pt                   %xcc, __handle_signal_continue
+                andn                   %l1, %l4, %l1
+ 
+               /* When returning from a NMI (%pil==15) interrupt we want to
+                * avoid running softirqs, doing IRQ tracing, preempting, etc.
+                */
+               .globl                  rtrap_nmi
+ rtrap_nmi:    ldx                     [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+               sethi                   %hi(0xf << 20), %l4
+               and                     %l1, %l4, %l4
+               andn                    %l1, %l4, %l1
+               srl                     %l4, 20, %l4
+               ba,pt                   %xcc, rtrap_no_irq_enable
+                wrpr                   %l4, %pil
+ 
+               .align                  64
+               .globl                  rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall
+ rtrap_irq:
+ rtrap:
+ #ifndef CONFIG_SMP
+               sethi                   %hi(per_cpu____cpu_data), %l0
+               lduw                    [%l0 + %lo(per_cpu____cpu_data)], %l1
+ #else
+               sethi                   %hi(per_cpu____cpu_data), %l0
+               or                      %l0, %lo(per_cpu____cpu_data), %l0
+               lduw                    [%l0 + %g5], %l1
+ #endif
+               cmp                     %l1, 0
+ 
+               /* mm/ultra.S:xcall_report_regs KNOWS about this load. */
+               bne,pn                  %icc, __handle_softirq
+                ldx                    [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+ __handle_softirq_continue:
+ rtrap_xcall:
+               sethi                   %hi(0xf << 20), %l4
+               and                     %l1, %l4, %l4
+               andn                    %l1, %l4, %l1
+               srl                     %l4, 20, %l4
+ #ifdef CONFIG_TRACE_IRQFLAGS
+               brnz,pn                 %l4, rtrap_no_irq_enable
+                nop
+               call                    trace_hardirqs_on
+                nop
+               wrpr                    %l4, %pil
+ #endif
+ rtrap_no_irq_enable:
+               andcc                   %l1, TSTATE_PRIV, %l3
+               bne,pn                  %icc, to_kernel
+                nop
+ 
+               /* We must hold IRQs off and atomically test schedule+signal
+                * state, then hold them off all the way back to userspace.
+                * If we are returning to kernel, none of this matters.  Note
+                * that we are disabling interrupts via PSTATE_IE, not using
+                * %pil.
+                *
+                * If we do not do this, there is a window where we would do
+                * the tests, later the signal/resched event arrives but we do
+                * not process it since we are still in kernel mode.  It would
+                * take until the next local IRQ before the signal/resched
+                * event would be handled.
+                *
+                * This also means that if we have to deal with performance
+                * counters or user windows, we have to redo all of these
+                * sched+signal checks with IRQs disabled.
+                */
+ to_user:      wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
+               wrpr                    0, %pil
+ __handle_preemption_continue:
+               ldx                     [%g6 + TI_FLAGS], %l0
+               sethi                   %hi(_TIF_USER_WORK_MASK), %o0
+               or                      %o0, %lo(_TIF_USER_WORK_MASK), %o0
+               andcc                   %l0, %o0, %g0
+               sethi                   %hi(TSTATE_PEF), %o0
+               be,pt                   %xcc, user_nowork
+                andcc                  %l1, %o0, %g0
+               andcc                   %l0, _TIF_NEED_RESCHED, %g0
+               bne,pn                  %xcc, __handle_preemption
+                andcc                  %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
+               bne,pn                  %xcc, __handle_signal
+ __handle_signal_continue:
+                ldub                   [%g6 + TI_WSAVED], %o2
+               brnz,pn                 %o2, __handle_user_windows
+                nop
+ __handle_user_windows_continue:
- -              ldx                     [%g6 + TI_FLAGS], %l5
- -              andcc                   %l5, _TIF_PERFCTR, %g0
+               sethi                   %hi(TSTATE_PEF), %o0
- -              bne,pn                  %xcc, __handle_perfctrs
- -__handle_perfctrs_continue:
- -               andcc                  %l1, %o0, %g0
++              andcc                   %l1, %o0, %g0
+ 
+               /* This fpdepth clear is necessary for non-syscall rtraps only */
+ user_nowork:
+               bne,pn                  %xcc, __handle_userfpu
+                stb                    %g0, [%g6 + TI_FPDEPTH]
+ __handle_userfpu_continue:
+ 
+ rt_continue:  ldx                     [%sp + PTREGS_OFF + PT_V9_G1], %g1
+               ldx                     [%sp + PTREGS_OFF + PT_V9_G2], %g2
+ 
+               ldx                     [%sp + PTREGS_OFF + PT_V9_G3], %g3
+               ldx                     [%sp + PTREGS_OFF + PT_V9_G4], %g4
+               ldx                     [%sp + PTREGS_OFF + PT_V9_G5], %g5
+               brz,pt                  %l3, 1f
+               mov                     %g6, %l2
+ 
+               /* Must do this before thread reg is clobbered below.  */
+               LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2)
+ 1:
+               ldx                     [%sp + PTREGS_OFF + PT_V9_G6], %g6
+               ldx                     [%sp + PTREGS_OFF + PT_V9_G7], %g7
+ 
+               /* Normal globals are restored, go to trap globals.  */
+ 661:          wrpr                    %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
+               nop
+               .section                .sun4v_2insn_patch, "ax"
+               .word                   661b
+               wrpr                    %g0, RTRAP_PSTATE_IRQOFF, %pstate
+               SET_GL(1)
+               .previous
+ 
+               mov                     %l2, %g6
+ 
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I0], %i0
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I1], %i1
+ 
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I2], %i2
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I3], %i3
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I4], %i4
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I5], %i5
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I6], %i6
+               ldx                     [%sp + PTREGS_OFF + PT_V9_I7], %i7
+               ldx                     [%sp + PTREGS_OFF + PT_V9_TPC], %l2
+               ldx                     [%sp + PTREGS_OFF + PT_V9_TNPC], %o2
+ 
+               ld                      [%sp + PTREGS_OFF + PT_V9_Y], %o3
+               wr                      %o3, %g0, %y
+               wrpr                    %l4, 0x0, %pil
+               wrpr                    %g0, 0x1, %tl
+               andn                    %l1, TSTATE_SYSCALL, %l1
+               wrpr                    %l1, %g0, %tstate
+               wrpr                    %l2, %g0, %tpc
+               wrpr                    %o2, %g0, %tnpc
+ 
+               brnz,pn                 %l3, kern_rtt
+                mov                    PRIMARY_CONTEXT, %l7
+ 
+ 661:          ldxa                    [%l7 + %l7] ASI_DMMU, %l0
+               .section                .sun4v_1insn_patch, "ax"
+               .word                   661b
+               ldxa                    [%l7 + %l7] ASI_MMU, %l0
+               .previous
+ 
+               sethi                   %hi(sparc64_kern_pri_nuc_bits), %l1
+               ldx                     [%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1
+               or                      %l0, %l1, %l0
+ 
+ 661:          stxa                    %l0, [%l7] ASI_DMMU
+               .section                .sun4v_1insn_patch, "ax"
+               .word                   661b
+               stxa                    %l0, [%l7] ASI_MMU
+               .previous
+ 
+               sethi                   %hi(KERNBASE), %l7
+               flush                   %l7
+               rdpr                    %wstate, %l1
+               rdpr                    %otherwin, %l2
+               srl                     %l1, 3, %l1
+ 
+               wrpr                    %l2, %g0, %canrestore
+               wrpr                    %l1, %g0, %wstate
+               brnz,pt                 %l2, user_rtt_restore
+                wrpr                   %g0, %g0, %otherwin
+ 
+               ldx                     [%g6 + TI_FLAGS], %g3
+               wr                      %g0, ASI_AIUP, %asi
+               rdpr                    %cwp, %g1
+               andcc                   %g3, _TIF_32BIT, %g0
+               sub                     %g1, 1, %g1
+               bne,pt                  %xcc, user_rtt_fill_32bit
+                wrpr                   %g1, %cwp
+               ba,a,pt                 %xcc, user_rtt_fill_64bit
+ 
+ user_rtt_fill_fixup:
+               rdpr    %cwp, %g1
+               add     %g1, 1, %g1
+               wrpr    %g1, 0x0, %cwp
+ 
+               rdpr    %wstate, %g2
+               sll     %g2, 3, %g2
+               wrpr    %g2, 0x0, %wstate
+ 
+               /* We know %canrestore and %otherwin are both zero.  */
+ 
+               sethi   %hi(sparc64_kern_pri_context), %g2
+               ldx     [%g2 + %lo(sparc64_kern_pri_context)], %g2
+               mov     PRIMARY_CONTEXT, %g1
+ 
+ 661:          stxa    %g2, [%g1] ASI_DMMU
+               .section .sun4v_1insn_patch, "ax"
+               .word   661b
+               stxa    %g2, [%g1] ASI_MMU
+               .previous
+ 
+               sethi   %hi(KERNBASE), %g1
+               flush   %g1
+ 
+               or      %g4, FAULT_CODE_WINFIXUP, %g4
+               stb     %g4, [%g6 + TI_FAULT_CODE]
+               stx     %g5, [%g6 + TI_FAULT_ADDR]
+ 
+               mov     %g6, %l1
+               wrpr    %g0, 0x0, %tl
+ 
+ 661:          nop
+               .section                .sun4v_1insn_patch, "ax"
+               .word                   661b
+               SET_GL(0)
+               .previous
+ 
+               wrpr    %g0, RTRAP_PSTATE, %pstate
+ 
+               mov     %l1, %g6
+               ldx     [%g6 + TI_TASK], %g4
+               LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
+               call    do_sparc64_fault
+                add    %sp, PTREGS_OFF, %o0
+               ba,pt   %xcc, rtrap
+                nop
+ 
+ user_rtt_pre_restore:
+               add                     %g1, 1, %g1
+               wrpr                    %g1, 0x0, %cwp
+ 
+ user_rtt_restore:
+               restore
+               rdpr                    %canrestore, %g1
+               wrpr                    %g1, 0x0, %cleanwin
+               retry
+               nop
+ 
+ kern_rtt:     rdpr                    %canrestore, %g1
+               brz,pn                  %g1, kern_rtt_fill
+                nop
+ kern_rtt_restore:
+               stw                     %g0, [%sp + PTREGS_OFF + PT_V9_MAGIC]
+               restore
+               retry
+ 
+ to_kernel:
+ #ifdef CONFIG_PREEMPT
+               ldsw                    [%g6 + TI_PRE_COUNT], %l5
+               brnz                    %l5, kern_fpucheck
+                ldx                    [%g6 + TI_FLAGS], %l5
+               andcc                   %l5, _TIF_NEED_RESCHED, %g0
+               be,pt                   %xcc, kern_fpucheck
+                nop
+               cmp                     %l4, 0
+               bne,pn                  %xcc, kern_fpucheck
+                sethi                  %hi(PREEMPT_ACTIVE), %l6
+               stw                     %l6, [%g6 + TI_PRE_COUNT]
+               call                    schedule
+                nop
+               ba,pt                   %xcc, rtrap
+                stw                    %g0, [%g6 + TI_PRE_COUNT]
+ #endif
+ kern_fpucheck:        ldub                    [%g6 + TI_FPDEPTH], %l5
+               brz,pt                  %l5, rt_continue
+                srl                    %l5, 1, %o0
+               add                     %g6, TI_FPSAVED, %l6
+               ldub                    [%l6 + %o0], %l2
+               sub                     %l5, 2, %l5
+ 
+               add                     %g6, TI_GSR, %o1
+               andcc                   %l2, (FPRS_FEF|FPRS_DU), %g0
+               be,pt                   %icc, 2f
+                and                    %l2, FPRS_DL, %l6
+               andcc                   %l2, FPRS_FEF, %g0
+               be,pn                   %icc, 5f
+                sll                    %o0, 3, %o5
+               rd                      %fprs, %g1
+ 
+               wr                      %g1, FPRS_FEF, %fprs
+               ldx                     [%o1 + %o5], %g1
+               add                     %g6, TI_XFSR, %o1
+               sll                     %o0, 8, %o2
+               add                     %g6, TI_FPREGS, %o3
+               brz,pn                  %l6, 1f
+                add                    %g6, TI_FPREGS+0x40, %o4
+ 
+               membar                  #Sync
+               ldda                    [%o3 + %o2] ASI_BLK_P, %f0
+               ldda                    [%o4 + %o2] ASI_BLK_P, %f16
+               membar                  #Sync
+ 1:            andcc                   %l2, FPRS_DU, %g0
+               be,pn                   %icc, 1f
+                wr                     %g1, 0, %gsr
+               add                     %o2, 0x80, %o2
+               membar                  #Sync
+               ldda                    [%o3 + %o2] ASI_BLK_P, %f32
+               ldda                    [%o4 + %o2] ASI_BLK_P, %f48
+ 1:            membar                  #Sync
+               ldx                     [%o1 + %o5], %fsr
+ 2:            stb                     %l5, [%g6 + TI_FPDEPTH]
+               ba,pt                   %xcc, rt_continue
+                nop
+ 5:            wr                      %g0, FPRS_FEF, %fprs
+               sll                     %o0, 8, %o2
+ 
+               add                     %g6, TI_FPREGS+0x80, %o3
+               add                     %g6, TI_FPREGS+0xc0, %o4
+               membar                  #Sync
+               ldda                    [%o3 + %o2] ASI_BLK_P, %f32
+               ldda                    [%o4 + %o2] ASI_BLK_P, %f48
+               membar                  #Sync
+               wr                      %g0, FPRS_DU, %fprs
+               ba,pt                   %xcc, rt_continue
+                stb                    %l5, [%g6 + TI_FPDEPTH]
diff --cc arch/sparc/kernel/setup_64.c

index 0000000,49d061f..f2bcfd2

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@@ -1,0 -1,432 +1,434 @@@
+ /*
+  *  linux/arch/sparc64/kernel/setup.c
+  *
+  *  Copyright (C) 1995,1996  David S. Miller (davem@caip.rutgers.edu)
+  *  Copyright (C) 1997       Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+  */
+ 
+ #include <linux/errno.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/stddef.h>
+ #include <linux/unistd.h>
+ #include <linux/ptrace.h>
+ #include <linux/slab.h>
+ #include <asm/smp.h>
+ #include <linux/user.h>
+ #include <linux/screen_info.h>
+ #include <linux/delay.h>
+ #include <linux/fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/syscalls.h>
+ #include <linux/kdev_t.h>
+ #include <linux/major.h>
+ #include <linux/string.h>
+ #include <linux/init.h>
+ #include <linux/inet.h>
+ #include <linux/console.h>
+ #include <linux/root_dev.h>
+ #include <linux/interrupt.h>
+ #include <linux/cpu.h>
+ #include <linux/initrd.h>
+ 
+ #include <asm/system.h>
+ #include <asm/io.h>
+ #include <asm/processor.h>
+ #include <asm/oplib.h>
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/idprom.h>
+ #include <asm/head.h>
+ #include <asm/starfire.h>
+ #include <asm/mmu_context.h>
+ #include <asm/timer.h>
+ #include <asm/sections.h>
+ #include <asm/setup.h>
+ #include <asm/mmu.h>
+ #include <asm/ns87303.h>
+ 
+ #ifdef CONFIG_IP_PNP
+ #include <net/ipconfig.h>
+ #endif
+ 
+ #include "entry.h"
+ #include "kernel.h"
+ 
+ /* Used to synchronize accesses to NatSemi SUPER I/O chip configure
+  * operations in asm/ns87303.h
+  */
+ DEFINE_SPINLOCK(ns87303_lock);
+ EXPORT_SYMBOL(ns87303_lock);
+ 
+ struct screen_info screen_info = {
+       0, 0,                   /* orig-x, orig-y */
+       0,                      /* unused */
+       0,                      /* orig-video-page */
+       0,                      /* orig-video-mode */
+       128,                    /* orig-video-cols */
+       0, 0, 0,                /* unused, ega_bx, unused */
+       54,                     /* orig-video-lines */
+       0,                      /* orig-video-isVGA */
+       16                      /* orig-video-points */
+ };
+ 
+ static void
+ prom_console_write(struct console *con, const char *s, unsigned n)
+ {
+       prom_write(s, n);
+ }
+ 
+ /* Exported for mm/init.c:paging_init. */
+ unsigned long cmdline_memory_size = 0;
+ 
+ static struct console prom_early_console = {
+       .name =         "earlyprom",
+       .write =        prom_console_write,
+       .flags =        CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
+       .index =        -1,
+ };
+ 
+ /* 
+  * Process kernel command line switches that are specific to the
+  * SPARC or that require special low-level processing.
+  */
+ static void __init process_switch(char c)
+ {
+       switch (c) {
+       case 'd':
+       case 's':
+               break;
+       case 'h':
+               prom_printf("boot_flags_init: Halt!\n");
+               prom_halt();
+               break;
+       case 'p':
+               /* Just ignore, this behavior is now the default.  */
+               break;
+       case 'P':
+               /* Force UltraSPARC-III P-Cache on. */
+               if (tlb_type != cheetah) {
+                       printk("BOOT: Ignoring P-Cache force option.\n");
+                       break;
+               }
+               cheetah_pcache_forced_on = 1;
+               add_taint(TAINT_MACHINE_CHECK);
+               cheetah_enable_pcache();
+               break;
+ 
+       default:
+               printk("Unknown boot switch (-%c)\n", c);
+               break;
+       }
+ }
+ 
+ static void __init boot_flags_init(char *commands)
+ {
+       while (*commands) {
+               /* Move to the start of the next "argument". */
+               while (*commands && *commands == ' ')
+                       commands++;
+ 
+               /* Process any command switches, otherwise skip it. */
+               if (*commands == '\0')
+                       break;
+               if (*commands == '-') {
+                       commands++;
+                       while (*commands && *commands != ' ')
+                               process_switch(*commands++);
+                       continue;
+               }
+               if (!strncmp(commands, "mem=", 4)) {
+                       /*
+                        * "mem=XXX[kKmM]" overrides the PROM-reported
+                        * memory size.
+                        */
+                       cmdline_memory_size = simple_strtoul(commands + 4,
+                                                            &commands, 0);
+                       if (*commands == 'K' || *commands == 'k') {
+                               cmdline_memory_size <<= 10;
+                               commands++;
+                       } else if (*commands=='M' || *commands=='m') {
+                               cmdline_memory_size <<= 20;
+                               commands++;
+                       }
+               }
+               while (*commands && *commands != ' ')
+                       commands++;
+       }
+ }
+ 
+ extern unsigned short root_flags;
+ extern unsigned short root_dev;
+ extern unsigned short ram_flags;
+ #define RAMDISK_IMAGE_START_MASK      0x07FF
+ #define RAMDISK_PROMPT_FLAG           0x8000
+ #define RAMDISK_LOAD_FLAG             0x4000
+ 
+ extern int root_mountflags;
+ 
+ char reboot_command[COMMAND_LINE_SIZE];
+ 
+ static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
+ 
+ void __init per_cpu_patch(void)
+ {
+       struct cpuid_patch_entry *p;
+       unsigned long ver;
+       int is_jbus;
+ 
+       if (tlb_type == spitfire && !this_is_starfire)
+               return;
+ 
+       is_jbus = 0;
+       if (tlb_type != hypervisor) {
+               __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+               is_jbus = ((ver >> 32UL) == __JALAPENO_ID ||
+                          (ver >> 32UL) == __SERRANO_ID);
+       }
+ 
+       p = &__cpuid_patch;
+       while (p < &__cpuid_patch_end) {
+               unsigned long addr = p->addr;
+               unsigned int *insns;
+ 
+               switch (tlb_type) {
+               case spitfire:
+                       insns = &p->starfire[0];
+                       break;
+               case cheetah:
+               case cheetah_plus:
+                       if (is_jbus)
+                               insns = &p->cheetah_jbus[0];
+                       else
+                               insns = &p->cheetah_safari[0];
+                       break;
+               case hypervisor:
+                       insns = &p->sun4v[0];
+                       break;
+               default:
+                       prom_printf("Unknown cpu type, halting.\n");
+                       prom_halt();
+               };
+ 
+               *(unsigned int *) (addr +  0) = insns[0];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  0));
+ 
+               *(unsigned int *) (addr +  4) = insns[1];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  4));
+ 
+               *(unsigned int *) (addr +  8) = insns[2];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  8));
+ 
+               *(unsigned int *) (addr + 12) = insns[3];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr + 12));
+ 
+               p++;
+       }
+ }
+ 
+ void __init sun4v_patch(void)
+ {
+       extern void sun4v_hvapi_init(void);
+       struct sun4v_1insn_patch_entry *p1;
+       struct sun4v_2insn_patch_entry *p2;
+ 
+       if (tlb_type != hypervisor)
+               return;
+ 
+       p1 = &__sun4v_1insn_patch;
+       while (p1 < &__sun4v_1insn_patch_end) {
+               unsigned long addr = p1->addr;
+ 
+               *(unsigned int *) (addr +  0) = p1->insn;
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  0));
+ 
+               p1++;
+       }
+ 
+       p2 = &__sun4v_2insn_patch;
+       while (p2 < &__sun4v_2insn_patch_end) {
+               unsigned long addr = p2->addr;
+ 
+               *(unsigned int *) (addr +  0) = p2->insns[0];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  0));
+ 
+               *(unsigned int *) (addr +  4) = p2->insns[1];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  4));
+ 
+               p2++;
+       }
+ 
+       sun4v_hvapi_init();
+ }
+ 
+ #ifdef CONFIG_SMP
+ void __init boot_cpu_id_too_large(int cpu)
+ {
+       prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
+                   cpu, NR_CPUS);
+       prom_halt();
+ }
+ #endif
+ 
+ void __init setup_arch(char **cmdline_p)
+ {
+       /* Initialize PROM console and command line. */
+       *cmdline_p = prom_getbootargs();
+       strcpy(boot_command_line, *cmdline_p);
+       parse_early_param();
+ 
+       boot_flags_init(*cmdline_p);
+       register_console(&prom_early_console);
+ 
+       if (tlb_type == hypervisor)
+               printk("ARCH: SUN4V\n");
+       else
+               printk("ARCH: SUN4U\n");
+ 
+ #ifdef CONFIG_DUMMY_CONSOLE
+       conswitchp = &dummy_con;
+ #elif defined(CONFIG_PROM_CONSOLE)
+       conswitchp = &prom_con;
+ #endif
+ 
+       idprom_init();
+ 
+       if (!root_flags)
+               root_mountflags &= ~MS_RDONLY;
+       ROOT_DEV = old_decode_dev(root_dev);
+ #ifdef CONFIG_BLK_DEV_RAM
+       rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
+       rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
+       rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);     
+ #endif
+ 
+       task_thread_info(&init_task)->kregs = &fake_swapper_regs;
+ 
+ #ifdef CONFIG_IP_PNP
+       if (!ic_set_manually) {
+               int chosen = prom_finddevice ("/chosen");
+               u32 cl, sv, gw;
+               
+               cl = prom_getintdefault (chosen, "client-ip", 0);
+               sv = prom_getintdefault (chosen, "server-ip", 0);
+               gw = prom_getintdefault (chosen, "gateway-ip", 0);
+               if (cl && sv) {
+                       ic_myaddr = cl;
+                       ic_servaddr = sv;
+                       if (gw)
+                               ic_gateway = gw;
+ #if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_RARP)
+                       ic_proto_enabled = 0;
+ #endif
+               }
+       }
+ #endif
+ 
+       /* Get boot processor trap_block[] setup.  */
+       init_cur_cpu_trap(current_thread_info());
+ 
+       paging_init();
+ }
+ 
+ /* BUFFER is PAGE_SIZE bytes long. */
+ 
+ extern void smp_info(struct seq_file *);
+ extern void smp_bogo(struct seq_file *);
+ extern void mmu_info(struct seq_file *);
+ 
+ unsigned int dcache_parity_tl1_occurred;
+ unsigned int icache_parity_tl1_occurred;
+ 
+ int ncpus_probed;
+ 
+ static int show_cpuinfo(struct seq_file *m, void *__unused)
+ {
+       seq_printf(m, 
+                  "cpu\t\t: %s\n"
+                  "fpu\t\t: %s\n"
++                 "pmu\t\t: %s\n"
+                  "prom\t\t: %s\n"
+                  "type\t\t: %s\n"
+                  "ncpus probed\t: %d\n"
+                  "ncpus active\t: %d\n"
+                  "D$ parity tl1\t: %u\n"
+                  "I$ parity tl1\t: %u\n"
+ #ifndef CONFIG_SMP
+                  "Cpu0ClkTck\t: %016lx\n"
+ #endif
+                  ,
+                  sparc_cpu_type,
+                  sparc_fpu_type,
++                 sparc_pmu_type,
+                  prom_version,
+                  ((tlb_type == hypervisor) ?
+                   "sun4v" :
+                   "sun4u"),
+                  ncpus_probed,
+                  num_online_cpus(),
+                  dcache_parity_tl1_occurred,
+                  icache_parity_tl1_occurred
+ #ifndef CONFIG_SMP
+                  , cpu_data(0).clock_tick
+ #endif
+               );
+ #ifdef CONFIG_SMP
+       smp_bogo(m);
+ #endif
+       mmu_info(m);
+ #ifdef CONFIG_SMP
+       smp_info(m);
+ #endif
+       return 0;
+ }
+ 
+ static void *c_start(struct seq_file *m, loff_t *pos)
+ {
+       /* The pointer we are returning is arbitrary,
+        * it just has to be non-NULL and not IS_ERR
+        * in the success case.
+        */
+       return *pos == 0 ? &c_start : NULL;
+ }
+ 
+ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+ {
+       ++*pos;
+       return c_start(m, pos);
+ }
+ 
+ static void c_stop(struct seq_file *m, void *v)
+ {
+ }
+ 
+ const struct seq_operations cpuinfo_op = {
+       .start =c_start,
+       .next = c_next,
+       .stop = c_stop,
+       .show = show_cpuinfo,
+ };
+ 
+ extern int stop_a_enabled;
+ 
+ void sun_do_break(void)
+ {
+       if (!stop_a_enabled)
+               return;
+ 
+       prom_printf("\n");
+       flush_user_windows();
+ 
+       prom_cmdline();
+ }
+ EXPORT_SYMBOL(sun_do_break);
+ 
+ int stop_a_enabled = 1;
+ EXPORT_SYMBOL(stop_a_enabled);
diff --cc arch/sparc/kernel/signal_64.c

index 0000000,ec82d76..cea1082

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@@ -1,0 -1,617 +1,621 @@@
+ /*
+  *  arch/sparc64/kernel/signal.c
+  *
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  *  Copyright (C) 1995, 2008 David S. Miller (davem@davemloft.net)
+  *  Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+  *  Copyright (C) 1997 Eddie C. Dost   (ecd@skynet.be)
+  *  Copyright (C) 1997,1998 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
+  */
+ 
+ #ifdef CONFIG_COMPAT
+ #include <linux/compat.h>     /* for compat_old_sigset_t */
+ #endif
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/signal.h>
+ #include <linux/errno.h>
+ #include <linux/wait.h>
+ #include <linux/ptrace.h>
+ #include <linux/tracehook.h>
+ #include <linux/unistd.h>
+ #include <linux/mm.h>
+ #include <linux/tty.h>
+ #include <linux/binfmts.h>
+ #include <linux/bitops.h>
++#include <linux/perfmon_kern.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/ptrace.h>
+ #include <asm/pgtable.h>
+ #include <asm/fpumacro.h>
+ #include <asm/uctx.h>
+ #include <asm/siginfo.h>
+ #include <asm/visasm.h>
+ 
+ #include "entry.h"
+ #include "systbls.h"
+ 
+ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+ 
+ /* {set, get}context() needed for 64-bit SparcLinux userland. */
+ asmlinkage void sparc64_set_context(struct pt_regs *regs)
+ {
+       struct ucontext __user *ucp = (struct ucontext __user *)
+               regs->u_regs[UREG_I0];
+       mc_gregset_t __user *grp;
+       unsigned long pc, npc, tstate;
+       unsigned long fp, i7;
+       unsigned char fenab;
+       int err;
+ 
+       flush_user_windows();
+       if (get_thread_wsaved()                                 ||
+           (((unsigned long)ucp) & (sizeof(unsigned long)-1))  ||
+           (!__access_ok(ucp, sizeof(*ucp))))
+               goto do_sigsegv;
+       grp  = &ucp->uc_mcontext.mc_gregs;
+       err  = __get_user(pc, &((*grp)[MC_PC]));
+       err |= __get_user(npc, &((*grp)[MC_NPC]));
+       if (err || ((pc | npc) & 3))
+               goto do_sigsegv;
+       if (regs->u_regs[UREG_I1]) {
+               sigset_t set;
+ 
+               if (_NSIG_WORDS == 1) {
+                       if (__get_user(set.sig[0], &ucp->uc_sigmask.sig[0]))
+                               goto do_sigsegv;
+               } else {
+                       if (__copy_from_user(&set, &ucp->uc_sigmask, sizeof(sigset_t)))
+                               goto do_sigsegv;
+               }
+               sigdelsetmask(&set, ~_BLOCKABLE);
+               spin_lock_irq(&current->sighand->siglock);
+               current->blocked = set;
+               recalc_sigpending();
+               spin_unlock_irq(&current->sighand->siglock);
+       }
+       if (test_thread_flag(TIF_32BIT)) {
+               pc &= 0xffffffff;
+               npc &= 0xffffffff;
+       }
+       regs->tpc = pc;
+       regs->tnpc = npc;
+       err |= __get_user(regs->y, &((*grp)[MC_Y]));
+       err |= __get_user(tstate, &((*grp)[MC_TSTATE]));
+       regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC);
+       regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
+       err |= __get_user(regs->u_regs[UREG_G1], (&(*grp)[MC_G1]));
+       err |= __get_user(regs->u_regs[UREG_G2], (&(*grp)[MC_G2]));
+       err |= __get_user(regs->u_regs[UREG_G3], (&(*grp)[MC_G3]));
+       err |= __get_user(regs->u_regs[UREG_G4], (&(*grp)[MC_G4]));
+       err |= __get_user(regs->u_regs[UREG_G5], (&(*grp)[MC_G5]));
+       err |= __get_user(regs->u_regs[UREG_G6], (&(*grp)[MC_G6]));
+ 
+       /* Skip %g7 as that's the thread register in userspace.  */
+ 
+       err |= __get_user(regs->u_regs[UREG_I0], (&(*grp)[MC_O0]));
+       err |= __get_user(regs->u_regs[UREG_I1], (&(*grp)[MC_O1]));
+       err |= __get_user(regs->u_regs[UREG_I2], (&(*grp)[MC_O2]));
+       err |= __get_user(regs->u_regs[UREG_I3], (&(*grp)[MC_O3]));
+       err |= __get_user(regs->u_regs[UREG_I4], (&(*grp)[MC_O4]));
+       err |= __get_user(regs->u_regs[UREG_I5], (&(*grp)[MC_O5]));
+       err |= __get_user(regs->u_regs[UREG_I6], (&(*grp)[MC_O6]));
+       err |= __get_user(regs->u_regs[UREG_I7], (&(*grp)[MC_O7]));
+ 
+       err |= __get_user(fp, &(ucp->uc_mcontext.mc_fp));
+       err |= __get_user(i7, &(ucp->uc_mcontext.mc_i7));
+       err |= __put_user(fp,
+             (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[6])));
+       err |= __put_user(i7,
+             (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[7])));
+ 
+       err |= __get_user(fenab, &(ucp->uc_mcontext.mc_fpregs.mcfpu_enab));
+       if (fenab) {
+               unsigned long *fpregs = current_thread_info()->fpregs;
+               unsigned long fprs;
+               
+               fprs_write(0);
+               err |= __get_user(fprs, &(ucp->uc_mcontext.mc_fpregs.mcfpu_fprs));
+               if (fprs & FPRS_DL)
+                       err |= copy_from_user(fpregs,
+                                             &(ucp->uc_mcontext.mc_fpregs.mcfpu_fregs),
+                                             (sizeof(unsigned int) * 32));
+               if (fprs & FPRS_DU)
+                       err |= copy_from_user(fpregs+16,
+                        ((unsigned long __user *)&(ucp->uc_mcontext.mc_fpregs.mcfpu_fregs))+16,
+                        (sizeof(unsigned int) * 32));
+               err |= __get_user(current_thread_info()->xfsr[0],
+                                 &(ucp->uc_mcontext.mc_fpregs.mcfpu_fsr));
+               err |= __get_user(current_thread_info()->gsr[0],
+                                 &(ucp->uc_mcontext.mc_fpregs.mcfpu_gsr));
+               regs->tstate &= ~TSTATE_PEF;
+       }
+       if (err)
+               goto do_sigsegv;
+ 
+       return;
+ do_sigsegv:
+       force_sig(SIGSEGV, current);
+ }
+ 
+ asmlinkage void sparc64_get_context(struct pt_regs *regs)
+ {
+       struct ucontext __user *ucp = (struct ucontext __user *)
+               regs->u_regs[UREG_I0];
+       mc_gregset_t __user *grp;
+       mcontext_t __user *mcp;
+       unsigned long fp, i7;
+       unsigned char fenab;
+       int err;
+ 
+       synchronize_user_stack();
+       if (get_thread_wsaved() || clear_user(ucp, sizeof(*ucp)))
+               goto do_sigsegv;
+ 
+ #if 1
+       fenab = 0; /* IMO get_context is like any other system call, thus modifies FPU state -jj */
+ #else
+       fenab = (current_thread_info()->fpsaved[0] & FPRS_FEF);
+ #endif
+               
+       mcp = &ucp->uc_mcontext;
+       grp = &mcp->mc_gregs;
+ 
+       /* Skip over the trap instruction, first. */
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc   = (regs->tnpc & 0xffffffff);
+               regs->tnpc  = (regs->tnpc + 4) & 0xffffffff;
+       } else {
+               regs->tpc   = regs->tnpc;
+               regs->tnpc += 4;
+       }
+       err = 0;
+       if (_NSIG_WORDS == 1)
+               err |= __put_user(current->blocked.sig[0],
+                                 (unsigned long __user *)&ucp->uc_sigmask);
+       else
+               err |= __copy_to_user(&ucp->uc_sigmask, &current->blocked,
+                                     sizeof(sigset_t));
+ 
+       err |= __put_user(regs->tstate, &((*grp)[MC_TSTATE]));
+       err |= __put_user(regs->tpc, &((*grp)[MC_PC]));
+       err |= __put_user(regs->tnpc, &((*grp)[MC_NPC]));
+       err |= __put_user(regs->y, &((*grp)[MC_Y]));
+       err |= __put_user(regs->u_regs[UREG_G1], &((*grp)[MC_G1]));
+       err |= __put_user(regs->u_regs[UREG_G2], &((*grp)[MC_G2]));
+       err |= __put_user(regs->u_regs[UREG_G3], &((*grp)[MC_G3]));
+       err |= __put_user(regs->u_regs[UREG_G4], &((*grp)[MC_G4]));
+       err |= __put_user(regs->u_regs[UREG_G5], &((*grp)[MC_G5]));
+       err |= __put_user(regs->u_regs[UREG_G6], &((*grp)[MC_G6]));
+       err |= __put_user(regs->u_regs[UREG_G7], &((*grp)[MC_G7]));
+       err |= __put_user(regs->u_regs[UREG_I0], &((*grp)[MC_O0]));
+       err |= __put_user(regs->u_regs[UREG_I1], &((*grp)[MC_O1]));
+       err |= __put_user(regs->u_regs[UREG_I2], &((*grp)[MC_O2]));
+       err |= __put_user(regs->u_regs[UREG_I3], &((*grp)[MC_O3]));
+       err |= __put_user(regs->u_regs[UREG_I4], &((*grp)[MC_O4]));
+       err |= __put_user(regs->u_regs[UREG_I5], &((*grp)[MC_O5]));
+       err |= __put_user(regs->u_regs[UREG_I6], &((*grp)[MC_O6]));
+       err |= __put_user(regs->u_regs[UREG_I7], &((*grp)[MC_O7]));
+ 
+       err |= __get_user(fp,
+                (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[6])));
+       err |= __get_user(i7,
+                (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[7])));
+       err |= __put_user(fp, &(mcp->mc_fp));
+       err |= __put_user(i7, &(mcp->mc_i7));
+ 
+       err |= __put_user(fenab, &(mcp->mc_fpregs.mcfpu_enab));
+       if (fenab) {
+               unsigned long *fpregs = current_thread_info()->fpregs;
+               unsigned long fprs;
+               
+               fprs = current_thread_info()->fpsaved[0];
+               if (fprs & FPRS_DL)
+                       err |= copy_to_user(&(mcp->mc_fpregs.mcfpu_fregs), fpregs,
+                                           (sizeof(unsigned int) * 32));
+               if (fprs & FPRS_DU)
+                       err |= copy_to_user(
+                           ((unsigned long __user *)&(mcp->mc_fpregs.mcfpu_fregs))+16, fpregs+16,
+                         (sizeof(unsigned int) * 32));
+               err |= __put_user(current_thread_info()->xfsr[0], &(mcp->mc_fpregs.mcfpu_fsr));
+               err |= __put_user(current_thread_info()->gsr[0], &(mcp->mc_fpregs.mcfpu_gsr));
+               err |= __put_user(fprs, &(mcp->mc_fpregs.mcfpu_fprs));
+       }
+       if (err)
+               goto do_sigsegv;
+ 
+       return;
+ do_sigsegv:
+       force_sig(SIGSEGV, current);
+ }
+ 
+ struct rt_signal_frame {
+       struct sparc_stackf     ss;
+       siginfo_t               info;
+       struct pt_regs          regs;
+       __siginfo_fpu_t __user  *fpu_save;
+       stack_t                 stack;
+       sigset_t                mask;
+       __siginfo_fpu_t         fpu_state;
+ };
+ 
+ static long _sigpause_common(old_sigset_t set)
+ {
+       set &= _BLOCKABLE;
+       spin_lock_irq(&current->sighand->siglock);
+       current->saved_sigmask = current->blocked;
+       siginitset(&current->blocked, set);
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
+ 
+       current->state = TASK_INTERRUPTIBLE;
+       schedule();
+ 
+       set_restore_sigmask();
+ 
+       return -ERESTARTNOHAND;
+ }
+ 
+ asmlinkage long sys_sigpause(unsigned int set)
+ {
+       return _sigpause_common(set);
+ }
+ 
+ asmlinkage long sys_sigsuspend(old_sigset_t set)
+ {
+       return _sigpause_common(set);
+ }
+ 
+ static inline int
+ restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ {
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err;
+ 
+       err = __get_user(fprs, &fpu->si_fprs);
+       fprs_write(0);
+       regs->tstate &= ~TSTATE_PEF;
+       if (fprs & FPRS_DL)
+               err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
+                              (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
+                              (sizeof(unsigned int) * 32));
+       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       current_thread_info()->fpsaved[0] |= fprs;
+       return err;
+ }
+ 
+ void do_rt_sigreturn(struct pt_regs *regs)
+ {
+       struct rt_signal_frame __user *sf;
+       unsigned long tpc, tnpc, tstate;
+       __siginfo_fpu_t __user *fpu_save;
+       sigset_t set;
+       int err;
+ 
+       /* Always make any pending restarted system calls return -EINTR */
+       current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ 
+       synchronize_user_stack ();
+       sf = (struct rt_signal_frame __user *)
+               (regs->u_regs [UREG_FP] + STACK_BIAS);
+ 
+       /* 1. Make sure we are not getting garbage from the user */
+       if (((unsigned long) sf) & 3)
+               goto segv;
+ 
+       err = get_user(tpc, &sf->regs.tpc);
+       err |= __get_user(tnpc, &sf->regs.tnpc);
+       if (test_thread_flag(TIF_32BIT)) {
+               tpc &= 0xffffffff;
+               tnpc &= 0xffffffff;
+       }
+       err |= ((tpc | tnpc) & 3);
+ 
+       /* 2. Restore the state */
+       err |= __get_user(regs->y, &sf->regs.y);
+       err |= __get_user(tstate, &sf->regs.tstate);
+       err |= copy_from_user(regs->u_regs, sf->regs.u_regs, sizeof(regs->u_regs));
+ 
+       /* User can only change condition codes and %asi in %tstate. */
+       regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC);
+       regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
+ 
+       err |= __get_user(fpu_save, &sf->fpu_save);
+       if (fpu_save)
+               err |= restore_fpu_state(regs, &sf->fpu_state);
+ 
+       err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
+       err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf);
+ 
+       if (err)
+               goto segv;
+ 
+       regs->tpc = tpc;
+       regs->tnpc = tnpc;
+ 
+       /* Prevent syscall restart.  */
+       pt_regs_clear_syscall(regs);
+ 
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sighand->siglock);
+       current->blocked = set;
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
+       return;
+ segv:
+       force_sig(SIGSEGV, current);
+ }
+ 
+ /* Checks if the fp is valid */
+ static int invalid_frame_pointer(void __user *fp, int fplen)
+ {
+       if (((unsigned long) fp) & 7)
+               return 1;
+       return 0;
+ }
+ 
+ static inline int
+ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ {
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err = 0;
+       
+       fprs = current_thread_info()->fpsaved[0];
+       if (fprs & FPRS_DL)
+               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
+                                   (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
+                                   (sizeof(unsigned int) * 32));
+       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       err |= __put_user(fprs, &fpu->si_fprs);
+ 
+       return err;
+ }
+ 
+ static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
+ {
+       unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
+ 
+       /*
+        * If we are on the alternate signal stack and would overflow it, don't.
+        * Return an always-bogus address instead so we will die with SIGSEGV.
+        */
+       if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
+               return (void __user *) -1L;
+ 
+       /* This is the X/Open sanctioned signal stack switching.  */
+       if (ka->sa.sa_flags & SA_ONSTACK) {
+               if (sas_ss_flags(sp) == 0)
+                       sp = current->sas_ss_sp + current->sas_ss_size;
+       }
+ 
+       /* Always align the stack frame.  This handles two cases.  First,
+        * sigaltstack need not be mindful of platform specific stack
+        * alignment.  Second, if we took this signal because the stack
+        * is not aligned properly, we'd like to take the signal cleanly
+        * and report that.
+        */
+       sp &= ~7UL;
+ 
+       return (void __user *)(sp - framesize);
+ }
+ 
+ static inline void
+ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
+              int signo, sigset_t *oldset, siginfo_t *info)
+ {
+       struct rt_signal_frame __user *sf;
+       int sigframe_size, err;
+ 
+       /* 1. Make sure everything is clean */
+       synchronize_user_stack();
+       save_and_clear_fpu();
+       
+       sigframe_size = sizeof(struct rt_signal_frame);
+       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
+               sigframe_size -= sizeof(__siginfo_fpu_t);
+ 
+       sf = (struct rt_signal_frame __user *)
+               get_sigframe(ka, regs, sigframe_size);
+       
+       if (invalid_frame_pointer (sf, sigframe_size))
+               goto sigill;
+ 
+       if (get_thread_wsaved() != 0)
+               goto sigill;
+ 
+       /* 2. Save the current process state */
+       err = copy_to_user(&sf->regs, regs, sizeof (*regs));
+ 
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF) {
+               err |= save_fpu_state(regs, &sf->fpu_state);
+               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+       } else {
+               err |= __put_user(0, &sf->fpu_save);
+       }
+       
+       /* Setup sigaltstack */
+       err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp);
+       err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags);
+       err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
+ 
+       err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t));
+ 
+       err |= copy_in_user((u64 __user *)sf,
+                           (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS),
+                           sizeof(struct reg_window));
+ 
+       if (info)
+               err |= copy_siginfo_to_user(&sf->info, info);
+       else {
+               err |= __put_user(signo, &sf->info.si_signo);
+               err |= __put_user(SI_NOINFO, &sf->info.si_code);
+       }
+       if (err)
+               goto sigsegv;
+       
+       /* 3. signal handler back-trampoline and parameters */
+       regs->u_regs[UREG_FP] = ((unsigned long) sf) - STACK_BIAS;
+       regs->u_regs[UREG_I0] = signo;
+       regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
+ 
+       /* The sigcontext is passed in this way because of how it
+        * is defined in GLIBC's /usr/include/bits/sigcontext.h
+        * for sparc64.  It includes the 128 bytes of siginfo_t.
+        */
+       regs->u_regs[UREG_I2] = (unsigned long) &sf->info;
+ 
+       /* 5. signal handler */
+       regs->tpc = (unsigned long) ka->sa.sa_handler;
+       regs->tnpc = (regs->tpc + 4);
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       /* 4. return to kernel instructions */
+       regs->u_regs[UREG_I7] = (unsigned long)ka->ka_restorer;
+       return;
+ 
+ sigill:
+       do_exit(SIGILL);
+ sigsegv:
+       force_sigsegv(signo, current);
+ }
+ 
+ static inline void handle_signal(unsigned long signr, struct k_sigaction *ka,
+                                siginfo_t *info,
+                                sigset_t *oldset, struct pt_regs *regs)
+ {
+       setup_rt_frame(ka, regs, signr, oldset,
+                      (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL);
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NOMASK))
+               sigaddset(&current->blocked,signr);
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
+ }
+ 
+ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
+                                  struct sigaction *sa)
+ {
+       switch (regs->u_regs[UREG_I0]) {
+       case ERESTART_RESTARTBLOCK:
+       case ERESTARTNOHAND:
+       no_system_call_restart:
+               regs->u_regs[UREG_I0] = EINTR;
+               regs->tstate |= (TSTATE_ICARRY|TSTATE_XCARRY);
+               break;
+       case ERESTARTSYS:
+               if (!(sa->sa_flags & SA_RESTART))
+                       goto no_system_call_restart;
+               /* fallthrough */
+       case ERESTARTNOINTR:
+               regs->u_regs[UREG_I0] = orig_i0;
+               regs->tpc -= 4;
+               regs->tnpc -= 4;
+       }
+ }
+ 
+ /* Note that 'init' is a special process: it doesn't get signals it doesn't
+  * want to handle. Thus you cannot kill init even with a SIGKILL even by
+  * mistake.
+  */
+ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
+ {
+       struct k_sigaction ka;
+       int restart_syscall;
+       sigset_t *oldset;
+       siginfo_t info;
+       int signr;
+       
+       if (pt_regs_is_syscall(regs) &&
+           (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
+               restart_syscall = 1;
+       } else
+               restart_syscall = 0;
+ 
+       if (current_thread_info()->status & TS_RESTORE_SIGMASK)
+               oldset = &current->saved_sigmask;
+       else
+               oldset = &current->blocked;
+ 
+ #ifdef CONFIG_COMPAT
+       if (test_thread_flag(TIF_32BIT)) {
+               extern void do_signal32(sigset_t *, struct pt_regs *,
+                                       int restart_syscall,
+                                       unsigned long orig_i0);
+               do_signal32(oldset, regs, restart_syscall, orig_i0);
+               return;
+       }
+ #endif        
+ 
+       signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+ 
+       /* If the debugger messes with the program counter, it clears
+        * the software "in syscall" bit, directing us to not perform
+        * a syscall restart.
+        */
+       if (restart_syscall && !pt_regs_is_syscall(regs))
+               restart_syscall = 0;
+ 
+       if (signr > 0) {
+               if (restart_syscall)
+                       syscall_restart(orig_i0, regs, &ka.sa);
+               handle_signal(signr, &ka, &info, oldset, regs);
+ 
+               /* A signal was successfully delivered; the saved
+                * sigmask will have been stored in the signal frame,
+                * and will be restored by sigreturn, so we can simply
+                * clear the TS_RESTORE_SIGMASK flag.
+                */
+               current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+ 
+               tracehook_signal_handler(signr, &info, &ka, regs, 0);
+               return;
+       }
+       if (restart_syscall &&
+           (regs->u_regs[UREG_I0] == ERESTARTNOHAND ||
+            regs->u_regs[UREG_I0] == ERESTARTSYS ||
+            regs->u_regs[UREG_I0] == ERESTARTNOINTR)) {
+               /* replay the system call when we are done */
+               regs->u_regs[UREG_I0] = orig_i0;
+               regs->tpc -= 4;
+               regs->tnpc -= 4;
+       }
+       if (restart_syscall &&
+           regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) {
+               regs->u_regs[UREG_G1] = __NR_restart_syscall;
+               regs->tpc -= 4;
+               regs->tnpc -= 4;
+       }
+ 
+       /* If there's no signal to deliver, we just put the saved sigmask
+        * back
+        */
+       if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+               current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+               sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+       }
+ }
+ 
+ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
+ {
++      if (thread_info_flags & _TIF_PERFMON_WORK)
++              pfm_handle_work(regs);
++
+       if (thread_info_flags & _TIF_SIGPENDING)
+               do_signal(regs, orig_i0);
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+       }
+ }
diff --cc arch/sparc/kernel/sys_sparc_64.c

index 0000000,e2d1024..3d9f317

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@@ -1,0 -1,914 +1,817 @@@
+ /* linux/arch/sparc64/kernel/sys_sparc.c
+  *
+  * This file contains various random system calls that
+  * have a non-standard calling sequence on the Linux/sparc
+  * platform.
+  */
+ 
+ #include <linux/errno.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/fs.h>
+ #include <linux/file.h>
+ #include <linux/mm.h>
+ #include <linux/sem.h>
+ #include <linux/msg.h>
+ #include <linux/shm.h>
+ #include <linux/stat.h>
+ #include <linux/mman.h>
+ #include <linux/utsname.h>
+ #include <linux/smp.h>
+ #include <linux/slab.h>
+ #include <linux/syscalls.h>
+ #include <linux/ipc.h>
+ #include <linux/personality.h>
+ #include <linux/random.h>
+ #include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/utrap.h>
- -#include <asm/perfctr.h>
+ #include <asm/unistd.h>
+ 
+ #include "entry.h"
+ #include "systbls.h"
+ 
+ /* #define DEBUG_UNIMP_SYSCALL */
+ 
+ asmlinkage unsigned long sys_getpagesize(void)
+ {
+       return PAGE_SIZE;
+ }
+ 
+ #define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
+ #define VA_EXCLUDE_END   (0xfffff80000000000UL + (1UL << 32UL))
+ 
+ /* Does addr --> addr+len fall within 4GB of the VA-space hole or
+  * overflow past the end of the 64-bit address space?
+  */
+ static inline int invalid_64bit_range(unsigned long addr, unsigned long len)
+ {
+       unsigned long va_exclude_start, va_exclude_end;
+ 
+       va_exclude_start = VA_EXCLUDE_START;
+       va_exclude_end   = VA_EXCLUDE_END;
+ 
+       if (unlikely(len >= va_exclude_start))
+               return 1;
+ 
+       if (unlikely((addr + len) < addr))
+               return 1;
+ 
+       if (unlikely((addr >= va_exclude_start && addr < va_exclude_end) ||
+                    ((addr + len) >= va_exclude_start &&
+                     (addr + len) < va_exclude_end)))
+               return 1;
+ 
+       return 0;
+ }
+ 
+ /* Does start,end straddle the VA-space hole?  */
+ static inline int straddles_64bit_va_hole(unsigned long start, unsigned long end)
+ {
+       unsigned long va_exclude_start, va_exclude_end;
+ 
+       va_exclude_start = VA_EXCLUDE_START;
+       va_exclude_end   = VA_EXCLUDE_END;
+ 
+       if (likely(start < va_exclude_start && end < va_exclude_start))
+               return 0;
+ 
+       if (likely(start >= va_exclude_end && end >= va_exclude_end))
+               return 0;
+ 
+       return 1;
+ }
+ 
+ /* These functions differ from the default implementations in
+  * mm/mmap.c in two ways:
+  *
+  * 1) For file backed MAP_SHARED mmap()'s we D-cache color align,
+  *    for fixed such mappings we just validate what the user gave us.
+  * 2) For 64-bit tasks we avoid mapping anything within 4GB of
+  *    the spitfire/niagara VA-hole.
+  */
+ 
+ static inline unsigned long COLOUR_ALIGN(unsigned long addr,
+                                        unsigned long pgoff)
+ {
+       unsigned long base = (addr+SHMLBA-1)&~(SHMLBA-1);
+       unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1);
+ 
+       return base + off;
+ }
+ 
+ static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
+                                             unsigned long pgoff)
+ {
+       unsigned long base = addr & ~(SHMLBA-1);
+       unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1);
+ 
+       if (base + off <= addr)
+               return base + off;
+       return base - off;
+ }
+ 
+ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct * vma;
+       unsigned long task_size = TASK_SIZE;
+       unsigned long start_addr;
+       int do_color_align;
+ 
+       if (flags & MAP_FIXED) {
+               /* We do not accept a shared mapping if it would violate
+                * cache aliasing constraints.
+                */
+               if ((flags & MAP_SHARED) &&
+                   ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
+                       return -EINVAL;
+               return addr;
+       }
+ 
+       if (test_thread_flag(TIF_32BIT))
+               task_size = STACK_TOP32;
+       if (unlikely(len > task_size || len >= VA_EXCLUDE_START))
+               return -ENOMEM;
+ 
+       do_color_align = 0;
+       if (filp || (flags & MAP_SHARED))
+               do_color_align = 1;
+ 
+       if (addr) {
+               if (do_color_align)
+                       addr = COLOUR_ALIGN(addr, pgoff);
+               else
+                       addr = PAGE_ALIGN(addr);
+ 
+               vma = find_vma(mm, addr);
+               if (task_size - len >= addr &&
+                   (!vma || addr + len <= vma->vm_start))
+                       return addr;
+       }
+ 
+       if (len > mm->cached_hole_size) {
+               start_addr = addr = mm->free_area_cache;
+       } else {
+               start_addr = addr = TASK_UNMAPPED_BASE;
+               mm->cached_hole_size = 0;
+       }
+ 
+       task_size -= len;
+ 
+ full_search:
+       if (do_color_align)
+               addr = COLOUR_ALIGN(addr, pgoff);
+       else
+               addr = PAGE_ALIGN(addr);
+ 
+       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+               /* At this point:  (!vma || addr < vma->vm_end). */
+               if (addr < VA_EXCLUDE_START &&
+                   (addr + len) >= VA_EXCLUDE_START) {
+                       addr = VA_EXCLUDE_END;
+                       vma = find_vma(mm, VA_EXCLUDE_END);
+               }
+               if (unlikely(task_size < addr)) {
+                       if (start_addr != TASK_UNMAPPED_BASE) {
+                               start_addr = addr = TASK_UNMAPPED_BASE;
+                               mm->cached_hole_size = 0;
+                               goto full_search;
+                       }
+                       return -ENOMEM;
+               }
+               if (likely(!vma || addr + len <= vma->vm_start)) {
+                       /*
+                        * Remember the place where we stopped the search:
+                        */
+                       mm->free_area_cache = addr + len;
+                       return addr;
+               }
+               if (addr + mm->cached_hole_size < vma->vm_start)
+                       mm->cached_hole_size = vma->vm_start - addr;
+ 
+               addr = vma->vm_end;
+               if (do_color_align)
+                       addr = COLOUR_ALIGN(addr, pgoff);
+       }
+ }
+ 
+ unsigned long
+ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+                         const unsigned long len, const unsigned long pgoff,
+                         const unsigned long flags)
+ {
+       struct vm_area_struct *vma;
+       struct mm_struct *mm = current->mm;
+       unsigned long task_size = STACK_TOP32;
+       unsigned long addr = addr0;
+       int do_color_align;
+ 
+       /* This should only ever run for 32-bit processes.  */
+       BUG_ON(!test_thread_flag(TIF_32BIT));
+ 
+       if (flags & MAP_FIXED) {
+               /* We do not accept a shared mapping if it would violate
+                * cache aliasing constraints.
+                */
+               if ((flags & MAP_SHARED) &&
+                   ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
+                       return -EINVAL;
+               return addr;
+       }
+ 
+       if (unlikely(len > task_size))
+               return -ENOMEM;
+ 
+       do_color_align = 0;
+       if (filp || (flags & MAP_SHARED))
+               do_color_align = 1;
+ 
+       /* requesting a specific address */
+       if (addr) {
+               if (do_color_align)
+                       addr = COLOUR_ALIGN(addr, pgoff);
+               else
+                       addr = PAGE_ALIGN(addr);
+ 
+               vma = find_vma(mm, addr);
+               if (task_size - len >= addr &&
+                   (!vma || addr + len <= vma->vm_start))
+                       return addr;
+       }
+ 
+       /* check if free_area_cache is useful for us */
+       if (len <= mm->cached_hole_size) {
+               mm->cached_hole_size = 0;
+               mm->free_area_cache = mm->mmap_base;
+       }
+ 
+       /* either no address requested or can't fit in requested address hole */
+       addr = mm->free_area_cache;
+       if (do_color_align) {
+               unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff);
+ 
+               addr = base + len;
+       }
+ 
+       /* make sure it can fit in the remaining address space */
+       if (likely(addr > len)) {
+               vma = find_vma(mm, addr-len);
+               if (!vma || addr <= vma->vm_start) {
+                       /* remember the address as a hint for next time */
+                       return (mm->free_area_cache = addr-len);
+               }
+       }
+ 
+       if (unlikely(mm->mmap_base < len))
+               goto bottomup;
+ 
+       addr = mm->mmap_base-len;
+       if (do_color_align)
+               addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+ 
+       do {
+               /*
+                * Lookup failure means no vma is above this address,
+                * else if new region fits below vma->vm_start,
+                * return with success:
+                */
+               vma = find_vma(mm, addr);
+               if (likely(!vma || addr+len <= vma->vm_start)) {
+                       /* remember the address as a hint for next time */
+                       return (mm->free_area_cache = addr);
+               }
+ 
+               /* remember the largest hole we saw so far */
+               if (addr + mm->cached_hole_size < vma->vm_start)
+                       mm->cached_hole_size = vma->vm_start - addr;
+ 
+               /* try just below the current vma->vm_start */
+               addr = vma->vm_start-len;
+               if (do_color_align)
+                       addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+       } while (likely(len < vma->vm_start));
+ 
+ bottomup:
+       /*
+        * A failed mmap() very likely causes application failure,
+        * so fall back to the bottom-up function here. This scenario
+        * can happen with large stack limits and large mmap()
+        * allocations.
+        */
+       mm->cached_hole_size = ~0UL;
+       mm->free_area_cache = TASK_UNMAPPED_BASE;
+       addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+       /*
+        * Restore the topdown base:
+        */
+       mm->free_area_cache = mm->mmap_base;
+       mm->cached_hole_size = ~0UL;
+ 
+       return addr;
+ }
+ 
+ /* Try to align mapping such that we align it as much as possible. */
+ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+       unsigned long align_goal, addr = -ENOMEM;
+ 
+       if (flags & MAP_FIXED) {
+               /* Ok, don't mess with it. */
+               return get_unmapped_area(NULL, orig_addr, len, pgoff, flags);
+       }
+       flags &= ~MAP_SHARED;
+ 
+       align_goal = PAGE_SIZE;
+       if (len >= (4UL * 1024 * 1024))
+               align_goal = (4UL * 1024 * 1024);
+       else if (len >= (512UL * 1024))
+               align_goal = (512UL * 1024);
+       else if (len >= (64UL * 1024))
+               align_goal = (64UL * 1024);
+ 
+       do {
+               addr = get_unmapped_area(NULL, orig_addr, len + (align_goal - PAGE_SIZE), pgoff, flags);
+               if (!(addr & ~PAGE_MASK)) {
+                       addr = (addr + (align_goal - 1UL)) & ~(align_goal - 1UL);
+                       break;
+               }
+ 
+               if (align_goal == (4UL * 1024 * 1024))
+                       align_goal = (512UL * 1024);
+               else if (align_goal == (512UL * 1024))
+                       align_goal = (64UL * 1024);
+               else
+                       align_goal = PAGE_SIZE;
+       } while ((addr & ~PAGE_MASK) && align_goal > PAGE_SIZE);
+ 
+       /* Mapping is smaller than 64K or larger areas could not
+        * be obtained.
+        */
+       if (addr & ~PAGE_MASK)
+               addr = get_unmapped_area(NULL, orig_addr, len, pgoff, flags);
+ 
+       return addr;
+ }
+ EXPORT_SYMBOL(get_fb_unmapped_area);
+ 
+ /* Essentially the same as PowerPC... */
+ void arch_pick_mmap_layout(struct mm_struct *mm)
+ {
+       unsigned long random_factor = 0UL;
+ 
+       if (current->flags & PF_RANDOMIZE) {
+               random_factor = get_random_int();
+               if (test_thread_flag(TIF_32BIT))
+                       random_factor &= ((1 * 1024 * 1024) - 1);
+               else
+                       random_factor = ((random_factor << PAGE_SHIFT) &
+                                        0xffffffffUL);
+       }
+ 
+       /*
+        * Fall back to the standard layout if the personality
+        * bit is set, or if the expected stack growth is unlimited:
+        */
+       if (!test_thread_flag(TIF_32BIT) ||
+           (current->personality & ADDR_COMPAT_LAYOUT) ||
+           current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY ||
+           sysctl_legacy_va_layout) {
+               mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+               mm->get_unmapped_area = arch_get_unmapped_area;
+               mm->unmap_area = arch_unmap_area;
+       } else {
+               /* We know it's 32-bit */
+               unsigned long task_size = STACK_TOP32;
+               unsigned long gap;
+ 
+               gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+               if (gap < 128 * 1024 * 1024)
+                       gap = 128 * 1024 * 1024;
+               if (gap > (task_size / 6 * 5))
+                       gap = (task_size / 6 * 5);
+ 
+               mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
+               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+               mm->unmap_area = arch_unmap_area_topdown;
+       }
+ }
+ 
+ SYSCALL_DEFINE1(sparc_brk, unsigned long, brk)
+ {
+       /* People could try to be nasty and use ta 0x6d in 32bit programs */
+       if (test_thread_flag(TIF_32BIT) && brk >= STACK_TOP32)
+               return current->mm->brk;
+ 
+       if (unlikely(straddles_64bit_va_hole(current->mm->brk, brk)))
+               return current->mm->brk;
+ 
+       return sys_brk(brk);
+ }
+                                                                 
+ /*
+  * sys_pipe() is the normal C calling standard for creating
+  * a pipe. It's not the way unix traditionally does this, though.
+  */
+ SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs)
+ {
+       int fd[2];
+       int error;
+ 
+       error = do_pipe_flags(fd, 0);
+       if (error)
+               goto out;
+       regs->u_regs[UREG_I1] = fd[1];
+       error = fd[0];
+ out:
+       return error;
+ }
+ 
+ /*
+  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+  *
+  * This is really horribly ugly.
+  */
+ 
+ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
+               unsigned long, third, void __user *, ptr, long, fifth)
+ {
+       long err;
+ 
+       /* No need for backward compatibility. We can start fresh... */
+       if (call <= SEMCTL) {
+               switch (call) {
+               case SEMOP:
+                       err = sys_semtimedop(first, ptr,
+                                            (unsigned)second, NULL);
+                       goto out;
+               case SEMTIMEDOP:
+                       err = sys_semtimedop(first, ptr, (unsigned)second,
+                               (const struct timespec __user *)
+                                            (unsigned long) fifth);
+                       goto out;
+               case SEMGET:
+                       err = sys_semget(first, (int)second, (int)third);
+                       goto out;
+               case SEMCTL: {
+                       err = sys_semctl(first, second,
+                                        (int)third | IPC_64,
+                                        (union semun) ptr);
+                       goto out;
+               }
+               default:
+                       err = -ENOSYS;
+                       goto out;
+               };
+       }
+       if (call <= MSGCTL) {
+               switch (call) {
+               case MSGSND:
+                       err = sys_msgsnd(first, ptr, (size_t)second,
+                                        (int)third);
+                       goto out;
+               case MSGRCV:
+                       err = sys_msgrcv(first, ptr, (size_t)second, fifth,
+                                        (int)third);
+                       goto out;
+               case MSGGET:
+                       err = sys_msgget((key_t)first, (int)second);
+                       goto out;
+               case MSGCTL:
+                       err = sys_msgctl(first, (int)second | IPC_64, ptr);
+                       goto out;
+               default:
+                       err = -ENOSYS;
+                       goto out;
+               };
+       }
+       if (call <= SHMCTL) {
+               switch (call) {
+               case SHMAT: {
+                       ulong raddr;
+                       err = do_shmat(first, ptr, (int)second, &raddr);
+                       if (!err) {
+                               if (put_user(raddr,
+                                            (ulong __user *) third))
+                                       err = -EFAULT;
+                       }
+                       goto out;
+               }
+               case SHMDT:
+                       err = sys_shmdt(ptr);
+                       goto out;
+               case SHMGET:
+                       err = sys_shmget(first, (size_t)second, (int)third);
+                       goto out;
+               case SHMCTL:
+                       err = sys_shmctl(first, (int)second | IPC_64, ptr);
+                       goto out;
+               default:
+                       err = -ENOSYS;
+                       goto out;
+               };
+       } else {
+               err = -ENOSYS;
+       }
+ out:
+       return err;
+ }
+ 
+ SYSCALL_DEFINE1(sparc64_newuname, struct new_utsname __user *, name)
+ {
+       int ret = sys_newuname(name);
+       
+       if (current->personality == PER_LINUX32 && !ret) {
+               ret = (copy_to_user(name->machine, "sparc\0\0", 8)
+                      ? -EFAULT : 0);
+       }
+       return ret;
+ }
+ 
+ SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality)
+ {
+       int ret;
+ 
+       if (current->personality == PER_LINUX32 &&
+           personality == PER_LINUX)
+               personality = PER_LINUX32;
+       ret = sys_personality(personality);
+       if (ret == PER_LINUX32)
+               ret = PER_LINUX;
+ 
+       return ret;
+ }
+ 
+ int sparc_mmap_check(unsigned long addr, unsigned long len)
+ {
+       if (test_thread_flag(TIF_32BIT)) {
+               if (len >= STACK_TOP32)
+                       return -EINVAL;
+ 
+               if (addr > STACK_TOP32 - len)
+                       return -EINVAL;
+       } else {
+               if (len >= VA_EXCLUDE_START)
+                       return -EINVAL;
+ 
+               if (invalid_64bit_range(addr, len))
+                       return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ /* Linux version of mmap */
+ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+               unsigned long, prot, unsigned long, flags, unsigned long, fd,
+               unsigned long, off)
+ {
+       struct file * file = NULL;
+       unsigned long retval = -EBADF;
+ 
+       if (!(flags & MAP_ANONYMOUS)) {
+               file = fget(fd);
+               if (!file)
+                       goto out;
+       }
+       flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+       len = PAGE_ALIGN(len);
+ 
+       down_write(&current->mm->mmap_sem);
+       retval = do_mmap(file, addr, len, prot, flags, off);
+       up_write(&current->mm->mmap_sem);
+ 
+       if (file)
+               fput(file);
+ out:
+       return retval;
+ }
+ 
+ SYSCALL_DEFINE2(64_munmap, unsigned long, addr, size_t, len)
+ {
+       long ret;
+ 
+       if (invalid_64bit_range(addr, len))
+               return -EINVAL;
+ 
+       down_write(&current->mm->mmap_sem);
+       ret = do_munmap(current->mm, addr, len);
+       up_write(&current->mm->mmap_sem);
+       return ret;
+ }
+ 
+ extern unsigned long do_mremap(unsigned long addr,
+       unsigned long old_len, unsigned long new_len,
+       unsigned long flags, unsigned long new_addr);
+                 
+ SYSCALL_DEFINE5(64_mremap, unsigned long, addr,       unsigned long, old_len,
+               unsigned long, new_len, unsigned long, flags,
+               unsigned long, new_addr)
+ {
+       unsigned long ret = -EINVAL;
+ 
+       if (test_thread_flag(TIF_32BIT))
+               goto out;
+       if (unlikely(new_len >= VA_EXCLUDE_START))
+               goto out;
+       if (unlikely(sparc_mmap_check(addr, old_len)))
+               goto out;
+       if (unlikely(sparc_mmap_check(new_addr, new_len)))
+               goto out;
+ 
+       down_write(&current->mm->mmap_sem);
+       ret = do_mremap(addr, old_len, new_len, flags, new_addr);
+       up_write(&current->mm->mmap_sem);
+ out:
+       return ret;       
+ }
+ 
+ /* we come to here via sys_nis_syscall so it can setup the regs argument */
+ asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs)
+ {
+       static int count;
+       
+       /* Don't make the system unusable, if someone goes stuck */
+       if (count++ > 5)
+               return -ENOSYS;
+ 
+       printk ("Unimplemented SPARC system call %ld\n",regs->u_regs[1]);
+ #ifdef DEBUG_UNIMP_SYSCALL    
+       show_regs (regs);
+ #endif
+ 
+       return -ENOSYS;
+ }
+ 
+ /* #define DEBUG_SPARC_BREAKPOINT */
+ 
+ asmlinkage void sparc_breakpoint(struct pt_regs *regs)
+ {
+       siginfo_t info;
+ 
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+ #ifdef DEBUG_SPARC_BREAKPOINT
+         printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc);
+ #endif
+       info.si_signo = SIGTRAP;
+       info.si_errno = 0;
+       info.si_code = TRAP_BRKPT;
+       info.si_addr = (void __user *)regs->tpc;
+       info.si_trapno = 0;
+       force_sig_info(SIGTRAP, &info, current);
+ #ifdef DEBUG_SPARC_BREAKPOINT
+       printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
+ #endif
+ }
+ 
+ extern void check_pending(int signum);
+ 
+ SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
+ {
+         int nlen, err;
+ 
+       if (len < 0)
+               return -EINVAL;
+ 
+       down_read(&uts_sem);
+       
+       nlen = strlen(utsname()->domainname) + 1;
+       err = -EINVAL;
+       if (nlen > len)
+               goto out;
+ 
+       err = -EFAULT;
+       if (!copy_to_user(name, utsname()->domainname, nlen))
+               err = 0;
+ 
+ out:
+       up_read(&uts_sem);
+       return err;
+ }
+ 
+ SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
+               utrap_handler_t, new_p, utrap_handler_t, new_d,
+               utrap_handler_t __user *, old_p,
+               utrap_handler_t __user *, old_d)
+ {
+       if (type < UT_INSTRUCTION_EXCEPTION || type > UT_TRAP_INSTRUCTION_31)
+               return -EINVAL;
+       if (new_p == (utrap_handler_t)(long)UTH_NOCHANGE) {
+               if (old_p) {
+                       if (!current_thread_info()->utraps) {
+                               if (put_user(NULL, old_p))
+                                       return -EFAULT;
+                       } else {
+                               if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p))
+                                       return -EFAULT;
+                       }
+               }
+               if (old_d) {
+                       if (put_user(NULL, old_d))
+                               return -EFAULT;
+               }
+               return 0;
+       }
+       if (!current_thread_info()->utraps) {
+               current_thread_info()->utraps =
+                       kzalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL);
+               if (!current_thread_info()->utraps)
+                       return -ENOMEM;
+               current_thread_info()->utraps[0] = 1;
+       } else {
+               if ((utrap_handler_t)current_thread_info()->utraps[type] != new_p &&
+                   current_thread_info()->utraps[0] > 1) {
+                       unsigned long *p = current_thread_info()->utraps;
+ 
+                       current_thread_info()->utraps =
+                               kmalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long),
+                                       GFP_KERNEL);
+                       if (!current_thread_info()->utraps) {
+                               current_thread_info()->utraps = p;
+                               return -ENOMEM;
+                       }
+                       p[0]--;
+                       current_thread_info()->utraps[0] = 1;
+                       memcpy(current_thread_info()->utraps+1, p+1,
+                              UT_TRAP_INSTRUCTION_31*sizeof(long));
+               }
+       }
+       if (old_p) {
+               if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p))
+                       return -EFAULT;
+       }
+       if (old_d) {
+               if (put_user(NULL, old_d))
+                       return -EFAULT;
+       }
+       current_thread_info()->utraps[type] = (long)new_p;
+ 
+       return 0;
+ }
+ 
+ asmlinkage long sparc_memory_ordering(unsigned long model,
+                                     struct pt_regs *regs)
+ {
+       if (model >= 3)
+               return -EINVAL;
+       regs->tstate = (regs->tstate & ~TSTATE_MM) | (model << 14);
+       return 0;
+ }
+ 
+ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
+               struct sigaction __user *, oact, void __user *, restorer,
+               size_t, sigsetsize)
+ {
+       struct k_sigaction new_ka, old_ka;
+       int ret;
+ 
+       /* XXX: Don't preclude handling different sized sigset_t's.  */
+       if (sigsetsize != sizeof(sigset_t))
+               return -EINVAL;
+ 
+       if (act) {
+               new_ka.ka_restorer = restorer;
+               if (copy_from_user(&new_ka.sa, act, sizeof(*act)))
+                       return -EFAULT;
+       }
+ 
+       ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+ 
+       if (!ret && oact) {
+               if (copy_to_user(oact, &old_ka.sa, sizeof(*oact)))
+                       return -EFAULT;
+       }
+ 
+       return ret;
+ }
+ 
- -/* Invoked by rtrap code to update performance counters in
- - * user space.
- - */
- -asmlinkage void update_perfctrs(void)
- -{
- -      unsigned long pic, tmp;
- -
- -      read_pic(pic);
- -      tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
- -      __put_user(tmp, current_thread_info()->user_cntd0);
- -      tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
- -      __put_user(tmp, current_thread_info()->user_cntd1);
- -      reset_pic();
- -}
- -
+ SYSCALL_DEFINE4(perfctr, int, opcode, unsigned long, arg0,
+               unsigned long, arg1, unsigned long, arg2)
+ {
- -      int err = 0;
- -
- -      switch(opcode) {
- -      case PERFCTR_ON:
- -              current_thread_info()->pcr_reg = arg2;
- -              current_thread_info()->user_cntd0 = (u64 __user *) arg0;
- -              current_thread_info()->user_cntd1 = (u64 __user *) arg1;
- -              current_thread_info()->kernel_cntd0 =
- -                      current_thread_info()->kernel_cntd1 = 0;
- -              write_pcr(arg2);
- -              reset_pic();
- -              set_thread_flag(TIF_PERFCTR);
- -              break;
- -
- -      case PERFCTR_OFF:
- -              err = -EINVAL;
- -              if (test_thread_flag(TIF_PERFCTR)) {
- -                      current_thread_info()->user_cntd0 =
- -                              current_thread_info()->user_cntd1 = NULL;
- -                      current_thread_info()->pcr_reg = 0;
- -                      write_pcr(0);
- -                      clear_thread_flag(TIF_PERFCTR);
- -                      err = 0;
- -              }
- -              break;
- -
- -      case PERFCTR_READ: {
- -              unsigned long pic, tmp;
- -
- -              if (!test_thread_flag(TIF_PERFCTR)) {
- -                      err = -EINVAL;
- -                      break;
- -              }
- -              read_pic(pic);
- -              tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
- -              err |= __put_user(tmp, current_thread_info()->user_cntd0);
- -              tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
- -              err |= __put_user(tmp, current_thread_info()->user_cntd1);
- -              reset_pic();
- -              break;
- -      }
- -
- -      case PERFCTR_CLRPIC:
- -              if (!test_thread_flag(TIF_PERFCTR)) {
- -                      err = -EINVAL;
- -                      break;
- -              }
- -              current_thread_info()->kernel_cntd0 =
- -                      current_thread_info()->kernel_cntd1 = 0;
- -              reset_pic();
- -              break;
- -
- -      case PERFCTR_SETPCR: {
- -              u64 __user *user_pcr = (u64 __user *)arg0;
- -
- -              if (!test_thread_flag(TIF_PERFCTR)) {
- -                      err = -EINVAL;
- -                      break;
- -              }
- -              err |= __get_user(current_thread_info()->pcr_reg, user_pcr);
- -              write_pcr(current_thread_info()->pcr_reg);
- -              current_thread_info()->kernel_cntd0 =
- -                      current_thread_info()->kernel_cntd1 = 0;
- -              reset_pic();
- -              break;
- -      }
- -
- -      case PERFCTR_GETPCR: {
- -              u64 __user *user_pcr = (u64 __user *)arg0;
- -
- -              if (!test_thread_flag(TIF_PERFCTR)) {
- -                      err = -EINVAL;
- -                      break;
- -              }
- -              err |= __put_user(current_thread_info()->pcr_reg, user_pcr);
- -              break;
- -      }
- -
- -      default:
- -              err = -EINVAL;
- -              break;
- -      };
- -      return err;
++      /* Superceded by perfmon2 */
++      return -ENOSYS;
+ }
+ 
+ /*
+  * Do a system call from kernel instead of calling sys_execve so we
+  * end up with proper pt_regs.
+  */
+ int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+ {
+       long __res;
+       register long __g1 __asm__ ("g1") = __NR_execve;
+       register long __o0 __asm__ ("o0") = (long)(filename);
+       register long __o1 __asm__ ("o1") = (long)(argv);
+       register long __o2 __asm__ ("o2") = (long)(envp);
+       asm volatile ("t 0x6d\n\t"
+                     "sub %%g0, %%o0, %0\n\t"
+                     "movcc %%xcc, %%o0, %0\n\t"
+                     : "=r" (__res), "=&r" (__o0)
+                     : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
+                     : "cc");
+       return __res;
+ }
diff --cc arch/sparc/kernel/syscalls.S

index 0000000,d150c2a..e0bbbed

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@@ -1,0 -1,279 +1,262 @@@
+       /* SunOS's execv() call only specifies the argv argument, the
+        * environment settings are the same as the calling processes.
+        */
+ sys_execve:
+       sethi   %hi(sparc_execve), %g1
+       ba,pt   %xcc, execve_merge
+        or     %g1, %lo(sparc_execve), %g1
+ 
+ #ifdef CONFIG_COMPAT
+ sunos_execv:
+       stx     %g0, [%sp + PTREGS_OFF + PT_V9_I2]
+ sys32_execve:
+       sethi   %hi(sparc32_execve), %g1
+       or      %g1, %lo(sparc32_execve), %g1
+ #endif
+ 
+ execve_merge:
+       flushw
+       jmpl    %g1, %g0
+        add    %sp, PTREGS_OFF, %o0
+ 
+       .align  32
+ sys_sparc_pipe:
+       ba,pt   %xcc, sys_sparc_pipe_real
+        add    %sp, PTREGS_OFF, %o0
+ sys_nis_syscall:
+       ba,pt   %xcc, c_sys_nis_syscall
+        add    %sp, PTREGS_OFF, %o0
+ sys_memory_ordering:
+       ba,pt   %xcc, sparc_memory_ordering
+        add    %sp, PTREGS_OFF, %o1
+ sys_sigaltstack:
+       ba,pt   %xcc, do_sigaltstack
+        add    %i6, STACK_BIAS, %o2
+ #ifdef CONFIG_COMPAT
+ sys32_sigstack:
+       ba,pt   %xcc, do_sys32_sigstack
+        mov    %i6, %o2
+ sys32_sigaltstack:
+       ba,pt   %xcc, do_sys32_sigaltstack
+        mov    %i6, %o2
+ #endif
+       .align  32
+ #ifdef CONFIG_COMPAT
+ sys32_sigreturn:
+       add     %sp, PTREGS_OFF, %o0
+       call    do_sigreturn32
+        add    %o7, 1f-.-4, %o7
+       nop
+ #endif
+ sys_rt_sigreturn:
+       add     %sp, PTREGS_OFF, %o0
+       call    do_rt_sigreturn
+        add    %o7, 1f-.-4, %o7
+       nop
+ #ifdef CONFIG_COMPAT
+ sys32_rt_sigreturn:
+       add     %sp, PTREGS_OFF, %o0
+       call    do_rt_sigreturn32
+        add    %o7, 1f-.-4, %o7
+       nop
+ #endif
+       .align  32
+ 1:    ldx     [%g6 + TI_FLAGS], %l5
+       andcc   %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
+       be,pt   %icc, rtrap
+        nop
+       call    syscall_trace_leave
+        add    %sp, PTREGS_OFF, %o0
+       ba,pt   %xcc, rtrap
+        nop
+ 
+       /* This is how fork() was meant to be done, 8 instruction entry.
+        *
+        * I questioned the following code briefly, let me clear things
+        * up so you must not reason on it like I did.
+        *
+        * Know the fork_kpsr etc. we use in the sparc32 port?  We don't
+        * need it here because the only piece of window state we copy to
+        * the child is the CWP register.  Even if the parent sleeps,
+        * we are safe because we stuck it into pt_regs of the parent
+        * so it will not change.
+        *
+        * XXX This raises the question, whether we can do the same on
+        * XXX sparc32 to get rid of fork_kpsr _and_ fork_kwim.  The
+        * XXX answer is yes.  We stick fork_kpsr in UREG_G0 and
+        * XXX fork_kwim in UREG_G1 (global registers are considered
+        * XXX volatile across a system call in the sparc ABI I think
+        * XXX if it isn't we can use regs->y instead, anyone who depends
+        * XXX upon the Y register being preserved across a fork deserves
+        * XXX to lose).
+        *
+        * In fact we should take advantage of that fact for other things
+        * during system calls...
+        */
+       .align  32
+ sys_vfork: /* Under Linux, vfork and fork are just special cases of clone. */
+       sethi   %hi(0x4000 | 0x0100 | SIGCHLD), %o0
+       or      %o0, %lo(0x4000 | 0x0100 | SIGCHLD), %o0
+       ba,pt   %xcc, sys_clone
+ sys_fork:
+        clr    %o1
+       mov     SIGCHLD, %o0
+ sys_clone:
+       flushw
+       movrz   %o1, %fp, %o1
+       mov     0, %o3
+       ba,pt   %xcc, sparc_do_fork
+        add    %sp, PTREGS_OFF, %o2
+ 
+       .globl  ret_from_syscall
+ ret_from_syscall:
+       /* Clear current_thread_info()->new_child, and
+        * check performance counter stuff too.
+        */
+       stb     %g0, [%g6 + TI_NEW_CHILD]
+       ldx     [%g6 + TI_FLAGS], %l0
+       call    schedule_tail
- -       mov    %g7, %o0
- -      andcc   %l0, _TIF_PERFCTR, %g0
- -      be,pt   %icc, 1f
- -       nop
- -      ldx     [%g6 + TI_PCR], %o7
- -      wr      %g0, %o7, %pcr
- -
- -      /* Blackbird errata workaround.  See commentary in
- -       * smp.c:smp_percpu_timer_interrupt() for more
- -       * information.
- -       */
- -      ba,pt   %xcc, 99f
- -       nop
- -
- -      .align  64
- -99:   wr      %g0, %g0, %pic
- -      rd      %pic, %g0
- -
- -1:    ba,pt   %xcc, ret_sys_call
- -       ldx    [%sp + PTREGS_OFF + PT_V9_I0], %o0
++      mov             %g7, %o0
++      ba,pt           %xcc, ret_sys_call
++      ldx     [%sp + PTREGS_OFF + PT_V9_I0], %o0
+ 
+       .globl  sparc_exit
+       .type   sparc_exit,#function
+ sparc_exit:
+       rdpr    %pstate, %g2
+       wrpr    %g2, PSTATE_IE, %pstate
+       rdpr    %otherwin, %g1
+       rdpr    %cansave, %g3
+       add     %g3, %g1, %g3
+       wrpr    %g3, 0x0, %cansave
+       wrpr    %g0, 0x0, %otherwin
+       wrpr    %g2, 0x0, %pstate
+       ba,pt   %xcc, sys_exit
+        stb    %g0, [%g6 + TI_WSAVED]
+       .size   sparc_exit,.-sparc_exit
+ 
+ linux_sparc_ni_syscall:
+       sethi   %hi(sys_ni_syscall), %l7
+       ba,pt   %xcc, 4f
+        or     %l7, %lo(sys_ni_syscall), %l7
+ 
+ linux_syscall_trace32:
+       call    syscall_trace_enter
+        add    %sp, PTREGS_OFF, %o0
+       brnz,pn %o0, 3f
+        mov    -ENOSYS, %o0
+       srl     %i0, 0, %o0
+       srl     %i4, 0, %o4
+       srl     %i1, 0, %o1
+       srl     %i2, 0, %o2
+       ba,pt   %xcc, 2f
+        srl    %i3, 0, %o3
+ 
+ linux_syscall_trace:
+       call    syscall_trace_enter
+        add    %sp, PTREGS_OFF, %o0
+       brnz,pn %o0, 3f
+        mov    -ENOSYS, %o0
+       mov     %i0, %o0
+       mov     %i1, %o1
+       mov     %i2, %o2
+       mov     %i3, %o3
+       b,pt    %xcc, 2f
+        mov    %i4, %o4
+ 
+ 
+       /* Linux 32-bit system calls enter here... */
+       .align  32
+       .globl  linux_sparc_syscall32
+ linux_sparc_syscall32:
+       /* Direct access to user regs, much faster. */
+       cmp     %g1, NR_SYSCALLS                        ! IEU1  Group
+       bgeu,pn %xcc, linux_sparc_ni_syscall            ! CTI
+        srl    %i0, 0, %o0                             ! IEU0
+       sll     %g1, 2, %l4                             ! IEU0  Group
+       srl     %i4, 0, %o4                             ! IEU1
+       lduw    [%l7 + %l4], %l7                        ! Load
+       srl     %i1, 0, %o1                             ! IEU0  Group
+       ldx     [%g6 + TI_FLAGS], %l0           ! Load
+ 
+       srl     %i5, 0, %o5                             ! IEU1
+       srl     %i2, 0, %o2                             ! IEU0  Group
+       andcc   %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
+       bne,pn  %icc, linux_syscall_trace32             ! CTI
+        mov    %i0, %l5                                ! IEU1
+       call    %l7                                     ! CTI   Group brk forced
+        srl    %i3, 0, %o3                             ! IEU0
+       ba,a,pt %xcc, 3f
+ 
+       /* Linux native system calls enter here... */
+       .align  32
+       .globl  linux_sparc_syscall
+ linux_sparc_syscall:
+       /* Direct access to user regs, much faster. */
+       cmp     %g1, NR_SYSCALLS                        ! IEU1  Group
+       bgeu,pn %xcc, linux_sparc_ni_syscall            ! CTI
+        mov    %i0, %o0                                ! IEU0
+       sll     %g1, 2, %l4                             ! IEU0  Group
+       mov     %i1, %o1                                ! IEU1
+       lduw    [%l7 + %l4], %l7                        ! Load
+ 4:    mov     %i2, %o2                                ! IEU0  Group
+       ldx     [%g6 + TI_FLAGS], %l0           ! Load
+ 
+       mov     %i3, %o3                                ! IEU1
+       mov     %i4, %o4                                ! IEU0  Group
+       andcc   %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
+       bne,pn  %icc, linux_syscall_trace               ! CTI   Group
+        mov    %i0, %l5                                ! IEU0
+ 2:    call    %l7                                     ! CTI   Group brk forced
+        mov    %i5, %o5                                ! IEU0
+       nop
+ 
+ 3:    stx     %o0, [%sp + PTREGS_OFF + PT_V9_I0]
+ ret_sys_call:
+       ldx     [%sp + PTREGS_OFF + PT_V9_TSTATE], %g3
+       ldx     [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
+       sra     %o0, 0, %o0
+       mov     %ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2
+       sllx    %g2, 32, %g2
+ 
+       /* Check if force_successful_syscall_return()
+        * was invoked.
+        */
+       ldub    [%g6 + TI_SYS_NOERROR], %l2
+       brnz,a,pn %l2, 80f
+        stb    %g0, [%g6 + TI_SYS_NOERROR]
+ 
+       cmp     %o0, -ERESTART_RESTARTBLOCK
+       bgeu,pn %xcc, 1f
+        andcc  %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6
+ 80:
+       /* System call success, clear Carry condition code. */
+       andn    %g3, %g2, %g3
+       stx     %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]  
+       bne,pn  %icc, linux_syscall_trace2
+        add    %l1, 0x4, %l2                   ! npc = npc+4
+       stx     %l1, [%sp + PTREGS_OFF + PT_V9_TPC]
+       ba,pt   %xcc, rtrap
+        stx    %l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
+ 
+ 1:
+       /* System call failure, set Carry condition code.
+        * Also, get abs(errno) to return to the process.
+        */
+       andcc   %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6  
+       sub     %g0, %o0, %o0
+       or      %g3, %g2, %g3
+       stx     %o0, [%sp + PTREGS_OFF + PT_V9_I0]
+       stx     %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]
+       bne,pn  %icc, linux_syscall_trace2
+        add    %l1, 0x4, %l2                   ! npc = npc+4
+       stx     %l1, [%sp + PTREGS_OFF + PT_V9_TPC]
+ 
+       b,pt    %xcc, rtrap
+        stx    %l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
+ linux_syscall_trace2:
+       call    syscall_trace_leave
+        add    %sp, PTREGS_OFF, %o0
+       stx     %l1, [%sp + PTREGS_OFF + PT_V9_TPC]
+       ba,pt   %xcc, rtrap
+        stx    %l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
diff --cc arch/sparc/kernel/systbls_32.S

index 0000000,dccc95d..6a62545

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@@ -1,0 -1,84 +1,84 @@@
+ /* systbls.S: System call entry point tables for OS compatibility.
+  *            The native Linux system call table lives here also.
+  *
+  * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+  *
+  * Based upon preliminary work which is:
+  *
+  * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
+  */
+ 
+ 
+       .data
+       .align 4
+ 
+       /* First, the Linux native syscall table. */
+ 
+       .globl sys_call_table
+ sys_call_table:
+ /*0*/ .long sys_restart_syscall, sys_exit, sys_fork, sys_read, sys_write
+ /*5*/ .long sys_open, sys_close, sys_wait4, sys_creat, sys_link
+ /*10*/  .long sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys_mknod
+ /*15*/        .long sys_chmod, sys_lchown16, sparc_brk, sys_nis_syscall, sys_lseek
+ /*20*/        .long sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
+ /*25*/        .long sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_pause
+ /*30*/        .long sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice
+ /*35*/        .long sys_chown, sys_sync, sys_kill, sys_newstat, sys_sendfile
+ /*40*/        .long sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_getuid
+ /*45*/        .long sys_umount, sys_setgid16, sys_getgid16, sys_signal, sys_geteuid16
+ /*50*/        .long sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, sys_ioctl
+ /*55*/        .long sys_reboot, sys_mmap2, sys_symlink, sys_readlink, sys_execve
+ /*60*/        .long sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize
+ /*65*/        .long sys_msync, sys_vfork, sys_pread64, sys_pwrite64, sys_geteuid
+ /*70*/        .long sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect
+ /*75*/        .long sys_madvise, sys_vhangup, sys_truncate64, sys_mincore, sys_getgroups16
+ /*80*/        .long sys_setgroups16, sys_getpgrp, sys_setgroups, sys_setitimer, sys_ftruncate64
+ /*85*/        .long sys_swapon, sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
+ /*90*/        .long sys_dup2, sys_setfsuid, sys_fcntl, sys_select, sys_setfsgid
+ /*95*/        .long sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
+ /*100*/       .long sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending
+ /*105*/       .long sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid
+ /*110*/       .long sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
+ /*115*/       .long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_nis_syscall, sys_getcwd
+ /*120*/       .long sys_readv, sys_writev, sys_settimeofday, sys_fchown16, sys_fchmod
+ /*125*/       .long sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
+ /*130*/       .long sys_ftruncate, sys_flock, sys_lstat64, sys_nis_syscall, sys_nis_syscall
+ /*135*/       .long sys_nis_syscall, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
+ /*140*/       .long sys_sendfile64, sys_nis_syscall, sys_futex, sys_gettid, sys_getrlimit
+ /*145*/       .long sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
+ /*150*/       .long sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+ /*155*/       .long sys_fcntl64, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount
+ /*160*/       .long sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
+ /*165*/       .long sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr
+ /*170*/       .long sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents
+ /*175*/       .long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr
+ /*180*/       .long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_ni_syscall
+ /*185*/       .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname
+ /*190*/       .long sys_init_module, sys_personality, sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl
+ /*195*/       .long sys_epoll_wait, sys_ioprio_set, sys_getppid, sparc_sigaction, sys_sgetmask
+ /*200*/       .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_old_readdir
+ /*205*/       .long sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
+ /*210*/       .long sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
+ /*215*/       .long sys_ipc, sys_sigreturn, sys_clone, sys_ioprio_get, sys_adjtimex
+ /*220*/       .long sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
+ /*225*/       .long sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
+ /*230*/       .long sys_select, sys_time, sys_splice, sys_stime, sys_statfs64
+                                         /* "We are the Knights of the Forest of Ni!!" */
+ /*235*/       .long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
+ /*240*/       .long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
+ /*245*/       .long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
+ /*250*/       .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+ /*255*/       .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+ /*260*/       .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+ /*265*/       .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
+ /*270*/       .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+ /*275*/       .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+ /*280*/       .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+ /*285*/       .long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
+ /*290*/       .long sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
+ /*295*/       .long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
+ /*300*/       .long sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
+ /*305*/       .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
+ /*310*/       .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
+ /*315*/       .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
- -/*320*/       .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4
++/*320*/       .long sys_dup3, sys_pipe2, sys_inotify_init1
diff --cc arch/sparc/kernel/systbls_64.S

index 0000000,f93c42a..380d4c2

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@@ -1,0 -1,159 +1,159 @@@
+ /* systbls.S: System call entry point tables for OS compatibility.
+  *            The native Linux system call table lives here also.
+  *
+  * Copyright (C) 1995, 1996, 2007 David S. Miller (davem@davemloft.net)
+  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+  *
+  * Based upon preliminary work which is:
+  *
+  * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
+  */
+ 
+ 
+       .text
+       .align  4
+ 
+ #ifdef CONFIG_COMPAT
+       /* First, the 32-bit Linux native syscall table. */
+ 
+       .globl sys_call_table32
+ sys_call_table32:
+ /*0*/ .word sys_restart_syscall, sys32_exit, sys_fork, sys_read, sys_write
+ /*5*/ .word sys32_open, sys_close, sys32_wait4, sys32_creat, sys_link
+ /*10*/  .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys32_mknod
+ /*15*/        .word sys_chmod, sys_lchown16, sys_sparc_brk, sys32_perfctr, sys32_lseek
+ /*20*/        .word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
+ /*25*/        .word sys32_vmsplice, compat_sys_ptrace, sys_alarm, sys32_sigaltstack, sys_pause
+ /*30*/        .word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice
+       .word sys_chown, sys_sync, sys32_kill, compat_sys_newstat, sys32_sendfile
+ /*40*/        .word compat_sys_newlstat, sys_dup, sys_sparc_pipe, compat_sys_times, sys_getuid
+       .word sys32_umount, sys_setgid16, sys_getgid16, sys32_signal, sys_geteuid16
+ /*50*/        .word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl
+       .word sys32_reboot, sys32_mmap2, sys_symlink, sys32_readlink, sys32_execve
+ /*60*/        .word sys32_umask, sys_chroot, compat_sys_newfstat, compat_sys_fstat64, sys_getpagesize
+       .word sys32_msync, sys_vfork, sys32_pread64, sys32_pwrite64, sys_geteuid
+ /*70*/        .word sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect
+       .word sys_madvise, sys_vhangup, sys32_truncate64, sys_mincore, sys_getgroups16
+ /*80*/        .word sys_setgroups16, sys_getpgrp, sys32_setgroups, sys32_setitimer, sys32_ftruncate64
+       .word sys32_swapon, sys32_getitimer, sys_setuid, sys32_sethostname, sys_setgid
+ /*90*/        .word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid
+       .word sys_fsync, sys32_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
+ /*100*/ .word sys32_getpriority, sys32_rt_sigreturn, sys32_rt_sigaction, sys32_rt_sigprocmask, sys32_rt_sigpending
+       .word compat_sys_rt_sigtimedwait, sys32_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid
+ /*110*/       .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
+       .word sys32_getgroups, compat_sys_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd
+ /*120*/       .word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod
+       .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
+ /*130*/       .word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
+       .word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
+ /*140*/       .word sys32_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
+       .word compat_sys_setrlimit, sys_pivot_root, sys32_prctl, sys_pciconfig_read, sys_pciconfig_write
+ /*150*/       .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+       .word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
+ /*160*/       .word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys32_getdomainname, sys32_setdomainname, sys_nis_syscall
+       .word sys_quotactl, sys_set_tid_address, compat_sys_mount, sys_ustat, sys32_setxattr
+ /*170*/       .word sys32_lsetxattr, sys32_fsetxattr, sys_getxattr, sys_lgetxattr, compat_sys_getdents
+       .word sys_setsid, sys_fchdir, sys32_fgetxattr, sys_listxattr, sys_llistxattr
+ /*180*/       .word sys32_flistxattr, sys_removexattr, sys_lremovexattr, compat_sys_sigpending, sys_ni_syscall
+       .word sys32_setpgid, sys32_fremovexattr, sys32_tkill, sys32_exit_group, sys_sparc64_newuname
+ /*190*/       .word sys32_init_module, sys_sparc64_personality, sys_remap_file_pages, sys32_epoll_create, sys32_epoll_ctl
+       .word sys32_epoll_wait, sys32_ioprio_set, sys_getppid, sys32_sigaction, sys_sgetmask
+ /*200*/       .word sys32_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir
+       .word sys32_readahead, sys32_socketcall, sys32_syslog, sys32_lookup_dcookie, sys32_fadvise64
+ /*210*/       .word sys32_fadvise64_64, sys32_tgkill, sys32_waitpid, sys_swapoff, compat_sys_sysinfo
+       .word compat_sys_ipc, sys32_sigreturn, sys_clone, sys32_ioprio_get, compat_sys_adjtimex
+ /*220*/       .word sys32_sigprocmask, sys_ni_syscall, sys32_delete_module, sys_ni_syscall, sys32_getpgid
+       .word sys32_bdflush, sys32_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
+ /*230*/       .word sys32_select, compat_sys_time, sys32_splice, compat_sys_stime, compat_sys_statfs64
+       .word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall
+ /*240*/       .word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler
+       .word sys_sched_yield, sys32_sched_get_priority_max, sys32_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep
+ /*250*/       .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
+       .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
+ /*260*/       .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
+       .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
+ /*270*/       .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
+       .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
+ /*280*/       .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat
+       .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64
+ /*290*/       .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
+       .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare
+ /*300*/       .word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages, compat_sys_mbind, compat_sys_get_mempolicy
+       .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
+ /*310*/       .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
+       .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
- -/*320*/       .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4
++/*320*/       .word sys_dup3, sys_pipe2, sys_inotify_init1
+ 
+ #endif /* CONFIG_COMPAT */
+ 
+       /* Now the 64-bit native Linux syscall table. */
+ 
+       .align  4
+       .globl sys_call_table64, sys_call_table
+ sys_call_table64:
+ sys_call_table:
+ /*0*/ .word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write
+ /*5*/ .word sys_open, sys_close, sys_wait4, sys_creat, sys_link
+ /*10*/  .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod
+ /*15*/        .word sys_chmod, sys_lchown, sys_sparc_brk, sys_perfctr, sys_lseek
+ /*20*/        .word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid
+ /*25*/        .word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall
+ /*30*/        .word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice
+       .word sys_nis_syscall, sys_sync, sys_kill, sys_newstat, sys_sendfile64
+ /*40*/        .word sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_nis_syscall
+       .word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid
+ /*50*/        .word sys_getegid, sys_acct, sys_memory_ordering, sys_nis_syscall, sys_ioctl
+       .word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys_execve
+ /*60*/        .word sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize
+       .word sys_msync, sys_vfork, sys_pread64, sys_pwrite64, sys_nis_syscall
+ /*70*/        .word sys_nis_syscall, sys_mmap, sys_nis_syscall, sys_64_munmap, sys_mprotect
+       .word sys_madvise, sys_vhangup, sys_nis_syscall, sys_mincore, sys_getgroups
+ /*80*/        .word sys_setgroups, sys_getpgrp, sys_nis_syscall, sys_setitimer, sys_nis_syscall
+       .word sys_swapon, sys_getitimer, sys_nis_syscall, sys_sethostname, sys_nis_syscall
+ /*90*/        .word sys_dup2, sys_nis_syscall, sys_fcntl, sys_select, sys_nis_syscall
+       .word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
+ /*100*/       .word sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending
+       .word sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid
+ /*110*/       .word sys_setresgid, sys_getresgid, sys_nis_syscall, sys_recvmsg, sys_sendmsg
+       .word sys_nis_syscall, sys_gettimeofday, sys_getrusage, sys_getsockopt, sys_getcwd
+ /*120*/       .word sys_readv, sys_writev, sys_settimeofday, sys_fchown, sys_fchmod
+       .word sys_recvfrom, sys_setreuid, sys_setregid, sys_rename, sys_truncate
+ /*130*/       .word sys_ftruncate, sys_flock, sys_lstat64, sys_sendto, sys_shutdown
+       .word sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
+ /*140*/       .word sys_sendfile64, sys_getpeername, sys_futex, sys_gettid, sys_getrlimit
+       .word sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
+ /*150*/       .word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+       .word sys_nis_syscall, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount
+ /*160*/       .word sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_utrap_install
+       .word sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr
+ /*170*/       .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents
+       .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr
+ /*180*/       .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_nis_syscall, sys_ni_syscall
+       .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_sparc64_newuname
+ /*190*/       .word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl
+       .word sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_nis_syscall, sys_sgetmask
+ /*200*/       .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
+       .word sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
+ /*210*/       .word sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
+       .word sys_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex
+ /*220*/       .word sys_nis_syscall, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
+       .word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid
+ /*230*/       .word sys_select, sys_nis_syscall, sys_splice, sys_stime, sys_statfs64
+       .word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
+ /*240*/       .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
+       .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
+ /*250*/       .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+       .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+ /*260*/       .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+       .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
+ /*270*/       .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+       .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+ /*280*/       .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+       .word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
+ /*290*/       .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
+       .word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
+ /*300*/       .word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
+       .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
+ /*310*/       .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
+       .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
- -/*320*/       .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4
++/*320*/       .word sys_dup3, sys_pipe2, sys_inotify_init1
diff --cc arch/sparc/kernel/time_32.c

index 0000000,614ac7b..47c08b9

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@@ -1,0 -1,330 +1,331 @@@
+ /* linux/arch/sparc/kernel/time.c
+  *
+  * Copyright (C) 1995 David S. Miller (davem@davemloft.net)
+  * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
+  *
+  * Chris Davis (cdavis@cois.on.ca) 03/27/1998
+  * Added support for the intersil on the sun4/4200
+  *
+  * Gleb Raiko (rajko@mech.math.msu.su) 08/18/1998
+  * Support for MicroSPARC-IIep, PCI CPU.
+  *
+  * This file handles the Sparc specific time handling details.
+  *
+  * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
+  *            "A Kernel Model for Precision Timekeeping" by Dave Mills
+  */
+ #include <linux/errno.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/param.h>
+ #include <linux/string.h>
+ #include <linux/mm.h>
+ #include <linux/interrupt.h>
+ #include <linux/time.h>
+ #include <linux/rtc.h>
+ #include <linux/rtc/m48t59.h>
+ #include <linux/timex.h>
+ #include <linux/init.h>
+ #include <linux/pci.h>
+ #include <linux/ioport.h>
+ #include <linux/profile.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+ #include <linux/platform_device.h>
+ 
+ #include <asm/oplib.h>
+ #include <asm/timer.h>
+ #include <asm/system.h>
+ #include <asm/irq.h>
+ #include <asm/io.h>
+ #include <asm/idprom.h>
+ #include <asm/machines.h>
+ #include <asm/page.h>
+ #include <asm/pcic.h>
+ #include <asm/irq_regs.h>
+ 
+ #include "irq.h"
+ 
+ DEFINE_SPINLOCK(rtc_lock);
+ EXPORT_SYMBOL(rtc_lock);
+ 
+ static int set_rtc_mmss(unsigned long);
+ static int sbus_do_settimeofday(struct timespec *tv);
+ 
+ unsigned long profile_pc(struct pt_regs *regs)
+ {
+       extern char __copy_user_begin[], __copy_user_end[];
+       extern char __atomic_begin[], __atomic_end[];
+       extern char __bzero_begin[], __bzero_end[];
+ 
+       unsigned long pc = regs->pc;
+ 
+       if (in_lock_functions(pc) ||
+           (pc >= (unsigned long) __copy_user_begin &&
+            pc < (unsigned long) __copy_user_end) ||
+           (pc >= (unsigned long) __atomic_begin &&
+            pc < (unsigned long) __atomic_end) ||
+           (pc >= (unsigned long) __bzero_begin &&
+            pc < (unsigned long) __bzero_end))
+               pc = regs->u_regs[UREG_RETPC];
+       return pc;
+ }
+ 
+ EXPORT_SYMBOL(profile_pc);
+ 
+ __volatile__ unsigned int *master_l10_counter;
+ 
+ /*
+  * timer_interrupt() needs to keep up the real-time clock,
+  * as well as call the "do_timer()" routine every clocktick
+  */
+ 
+ #define TICK_SIZE (tick_nsec / 1000)
+ 
+ static irqreturn_t timer_interrupt(int dummy, void *dev_id)
+ {
+       /* last time the cmos clock got updated */
+       static long last_rtc_update;
+ 
+ #ifndef CONFIG_SMP
+       profile_tick(CPU_PROFILING);
+ #endif
+ 
+       /* Protect counter clear so that do_gettimeoffset works */
+       write_seqlock(&xtime_lock);
+ 
+       clear_clock_irq();
+ 
+       do_timer(1);
+ 
+       /* Determine when to update the Mostek clock. */
+       if (ntp_synced() &&
+           xtime.tv_sec > last_rtc_update + 660 &&
+           (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
+           (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
+         if (set_rtc_mmss(xtime.tv_sec) == 0)
+           last_rtc_update = xtime.tv_sec;
+         else
+           last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
+       }
+       write_sequnlock(&xtime_lock);
+ 
+ #ifndef CONFIG_SMP
+       update_process_times(user_mode(get_irq_regs()));
+ #endif
+       return IRQ_HANDLED;
+ }
+ 
+ static unsigned char mostek_read_byte(struct device *dev, u32 ofs)
+ {
+       struct platform_device *pdev = to_platform_device(dev);
+       struct m48t59_plat_data *pdata = pdev->dev.platform_data;
+ 
+       return readb(pdata->ioaddr + ofs);
+ }
+ 
+ static void mostek_write_byte(struct device *dev, u32 ofs, u8 val)
+ {
+       struct platform_device *pdev = to_platform_device(dev);
+       struct m48t59_plat_data *pdata = pdev->dev.platform_data;
+ 
+       writeb(val, pdata->ioaddr + ofs);
+ }
+ 
+ static struct m48t59_plat_data m48t59_data = {
+       .read_byte = mostek_read_byte,
+       .write_byte = mostek_write_byte,
+ };
+ 
+ /* resource is set at runtime */
+ static struct platform_device m48t59_rtc = {
+       .name           = "rtc-m48t59",
+       .id             = 0,
+       .num_resources  = 1,
+       .dev    = {
+               .platform_data = &m48t59_data,
+       },
+ };
+ 
+ static int __devinit clock_probe(struct of_device *op, const struct of_device_id *match)
+ {
+       struct device_node *dp = op->node;
+       const char *model = of_get_property(dp, "model", NULL);
+ 
+       if (!model)
+               return -ENODEV;
+ 
+       m48t59_rtc.resource = &op->resource[0];
+       if (!strcmp(model, "mk48t02")) {
+               /* Map the clock register io area read-only */
+               m48t59_data.ioaddr = of_ioremap(&op->resource[0], 0,
+                                               2048, "rtc-m48t59");
+               m48t59_data.type = M48T59RTC_TYPE_M48T02;
+       } else if (!strcmp(model, "mk48t08")) {
+               m48t59_data.ioaddr = of_ioremap(&op->resource[0], 0,
+                                               8192, "rtc-m48t59");
+               m48t59_data.type = M48T59RTC_TYPE_M48T08;
+       } else
+               return -ENODEV;
+ 
+       if (platform_device_register(&m48t59_rtc) < 0)
+               printk(KERN_ERR "Registering RTC device failed\n");
+ 
+       return 0;
+ }
+ 
+ static struct of_device_id __initdata clock_match[] = {
+       {
+               .name = "eeprom",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver clock_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = clock_match,
+       .probe          = clock_probe,
+       .driver         = {
+               .name   = "rtc",
+       },
+ };
+ 
+ 
+ /* Probe for the mostek real time clock chip. */
+ static int __init clock_init(void)
+ {
+       return of_register_driver(&clock_driver, &of_platform_bus_type);
+ }
+ 
+ /* Must be after subsys_initcall() so that busses are probed.  Must
+  * be before device_initcall() because things like the RTC driver
+  * need to see the clock registers.
+  */
+ fs_initcall(clock_init);
+ 
+ static void __init sbus_time_init(void)
+ {
+ 
+       BTFIXUPSET_CALL(bus_do_settimeofday, sbus_do_settimeofday, BTFIXUPCALL_NORM);
+       btfixup();
+ 
+       sparc_init_timers(timer_interrupt);
+       
+       /* Now that OBP ticker has been silenced, it is safe to enable IRQ. */
+       local_irq_enable();
+ }
+ 
+ void __init time_init(void)
+ {
+ #ifdef CONFIG_PCI
+       extern void pci_time_init(void);
+       if (pcic_present()) {
+               pci_time_init();
+               return;
+       }
+ #endif
+       sbus_time_init();
+ }
+ 
+ static inline unsigned long do_gettimeoffset(void)
+ {
+       unsigned long val = *master_l10_counter;
+       unsigned long usec = (val >> 10) & 0x1fffff;
+ 
+       /* Limit hit?  */
+       if (val & 0x80000000)
+               usec += 1000000 / HZ;
+ 
+       return usec;
+ }
+ 
+ /* Ok, my cute asm atomicity trick doesn't work anymore.
+  * There are just too many variables that need to be protected
+  * now (both members of xtime, et al.)
+  */
+ void do_gettimeofday(struct timeval *tv)
+ {
+       unsigned long flags;
+       unsigned long seq;
+       unsigned long usec, sec;
+       unsigned long max_ntp_tick = tick_usec - tickadj;
+ 
+       do {
+               seq = read_seqbegin_irqsave(&xtime_lock, flags);
+               usec = do_gettimeoffset();
+ 
+               /*
+                * If time_adjust is negative then NTP is slowing the clock
+                * so make sure not to go into next possible interval.
+                * Better to lose some accuracy than have time go backwards..
+                */
+               if (unlikely(time_adjust < 0))
+                       usec = min(usec, max_ntp_tick);
+ 
+               sec = xtime.tv_sec;
+               usec += (xtime.tv_nsec / 1000);
+       } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+ 
+       while (usec >= 1000000) {
+               usec -= 1000000;
+               sec++;
+       }
+ 
+       tv->tv_sec = sec;
+       tv->tv_usec = usec;
+ }
+ 
+ EXPORT_SYMBOL(do_gettimeofday);
+ 
+ int do_settimeofday(struct timespec *tv)
+ {
+       int ret;
+ 
+       write_seqlock_irq(&xtime_lock);
+       ret = bus_do_settimeofday(tv);
+       write_sequnlock_irq(&xtime_lock);
+       clock_was_set();
+       return ret;
+ }
+ 
+ EXPORT_SYMBOL(do_settimeofday);
+ 
+ static int sbus_do_settimeofday(struct timespec *tv)
+ {
+       time_t wtm_sec, sec = tv->tv_sec;
+       long wtm_nsec, nsec = tv->tv_nsec;
+ 
+       if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+               return -EINVAL;
+ 
+       /*
+        * This is revolting. We need to set "xtime" correctly. However, the
+        * value in this location is the value at the most recent update of
+        * wall time.  Discover what correction gettimeofday() would have
+        * made, and then undo it!
+        */
+       nsec -= 1000 * do_gettimeoffset();
+ 
+       wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+       wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+ 
+       set_normalized_timespec(&xtime, sec, nsec);
+       set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+ 
+       ntp_clear();
+       return 0;
+ }
+ 
+ static int set_rtc_mmss(unsigned long secs)
+ {
+       struct rtc_device *rtc = rtc_class_open("rtc0");
+       int err = -1;
+ 
+       if (rtc) {
+               err = rtc_set_mmss(rtc, secs);
+               rtc_class_close(rtc);
+       }
+ 
+       return err;
+ }
diff --cc arch/sparc/kernel/time_64.c

index 0000000,2db3c22..7bfd539

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@@ -1,0 -1,864 +1,867 @@@
+ /* time.c: UltraSparc timer and TOD clock support.
+  *
+  * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net)
+  * Copyright (C) 1998 Eddie C. Dost   (ecd@skynet.be)
+  *
+  * Based largely on code which is:
+  *
+  * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
+  */
+ 
+ #include <linux/errno.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/smp_lock.h>
+ #include <linux/kernel.h>
+ #include <linux/param.h>
+ #include <linux/string.h>
+ #include <linux/mm.h>
+ #include <linux/interrupt.h>
+ #include <linux/time.h>
+ #include <linux/timex.h>
+ #include <linux/init.h>
+ #include <linux/ioport.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/delay.h>
+ #include <linux/profile.h>
+ #include <linux/bcd.h>
+ #include <linux/jiffies.h>
+ #include <linux/cpufreq.h>
+ #include <linux/percpu.h>
+ #include <linux/miscdevice.h>
+ #include <linux/rtc.h>
+ #include <linux/rtc/m48t59.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/clockchips.h>
+ #include <linux/clocksource.h>
+ #include <linux/of_device.h>
+ #include <linux/platform_device.h>
+ 
+ #include <asm/oplib.h>
+ #include <asm/timer.h>
+ #include <asm/irq.h>
+ #include <asm/io.h>
+ #include <asm/prom.h>
+ #include <asm/starfire.h>
+ #include <asm/smp.h>
+ #include <asm/sections.h>
+ #include <asm/cpudata.h>
+ #include <asm/uaccess.h>
+ #include <asm/irq_regs.h>
+ 
+ #include "entry.h"
+ 
+ DEFINE_SPINLOCK(rtc_lock);
+ 
+ #define TICK_PRIV_BIT (1UL << 63)
+ #define TICKCMP_IRQ_BIT       (1UL << 63)
+ 
+ #ifdef CONFIG_SMP
+ unsigned long profile_pc(struct pt_regs *regs)
+ {
+       unsigned long pc = instruction_pointer(regs);
+ 
+       if (in_lock_functions(pc))
+               return regs->u_regs[UREG_RETPC];
+       return pc;
+ }
+ EXPORT_SYMBOL(profile_pc);
+ #endif
+ 
+ static void tick_disable_protection(void)
+ {
+       /* Set things up so user can access tick register for profiling
+        * purposes.  Also workaround BB_ERRATA_1 by doing a dummy
+        * read back of %tick after writing it.
+        */
+       __asm__ __volatile__(
+       "       ba,pt   %%xcc, 1f\n"
+       "        nop\n"
+       "       .align  64\n"
+       "1:     rd      %%tick, %%g2\n"
+       "       add     %%g2, 6, %%g2\n"
+       "       andn    %%g2, %0, %%g2\n"
+       "       wrpr    %%g2, 0, %%tick\n"
+       "       rdpr    %%tick, %%g0"
+       : /* no outputs */
+       : "r" (TICK_PRIV_BIT)
+       : "g2");
+ }
+ 
+ static void tick_disable_irq(void)
+ {
+       __asm__ __volatile__(
+       "       ba,pt   %%xcc, 1f\n"
+       "        nop\n"
+       "       .align  64\n"
+       "1:     wr      %0, 0x0, %%tick_cmpr\n"
+       "       rd      %%tick_cmpr, %%g0"
+       : /* no outputs */
+       : "r" (TICKCMP_IRQ_BIT));
+ }
+ 
+ static void tick_init_tick(void)
+ {
+       tick_disable_protection();
+       tick_disable_irq();
+ }
+ 
+ static unsigned long long tick_get_tick(void)
+ {
+       unsigned long ret;
+ 
+       __asm__ __volatile__("rd        %%tick, %0\n\t"
+                            "mov       %0, %0"
+                            : "=r" (ret));
+ 
+       return ret & ~TICK_PRIV_BIT;
+ }
+ 
+ static int tick_add_compare(unsigned long adj)
+ {
+       unsigned long orig_tick, new_tick, new_compare;
+ 
+       __asm__ __volatile__("rd        %%tick, %0"
+                            : "=r" (orig_tick));
+ 
+       orig_tick &= ~TICKCMP_IRQ_BIT;
+ 
+       /* Workaround for Spitfire Errata (#54 I think??), I discovered
+        * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
+        * number 103640.
+        *
+        * On Blackbird writes to %tick_cmpr can fail, the
+        * workaround seems to be to execute the wr instruction
+        * at the start of an I-cache line, and perform a dummy
+        * read back from %tick_cmpr right after writing to it. -DaveM
+        */
+       __asm__ __volatile__("ba,pt     %%xcc, 1f\n\t"
+                            " add      %1, %2, %0\n\t"
+                            ".align    64\n"
+                            "1:\n\t"
+                            "wr        %0, 0, %%tick_cmpr\n\t"
+                            "rd        %%tick_cmpr, %%g0\n\t"
+                            : "=r" (new_compare)
+                            : "r" (orig_tick), "r" (adj));
+ 
+       __asm__ __volatile__("rd        %%tick, %0"
+                            : "=r" (new_tick));
+       new_tick &= ~TICKCMP_IRQ_BIT;
+ 
+       return ((long)(new_tick - (orig_tick+adj))) > 0L;
+ }
+ 
+ static unsigned long tick_add_tick(unsigned long adj)
+ {
+       unsigned long new_tick;
+ 
+       /* Also need to handle Blackbird bug here too. */
+       __asm__ __volatile__("rd        %%tick, %0\n\t"
+                            "add       %0, %1, %0\n\t"
+                            "wrpr      %0, 0, %%tick\n\t"
+                            : "=&r" (new_tick)
+                            : "r" (adj));
+ 
+       return new_tick;
+ }
+ 
+ static struct sparc64_tick_ops tick_operations __read_mostly = {
+       .name           =       "tick",
+       .init_tick      =       tick_init_tick,
+       .disable_irq    =       tick_disable_irq,
+       .get_tick       =       tick_get_tick,
+       .add_tick       =       tick_add_tick,
+       .add_compare    =       tick_add_compare,
+       .softint_mask   =       1UL << 0,
+ };
+ 
+ struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations;
+ EXPORT_SYMBOL(tick_ops);
+ 
+ static void stick_disable_irq(void)
+ {
+       __asm__ __volatile__(
+       "wr     %0, 0x0, %%asr25"
+       : /* no outputs */
+       : "r" (TICKCMP_IRQ_BIT));
+ }
+ 
+ static void stick_init_tick(void)
+ {
+       /* Writes to the %tick and %stick register are not
+        * allowed on sun4v.  The Hypervisor controls that
+        * bit, per-strand.
+        */
+       if (tlb_type != hypervisor) {
+               tick_disable_protection();
+               tick_disable_irq();
+ 
+               /* Let the user get at STICK too. */
+               __asm__ __volatile__(
+               "       rd      %%asr24, %%g2\n"
+               "       andn    %%g2, %0, %%g2\n"
+               "       wr      %%g2, 0, %%asr24"
+               : /* no outputs */
+               : "r" (TICK_PRIV_BIT)
+               : "g1", "g2");
+       }
+ 
+       stick_disable_irq();
+ }
+ 
+ static unsigned long long stick_get_tick(void)
+ {
+       unsigned long ret;
+ 
+       __asm__ __volatile__("rd        %%asr24, %0"
+                            : "=r" (ret));
+ 
+       return ret & ~TICK_PRIV_BIT;
+ }
+ 
+ static unsigned long stick_add_tick(unsigned long adj)
+ {
+       unsigned long new_tick;
+ 
+       __asm__ __volatile__("rd        %%asr24, %0\n\t"
+                            "add       %0, %1, %0\n\t"
+                            "wr        %0, 0, %%asr24\n\t"
+                            : "=&r" (new_tick)
+                            : "r" (adj));
+ 
+       return new_tick;
+ }
+ 
+ static int stick_add_compare(unsigned long adj)
+ {
+       unsigned long orig_tick, new_tick;
+ 
+       __asm__ __volatile__("rd        %%asr24, %0"
+                            : "=r" (orig_tick));
+       orig_tick &= ~TICKCMP_IRQ_BIT;
+ 
+       __asm__ __volatile__("wr        %0, 0, %%asr25"
+                            : /* no outputs */
+                            : "r" (orig_tick + adj));
+ 
+       __asm__ __volatile__("rd        %%asr24, %0"
+                            : "=r" (new_tick));
+       new_tick &= ~TICKCMP_IRQ_BIT;
+ 
+       return ((long)(new_tick - (orig_tick+adj))) > 0L;
+ }
+ 
+ static struct sparc64_tick_ops stick_operations __read_mostly = {
+       .name           =       "stick",
+       .init_tick      =       stick_init_tick,
+       .disable_irq    =       stick_disable_irq,
+       .get_tick       =       stick_get_tick,
+       .add_tick       =       stick_add_tick,
+       .add_compare    =       stick_add_compare,
+       .softint_mask   =       1UL << 16,
+ };
+ 
+ /* On Hummingbird the STICK/STICK_CMPR register is implemented
+  * in I/O space.  There are two 64-bit registers each, the
+  * first holds the low 32-bits of the value and the second holds
+  * the high 32-bits.
+  *
+  * Since STICK is constantly updating, we have to access it carefully.
+  *
+  * The sequence we use to read is:
+  * 1) read high
+  * 2) read low
+  * 3) read high again, if it rolled re-read both low and high again.
+  *
+  * Writing STICK safely is also tricky:
+  * 1) write low to zero
+  * 2) write high
+  * 3) write low
+  */
+ #define HBIRD_STICKCMP_ADDR   0x1fe0000f060UL
+ #define HBIRD_STICK_ADDR      0x1fe0000f070UL
+ 
+ static unsigned long __hbird_read_stick(void)
+ {
+       unsigned long ret, tmp1, tmp2, tmp3;
+       unsigned long addr = HBIRD_STICK_ADDR+8;
+ 
+       __asm__ __volatile__("ldxa      [%1] %5, %2\n"
+                            "1:\n\t"
+                            "sub       %1, 0x8, %1\n\t"
+                            "ldxa      [%1] %5, %3\n\t"
+                            "add       %1, 0x8, %1\n\t"
+                            "ldxa      [%1] %5, %4\n\t"
+                            "cmp       %4, %2\n\t"
+                            "bne,a,pn  %%xcc, 1b\n\t"
+                            " mov      %4, %2\n\t"
+                            "sllx      %4, 32, %4\n\t"
+                            "or        %3, %4, %0\n\t"
+                            : "=&r" (ret), "=&r" (addr),
+                              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3)
+                            : "i" (ASI_PHYS_BYPASS_EC_E), "1" (addr));
+ 
+       return ret;
+ }
+ 
+ static void __hbird_write_stick(unsigned long val)
+ {
+       unsigned long low = (val & 0xffffffffUL);
+       unsigned long high = (val >> 32UL);
+       unsigned long addr = HBIRD_STICK_ADDR;
+ 
+       __asm__ __volatile__("stxa      %%g0, [%0] %4\n\t"
+                            "add       %0, 0x8, %0\n\t"
+                            "stxa      %3, [%0] %4\n\t"
+                            "sub       %0, 0x8, %0\n\t"
+                            "stxa      %2, [%0] %4"
+                            : "=&r" (addr)
+                            : "0" (addr), "r" (low), "r" (high),
+                              "i" (ASI_PHYS_BYPASS_EC_E));
+ }
+ 
+ static void __hbird_write_compare(unsigned long val)
+ {
+       unsigned long low = (val & 0xffffffffUL);
+       unsigned long high = (val >> 32UL);
+       unsigned long addr = HBIRD_STICKCMP_ADDR + 0x8UL;
+ 
+       __asm__ __volatile__("stxa      %3, [%0] %4\n\t"
+                            "sub       %0, 0x8, %0\n\t"
+                            "stxa      %2, [%0] %4"
+                            : "=&r" (addr)
+                            : "0" (addr), "r" (low), "r" (high),
+                              "i" (ASI_PHYS_BYPASS_EC_E));
+ }
+ 
+ static void hbtick_disable_irq(void)
+ {
+       __hbird_write_compare(TICKCMP_IRQ_BIT);
+ }
+ 
+ static void hbtick_init_tick(void)
+ {
+       tick_disable_protection();
+ 
+       /* XXX This seems to be necessary to 'jumpstart' Hummingbird
+        * XXX into actually sending STICK interrupts.  I think because
+        * XXX of how we store %tick_cmpr in head.S this somehow resets the
+        * XXX {TICK + STICK} interrupt mux.  -DaveM
+        */
+       __hbird_write_stick(__hbird_read_stick());
+ 
+       hbtick_disable_irq();
+ }
+ 
+ static unsigned long long hbtick_get_tick(void)
+ {
+       return __hbird_read_stick() & ~TICK_PRIV_BIT;
+ }
+ 
+ static unsigned long hbtick_add_tick(unsigned long adj)
+ {
+       unsigned long val;
+ 
+       val = __hbird_read_stick() + adj;
+       __hbird_write_stick(val);
+ 
+       return val;
+ }
+ 
+ static int hbtick_add_compare(unsigned long adj)
+ {
+       unsigned long val = __hbird_read_stick();
+       unsigned long val2;
+ 
+       val &= ~TICKCMP_IRQ_BIT;
+       val += adj;
+       __hbird_write_compare(val);
+ 
+       val2 = __hbird_read_stick() & ~TICKCMP_IRQ_BIT;
+ 
+       return ((long)(val2 - val)) > 0L;
+ }
+ 
+ static struct sparc64_tick_ops hbtick_operations __read_mostly = {
+       .name           =       "hbtick",
+       .init_tick      =       hbtick_init_tick,
+       .disable_irq    =       hbtick_disable_irq,
+       .get_tick       =       hbtick_get_tick,
+       .add_tick       =       hbtick_add_tick,
+       .add_compare    =       hbtick_add_compare,
+       .softint_mask   =       1UL << 0,
+ };
+ 
+ static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
+ 
+ int update_persistent_clock(struct timespec now)
+ {
+       struct rtc_device *rtc = rtc_class_open("rtc0");
+       int err = -1;
+ 
+       if (rtc) {
+               err = rtc_set_mmss(rtc, now.tv_sec);
+               rtc_class_close(rtc);
+       }
+ 
+       return err;
+ }
+ 
+ unsigned long cmos_regs;
+ EXPORT_SYMBOL(cmos_regs);
+ 
+ static struct resource rtc_cmos_resource;
+ 
+ static struct platform_device rtc_cmos_device = {
+       .name           = "rtc_cmos",
+       .id             = -1,
+       .resource       = &rtc_cmos_resource,
+       .num_resources  = 1,
+ };
+ 
+ static int __devinit rtc_probe(struct of_device *op, const struct of_device_id *match)
+ {
+       struct resource *r;
+ 
+       printk(KERN_INFO "%s: RTC regs at 0x%llx\n",
+              op->node->full_name, op->resource[0].start);
+ 
+       /* The CMOS RTC driver only accepts IORESOURCE_IO, so cons
+        * up a fake resource so that the probe works for all cases.
+        * When the RTC is behind an ISA bus it will have IORESOURCE_IO
+        * already, whereas when it's behind EBUS is will be IORESOURCE_MEM.
+        */
+ 
+       r = &rtc_cmos_resource;
+       r->flags = IORESOURCE_IO;
+       r->name = op->resource[0].name;
+       r->start = op->resource[0].start;
+       r->end = op->resource[0].end;
+ 
+       cmos_regs = op->resource[0].start;
+       return platform_device_register(&rtc_cmos_device);
+ }
+ 
+ static struct of_device_id __initdata rtc_match[] = {
+       {
+               .name = "rtc",
+               .compatible = "m5819",
+       },
+       {
+               .name = "rtc",
+               .compatible = "isa-m5819p",
+       },
+       {
+               .name = "rtc",
+               .compatible = "isa-m5823p",
+       },
+       {
+               .name = "rtc",
+               .compatible = "ds1287",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver rtc_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = rtc_match,
+       .probe          = rtc_probe,
+       .driver         = {
+               .name   = "rtc",
+       },
+ };
+ 
+ static struct platform_device rtc_bq4802_device = {
+       .name           = "rtc-bq4802",
+       .id             = -1,
+       .num_resources  = 1,
+ };
+ 
+ static int __devinit bq4802_probe(struct of_device *op, const struct of_device_id *match)
+ {
+ 
+       printk(KERN_INFO "%s: BQ4802 regs at 0x%llx\n",
+              op->node->full_name, op->resource[0].start);
+ 
+       rtc_bq4802_device.resource = &op->resource[0];
+       return platform_device_register(&rtc_bq4802_device);
+ }
+ 
+ static struct of_device_id __initdata bq4802_match[] = {
+       {
+               .name = "rtc",
+               .compatible = "bq4802",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver bq4802_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = bq4802_match,
+       .probe          = bq4802_probe,
+       .driver         = {
+               .name   = "bq4802",
+       },
+ };
+ 
+ static unsigned char mostek_read_byte(struct device *dev, u32 ofs)
+ {
+       struct platform_device *pdev = to_platform_device(dev);
+       void __iomem *regs = (void __iomem *) pdev->resource[0].start;
+ 
+       return readb(regs + ofs);
+ }
+ 
+ static void mostek_write_byte(struct device *dev, u32 ofs, u8 val)
+ {
+       struct platform_device *pdev = to_platform_device(dev);
+       void __iomem *regs = (void __iomem *) pdev->resource[0].start;
+ 
+       writeb(val, regs + ofs);
+ }
+ 
+ static struct m48t59_plat_data m48t59_data = {
+       .read_byte      = mostek_read_byte,
+       .write_byte     = mostek_write_byte,
+ };
+ 
+ static struct platform_device m48t59_rtc = {
+       .name           = "rtc-m48t59",
+       .id             = 0,
+       .num_resources  = 1,
+       .dev    = {
+               .platform_data = &m48t59_data,
+       },
+ };
+ 
+ static int __devinit mostek_probe(struct of_device *op, const struct of_device_id *match)
+ {
+       struct device_node *dp = op->node;
+ 
+       /* On an Enterprise system there can be multiple mostek clocks.
+        * We should only match the one that is on the central FHC bus.
+        */
+       if (!strcmp(dp->parent->name, "fhc") &&
+           strcmp(dp->parent->parent->name, "central") != 0)
+               return -ENODEV;
+ 
+       printk(KERN_INFO "%s: Mostek regs at 0x%llx\n",
+              dp->full_name, op->resource[0].start);
+ 
+       m48t59_rtc.resource = &op->resource[0];
+       return platform_device_register(&m48t59_rtc);
+ }
+ 
+ static struct of_device_id __initdata mostek_match[] = {
+       {
+               .name = "eeprom",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver mostek_driver = {
++      .owner          = THIS_MODULE,
+       .match_table    = mostek_match,
+       .probe          = mostek_probe,
+       .driver         = {
+               .name   = "mostek",
+       },
+ };
+ 
+ static struct platform_device rtc_sun4v_device = {
+       .name           = "rtc-sun4v",
+       .id             = -1,
+ };
+ 
+ static struct platform_device rtc_starfire_device = {
+       .name           = "rtc-starfire",
+       .id             = -1,
+ };
+ 
+ static int __init clock_init(void)
+ {
+       if (this_is_starfire)
+               return platform_device_register(&rtc_starfire_device);
+ 
+       if (tlb_type == hypervisor)
+               return platform_device_register(&rtc_sun4v_device);
+ 
+       (void) of_register_driver(&rtc_driver, &of_platform_bus_type);
+       (void) of_register_driver(&mostek_driver, &of_platform_bus_type);
+       (void) of_register_driver(&bq4802_driver, &of_platform_bus_type);
+ 
+       return 0;
+ }
+ 
+ /* Must be after subsys_initcall() so that busses are probed.  Must
+  * be before device_initcall() because things like the RTC driver
+  * need to see the clock registers.
+  */
+ fs_initcall(clock_init);
+ 
+ /* This is gets the master TICK_INT timer going. */
+ static unsigned long sparc64_init_timers(void)
+ {
+       struct device_node *dp;
+       unsigned long freq;
+ 
+       dp = of_find_node_by_path("/");
+       if (tlb_type == spitfire) {
+               unsigned long ver, manuf, impl;
+ 
+               __asm__ __volatile__ ("rdpr %%ver, %0"
+                                     : "=&r" (ver));
+               manuf = ((ver >> 48) & 0xffff);
+               impl = ((ver >> 32) & 0xffff);
+               if (manuf == 0x17 && impl == 0x13) {
+                       /* Hummingbird, aka Ultra-IIe */
+                       tick_ops = &hbtick_operations;
+                       freq = of_getintprop_default(dp, "stick-frequency", 0);
+               } else {
+                       tick_ops = &tick_operations;
+                       freq = local_cpu_data().clock_tick;
+               }
+       } else {
+               tick_ops = &stick_operations;
+               freq = of_getintprop_default(dp, "stick-frequency", 0);
+       }
+ 
+       return freq;
+ }
+ 
+ struct freq_table {
+       unsigned long clock_tick_ref;
+       unsigned int ref_freq;
+ };
+ static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 };
+ 
+ unsigned long sparc64_get_clock_tick(unsigned int cpu)
+ {
+       struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
+ 
+       if (ft->clock_tick_ref)
+               return ft->clock_tick_ref;
+       return cpu_data(cpu).clock_tick;
+ }
+ EXPORT_SYMBOL(sparc64_get_clock_tick);
+ 
+ #ifdef CONFIG_CPU_FREQ
+ 
+ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+                                   void *data)
+ {
+       struct cpufreq_freqs *freq = data;
+       unsigned int cpu = freq->cpu;
+       struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
+ 
+       if (!ft->ref_freq) {
+               ft->ref_freq = freq->old;
+               ft->clock_tick_ref = cpu_data(cpu).clock_tick;
+       }
+       if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+           (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+           (val == CPUFREQ_RESUMECHANGE)) {
+               cpu_data(cpu).clock_tick =
+                       cpufreq_scale(ft->clock_tick_ref,
+                                     ft->ref_freq,
+                                     freq->new);
+       }
+ 
+       return 0;
+ }
+ 
+ static struct notifier_block sparc64_cpufreq_notifier_block = {
+       .notifier_call  = sparc64_cpufreq_notifier
+ };
+ 
+ static int __init register_sparc64_cpufreq_notifier(void)
+ {
+ 
+       cpufreq_register_notifier(&sparc64_cpufreq_notifier_block,
+                                 CPUFREQ_TRANSITION_NOTIFIER);
+       return 0;
+ }
+ 
+ core_initcall(register_sparc64_cpufreq_notifier);
+ 
+ #endif /* CONFIG_CPU_FREQ */
+ 
+ static int sparc64_next_event(unsigned long delta,
+                             struct clock_event_device *evt)
+ {
+       return tick_ops->add_compare(delta) ? -ETIME : 0;
+ }
+ 
+ static void sparc64_timer_setup(enum clock_event_mode mode,
+                               struct clock_event_device *evt)
+ {
+       switch (mode) {
+       case CLOCK_EVT_MODE_ONESHOT:
+       case CLOCK_EVT_MODE_RESUME:
+               break;
+ 
+       case CLOCK_EVT_MODE_SHUTDOWN:
+               tick_ops->disable_irq();
+               break;
+ 
+       case CLOCK_EVT_MODE_PERIODIC:
+       case CLOCK_EVT_MODE_UNUSED:
+               WARN_ON(1);
+               break;
+       };
+ }
+ 
+ static struct clock_event_device sparc64_clockevent = {
+       .features       = CLOCK_EVT_FEAT_ONESHOT,
+       .set_mode       = sparc64_timer_setup,
+       .set_next_event = sparc64_next_event,
+       .rating         = 100,
+       .shift          = 30,
+       .irq            = -1,
+ };
+ static DEFINE_PER_CPU(struct clock_event_device, sparc64_events);
+ 
+ void timer_interrupt(int irq, struct pt_regs *regs)
+ {
+       struct pt_regs *old_regs = set_irq_regs(regs);
+       unsigned long tick_mask = tick_ops->softint_mask;
+       int cpu = smp_processor_id();
+       struct clock_event_device *evt = &per_cpu(sparc64_events, cpu);
+ 
+       clear_softint(tick_mask);
+ 
+       irq_enter();
+ 
+       kstat_this_cpu.irqs[0]++;
+ 
+       if (unlikely(!evt->event_handler)) {
+               printk(KERN_WARNING
+                      "Spurious SPARC64 timer interrupt on cpu %d\n", cpu);
+       } else
+               evt->event_handler(evt);
+ 
+       irq_exit();
+ 
+       set_irq_regs(old_regs);
+ }
+ 
+ void __devinit setup_sparc64_timer(void)
+ {
+       struct clock_event_device *sevt;
+       unsigned long pstate;
+ 
+       /* Guarantee that the following sequences execute
+        * uninterrupted.
+        */
+       __asm__ __volatile__("rdpr      %%pstate, %0\n\t"
+                            "wrpr      %0, %1, %%pstate"
+                            : "=r" (pstate)
+                            : "i" (PSTATE_IE));
+ 
+       tick_ops->init_tick();
+ 
+       /* Restore PSTATE_IE. */
+       __asm__ __volatile__("wrpr      %0, 0x0, %%pstate"
+                            : /* no outputs */
+                            : "r" (pstate));
+ 
+       sevt = &__get_cpu_var(sparc64_events);
+ 
+       memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
+       sevt->cpumask = cpumask_of(smp_processor_id());
+ 
+       clockevents_register_device(sevt);
+ }
+ 
+ #define SPARC64_NSEC_PER_CYC_SHIFT    10UL
+ 
+ static struct clocksource clocksource_tick = {
+       .rating         = 100,
+       .mask           = CLOCKSOURCE_MASK(64),
+       .shift          = 16,
+       .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+ };
+ 
+ static void __init setup_clockevent_multiplier(unsigned long hz)
+ {
+       unsigned long mult, shift = 32;
+ 
+       while (1) {
+               mult = div_sc(hz, NSEC_PER_SEC, shift);
+               if (mult && (mult >> 32UL) == 0UL)
+                       break;
+ 
+               shift--;
+       }
+ 
+       sparc64_clockevent.shift = shift;
+       sparc64_clockevent.mult = mult;
+ }
+ 
+ static unsigned long tb_ticks_per_usec __read_mostly;
+ 
+ void __delay(unsigned long loops)
+ {
+       unsigned long bclock, now;
+ 
+       bclock = tick_ops->get_tick();
+       do {
+               now = tick_ops->get_tick();
+       } while ((now-bclock) < loops);
+ }
+ EXPORT_SYMBOL(__delay);
+ 
+ void udelay(unsigned long usecs)
+ {
+       __delay(tb_ticks_per_usec * usecs);
+ }
+ EXPORT_SYMBOL(udelay);
+ 
+ void __init time_init(void)
+ {
+       unsigned long freq = sparc64_init_timers();
+ 
+       tb_ticks_per_usec = freq / USEC_PER_SEC;
+ 
+       timer_ticks_per_nsec_quotient =
+               clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
+ 
+       clocksource_tick.name = tick_ops->name;
+       clocksource_tick.mult =
+               clocksource_hz2mult(freq,
+                                   clocksource_tick.shift);
+       clocksource_tick.read = tick_ops->get_tick;
+ 
+       printk("clocksource: mult[%x] shift[%d]\n",
+              clocksource_tick.mult, clocksource_tick.shift);
+ 
+       clocksource_register(&clocksource_tick);
+ 
+       sparc64_clockevent.name = tick_ops->name;
+ 
+       setup_clockevent_multiplier(freq);
+ 
+       sparc64_clockevent.max_delta_ns =
+               clockevent_delta2ns(0x7fffffffffffffffUL, &sparc64_clockevent);
+       sparc64_clockevent.min_delta_ns =
+               clockevent_delta2ns(0xF, &sparc64_clockevent);
+ 
+       printk("clockevent: mult[%lx] shift[%d]\n",
+              sparc64_clockevent.mult, sparc64_clockevent.shift);
+ 
+       setup_sparc64_timer();
+ }
+ 
+ unsigned long long sched_clock(void)
+ {
+       unsigned long ticks = tick_ops->get_tick();
+ 
+       return (ticks * timer_ticks_per_nsec_quotient)
+               >> SPARC64_NSEC_PER_CYC_SHIFT;
+ }
+ 
+ int __devinit read_current_timer(unsigned long *timer_val)
+ {
+       *timer_val = tick_ops->get_tick();
+       return 0;
+ }
diff --cc arch/sparc/kernel/traps_64.c

index 0000000,d809c4e..dae373e

mode 000000,100644..100644
--- /dev/null
--- 2/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@@ -1,0 -1,2617 +1,2621 @@@
+ /* arch/sparc64/kernel/traps.c
+  *
+  * Copyright (C) 1995,1997,2008,2009 David S. Miller (davem@davemloft.net)
+  * Copyright (C) 1997,1999,2000 Jakub Jelinek (jakub@redhat.com)
+  */
+ 
+ /*
+  * I like traps on v9, :))))
+  */
+ 
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/linkage.h>
+ #include <linux/kernel.h>
+ #include <linux/signal.h>
+ #include <linux/smp.h>
+ #include <linux/mm.h>
+ #include <linux/init.h>
+ #include <linux/kdebug.h>
+ 
+ #include <asm/smp.h>
+ #include <asm/delay.h>
+ #include <asm/system.h>
+ #include <asm/ptrace.h>
+ #include <asm/oplib.h>
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/unistd.h>
+ #include <asm/uaccess.h>
+ #include <asm/fpumacro.h>
+ #include <asm/lsu.h>
+ #include <asm/dcu.h>
+ #include <asm/estate.h>
+ #include <asm/chafsr.h>
+ #include <asm/sfafsr.h>
+ #include <asm/psrcompat.h>
+ #include <asm/processor.h>
+ #include <asm/timer.h>
+ #include <asm/head.h>
+ #include <asm/prom.h>
+ #include <asm/memctrl.h>
+ 
+ #include "entry.h"
+ #include "kstack.h"
+ 
+ /* When an irrecoverable trap occurs at tl > 0, the trap entry
+  * code logs the trap state registers at every level in the trap
+  * stack.  It is found at (pt_regs + sizeof(pt_regs)) and the layout
+  * is as follows:
+  */
+ struct tl1_traplog {
+       struct {
+               unsigned long tstate;
+               unsigned long tpc;
+               unsigned long tnpc;
+               unsigned long tt;
+       } trapstack[4];
+       unsigned long tl;
+ };
+ 
+ static void dump_tl1_traplog(struct tl1_traplog *p)
+ {
+       int i, limit;
+ 
+       printk(KERN_EMERG "TRAPLOG: Error at trap level 0x%lx, "
+              "dumping track stack.\n", p->tl);
+ 
+       limit = (tlb_type == hypervisor) ? 2 : 4;
+       for (i = 0; i < limit; i++) {
+               printk(KERN_EMERG
+                      "TRAPLOG: Trap level %d TSTATE[%016lx] TPC[%016lx] "
+                      "TNPC[%016lx] TT[%lx]\n",
+                      i + 1,
+                      p->trapstack[i].tstate, p->trapstack[i].tpc,
+                      p->trapstack[i].tnpc, p->trapstack[i].tt);
+               printk("TRAPLOG: TPC<%pS>\n", (void *) p->trapstack[i].tpc);
+       }
+ }
+ 
+ void bad_trap(struct pt_regs *regs, long lvl)
+ {
+       char buffer[32];
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "bad trap", regs,
+                      0, lvl, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       if (lvl < 0x100) {
+               sprintf(buffer, "Bad hw trap %lx at tl0\n", lvl);
+               die_if_kernel(buffer, regs);
+       }
+ 
+       lvl -= 0x100;
+       if (regs->tstate & TSTATE_PRIV) {
+               sprintf(buffer, "Kernel bad sw trap %lx", lvl);
+               die_if_kernel(buffer, regs);
+       }
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGILL;
+       info.si_errno = 0;
+       info.si_code = ILL_ILLTRP;
+       info.si_addr = (void __user *)regs->tpc;
+       info.si_trapno = lvl;
+       force_sig_info(SIGILL, &info, current);
+ }
+ 
+ void bad_trap_tl1(struct pt_regs *regs, long lvl)
+ {
+       char buffer[32];
+       
+       if (notify_die(DIE_TRAP_TL1, "bad trap tl1", regs,
+                      0, lvl, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+ 
+       sprintf (buffer, "Bad trap %lx at tl>0", lvl);
+       die_if_kernel (buffer, regs);
+ }
+ 
+ #ifdef CONFIG_DEBUG_BUGVERBOSE
+ void do_BUG(const char *file, int line)
+ {
+       bust_spinlocks(1);
+       printk("kernel BUG at %s:%d!\n", file, line);
+ }
+ EXPORT_SYMBOL(do_BUG);
+ #endif
+ 
+ static DEFINE_SPINLOCK(dimm_handler_lock);
+ static dimm_printer_t dimm_handler;
+ 
+ static int sprintf_dimm(int synd_code, unsigned long paddr, char *buf, int buflen)
+ {
+       unsigned long flags;
+       int ret = -ENODEV;
+ 
+       spin_lock_irqsave(&dimm_handler_lock, flags);
+       if (dimm_handler) {
+               ret = dimm_handler(synd_code, paddr, buf, buflen);
+       } else if (tlb_type == spitfire) {
+               if (prom_getunumber(synd_code, paddr, buf, buflen) == -1)
+                       ret = -EINVAL;
+               else
+                       ret = 0;
+       } else
+               ret = -ENODEV;
+       spin_unlock_irqrestore(&dimm_handler_lock, flags);
+ 
+       return ret;
+ }
+ 
+ int register_dimm_printer(dimm_printer_t func)
+ {
+       unsigned long flags;
+       int ret = 0;
+ 
+       spin_lock_irqsave(&dimm_handler_lock, flags);
+       if (!dimm_handler)
+               dimm_handler = func;
+       else
+               ret = -EEXIST;
+       spin_unlock_irqrestore(&dimm_handler_lock, flags);
+ 
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(register_dimm_printer);
+ 
+ void unregister_dimm_printer(dimm_printer_t func)
+ {
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&dimm_handler_lock, flags);
+       if (dimm_handler == func)
+               dimm_handler = NULL;
+       spin_unlock_irqrestore(&dimm_handler_lock, flags);
+ }
+ EXPORT_SYMBOL_GPL(unregister_dimm_printer);
+ 
+ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+ {
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "instruction access exception", regs,
+                      0, 0x8, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               printk("spitfire_insn_access_exception: SFSR[%016lx] "
+                      "SFAR[%016lx], going.\n", sfsr, sfar);
+               die_if_kernel("Iax", regs);
+       }
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGSEGV;
+       info.si_errno = 0;
+       info.si_code = SEGV_MAPERR;
+       info.si_addr = (void __user *)regs->tpc;
+       info.si_trapno = 0;
+       force_sig_info(SIGSEGV, &info, current);
+ }
+ 
+ void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+ {
+       if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
+                      0, 0x8, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       spitfire_insn_access_exception(regs, sfsr, sfar);
+ }
+ 
+ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+ {
+       unsigned short type = (type_ctx >> 16);
+       unsigned short ctx  = (type_ctx & 0xffff);
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "instruction access exception", regs,
+                      0, 0x8, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               printk("sun4v_insn_access_exception: ADDR[%016lx] "
+                      "CTX[%04x] TYPE[%04x], going.\n",
+                      addr, ctx, type);
+               die_if_kernel("Iax", regs);
+       }
+ 
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGSEGV;
+       info.si_errno = 0;
+       info.si_code = SEGV_MAPERR;
+       info.si_addr = (void __user *) addr;
+       info.si_trapno = 0;
+       force_sig_info(SIGSEGV, &info, current);
+ }
+ 
+ void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+ {
+       if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
+                      0, 0x8, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       sun4v_insn_access_exception(regs, addr, type_ctx);
+ }
+ 
+ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+ {
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "data access exception", regs,
+                      0, 0x30, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               /* Test if this comes from uaccess places. */
+               const struct exception_table_entry *entry;
+ 
+               entry = search_exception_tables(regs->tpc);
+               if (entry) {
+                       /* Ouch, somebody is trying VM hole tricks on us... */
+ #ifdef DEBUG_EXCEPTIONS
+                       printk("Exception: PC<%016lx> faddr<UNKNOWN>\n", regs->tpc);
+                       printk("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
+                              regs->tpc, entry->fixup);
+ #endif
+                       regs->tpc = entry->fixup;
+                       regs->tnpc = regs->tpc + 4;
+                       return;
+               }
+               /* Shit... */
+               printk("spitfire_data_access_exception: SFSR[%016lx] "
+                      "SFAR[%016lx], going.\n", sfsr, sfar);
+               die_if_kernel("Dax", regs);
+       }
+ 
+       info.si_signo = SIGSEGV;
+       info.si_errno = 0;
+       info.si_code = SEGV_MAPERR;
+       info.si_addr = (void __user *)sfar;
+       info.si_trapno = 0;
+       force_sig_info(SIGSEGV, &info, current);
+ }
+ 
+ void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+ {
+       if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
+                      0, 0x30, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       spitfire_data_access_exception(regs, sfsr, sfar);
+ }
+ 
+ void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+ {
+       unsigned short type = (type_ctx >> 16);
+       unsigned short ctx  = (type_ctx & 0xffff);
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "data access exception", regs,
+                      0, 0x8, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               /* Test if this comes from uaccess places. */
+               const struct exception_table_entry *entry;
+ 
+               entry = search_exception_tables(regs->tpc);
+               if (entry) {
+                       /* Ouch, somebody is trying VM hole tricks on us... */
+ #ifdef DEBUG_EXCEPTIONS
+                       printk("Exception: PC<%016lx> faddr<UNKNOWN>\n", regs->tpc);
+                       printk("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
+                              regs->tpc, entry->fixup);
+ #endif
+                       regs->tpc = entry->fixup;
+                       regs->tnpc = regs->tpc + 4;
+                       return;
+               }
+               printk("sun4v_data_access_exception: ADDR[%016lx] "
+                      "CTX[%04x] TYPE[%04x], going.\n",
+                      addr, ctx, type);
+               die_if_kernel("Dax", regs);
+       }
+ 
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGSEGV;
+       info.si_errno = 0;
+       info.si_code = SEGV_MAPERR;
+       info.si_addr = (void __user *) addr;
+       info.si_trapno = 0;
+       force_sig_info(SIGSEGV, &info, current);
+ }
+ 
+ void sun4v_data_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+ {
+       if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
+                      0, 0x8, SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       sun4v_data_access_exception(regs, addr, type_ctx);
+ }
+ 
+ #ifdef CONFIG_PCI
+ #include "pci_impl.h"
+ #endif
+ 
+ /* When access exceptions happen, we must do this. */
+ static void spitfire_clean_and_reenable_l1_caches(void)
+ {
+       unsigned long va;
+ 
+       if (tlb_type != spitfire)
+               BUG();
+ 
+       /* Clean 'em. */
+       for (va =  0; va < (PAGE_SIZE << 1); va += 32) {
+               spitfire_put_icache_tag(va, 0x0);
+               spitfire_put_dcache_tag(va, 0x0);
+       }
+ 
+       /* Re-enable in LSU. */
+       __asm__ __volatile__("flush %%g6\n\t"
+                            "membar #Sync\n\t"
+                            "stxa %0, [%%g0] %1\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "r" (LSU_CONTROL_IC | LSU_CONTROL_DC |
+                                   LSU_CONTROL_IM | LSU_CONTROL_DM),
+                            "i" (ASI_LSU_CONTROL)
+                            : "memory");
+ }
+ 
+ static void spitfire_enable_estate_errors(void)
+ {
+       __asm__ __volatile__("stxa      %0, [%%g0] %1\n\t"
+                            "membar    #Sync"
+                            : /* no outputs */
+                            : "r" (ESTATE_ERR_ALL),
+                              "i" (ASI_ESTATE_ERROR_EN));
+ }
+ 
+ static char ecc_syndrome_table[] = {
+       0x4c, 0x40, 0x41, 0x48, 0x42, 0x48, 0x48, 0x49,
+       0x43, 0x48, 0x48, 0x49, 0x48, 0x49, 0x49, 0x4a,
+       0x44, 0x48, 0x48, 0x20, 0x48, 0x39, 0x4b, 0x48,
+       0x48, 0x25, 0x31, 0x48, 0x28, 0x48, 0x48, 0x2c,
+       0x45, 0x48, 0x48, 0x21, 0x48, 0x3d, 0x04, 0x48,
+       0x48, 0x4b, 0x35, 0x48, 0x2d, 0x48, 0x48, 0x29,
+       0x48, 0x00, 0x01, 0x48, 0x0a, 0x48, 0x48, 0x4b,
+       0x0f, 0x48, 0x48, 0x4b, 0x48, 0x49, 0x49, 0x48,
+       0x46, 0x48, 0x48, 0x2a, 0x48, 0x3b, 0x27, 0x48,
+       0x48, 0x4b, 0x33, 0x48, 0x22, 0x48, 0x48, 0x2e,
+       0x48, 0x19, 0x1d, 0x48, 0x1b, 0x4a, 0x48, 0x4b,
+       0x1f, 0x48, 0x4a, 0x4b, 0x48, 0x4b, 0x4b, 0x48,
+       0x48, 0x4b, 0x24, 0x48, 0x07, 0x48, 0x48, 0x36,
+       0x4b, 0x48, 0x48, 0x3e, 0x48, 0x30, 0x38, 0x48,
+       0x49, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x16, 0x48,
+       0x48, 0x12, 0x4b, 0x48, 0x49, 0x48, 0x48, 0x4b,
+       0x47, 0x48, 0x48, 0x2f, 0x48, 0x3f, 0x4b, 0x48,
+       0x48, 0x06, 0x37, 0x48, 0x23, 0x48, 0x48, 0x2b,
+       0x48, 0x05, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x32,
+       0x26, 0x48, 0x48, 0x3a, 0x48, 0x34, 0x3c, 0x48,
+       0x48, 0x11, 0x15, 0x48, 0x13, 0x4a, 0x48, 0x4b,
+       0x17, 0x48, 0x4a, 0x4b, 0x48, 0x4b, 0x4b, 0x48,
+       0x49, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x1e, 0x48,
+       0x48, 0x1a, 0x4b, 0x48, 0x49, 0x48, 0x48, 0x4b,
+       0x48, 0x08, 0x0d, 0x48, 0x02, 0x48, 0x48, 0x49,
+       0x03, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x4b, 0x48,
+       0x49, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x10, 0x48,
+       0x48, 0x14, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x4b,
+       0x49, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x18, 0x48,
+       0x48, 0x1c, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x4b,
+       0x4a, 0x0c, 0x09, 0x48, 0x0e, 0x48, 0x48, 0x4b,
+       0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a
+ };
+ 
+ static char *syndrome_unknown = "<Unknown>";
+ 
+ static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit)
+ {
+       unsigned short scode;
+       char memmod_str[64], *p;
+ 
+       if (udbl & bit) {
+               scode = ecc_syndrome_table[udbl & 0xff];
+               if (sprintf_dimm(scode, afar, memmod_str, sizeof(memmod_str)) < 0)
+                       p = syndrome_unknown;
+               else
+                       p = memmod_str;
+               printk(KERN_WARNING "CPU[%d]: UDBL Syndrome[%x] "
+                      "Memory Module \"%s\"\n",
+                      smp_processor_id(), scode, p);
+       }
+ 
+       if (udbh & bit) {
+               scode = ecc_syndrome_table[udbh & 0xff];
+               if (sprintf_dimm(scode, afar, memmod_str, sizeof(memmod_str)) < 0)
+                       p = syndrome_unknown;
+               else
+                       p = memmod_str;
+               printk(KERN_WARNING "CPU[%d]: UDBH Syndrome[%x] "
+                      "Memory Module \"%s\"\n",
+                      smp_processor_id(), scode, p);
+       }
+ 
+ }
+ 
+ static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs)
+ {
+ 
+       printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
+              "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n",
+              smp_processor_id(), afsr, afar, udbl, udbh, tl1);
+ 
+       spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE);
+ 
+       /* We always log it, even if someone is listening for this
+        * trap.
+        */
+       notify_die(DIE_TRAP, "Correctable ECC Error", regs,
+                  0, TRAP_TYPE_CEE, SIGTRAP);
+ 
+       /* The Correctable ECC Error trap does not disable I/D caches.  So
+        * we only have to restore the ESTATE Error Enable register.
+        */
+       spitfire_enable_estate_errors();
+ }
+ 
+ static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs)
+ {
+       siginfo_t info;
+ 
+       printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] "
+              "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n",
+              smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1);
+ 
+       /* XXX add more human friendly logging of the error status
+        * XXX as is implemented for cheetah
+        */
+ 
+       spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE);
+ 
+       /* We always log it, even if someone is listening for this
+        * trap.
+        */
+       notify_die(DIE_TRAP, "Uncorrectable Error", regs,
+                  0, tt, SIGTRAP);
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               if (tl1)
+                       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+               die_if_kernel("UE", regs);
+       }
+ 
+       /* XXX need more intelligent processing here, such as is implemented
+        * XXX for cheetah errors, in fact if the E-cache still holds the
+        * XXX line with bad parity this will loop
+        */
+ 
+       spitfire_clean_and_reenable_l1_caches();
+       spitfire_enable_estate_errors();
+ 
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_OBJERR;
+       info.si_addr = (void *)0;
+       info.si_trapno = 0;
+       force_sig_info(SIGBUS, &info, current);
+ }
+ 
+ void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
+ {
+       unsigned long afsr, tt, udbh, udbl;
+       int tl1;
+ 
+       afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT;
+       tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT;
+       tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0;
+       udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT;
+       udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT;
+ 
+ #ifdef CONFIG_PCI
+       if (tt == TRAP_TYPE_DAE &&
+           pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
+               spitfire_clean_and_reenable_l1_caches();
+               spitfire_enable_estate_errors();
+ 
+               pci_poke_faulted = 1;
+               regs->tnpc = regs->tpc + 4;
+               return;
+       }
+ #endif
+ 
+       if (afsr & SFAFSR_UE)
+               spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs);
+ 
+       if (tt == TRAP_TYPE_CEE) {
+               /* Handle the case where we took a CEE trap, but ACK'd
+                * only the UE state in the UDB error registers.
+                */
+               if (afsr & SFAFSR_UE) {
+                       if (udbh & UDBE_CE) {
+                               __asm__ __volatile__(
+                                       "stxa   %0, [%1] %2\n\t"
+                                       "membar #Sync"
+                                       : /* no outputs */
+                                       : "r" (udbh & UDBE_CE),
+                                         "r" (0x0), "i" (ASI_UDB_ERROR_W));
+                       }
+                       if (udbl & UDBE_CE) {
+                               __asm__ __volatile__(
+                                       "stxa   %0, [%1] %2\n\t"
+                                       "membar #Sync"
+                                       : /* no outputs */
+                                       : "r" (udbl & UDBE_CE),
+                                         "r" (0x18), "i" (ASI_UDB_ERROR_W));
+                       }
+               }
+ 
+               spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs);
+       }
+ }
+ 
+ int cheetah_pcache_forced_on;
+ 
+ void cheetah_enable_pcache(void)
+ {
+       unsigned long dcr;
+ 
+       printk("CHEETAH: Enabling P-Cache on cpu %d.\n",
+              smp_processor_id());
+ 
+       __asm__ __volatile__("ldxa [%%g0] %1, %0"
+                            : "=r" (dcr)
+                            : "i" (ASI_DCU_CONTROL_REG));
+       dcr |= (DCU_PE | DCU_HPE | DCU_SPE | DCU_SL);
+       __asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "r" (dcr), "i" (ASI_DCU_CONTROL_REG));
+ }
+ 
+ /* Cheetah error trap handling. */
+ static unsigned long ecache_flush_physbase;
+ static unsigned long ecache_flush_linesize;
+ static unsigned long ecache_flush_size;
+ 
+ /* This table is ordered in priority of errors and matches the
+  * AFAR overwrite policy as well.
+  */
+ 
+ struct afsr_error_table {
+       unsigned long mask;
+       const char *name;
+ };
+ 
+ static const char CHAFSR_PERR_msg[] =
+       "System interface protocol error";
+ static const char CHAFSR_IERR_msg[] =
+       "Internal processor error";
+ static const char CHAFSR_ISAP_msg[] =
+       "System request parity error on incoming addresss";
+ static const char CHAFSR_UCU_msg[] =
+       "Uncorrectable E-cache ECC error for ifetch/data";
+ static const char CHAFSR_UCC_msg[] =
+       "SW Correctable E-cache ECC error for ifetch/data";
+ static const char CHAFSR_UE_msg[] =
+       "Uncorrectable system bus data ECC error for read";
+ static const char CHAFSR_EDU_msg[] =
+       "Uncorrectable E-cache ECC error for stmerge/blkld";
+ static const char CHAFSR_EMU_msg[] =
+       "Uncorrectable system bus MTAG error";
+ static const char CHAFSR_WDU_msg[] =
+       "Uncorrectable E-cache ECC error for writeback";
+ static const char CHAFSR_CPU_msg[] =
+       "Uncorrectable ECC error for copyout";
+ static const char CHAFSR_CE_msg[] =
+       "HW corrected system bus data ECC error for read";
+ static const char CHAFSR_EDC_msg[] =
+       "HW corrected E-cache ECC error for stmerge/blkld";
+ static const char CHAFSR_EMC_msg[] =
+       "HW corrected system bus MTAG ECC error";
+ static const char CHAFSR_WDC_msg[] =
+       "HW corrected E-cache ECC error for writeback";
+ static const char CHAFSR_CPC_msg[] =
+       "HW corrected ECC error for copyout";
+ static const char CHAFSR_TO_msg[] =
+       "Unmapped error from system bus";
+ static const char CHAFSR_BERR_msg[] =
+       "Bus error response from system bus";
+ static const char CHAFSR_IVC_msg[] =
+       "HW corrected system bus data ECC error for ivec read";
+ static const char CHAFSR_IVU_msg[] =
+       "Uncorrectable system bus data ECC error for ivec read";
+ static struct afsr_error_table __cheetah_error_table[] = {
+       {       CHAFSR_PERR,    CHAFSR_PERR_msg         },
+       {       CHAFSR_IERR,    CHAFSR_IERR_msg         },
+       {       CHAFSR_ISAP,    CHAFSR_ISAP_msg         },
+       {       CHAFSR_UCU,     CHAFSR_UCU_msg          },
+       {       CHAFSR_UCC,     CHAFSR_UCC_msg          },
+       {       CHAFSR_UE,      CHAFSR_UE_msg           },
+       {       CHAFSR_EDU,     CHAFSR_EDU_msg          },
+       {       CHAFSR_EMU,     CHAFSR_EMU_msg          },
+       {       CHAFSR_WDU,     CHAFSR_WDU_msg          },
+       {       CHAFSR_CPU,     CHAFSR_CPU_msg          },
+       {       CHAFSR_CE,      CHAFSR_CE_msg           },
+       {       CHAFSR_EDC,     CHAFSR_EDC_msg          },
+       {       CHAFSR_EMC,     CHAFSR_EMC_msg          },
+       {       CHAFSR_WDC,     CHAFSR_WDC_msg          },
+       {       CHAFSR_CPC,     CHAFSR_CPC_msg          },
+       {       CHAFSR_TO,      CHAFSR_TO_msg           },
+       {       CHAFSR_BERR,    CHAFSR_BERR_msg         },
+       /* These two do not update the AFAR. */
+       {       CHAFSR_IVC,     CHAFSR_IVC_msg          },
+       {       CHAFSR_IVU,     CHAFSR_IVU_msg          },
+       {       0,              NULL                    },
+ };
+ static const char CHPAFSR_DTO_msg[] =
+       "System bus unmapped error for prefetch/storequeue-read";
+ static const char CHPAFSR_DBERR_msg[] =
+       "System bus error for prefetch/storequeue-read";
+ static const char CHPAFSR_THCE_msg[] =
+       "Hardware corrected E-cache Tag ECC error";
+ static const char CHPAFSR_TSCE_msg[] =
+       "SW handled correctable E-cache Tag ECC error";
+ static const char CHPAFSR_TUE_msg[] =
+       "Uncorrectable E-cache Tag ECC error";
+ static const char CHPAFSR_DUE_msg[] =
+       "System bus uncorrectable data ECC error due to prefetch/store-fill";
+ static struct afsr_error_table __cheetah_plus_error_table[] = {
+       {       CHAFSR_PERR,    CHAFSR_PERR_msg         },
+       {       CHAFSR_IERR,    CHAFSR_IERR_msg         },
+       {       CHAFSR_ISAP,    CHAFSR_ISAP_msg         },
+       {       CHAFSR_UCU,     CHAFSR_UCU_msg          },
+       {       CHAFSR_UCC,     CHAFSR_UCC_msg          },
+       {       CHAFSR_UE,      CHAFSR_UE_msg           },
+       {       CHAFSR_EDU,     CHAFSR_EDU_msg          },
+       {       CHAFSR_EMU,     CHAFSR_EMU_msg          },
+       {       CHAFSR_WDU,     CHAFSR_WDU_msg          },
+       {       CHAFSR_CPU,     CHAFSR_CPU_msg          },
+       {       CHAFSR_CE,      CHAFSR_CE_msg           },
+       {       CHAFSR_EDC,     CHAFSR_EDC_msg          },
+       {       CHAFSR_EMC,     CHAFSR_EMC_msg          },
+       {       CHAFSR_WDC,     CHAFSR_WDC_msg          },
+       {       CHAFSR_CPC,     CHAFSR_CPC_msg          },
+       {       CHAFSR_TO,      CHAFSR_TO_msg           },
+       {       CHAFSR_BERR,    CHAFSR_BERR_msg         },
+       {       CHPAFSR_DTO,    CHPAFSR_DTO_msg         },
+       {       CHPAFSR_DBERR,  CHPAFSR_DBERR_msg       },
+       {       CHPAFSR_THCE,   CHPAFSR_THCE_msg        },
+       {       CHPAFSR_TSCE,   CHPAFSR_TSCE_msg        },
+       {       CHPAFSR_TUE,    CHPAFSR_TUE_msg         },
+       {       CHPAFSR_DUE,    CHPAFSR_DUE_msg         },
+       /* These two do not update the AFAR. */
+       {       CHAFSR_IVC,     CHAFSR_IVC_msg          },
+       {       CHAFSR_IVU,     CHAFSR_IVU_msg          },
+       {       0,              NULL                    },
+ };
+ static const char JPAFSR_JETO_msg[] =
+       "System interface protocol error, hw timeout caused";
+ static const char JPAFSR_SCE_msg[] =
+       "Parity error on system snoop results";
+ static const char JPAFSR_JEIC_msg[] =
+       "System interface protocol error, illegal command detected";
+ static const char JPAFSR_JEIT_msg[] =
+       "System interface protocol error, illegal ADTYPE detected";
+ static const char JPAFSR_OM_msg[] =
+       "Out of range memory error has occurred";
+ static const char JPAFSR_ETP_msg[] =
+       "Parity error on L2 cache tag SRAM";
+ static const char JPAFSR_UMS_msg[] =
+       "Error due to unsupported store";
+ static const char JPAFSR_RUE_msg[] =
+       "Uncorrectable ECC error from remote cache/memory";
+ static const char JPAFSR_RCE_msg[] =
+       "Correctable ECC error from remote cache/memory";
+ static const char JPAFSR_BP_msg[] =
+       "JBUS parity error on returned read data";
+ static const char JPAFSR_WBP_msg[] =
+       "JBUS parity error on data for writeback or block store";
+ static const char JPAFSR_FRC_msg[] =
+       "Foreign read to DRAM incurring correctable ECC error";
+ static const char JPAFSR_FRU_msg[] =
+       "Foreign read to DRAM incurring uncorrectable ECC error";
+ static struct afsr_error_table __jalapeno_error_table[] = {
+       {       JPAFSR_JETO,    JPAFSR_JETO_msg         },
+       {       JPAFSR_SCE,     JPAFSR_SCE_msg          },
+       {       JPAFSR_JEIC,    JPAFSR_JEIC_msg         },
+       {       JPAFSR_JEIT,    JPAFSR_JEIT_msg         },
+       {       CHAFSR_PERR,    CHAFSR_PERR_msg         },
+       {       CHAFSR_IERR,    CHAFSR_IERR_msg         },
+       {       CHAFSR_ISAP,    CHAFSR_ISAP_msg         },
+       {       CHAFSR_UCU,     CHAFSR_UCU_msg          },
+       {       CHAFSR_UCC,     CHAFSR_UCC_msg          },
+       {       CHAFSR_UE,      CHAFSR_UE_msg           },
+       {       CHAFSR_EDU,     CHAFSR_EDU_msg          },
+       {       JPAFSR_OM,      JPAFSR_OM_msg           },
+       {       CHAFSR_WDU,     CHAFSR_WDU_msg          },
+       {       CHAFSR_CPU,     CHAFSR_CPU_msg          },
+       {       CHAFSR_CE,      CHAFSR_CE_msg           },
+       {       CHAFSR_EDC,     CHAFSR_EDC_msg          },
+       {       JPAFSR_ETP,     JPAFSR_ETP_msg          },
+       {       CHAFSR_WDC,     CHAFSR_WDC_msg          },
+       {       CHAFSR_CPC,     CHAFSR_CPC_msg          },
+       {       CHAFSR_TO,      CHAFSR_TO_msg           },
+       {       CHAFSR_BERR,    CHAFSR_BERR_msg         },
+       {       JPAFSR_UMS,     JPAFSR_UMS_msg          },
+       {       JPAFSR_RUE,     JPAFSR_RUE_msg          },
+       {       JPAFSR_RCE,     JPAFSR_RCE_msg          },
+       {       JPAFSR_BP,      JPAFSR_BP_msg           },
+       {       JPAFSR_WBP,     JPAFSR_WBP_msg          },
+       {       JPAFSR_FRC,     JPAFSR_FRC_msg          },
+       {       JPAFSR_FRU,     JPAFSR_FRU_msg          },
+       /* These two do not update the AFAR. */
+       {       CHAFSR_IVU,     CHAFSR_IVU_msg          },
+       {       0,              NULL                    },
+ };
+ static struct afsr_error_table *cheetah_error_table;
+ static unsigned long cheetah_afsr_errors;
+ 
+ struct cheetah_err_info *cheetah_error_log;
+ 
+ static inline struct cheetah_err_info *cheetah_get_error_log(unsigned long afsr)
+ {
+       struct cheetah_err_info *p;
+       int cpu = smp_processor_id();
+ 
+       if (!cheetah_error_log)
+               return NULL;
+ 
+       p = cheetah_error_log + (cpu * 2);
+       if ((afsr & CHAFSR_TL1) != 0UL)
+               p++;
+ 
+       return p;
+ }
+ 
+ extern unsigned int tl0_icpe[], tl1_icpe[];
+ extern unsigned int tl0_dcpe[], tl1_dcpe[];
+ extern unsigned int tl0_fecc[], tl1_fecc[];
+ extern unsigned int tl0_cee[], tl1_cee[];
+ extern unsigned int tl0_iae[], tl1_iae[];
+ extern unsigned int tl0_dae[], tl1_dae[];
+ extern unsigned int cheetah_plus_icpe_trap_vector[], cheetah_plus_icpe_trap_vector_tl1[];
+ extern unsigned int cheetah_plus_dcpe_trap_vector[], cheetah_plus_dcpe_trap_vector_tl1[];
+ extern unsigned int cheetah_fecc_trap_vector[], cheetah_fecc_trap_vector_tl1[];
+ extern unsigned int cheetah_cee_trap_vector[], cheetah_cee_trap_vector_tl1[];
+ extern unsigned int cheetah_deferred_trap_vector[], cheetah_deferred_trap_vector_tl1[];
+ 
+ void __init cheetah_ecache_flush_init(void)
+ {
+       unsigned long largest_size, smallest_linesize, order, ver;
+       int i, sz;
+ 
+       /* Scan all cpu device tree nodes, note two values:
+        * 1) largest E-cache size
+        * 2) smallest E-cache line size
+        */
+       largest_size = 0UL;
+       smallest_linesize = ~0UL;
+ 
+       for (i = 0; i < NR_CPUS; i++) {
+               unsigned long val;
+ 
+               val = cpu_data(i).ecache_size;
+               if (!val)
+                       continue;
+ 
+               if (val > largest_size)
+                       largest_size = val;
+ 
+               val = cpu_data(i).ecache_line_size;
+               if (val < smallest_linesize)
+                       smallest_linesize = val;
+ 
+       }
+ 
+       if (largest_size == 0UL || smallest_linesize == ~0UL) {
+               prom_printf("cheetah_ecache_flush_init: Cannot probe cpu E-cache "
+                           "parameters.\n");
+               prom_halt();
+       }
+ 
+       ecache_flush_size = (2 * largest_size);
+       ecache_flush_linesize = smallest_linesize;
+ 
+       ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size);
+ 
+       if (ecache_flush_physbase == ~0UL) {
+               prom_printf("cheetah_ecache_flush_init: Cannot find %d byte "
+                           "contiguous physical memory.\n",
+                           ecache_flush_size);
+               prom_halt();
+       }
+ 
+       /* Now allocate error trap reporting scoreboard. */
+       sz = NR_CPUS * (2 * sizeof(struct cheetah_err_info));
+       for (order = 0; order < MAX_ORDER; order++) {
+               if ((PAGE_SIZE << order) >= sz)
+                       break;
+       }
+       cheetah_error_log = (struct cheetah_err_info *)
+               __get_free_pages(GFP_KERNEL, order);
+       if (!cheetah_error_log) {
+               prom_printf("cheetah_ecache_flush_init: Failed to allocate "
+                           "error logging scoreboard (%d bytes).\n", sz);
+               prom_halt();
+       }
+       memset(cheetah_error_log, 0, PAGE_SIZE << order);
+ 
+       /* Mark all AFSRs as invalid so that the trap handler will
+        * log new new information there.
+        */
+       for (i = 0; i < 2 * NR_CPUS; i++)
+               cheetah_error_log[i].afsr = CHAFSR_INVALID;
+ 
+       __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+       if ((ver >> 32) == __JALAPENO_ID ||
+           (ver >> 32) == __SERRANO_ID) {
+               cheetah_error_table = &__jalapeno_error_table[0];
+               cheetah_afsr_errors = JPAFSR_ERRORS;
+       } else if ((ver >> 32) == 0x003e0015) {
+               cheetah_error_table = &__cheetah_plus_error_table[0];
+               cheetah_afsr_errors = CHPAFSR_ERRORS;
+       } else {
+               cheetah_error_table = &__cheetah_error_table[0];
+               cheetah_afsr_errors = CHAFSR_ERRORS;
+       }
+ 
+       /* Now patch trap tables. */
+       memcpy(tl0_fecc, cheetah_fecc_trap_vector, (8 * 4));
+       memcpy(tl1_fecc, cheetah_fecc_trap_vector_tl1, (8 * 4));
+       memcpy(tl0_cee, cheetah_cee_trap_vector, (8 * 4));
+       memcpy(tl1_cee, cheetah_cee_trap_vector_tl1, (8 * 4));
+       memcpy(tl0_iae, cheetah_deferred_trap_vector, (8 * 4));
+       memcpy(tl1_iae, cheetah_deferred_trap_vector_tl1, (8 * 4));
+       memcpy(tl0_dae, cheetah_deferred_trap_vector, (8 * 4));
+       memcpy(tl1_dae, cheetah_deferred_trap_vector_tl1, (8 * 4));
+       if (tlb_type == cheetah_plus) {
+               memcpy(tl0_dcpe, cheetah_plus_dcpe_trap_vector, (8 * 4));
+               memcpy(tl1_dcpe, cheetah_plus_dcpe_trap_vector_tl1, (8 * 4));
+               memcpy(tl0_icpe, cheetah_plus_icpe_trap_vector, (8 * 4));
+               memcpy(tl1_icpe, cheetah_plus_icpe_trap_vector_tl1, (8 * 4));
+       }
+       flushi(PAGE_OFFSET);
+ }
+ 
+ static void cheetah_flush_ecache(void)
+ {
+       unsigned long flush_base = ecache_flush_physbase;
+       unsigned long flush_linesize = ecache_flush_linesize;
+       unsigned long flush_size = ecache_flush_size;
+ 
+       __asm__ __volatile__("1: subcc  %0, %4, %0\n\t"
+                            "   bne,pt %%xcc, 1b\n\t"
+                            "    ldxa  [%2 + %0] %3, %%g0\n\t"
+                            : "=&r" (flush_size)
+                            : "0" (flush_size), "r" (flush_base),
+                              "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
+ }
+ 
+ static void cheetah_flush_ecache_line(unsigned long physaddr)
+ {
+       unsigned long alias;
+ 
+       physaddr &= ~(8UL - 1UL);
+       physaddr = (ecache_flush_physbase +
+                   (physaddr & ((ecache_flush_size>>1UL) - 1UL)));
+       alias = physaddr + (ecache_flush_size >> 1UL);
+       __asm__ __volatile__("ldxa [%0] %2, %%g0\n\t"
+                            "ldxa [%1] %2, %%g0\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "r" (physaddr), "r" (alias),
+                              "i" (ASI_PHYS_USE_EC));
+ }
+ 
+ /* Unfortunately, the diagnostic access to the I-cache tags we need to
+  * use to clear the thing interferes with I-cache coherency transactions.
+  *
+  * So we must only flush the I-cache when it is disabled.
+  */
+ static void __cheetah_flush_icache(void)
+ {
+       unsigned int icache_size, icache_line_size;
+       unsigned long addr;
+ 
+       icache_size = local_cpu_data().icache_size;
+       icache_line_size = local_cpu_data().icache_line_size;
+ 
+       /* Clear the valid bits in all the tags. */
+       for (addr = 0; addr < icache_size; addr += icache_line_size) {
+               __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+                                    "membar #Sync"
+                                    : /* no outputs */
+                                    : "r" (addr | (2 << 3)),
+                                      "i" (ASI_IC_TAG));
+       }
+ }
+ 
+ static void cheetah_flush_icache(void)
+ {
+       unsigned long dcu_save;
+ 
+       /* Save current DCU, disable I-cache. */
+       __asm__ __volatile__("ldxa [%%g0] %1, %0\n\t"
+                            "or %0, %2, %%g1\n\t"
+                            "stxa %%g1, [%%g0] %1\n\t"
+                            "membar #Sync"
+                            : "=r" (dcu_save)
+                            : "i" (ASI_DCU_CONTROL_REG), "i" (DCU_IC)
+                            : "g1");
+ 
+       __cheetah_flush_icache();
+ 
+       /* Restore DCU register */
+       __asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "r" (dcu_save), "i" (ASI_DCU_CONTROL_REG));
+ }
+ 
+ static void cheetah_flush_dcache(void)
+ {
+       unsigned int dcache_size, dcache_line_size;
+       unsigned long addr;
+ 
+       dcache_size = local_cpu_data().dcache_size;
+       dcache_line_size = local_cpu_data().dcache_line_size;
+ 
+       for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
+               __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+                                    "membar #Sync"
+                                    : /* no outputs */
+                                    : "r" (addr), "i" (ASI_DCACHE_TAG));
+       }
+ }
+ 
+ /* In order to make the even parity correct we must do two things.
+  * First, we clear DC_data_parity and set DC_utag to an appropriate value.
+  * Next, we clear out all 32-bytes of data for that line.  Data of
+  * all-zero + tag parity value of zero == correct parity.
+  */
+ static void cheetah_plus_zap_dcache_parity(void)
+ {
+       unsigned int dcache_size, dcache_line_size;
+       unsigned long addr;
+ 
+       dcache_size = local_cpu_data().dcache_size;
+       dcache_line_size = local_cpu_data().dcache_line_size;
+ 
+       for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
+               unsigned long tag = (addr >> 14);
+               unsigned long line;
+ 
+               __asm__ __volatile__("membar    #Sync\n\t"
+                                    "stxa      %0, [%1] %2\n\t"
+                                    "membar    #Sync"
+                                    : /* no outputs */
+                                    : "r" (tag), "r" (addr),
+                                      "i" (ASI_DCACHE_UTAG));
+               for (line = addr; line < addr + dcache_line_size; line += 8)
+                       __asm__ __volatile__("membar    #Sync\n\t"
+                                            "stxa      %%g0, [%0] %1\n\t"
+                                            "membar    #Sync"
+                                            : /* no outputs */
+                                            : "r" (line),
+                                              "i" (ASI_DCACHE_DATA));
+       }
+ }
+ 
+ /* Conversion tables used to frob Cheetah AFSR syndrome values into
+  * something palatable to the memory controller driver get_unumber
+  * routine.
+  */
+ #define MT0   137
+ #define MT1   138
+ #define MT2   139
+ #define NONE  254
+ #define MTC0  140
+ #define MTC1  141
+ #define MTC2  142
+ #define MTC3  143
+ #define C0    128
+ #define C1    129
+ #define C2    130
+ #define C3    131
+ #define C4    132
+ #define C5    133
+ #define C6    134
+ #define C7    135
+ #define C8    136
+ #define M2    144
+ #define M3    145
+ #define M4    146
+ #define M     147
+ static unsigned char cheetah_ecc_syntab[] = {
+ /*00*/NONE, C0, C1, M2, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M,
+ /*01*/C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16,
+ /*02*/C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10,
+ /*03*/M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M,
+ /*04*/C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6,
+ /*05*/M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4,
+ /*06*/M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4,
+ /*07*/116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
+ /*08*/C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5,
+ /*09*/M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M,
+ /*0a*/M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2,
+ /*0b*/103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3,
+ /*0c*/M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M,
+ /*0d*/102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3,
+ /*0e*/98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M,
+ /*0f*/M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M,
+ /*10*/C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4,
+ /*11*/M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M,
+ /*12*/M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2,
+ /*13*/94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M,
+ /*14*/M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4,
+ /*15*/89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3,
+ /*16*/86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3,
+ /*17*/M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2,
+ /*18*/M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4,
+ /*19*/77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M,
+ /*1a*/74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3,
+ /*1b*/M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M,
+ /*1c*/80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3,
+ /*1d*/M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M,
+ /*1e*/M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M,
+ /*1f*/111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M
+ };
+ static unsigned char cheetah_mtag_syntab[] = {
+        NONE, MTC0,
+        MTC1, NONE,
+        MTC2, NONE,
+        NONE, MT0,
+        MTC3, NONE,
+        NONE, MT1,
+        NONE, MT2,
+        NONE, NONE
+ };
+ 
+ /* Return the highest priority error conditon mentioned. */
+ static inline unsigned long cheetah_get_hipri(unsigned long afsr)
+ {
+       unsigned long tmp = 0;
+       int i;
+ 
+       for (i = 0; cheetah_error_table[i].mask; i++) {
+               if ((tmp = (afsr & cheetah_error_table[i].mask)) != 0UL)
+                       return tmp;
+       }
+       return tmp;
+ }
+ 
+ static const char *cheetah_get_string(unsigned long bit)
+ {
+       int i;
+ 
+       for (i = 0; cheetah_error_table[i].mask; i++) {
+               if ((bit & cheetah_error_table[i].mask) != 0UL)
+                       return cheetah_error_table[i].name;
+       }
+       return "???";
+ }
+ 
+ static void cheetah_log_errors(struct pt_regs *regs, struct cheetah_err_info *info,
+                              unsigned long afsr, unsigned long afar, int recoverable)
+ {
+       unsigned long hipri;
+       char unum[256];
+ 
+       printk("%s" "ERROR(%d): Cheetah error trap taken afsr[%016lx] afar[%016lx] TL1(%d)\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              afsr, afar,
+              (afsr & CHAFSR_TL1) ? 1 : 0);
+       printk("%s" "ERROR(%d): TPC[%lx] TNPC[%lx] O7[%lx] TSTATE[%lx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              regs->tpc, regs->tnpc, regs->u_regs[UREG_I7], regs->tstate);
+       printk("%s" "ERROR(%d): ",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id());
+       printk("TPC<%pS>\n", (void *) regs->tpc);
+       printk("%s" "ERROR(%d): M_SYND(%lx),  E_SYND(%lx)%s%s\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT,
+              (afsr & CHAFSR_E_SYNDROME) >> CHAFSR_E_SYNDROME_SHIFT,
+              (afsr & CHAFSR_ME) ? ", Multiple Errors" : "",
+              (afsr & CHAFSR_PRIV) ? ", Privileged" : "");
+       hipri = cheetah_get_hipri(afsr);
+       printk("%s" "ERROR(%d): Highest priority error (%016lx) \"%s\"\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              hipri, cheetah_get_string(hipri));
+ 
+       /* Try to get unumber if relevant. */
+ #define ESYND_ERRORS  (CHAFSR_IVC | CHAFSR_IVU | \
+                        CHAFSR_CPC | CHAFSR_CPU | \
+                        CHAFSR_UE  | CHAFSR_CE  | \
+                        CHAFSR_EDC | CHAFSR_EDU  | \
+                        CHAFSR_UCC | CHAFSR_UCU  | \
+                        CHAFSR_WDU | CHAFSR_WDC)
+ #define MSYND_ERRORS  (CHAFSR_EMC | CHAFSR_EMU)
+       if (afsr & ESYND_ERRORS) {
+               int syndrome;
+               int ret;
+ 
+               syndrome = (afsr & CHAFSR_E_SYNDROME) >> CHAFSR_E_SYNDROME_SHIFT;
+               syndrome = cheetah_ecc_syntab[syndrome];
+               ret = sprintf_dimm(syndrome, afar, unum, sizeof(unum));
+               if (ret != -1)
+                       printk("%s" "ERROR(%d): AFAR E-syndrome [%s]\n",
+                              (recoverable ? KERN_WARNING : KERN_CRIT),
+                              smp_processor_id(), unum);
+       } else if (afsr & MSYND_ERRORS) {
+               int syndrome;
+               int ret;
+ 
+               syndrome = (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT;
+               syndrome = cheetah_mtag_syntab[syndrome];
+               ret = sprintf_dimm(syndrome, afar, unum, sizeof(unum));
+               if (ret != -1)
+                       printk("%s" "ERROR(%d): AFAR M-syndrome [%s]\n",
+                              (recoverable ? KERN_WARNING : KERN_CRIT),
+                              smp_processor_id(), unum);
+       }
+ 
+       /* Now dump the cache snapshots. */
+       printk("%s" "ERROR(%d): D-cache idx[%x] tag[%016llx] utag[%016llx] stag[%016llx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              (int) info->dcache_index,
+              info->dcache_tag,
+              info->dcache_utag,
+              info->dcache_stag);
+       printk("%s" "ERROR(%d): D-cache data0[%016llx] data1[%016llx] data2[%016llx] data3[%016llx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              info->dcache_data[0],
+              info->dcache_data[1],
+              info->dcache_data[2],
+              info->dcache_data[3]);
+       printk("%s" "ERROR(%d): I-cache idx[%x] tag[%016llx] utag[%016llx] stag[%016llx] "
+              "u[%016llx] l[%016llx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              (int) info->icache_index,
+              info->icache_tag,
+              info->icache_utag,
+              info->icache_stag,
+              info->icache_upper,
+              info->icache_lower);
+       printk("%s" "ERROR(%d): I-cache INSN0[%016llx] INSN1[%016llx] INSN2[%016llx] INSN3[%016llx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              info->icache_data[0],
+              info->icache_data[1],
+              info->icache_data[2],
+              info->icache_data[3]);
+       printk("%s" "ERROR(%d): I-cache INSN4[%016llx] INSN5[%016llx] INSN6[%016llx] INSN7[%016llx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              info->icache_data[4],
+              info->icache_data[5],
+              info->icache_data[6],
+              info->icache_data[7]);
+       printk("%s" "ERROR(%d): E-cache idx[%x] tag[%016llx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              (int) info->ecache_index, info->ecache_tag);
+       printk("%s" "ERROR(%d): E-cache data0[%016llx] data1[%016llx] data2[%016llx] data3[%016llx]\n",
+              (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+              info->ecache_data[0],
+              info->ecache_data[1],
+              info->ecache_data[2],
+              info->ecache_data[3]);
+ 
+       afsr = (afsr & ~hipri) & cheetah_afsr_errors;
+       while (afsr != 0UL) {
+               unsigned long bit = cheetah_get_hipri(afsr);
+ 
+               printk("%s" "ERROR: Multiple-error (%016lx) \"%s\"\n",
+                      (recoverable ? KERN_WARNING : KERN_CRIT),
+                      bit, cheetah_get_string(bit));
+ 
+               afsr &= ~bit;
+       }
+ 
+       if (!recoverable)
+               printk(KERN_CRIT "ERROR: This condition is not recoverable.\n");
+ }
+ 
+ static int cheetah_recheck_errors(struct cheetah_err_info *logp)
+ {
+       unsigned long afsr, afar;
+       int ret = 0;
+ 
+       __asm__ __volatile__("ldxa [%%g0] %1, %0\n\t"
+                            : "=r" (afsr)
+                            : "i" (ASI_AFSR));
+       if ((afsr & cheetah_afsr_errors) != 0) {
+               if (logp != NULL) {
+                       __asm__ __volatile__("ldxa [%%g0] %1, %0\n\t"
+                                            : "=r" (afar)
+                                            : "i" (ASI_AFAR));
+                       logp->afsr = afsr;
+                       logp->afar = afar;
+               }
+               ret = 1;
+       }
+       __asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
+                            "membar #Sync\n\t"
+                            : : "r" (afsr), "i" (ASI_AFSR));
+ 
+       return ret;
+ }
+ 
+ void cheetah_fecc_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar)
+ {
+       struct cheetah_err_info local_snapshot, *p;
+       int recoverable;
+ 
+       /* Flush E-cache */
+       cheetah_flush_ecache();
+ 
+       p = cheetah_get_error_log(afsr);
+       if (!p) {
+               prom_printf("ERROR: Early Fast-ECC error afsr[%016lx] afar[%016lx]\n",
+                           afsr, afar);
+               prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
+                           smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
+               prom_halt();
+       }
+ 
+       /* Grab snapshot of logged error. */
+       memcpy(&local_snapshot, p, sizeof(local_snapshot));
+ 
+       /* If the current trap snapshot does not match what the
+        * trap handler passed along into our args, big trouble.
+        * In such a case, mark the local copy as invalid.
+        *
+        * Else, it matches and we mark the afsr in the non-local
+        * copy as invalid so we may log new error traps there.
+        */
+       if (p->afsr != afsr || p->afar != afar)
+               local_snapshot.afsr = CHAFSR_INVALID;
+       else
+               p->afsr = CHAFSR_INVALID;
+ 
+       cheetah_flush_icache();
+       cheetah_flush_dcache();
+ 
+       /* Re-enable I-cache/D-cache */
+       __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                            "or %%g1, %1, %%g1\n\t"
+                            "stxa %%g1, [%%g0] %0\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "i" (ASI_DCU_CONTROL_REG),
+                              "i" (DCU_DC | DCU_IC)
+                            : "g1");
+ 
+       /* Re-enable error reporting */
+       __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                            "or %%g1, %1, %%g1\n\t"
+                            "stxa %%g1, [%%g0] %0\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "i" (ASI_ESTATE_ERROR_EN),
+                              "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN)
+                            : "g1");
+ 
+       /* Decide if we can continue after handling this trap and
+        * logging the error.
+        */
+       recoverable = 1;
+       if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
+               recoverable = 0;
+ 
+       /* Re-check AFSR/AFAR.  What we are looking for here is whether a new
+        * error was logged while we had error reporting traps disabled.
+        */
+       if (cheetah_recheck_errors(&local_snapshot)) {
+               unsigned long new_afsr = local_snapshot.afsr;
+ 
+               /* If we got a new asynchronous error, die... */
+               if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU |
+                               CHAFSR_WDU | CHAFSR_CPU |
+                               CHAFSR_IVU | CHAFSR_UE |
+                               CHAFSR_BERR | CHAFSR_TO))
+                       recoverable = 0;
+       }
+ 
+       /* Log errors. */
+       cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable);
+ 
+       if (!recoverable)
+               panic("Irrecoverable Fast-ECC error trap.\n");
+ 
+       /* Flush E-cache to kick the error trap handlers out. */
+       cheetah_flush_ecache();
+ }
+ 
+ /* Try to fix a correctable error by pushing the line out from
+  * the E-cache.  Recheck error reporting registers to see if the
+  * problem is intermittent.
+  */
+ static int cheetah_fix_ce(unsigned long physaddr)
+ {
+       unsigned long orig_estate;
+       unsigned long alias1, alias2;
+       int ret;
+ 
+       /* Make sure correctable error traps are disabled. */
+       __asm__ __volatile__("ldxa      [%%g0] %2, %0\n\t"
+                            "andn      %0, %1, %%g1\n\t"
+                            "stxa      %%g1, [%%g0] %2\n\t"
+                            "membar    #Sync"
+                            : "=&r" (orig_estate)
+                            : "i" (ESTATE_ERROR_CEEN),
+                              "i" (ASI_ESTATE_ERROR_EN)
+                            : "g1");
+ 
+       /* We calculate alias addresses that will force the
+        * cache line in question out of the E-cache.  Then
+        * we bring it back in with an atomic instruction so
+        * that we get it in some modified/exclusive state,
+        * then we displace it again to try and get proper ECC
+        * pushed back into the system.
+        */
+       physaddr &= ~(8UL - 1UL);
+       alias1 = (ecache_flush_physbase +
+                 (physaddr & ((ecache_flush_size >> 1) - 1)));
+       alias2 = alias1 + (ecache_flush_size >> 1);
+       __asm__ __volatile__("ldxa      [%0] %3, %%g0\n\t"
+                            "ldxa      [%1] %3, %%g0\n\t"
+                            "casxa     [%2] %3, %%g0, %%g0\n\t"
+                            "ldxa      [%0] %3, %%g0\n\t"
+                            "ldxa      [%1] %3, %%g0\n\t"
+                            "membar    #Sync"
+                            : /* no outputs */
+                            : "r" (alias1), "r" (alias2),
+                              "r" (physaddr), "i" (ASI_PHYS_USE_EC));
+ 
+       /* Did that trigger another error? */
+       if (cheetah_recheck_errors(NULL)) {
+               /* Try one more time. */
+               __asm__ __volatile__("ldxa [%0] %1, %%g0\n\t"
+                                    "membar #Sync"
+                                    : : "r" (physaddr), "i" (ASI_PHYS_USE_EC));
+               if (cheetah_recheck_errors(NULL))
+                       ret = 2;
+               else
+                       ret = 1;
+       } else {
+               /* No new error, intermittent problem. */
+               ret = 0;
+       }
+ 
+       /* Restore error enables. */
+       __asm__ __volatile__("stxa      %0, [%%g0] %1\n\t"
+                            "membar    #Sync"
+                            : : "r" (orig_estate), "i" (ASI_ESTATE_ERROR_EN));
+ 
+       return ret;
+ }
+ 
+ /* Return non-zero if PADDR is a valid physical memory address. */
+ static int cheetah_check_main_memory(unsigned long paddr)
+ {
+       unsigned long vaddr = PAGE_OFFSET + paddr;
+ 
+       if (vaddr > (unsigned long) high_memory)
+               return 0;
+ 
+       return kern_addr_valid(vaddr);
+ }
+ 
+ void cheetah_cee_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar)
+ {
+       struct cheetah_err_info local_snapshot, *p;
+       int recoverable, is_memory;
+ 
+       p = cheetah_get_error_log(afsr);
+       if (!p) {
+               prom_printf("ERROR: Early CEE error afsr[%016lx] afar[%016lx]\n",
+                           afsr, afar);
+               prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
+                           smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
+               prom_halt();
+       }
+ 
+       /* Grab snapshot of logged error. */
+       memcpy(&local_snapshot, p, sizeof(local_snapshot));
+ 
+       /* If the current trap snapshot does not match what the
+        * trap handler passed along into our args, big trouble.
+        * In such a case, mark the local copy as invalid.
+        *
+        * Else, it matches and we mark the afsr in the non-local
+        * copy as invalid so we may log new error traps there.
+        */
+       if (p->afsr != afsr || p->afar != afar)
+               local_snapshot.afsr = CHAFSR_INVALID;
+       else
+               p->afsr = CHAFSR_INVALID;
+ 
+       is_memory = cheetah_check_main_memory(afar);
+ 
+       if (is_memory && (afsr & CHAFSR_CE) != 0UL) {
+               /* XXX Might want to log the results of this operation
+                * XXX somewhere... -DaveM
+                */
+               cheetah_fix_ce(afar);
+       }
+ 
+       {
+               int flush_all, flush_line;
+ 
+               flush_all = flush_line = 0;
+               if ((afsr & CHAFSR_EDC) != 0UL) {
+                       if ((afsr & cheetah_afsr_errors) == CHAFSR_EDC)
+                               flush_line = 1;
+                       else
+                               flush_all = 1;
+               } else if ((afsr & CHAFSR_CPC) != 0UL) {
+                       if ((afsr & cheetah_afsr_errors) == CHAFSR_CPC)
+                               flush_line = 1;
+                       else
+                               flush_all = 1;
+               }
+ 
+               /* Trap handler only disabled I-cache, flush it. */
+               cheetah_flush_icache();
+ 
+               /* Re-enable I-cache */
+               __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                                    "or %%g1, %1, %%g1\n\t"
+                                    "stxa %%g1, [%%g0] %0\n\t"
+                                    "membar #Sync"
+                                    : /* no outputs */
+                                    : "i" (ASI_DCU_CONTROL_REG),
+                                    "i" (DCU_IC)
+                                    : "g1");
+ 
+               if (flush_all)
+                       cheetah_flush_ecache();
+               else if (flush_line)
+                       cheetah_flush_ecache_line(afar);
+       }
+ 
+       /* Re-enable error reporting */
+       __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                            "or %%g1, %1, %%g1\n\t"
+                            "stxa %%g1, [%%g0] %0\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "i" (ASI_ESTATE_ERROR_EN),
+                              "i" (ESTATE_ERROR_CEEN)
+                            : "g1");
+ 
+       /* Decide if we can continue after handling this trap and
+        * logging the error.
+        */
+       recoverable = 1;
+       if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
+               recoverable = 0;
+ 
+       /* Re-check AFSR/AFAR */
+       (void) cheetah_recheck_errors(&local_snapshot);
+ 
+       /* Log errors. */
+       cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable);
+ 
+       if (!recoverable)
+               panic("Irrecoverable Correctable-ECC error trap.\n");
+ }
+ 
+ void cheetah_deferred_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar)
+ {
+       struct cheetah_err_info local_snapshot, *p;
+       int recoverable, is_memory;
+ 
+ #ifdef CONFIG_PCI
+       /* Check for the special PCI poke sequence. */
+       if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
+               cheetah_flush_icache();
+               cheetah_flush_dcache();
+ 
+               /* Re-enable I-cache/D-cache */
+               __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                                    "or %%g1, %1, %%g1\n\t"
+                                    "stxa %%g1, [%%g0] %0\n\t"
+                                    "membar #Sync"
+                                    : /* no outputs */
+                                    : "i" (ASI_DCU_CONTROL_REG),
+                                      "i" (DCU_DC | DCU_IC)
+                                    : "g1");
+ 
+               /* Re-enable error reporting */
+               __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                                    "or %%g1, %1, %%g1\n\t"
+                                    "stxa %%g1, [%%g0] %0\n\t"
+                                    "membar #Sync"
+                                    : /* no outputs */
+                                    : "i" (ASI_ESTATE_ERROR_EN),
+                                      "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN)
+                                    : "g1");
+ 
+               (void) cheetah_recheck_errors(NULL);
+ 
+               pci_poke_faulted = 1;
+               regs->tpc += 4;
+               regs->tnpc = regs->tpc + 4;
+               return;
+       }
+ #endif
+ 
+       p = cheetah_get_error_log(afsr);
+       if (!p) {
+               prom_printf("ERROR: Early deferred error afsr[%016lx] afar[%016lx]\n",
+                           afsr, afar);
+               prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
+                           smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
+               prom_halt();
+       }
+ 
+       /* Grab snapshot of logged error. */
+       memcpy(&local_snapshot, p, sizeof(local_snapshot));
+ 
+       /* If the current trap snapshot does not match what the
+        * trap handler passed along into our args, big trouble.
+        * In such a case, mark the local copy as invalid.
+        *
+        * Else, it matches and we mark the afsr in the non-local
+        * copy as invalid so we may log new error traps there.
+        */
+       if (p->afsr != afsr || p->afar != afar)
+               local_snapshot.afsr = CHAFSR_INVALID;
+       else
+               p->afsr = CHAFSR_INVALID;
+ 
+       is_memory = cheetah_check_main_memory(afar);
+ 
+       {
+               int flush_all, flush_line;
+ 
+               flush_all = flush_line = 0;
+               if ((afsr & CHAFSR_EDU) != 0UL) {
+                       if ((afsr & cheetah_afsr_errors) == CHAFSR_EDU)
+                               flush_line = 1;
+                       else
+                               flush_all = 1;
+               } else if ((afsr & CHAFSR_BERR) != 0UL) {
+                       if ((afsr & cheetah_afsr_errors) == CHAFSR_BERR)
+                               flush_line = 1;
+                       else
+                               flush_all = 1;
+               }
+ 
+               cheetah_flush_icache();
+               cheetah_flush_dcache();
+ 
+               /* Re-enable I/D caches */
+               __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                                    "or %%g1, %1, %%g1\n\t"
+                                    "stxa %%g1, [%%g0] %0\n\t"
+                                    "membar #Sync"
+                                    : /* no outputs */
+                                    : "i" (ASI_DCU_CONTROL_REG),
+                                    "i" (DCU_IC | DCU_DC)
+                                    : "g1");
+ 
+               if (flush_all)
+                       cheetah_flush_ecache();
+               else if (flush_line)
+                       cheetah_flush_ecache_line(afar);
+       }
+ 
+       /* Re-enable error reporting */
+       __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                            "or %%g1, %1, %%g1\n\t"
+                            "stxa %%g1, [%%g0] %0\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "i" (ASI_ESTATE_ERROR_EN),
+                            "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN)
+                            : "g1");
+ 
+       /* Decide if we can continue after handling this trap and
+        * logging the error.
+        */
+       recoverable = 1;
+       if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
+               recoverable = 0;
+ 
+       /* Re-check AFSR/AFAR.  What we are looking for here is whether a new
+        * error was logged while we had error reporting traps disabled.
+        */
+       if (cheetah_recheck_errors(&local_snapshot)) {
+               unsigned long new_afsr = local_snapshot.afsr;
+ 
+               /* If we got a new asynchronous error, die... */
+               if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU |
+                               CHAFSR_WDU | CHAFSR_CPU |
+                               CHAFSR_IVU | CHAFSR_UE |
+                               CHAFSR_BERR | CHAFSR_TO))
+                       recoverable = 0;
+       }
+ 
+       /* Log errors. */
+       cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable);
+ 
+       /* "Recoverable" here means we try to yank the page from ever
+        * being newly used again.  This depends upon a few things:
+        * 1) Must be main memory, and AFAR must be valid.
+        * 2) If we trapped from user, OK.
+        * 3) Else, if we trapped from kernel we must find exception
+        *    table entry (ie. we have to have been accessing user
+        *    space).
+        *
+        * If AFAR is not in main memory, or we trapped from kernel
+        * and cannot find an exception table entry, it is unacceptable
+        * to try and continue.
+        */
+       if (recoverable && is_memory) {
+               if ((regs->tstate & TSTATE_PRIV) == 0UL) {
+                       /* OK, usermode access. */
+                       recoverable = 1;
+               } else {
+                       const struct exception_table_entry *entry;
+ 
+                       entry = search_exception_tables(regs->tpc);
+                       if (entry) {
+                               /* OK, kernel access to userspace. */
+                               recoverable = 1;
+ 
+                       } else {
+                               /* BAD, privileged state is corrupted. */
+                               recoverable = 0;
+                       }
+ 
+                       if (recoverable) {
+                               if (pfn_valid(afar >> PAGE_SHIFT))
+                                       get_page(pfn_to_page(afar >> PAGE_SHIFT));
+                               else
+                                       recoverable = 0;
+ 
+                               /* Only perform fixup if we still have a
+                                * recoverable condition.
+                                */
+                               if (recoverable) {
+                                       regs->tpc = entry->fixup;
+                                       regs->tnpc = regs->tpc + 4;
+                               }
+                       }
+               }
+       } else {
+               recoverable = 0;
+       }
+ 
+       if (!recoverable)
+               panic("Irrecoverable deferred error trap.\n");
+ }
+ 
+ /* Handle a D/I cache parity error trap.  TYPE is encoded as:
+  *
+  * Bit0:      0=dcache,1=icache
+  * Bit1:      0=recoverable,1=unrecoverable
+  *
+  * The hardware has disabled both the I-cache and D-cache in
+  * the %dcr register.  
+  */
+ void cheetah_plus_parity_error(int type, struct pt_regs *regs)
+ {
+       if (type & 0x1)
+               __cheetah_flush_icache();
+       else
+               cheetah_plus_zap_dcache_parity();
+       cheetah_flush_dcache();
+ 
+       /* Re-enable I-cache/D-cache */
+       __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+                            "or %%g1, %1, %%g1\n\t"
+                            "stxa %%g1, [%%g0] %0\n\t"
+                            "membar #Sync"
+                            : /* no outputs */
+                            : "i" (ASI_DCU_CONTROL_REG),
+                              "i" (DCU_DC | DCU_IC)
+                            : "g1");
+ 
+       if (type & 0x2) {
+               printk(KERN_EMERG "CPU[%d]: Cheetah+ %c-cache parity error at TPC[%016lx]\n",
+                      smp_processor_id(),
+                      (type & 0x1) ? 'I' : 'D',
+                      regs->tpc);
+               printk(KERN_EMERG "TPC<%pS>\n", (void *) regs->tpc);
+               panic("Irrecoverable Cheetah+ parity error.");
+       }
+ 
+       printk(KERN_WARNING "CPU[%d]: Cheetah+ %c-cache parity error at TPC[%016lx]\n",
+              smp_processor_id(),
+              (type & 0x1) ? 'I' : 'D',
+              regs->tpc);
+       printk(KERN_WARNING "TPC<%pS>\n", (void *) regs->tpc);
+ }
+ 
+ struct sun4v_error_entry {
+       u64             err_handle;
+       u64             err_stick;
+ 
+       u32             err_type;
+ #define SUN4V_ERR_TYPE_UNDEFINED      0
+ #define SUN4V_ERR_TYPE_UNCORRECTED_RES        1
+ #define SUN4V_ERR_TYPE_PRECISE_NONRES 2
+ #define SUN4V_ERR_TYPE_DEFERRED_NONRES        3
+ #define SUN4V_ERR_TYPE_WARNING_RES    4
+ 
+       u32             err_attrs;
+ #define SUN4V_ERR_ATTRS_PROCESSOR     0x00000001
+ #define SUN4V_ERR_ATTRS_MEMORY                0x00000002
+ #define SUN4V_ERR_ATTRS_PIO           0x00000004
+ #define SUN4V_ERR_ATTRS_INT_REGISTERS 0x00000008
+ #define SUN4V_ERR_ATTRS_FPU_REGISTERS 0x00000010
+ #define SUN4V_ERR_ATTRS_USER_MODE     0x01000000
+ #define SUN4V_ERR_ATTRS_PRIV_MODE     0x02000000
+ #define SUN4V_ERR_ATTRS_RES_QUEUE_FULL        0x80000000
+ 
+       u64             err_raddr;
+       u32             err_size;
+       u16             err_cpu;
+       u16             err_pad;
+ };
+ 
+ static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0);
+ static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0);
+ 
+ static const char *sun4v_err_type_to_str(u32 type)
+ {
+       switch (type) {
+       case SUN4V_ERR_TYPE_UNDEFINED:
+               return "undefined";
+       case SUN4V_ERR_TYPE_UNCORRECTED_RES:
+               return "uncorrected resumable";
+       case SUN4V_ERR_TYPE_PRECISE_NONRES:
+               return "precise nonresumable";
+       case SUN4V_ERR_TYPE_DEFERRED_NONRES:
+               return "deferred nonresumable";
+       case SUN4V_ERR_TYPE_WARNING_RES:
+               return "warning resumable";
+       default:
+               return "unknown";
+       };
+ }
+ 
+ static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt)
+ {
+       int cnt;
+ 
+       printk("%s: Reporting on cpu %d\n", pfx, cpu);
+       printk("%s: err_handle[%llx] err_stick[%llx] err_type[%08x:%s]\n",
+              pfx,
+              ent->err_handle, ent->err_stick,
+              ent->err_type,
+              sun4v_err_type_to_str(ent->err_type));
+       printk("%s: err_attrs[%08x:%s %s %s %s %s %s %s %s]\n",
+              pfx,
+              ent->err_attrs,
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_PROCESSOR) ?
+               "processor" : ""),
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_MEMORY) ?
+               "memory" : ""),
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_PIO) ?
+               "pio" : ""),
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_INT_REGISTERS) ?
+               "integer-regs" : ""),
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_FPU_REGISTERS) ?
+               "fpu-regs" : ""),
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_USER_MODE) ?
+               "user" : ""),
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_PRIV_MODE) ?
+               "privileged" : ""),
+              ((ent->err_attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL) ?
+               "queue-full" : ""));
+       printk("%s: err_raddr[%016llx] err_size[%u] err_cpu[%u]\n",
+              pfx,
+              ent->err_raddr, ent->err_size, ent->err_cpu);
+ 
+       show_regs(regs);
+ 
+       if ((cnt = atomic_read(ocnt)) != 0) {
+               atomic_set(ocnt, 0);
+               wmb();
+               printk("%s: Queue overflowed %d times.\n",
+                      pfx, cnt);
+       }
+ }
+ 
+ /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
+  * Log the event and clear the first word of the entry.
+  */
+ void sun4v_resum_error(struct pt_regs *regs, unsigned long offset)
+ {
+       struct sun4v_error_entry *ent, local_copy;
+       struct trap_per_cpu *tb;
+       unsigned long paddr;
+       int cpu;
+ 
+       cpu = get_cpu();
+ 
+       tb = &trap_block[cpu];
+       paddr = tb->resum_kernel_buf_pa + offset;
+       ent = __va(paddr);
+ 
+       memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry));
+ 
+       /* We have a local copy now, so release the entry.  */
+       ent->err_handle = 0;
+       wmb();
+ 
+       put_cpu();
+ 
+       if (ent->err_type == SUN4V_ERR_TYPE_WARNING_RES) {
+               /* If err_type is 0x4, it's a powerdown request.  Do
+                * not do the usual resumable error log because that
+                * makes it look like some abnormal error.
+                */
+               printk(KERN_INFO "Power down request...\n");
+               kill_cad_pid(SIGINT, 1);
+               return;
+       }
+ 
+       sun4v_log_error(regs, &local_copy, cpu,
+                       KERN_ERR "RESUMABLE ERROR",
+                       &sun4v_resum_oflow_cnt);
+ }
+ 
+ /* If we try to printk() we'll probably make matters worse, by trying
+  * to retake locks this cpu already holds or causing more errors. So
+  * just bump a counter, and we'll report these counter bumps above.
+  */
+ void sun4v_resum_overflow(struct pt_regs *regs)
+ {
+       atomic_inc(&sun4v_resum_oflow_cnt);
+ }
+ 
+ /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
+  * Log the event, clear the first word of the entry, and die.
+  */
+ void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset)
+ {
+       struct sun4v_error_entry *ent, local_copy;
+       struct trap_per_cpu *tb;
+       unsigned long paddr;
+       int cpu;
+ 
+       cpu = get_cpu();
+ 
+       tb = &trap_block[cpu];
+       paddr = tb->nonresum_kernel_buf_pa + offset;
+       ent = __va(paddr);
+ 
+       memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry));
+ 
+       /* We have a local copy now, so release the entry.  */
+       ent->err_handle = 0;
+       wmb();
+ 
+       put_cpu();
+ 
+ #ifdef CONFIG_PCI
+       /* Check for the special PCI poke sequence. */
+       if (pci_poke_in_progress && pci_poke_cpu == cpu) {
+               pci_poke_faulted = 1;
+               regs->tpc += 4;
+               regs->tnpc = regs->tpc + 4;
+               return;
+       }
+ #endif
+ 
+       sun4v_log_error(regs, &local_copy, cpu,
+                       KERN_EMERG "NON-RESUMABLE ERROR",
+                       &sun4v_nonresum_oflow_cnt);
+ 
+       panic("Non-resumable error.");
+ }
+ 
+ /* If we try to printk() we'll probably make matters worse, by trying
+  * to retake locks this cpu already holds or causing more errors. So
+  * just bump a counter, and we'll report these counter bumps above.
+  */
+ void sun4v_nonresum_overflow(struct pt_regs *regs)
+ {
+       /* XXX Actually even this can make not that much sense.  Perhaps
+        * XXX we should just pull the plug and panic directly from here?
+        */
+       atomic_inc(&sun4v_nonresum_oflow_cnt);
+ }
+ 
+ unsigned long sun4v_err_itlb_vaddr;
+ unsigned long sun4v_err_itlb_ctx;
+ unsigned long sun4v_err_itlb_pte;
+ unsigned long sun4v_err_itlb_error;
+ 
+ void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
+ {
+       if (tl > 1)
+               dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+ 
+       printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
+              regs->tpc, tl);
+       printk(KERN_EMERG "SUN4V-ITLB: TPC<%pS>\n", (void *) regs->tpc);
+       printk(KERN_EMERG "SUN4V-ITLB: O7[%lx]\n", regs->u_regs[UREG_I7]);
+       printk(KERN_EMERG "SUN4V-ITLB: O7<%pS>\n",
+              (void *) regs->u_regs[UREG_I7]);
+       printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] "
+              "pte[%lx] error[%lx]\n",
+              sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
+              sun4v_err_itlb_pte, sun4v_err_itlb_error);
+ 
+       prom_halt();
+ }
+ 
+ unsigned long sun4v_err_dtlb_vaddr;
+ unsigned long sun4v_err_dtlb_ctx;
+ unsigned long sun4v_err_dtlb_pte;
+ unsigned long sun4v_err_dtlb_error;
+ 
+ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
+ {
+       if (tl > 1)
+               dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+ 
+       printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
+              regs->tpc, tl);
+       printk(KERN_EMERG "SUN4V-DTLB: TPC<%pS>\n", (void *) regs->tpc);
+       printk(KERN_EMERG "SUN4V-DTLB: O7[%lx]\n", regs->u_regs[UREG_I7]);
+       printk(KERN_EMERG "SUN4V-DTLB: O7<%pS>\n",
+              (void *) regs->u_regs[UREG_I7]);
+       printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] "
+              "pte[%lx] error[%lx]\n",
+              sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
+              sun4v_err_dtlb_pte, sun4v_err_dtlb_error);
+ 
+       prom_halt();
+ }
+ 
+ void hypervisor_tlbop_error(unsigned long err, unsigned long op)
+ {
+       printk(KERN_CRIT "SUN4V: TLB hv call error %lu for op %lu\n",
+              err, op);
+ }
+ 
+ void hypervisor_tlbop_error_xcall(unsigned long err, unsigned long op)
+ {
+       printk(KERN_CRIT "SUN4V: XCALL TLB hv call error %lu for op %lu\n",
+              err, op);
+ }
+ 
+ void do_fpe_common(struct pt_regs *regs)
+ {
+       if (regs->tstate & TSTATE_PRIV) {
+               regs->tpc = regs->tnpc;
+               regs->tnpc += 4;
+       } else {
+               unsigned long fsr = current_thread_info()->xfsr[0];
+               siginfo_t info;
+ 
+               if (test_thread_flag(TIF_32BIT)) {
+                       regs->tpc &= 0xffffffff;
+                       regs->tnpc &= 0xffffffff;
+               }
+               info.si_signo = SIGFPE;
+               info.si_errno = 0;
+               info.si_addr = (void __user *)regs->tpc;
+               info.si_trapno = 0;
+               info.si_code = __SI_FAULT;
+               if ((fsr & 0x1c000) == (1 << 14)) {
+                       if (fsr & 0x10)
+                               info.si_code = FPE_FLTINV;
+                       else if (fsr & 0x08)
+                               info.si_code = FPE_FLTOVF;
+                       else if (fsr & 0x04)
+                               info.si_code = FPE_FLTUND;
+                       else if (fsr & 0x02)
+                               info.si_code = FPE_FLTDIV;
+                       else if (fsr & 0x01)
+                               info.si_code = FPE_FLTRES;
+               }
+               force_sig_info(SIGFPE, &info, current);
+       }
+ }
+ 
+ void do_fpieee(struct pt_regs *regs)
+ {
+       if (notify_die(DIE_TRAP, "fpu exception ieee", regs,
+                      0, 0x24, SIGFPE) == NOTIFY_STOP)
+               return;
+ 
+       do_fpe_common(regs);
+ }
+ 
+ extern int do_mathemu(struct pt_regs *, struct fpustate *);
+ 
+ void do_fpother(struct pt_regs *regs)
+ {
+       struct fpustate *f = FPUSTATE;
+       int ret = 0;
+ 
+       if (notify_die(DIE_TRAP, "fpu exception other", regs,
+                      0, 0x25, SIGFPE) == NOTIFY_STOP)
+               return;
+ 
+       switch ((current_thread_info()->xfsr[0] & 0x1c000)) {
+       case (2 << 14): /* unfinished_FPop */
+       case (3 << 14): /* unimplemented_FPop */
+               ret = do_mathemu(regs, f);
+               break;
+       }
+       if (ret)
+               return;
+       do_fpe_common(regs);
+ }
+ 
+ void do_tof(struct pt_regs *regs)
+ {
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs,
+                      0, 0x26, SIGEMT) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV)
+               die_if_kernel("Penguin overflow trap from kernel mode", regs);
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGEMT;
+       info.si_errno = 0;
+       info.si_code = EMT_TAGOVF;
+       info.si_addr = (void __user *)regs->tpc;
+       info.si_trapno = 0;
+       force_sig_info(SIGEMT, &info, current);
+ }
+ 
+ void do_div0(struct pt_regs *regs)
+ {
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "integer division by zero", regs,
+                      0, 0x28, SIGFPE) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV)
+               die_if_kernel("TL0: Kernel divide by zero.", regs);
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = FPE_INTDIV;
+       info.si_addr = (void __user *)regs->tpc;
+       info.si_trapno = 0;
+       force_sig_info(SIGFPE, &info, current);
+ }
+ 
+ static void instruction_dump(unsigned int *pc)
+ {
+       int i;
+ 
+       if ((((unsigned long) pc) & 3))
+               return;
+ 
+       printk("Instruction DUMP:");
+       for (i = -3; i < 6; i++)
+               printk("%c%08x%c",i?' ':'<',pc[i],i?' ':'>');
+       printk("\n");
+ }
+ 
+ static void user_instruction_dump(unsigned int __user *pc)
+ {
+       int i;
+       unsigned int buf[9];
+       
+       if ((((unsigned long) pc) & 3))
+               return;
+               
+       if (copy_from_user(buf, pc - 3, sizeof(buf)))
+               return;
+ 
+       printk("Instruction DUMP:");
+       for (i = 0; i < 9; i++)
+               printk("%c%08x%c",i==3?' ':'<',buf[i],i==3?' ':'>');
+       printk("\n");
+ }
+ 
+ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
+ {
+       unsigned long fp, thread_base, ksp;
+       struct thread_info *tp;
+       int count = 0;
+ 
+       ksp = (unsigned long) _ksp;
+       if (!tsk)
+               tsk = current;
+       tp = task_thread_info(tsk);
+       if (ksp == 0UL) {
+               if (tsk == current)
+                       asm("mov %%fp, %0" : "=r" (ksp));
+               else
+                       ksp = tp->ksp;
+       }
+       if (tp == current_thread_info())
+               flushw_all();
+ 
+       fp = ksp + STACK_BIAS;
+       thread_base = (unsigned long) tp;
+ 
+       printk("Call Trace:\n");
+       do {
+               struct sparc_stackf *sf;
+               struct pt_regs *regs;
+               unsigned long pc;
+ 
+               if (!kstack_valid(tp, fp))
+                       break;
+               sf = (struct sparc_stackf *) fp;
+               regs = (struct pt_regs *) (sf + 1);
+ 
+               if (kstack_is_trap_frame(tp, regs)) {
+                       if (!(regs->tstate & TSTATE_PRIV))
+                               break;
+                       pc = regs->tpc;
+                       fp = regs->u_regs[UREG_I6] + STACK_BIAS;
+               } else {
+                       pc = sf->callers_pc;
+                       fp = (unsigned long)sf->fp + STACK_BIAS;
+               }
+ 
+               printk(" [%016lx] %pS\n", pc, (void *) pc);
+       } while (++count < 16);
+ }
+ 
+ void dump_stack(void)
+ {
+       show_stack(current, NULL);
+ }
+ 
+ EXPORT_SYMBOL(dump_stack);
+ 
+ static inline int is_kernel_stack(struct task_struct *task,
+                                 struct reg_window *rw)
+ {
+       unsigned long rw_addr = (unsigned long) rw;
+       unsigned long thread_base, thread_end;
+ 
+       if (rw_addr < PAGE_OFFSET) {
+               if (task != &init_task)
+                       return 0;
+       }
+ 
+       thread_base = (unsigned long) task_stack_page(task);
+       thread_end = thread_base + sizeof(union thread_union);
+       if (rw_addr >= thread_base &&
+           rw_addr < thread_end &&
+           !(rw_addr & 0x7UL))
+               return 1;
+ 
+       return 0;
+ }
+ 
+ static inline struct reg_window *kernel_stack_up(struct reg_window *rw)
+ {
+       unsigned long fp = rw->ins[6];
+ 
+       if (!fp)
+               return NULL;
+ 
+       return (struct reg_window *) (fp + STACK_BIAS);
+ }
+ 
+ void die_if_kernel(char *str, struct pt_regs *regs)
+ {
+       static int die_counter;
+       int count = 0;
+       
+       /* Amuse the user. */
+       printk(
+ "              \\|/ ____ \\|/\n"
+ "              \"@'/ .. \\`@\"\n"
+ "              /_| \\__/ |_\\\n"
+ "                 \\__U_/\n");
+ 
+       printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
+       notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
+       __asm__ __volatile__("flushw");
+       show_regs(regs);
+       add_taint(TAINT_DIE);
+       if (regs->tstate & TSTATE_PRIV) {
+               struct reg_window *rw = (struct reg_window *)
+                       (regs->u_regs[UREG_FP] + STACK_BIAS);
+ 
+               /* Stop the back trace when we hit userland or we
+                * find some badly aligned kernel stack.
+                */
+               while (rw &&
+                      count++ < 30&&
+                      is_kernel_stack(current, rw)) {
+                       printk("Caller[%016lx]: %pS\n", rw->ins[7],
+                              (void *) rw->ins[7]);
+ 
+                       rw = kernel_stack_up(rw);
+               }
+               instruction_dump ((unsigned int *) regs->tpc);
+       } else {
+               if (test_thread_flag(TIF_32BIT)) {
+                       regs->tpc &= 0xffffffff;
+                       regs->tnpc &= 0xffffffff;
+               }
+               user_instruction_dump ((unsigned int __user *) regs->tpc);
+       }
+       if (regs->tstate & TSTATE_PRIV)
+               do_exit(SIGKILL);
+       do_exit(SIGSEGV);
+ }
+ EXPORT_SYMBOL(die_if_kernel);
+ 
+ #define VIS_OPCODE_MASK       ((0x3 << 30) | (0x3f << 19))
+ #define VIS_OPCODE_VAL        ((0x2 << 30) | (0x36 << 19))
+ 
+ extern int handle_popc(u32 insn, struct pt_regs *regs);
+ extern int handle_ldf_stq(u32 insn, struct pt_regs *regs);
+ 
+ void do_illegal_instruction(struct pt_regs *regs)
+ {
+       unsigned long pc = regs->tpc;
+       unsigned long tstate = regs->tstate;
+       u32 insn;
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "illegal instruction", regs,
+                      0, 0x10, SIGILL) == NOTIFY_STOP)
+               return;
+ 
+       if (tstate & TSTATE_PRIV)
+               die_if_kernel("Kernel illegal instruction", regs);
+       if (test_thread_flag(TIF_32BIT))
+               pc = (u32)pc;
+       if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
+               if ((insn & 0xc1ffc000) == 0x81700000) /* POPC */ {
+                       if (handle_popc(insn, regs))
+                               return;
+               } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ {
+                       if (handle_ldf_stq(insn, regs))
+                               return;
+               } else if (tlb_type == hypervisor) {
+                       if ((insn & VIS_OPCODE_MASK) == VIS_OPCODE_VAL) {
+                               if (!vis_emul(regs, insn))
+                                       return;
+                       } else {
+                               struct fpustate *f = FPUSTATE;
+ 
+                               /* XXX maybe verify XFSR bits like
+                                * XXX do_fpother() does?
+                                */
+                               if (do_mathemu(regs, f))
+                                       return;
+                       }
+               }
+       }
+       info.si_signo = SIGILL;
+       info.si_errno = 0;
+       info.si_code = ILL_ILLOPC;
+       info.si_addr = (void __user *)pc;
+       info.si_trapno = 0;
+       force_sig_info(SIGILL, &info, current);
+ }
+ 
+ extern void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn);
+ 
+ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
+ {
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "memory address unaligned", regs,
+                      0, 0x34, SIGSEGV) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
+               return;
+       }
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_ADRALN;
+       info.si_addr = (void __user *)sfar;
+       info.si_trapno = 0;
+       force_sig_info(SIGBUS, &info, current);
+ }
+ 
+ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+ {
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "memory address unaligned", regs,
+                      0, 0x34, SIGSEGV) == NOTIFY_STOP)
+               return;
+ 
+       if (regs->tstate & TSTATE_PRIV) {
+               kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
+               return;
+       }
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_ADRALN;
+       info.si_addr = (void __user *) addr;
+       info.si_trapno = 0;
+       force_sig_info(SIGBUS, &info, current);
+ }
+ 
+ void do_privop(struct pt_regs *regs)
+ {
+       siginfo_t info;
+ 
+       if (notify_die(DIE_TRAP, "privileged operation", regs,
+                      0, 0x11, SIGILL) == NOTIFY_STOP)
+               return;
+ 
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+       info.si_signo = SIGILL;
+       info.si_errno = 0;
+       info.si_code = ILL_PRVOPC;
+       info.si_addr = (void __user *)regs->tpc;
+       info.si_trapno = 0;
+       force_sig_info(SIGILL, &info, current);
+ }
+ 
+ void do_privact(struct pt_regs *regs)
+ {
+       do_privop(regs);
+ }
+ 
+ /* Trap level 1 stuff or other traps we should never see... */
+ void do_cee(struct pt_regs *regs)
+ {
+       die_if_kernel("TL0: Cache Error Exception", regs);
+ }
+ 
+ void do_cee_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: Cache Error Exception", regs);
+ }
+ 
+ void do_dae_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: Data Access Exception", regs);
+ }
+ 
+ void do_iae_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: Instruction Access Exception", regs);
+ }
+ 
+ void do_div0_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: DIV0 Exception", regs);
+ }
+ 
+ void do_fpdis_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: FPU Disabled", regs);
+ }
+ 
+ void do_fpieee_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: FPU IEEE Exception", regs);
+ }
+ 
+ void do_fpother_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: FPU Other Exception", regs);
+ }
+ 
+ void do_ill_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: Illegal Instruction Exception", regs);
+ }
+ 
+ void do_irq_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: IRQ Exception", regs);
+ }
+ 
+ void do_lddfmna_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: LDDF Exception", regs);
+ }
+ 
+ void do_stdfmna_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: STDF Exception", regs);
+ }
+ 
+ void do_paw(struct pt_regs *regs)
+ {
+       die_if_kernel("TL0: Phys Watchpoint Exception", regs);
+ }
+ 
+ void do_paw_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: Phys Watchpoint Exception", regs);
+ }
+ 
+ void do_vaw(struct pt_regs *regs)
+ {
+       die_if_kernel("TL0: Virt Watchpoint Exception", regs);
+ }
+ 
+ void do_vaw_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: Virt Watchpoint Exception", regs);
+ }
+ 
+ void do_tof_tl1(struct pt_regs *regs)
+ {
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       die_if_kernel("TL1: Tag Overflow Exception", regs);
+ }
+ 
+ void do_getpsr(struct pt_regs *regs)
+ {
+       regs->u_regs[UREG_I0] = tstate_to_psr(regs->tstate);
+       regs->tpc   = regs->tnpc;
+       regs->tnpc += 4;
+       if (test_thread_flag(TIF_32BIT)) {
+               regs->tpc &= 0xffffffff;
+               regs->tnpc &= 0xffffffff;
+       }
+ }
+ 
+ struct trap_per_cpu trap_block[NR_CPUS];
+ 
+ /* This can get invoked before sched_init() so play it super safe
+  * and use hard_smp_processor_id().
+  */
+ void notrace init_cur_cpu_trap(struct thread_info *t)
+ {
+       int cpu = hard_smp_processor_id();
+       struct trap_per_cpu *p = &trap_block[cpu];
+ 
+       p->thread = t;
+       p->pgd_paddr = 0;
+ }
+ 
+ extern void thread_info_offsets_are_bolixed_dave(void);
+ extern void trap_per_cpu_offsets_are_bolixed_dave(void);
+ extern void tsb_config_offsets_are_bolixed_dave(void);
+ 
+ /* Only invoked on boot processor. */
+ void __init trap_init(void)
+ {
- -      /* Compile time sanity check. */
- -      if (TI_TASK != offsetof(struct thread_info, task) ||
- -          TI_FLAGS != offsetof(struct thread_info, flags) ||
- -          TI_CPU != offsetof(struct thread_info, cpu) ||
- -          TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
- -          TI_KSP != offsetof(struct thread_info, ksp) ||
- -          TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) ||
- -          TI_KREGS != offsetof(struct thread_info, kregs) ||
- -          TI_UTRAPS != offsetof(struct thread_info, utraps) ||
- -          TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) ||
- -          TI_REG_WINDOW != offsetof(struct thread_info, reg_window) ||
- -          TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) ||
- -          TI_GSR != offsetof(struct thread_info, gsr) ||
- -          TI_XFSR != offsetof(struct thread_info, xfsr) ||
- -          TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) ||
- -          TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) ||
- -          TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) ||
- -          TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) ||
- -          TI_PCR != offsetof(struct thread_info, pcr_reg) ||
- -          TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
- -          TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
- -          TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
- -          TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) ||
- -          TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) ||
- -          TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) ||
- -          TI_FPREGS != offsetof(struct thread_info, fpregs) ||
- -          (TI_FPREGS & (64 - 1)))
- -              thread_info_offsets_are_bolixed_dave();
- -
- -      if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
- -          (TRAP_PER_CPU_PGD_PADDR !=
- -           offsetof(struct trap_per_cpu, pgd_paddr)) ||
- -          (TRAP_PER_CPU_CPU_MONDO_PA !=
- -           offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
- -          (TRAP_PER_CPU_DEV_MONDO_PA !=
- -           offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
- -          (TRAP_PER_CPU_RESUM_MONDO_PA !=
- -           offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
- -          (TRAP_PER_CPU_RESUM_KBUF_PA !=
- -           offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
- -          (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
- -           offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
- -          (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
- -           offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
- -          (TRAP_PER_CPU_FAULT_INFO !=
- -           offsetof(struct trap_per_cpu, fault_info)) ||
- -          (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
- -           offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
- -          (TRAP_PER_CPU_CPU_LIST_PA !=
- -           offsetof(struct trap_per_cpu, cpu_list_pa)) ||
- -          (TRAP_PER_CPU_TSB_HUGE !=
- -           offsetof(struct trap_per_cpu, tsb_huge)) ||
- -          (TRAP_PER_CPU_TSB_HUGE_TEMP !=
- -           offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
- -          (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
- -           offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
- -          (TRAP_PER_CPU_CPU_MONDO_QMASK !=
- -           offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
- -          (TRAP_PER_CPU_DEV_MONDO_QMASK !=
- -           offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
- -          (TRAP_PER_CPU_RESUM_QMASK !=
- -           offsetof(struct trap_per_cpu, resum_qmask)) ||
- -          (TRAP_PER_CPU_NONRESUM_QMASK !=
- -           offsetof(struct trap_per_cpu, nonresum_qmask)))
- -              trap_per_cpu_offsets_are_bolixed_dave();
- -
- -      if ((TSB_CONFIG_TSB !=
- -           offsetof(struct tsb_config, tsb)) ||
- -          (TSB_CONFIG_RSS_LIMIT !=
- -           offsetof(struct tsb_config, tsb_rss_limit)) ||
- -          (TSB_CONFIG_NENTRIES !=
- -           offsetof(struct tsb_config, tsb_nentries)) ||
- -          (TSB_CONFIG_REG_VAL !=
- -           offsetof(struct tsb_config, tsb_reg_val)) ||
- -          (TSB_CONFIG_MAP_VADDR !=
- -           offsetof(struct tsb_config, tsb_map_vaddr)) ||
- -          (TSB_CONFIG_MAP_PTE !=
- -           offsetof(struct tsb_config, tsb_map_pte)))
- -              tsb_config_offsets_are_bolixed_dave();
++      BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task));
++      BUILD_BUG_ON(TI_FLAGS != offsetof(struct thread_info, flags));
++      BUILD_BUG_ON(TI_CPU != offsetof(struct thread_info, cpu));
++      BUILD_BUG_ON(TI_FPSAVED != offsetof(struct thread_info, fpsaved));
++      BUILD_BUG_ON(TI_KSP != offsetof(struct thread_info, ksp));
++      BUILD_BUG_ON(TI_FAULT_ADDR !=
++                   offsetof(struct thread_info, fault_address));
++      BUILD_BUG_ON(TI_KREGS != offsetof(struct thread_info, kregs));
++      BUILD_BUG_ON(TI_UTRAPS != offsetof(struct thread_info, utraps));
++      BUILD_BUG_ON(TI_EXEC_DOMAIN !=
++                   offsetof(struct thread_info, exec_domain));
++      BUILD_BUG_ON(TI_REG_WINDOW !=
++                   offsetof(struct thread_info, reg_window));
++      BUILD_BUG_ON(TI_RWIN_SPTRS !=
++                   offsetof(struct thread_info, rwbuf_stkptrs));
++      BUILD_BUG_ON(TI_GSR != offsetof(struct thread_info, gsr));
++      BUILD_BUG_ON(TI_XFSR != offsetof(struct thread_info, xfsr));
++      BUILD_BUG_ON(TI_PRE_COUNT !=
++                   offsetof(struct thread_info, preempt_count));
++      BUILD_BUG_ON(TI_NEW_CHILD !=
++                   offsetof(struct thread_info, new_child));
++      BUILD_BUG_ON(TI_SYS_NOERROR !=
++                   offsetof(struct thread_info, syscall_noerror));
++      BUILD_BUG_ON(TI_RESTART_BLOCK !=
++                   offsetof(struct thread_info, restart_block));
++      BUILD_BUG_ON(TI_KUNA_REGS !=
++                   offsetof(struct thread_info, kern_una_regs));
++      BUILD_BUG_ON(TI_KUNA_INSN !=
++                   offsetof(struct thread_info, kern_una_insn));
++      BUILD_BUG_ON(TI_FPREGS != offsetof(struct thread_info, fpregs));
++      BUILD_BUG_ON((TI_FPREGS & (64 - 1)));
++
++      BUILD_BUG_ON(TRAP_PER_CPU_THREAD !=
++                   offsetof(struct trap_per_cpu, thread));
++      BUILD_BUG_ON(TRAP_PER_CPU_PGD_PADDR !=
++                   offsetof(struct trap_per_cpu, pgd_paddr));
++      BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_PA !=
++                   offsetof(struct trap_per_cpu, cpu_mondo_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_PA !=
++                   offsetof(struct trap_per_cpu, dev_mondo_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_RESUM_MONDO_PA !=
++                   offsetof(struct trap_per_cpu, resum_mondo_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_RESUM_KBUF_PA !=
++                   offsetof(struct trap_per_cpu, resum_kernel_buf_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_MONDO_PA !=
++                   offsetof(struct trap_per_cpu, nonresum_mondo_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_KBUF_PA !=
++                   offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_FAULT_INFO !=
++                   offsetof(struct trap_per_cpu, fault_info));
++      BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
++                   offsetof(struct trap_per_cpu, cpu_mondo_block_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_CPU_LIST_PA !=
++                   offsetof(struct trap_per_cpu, cpu_list_pa));
++      BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE !=
++                   offsetof(struct trap_per_cpu, tsb_huge));
++      BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE_TEMP !=
++                   offsetof(struct trap_per_cpu, tsb_huge_temp));
++#if 0
++      BUILD_BUG_ON(TRAP_PER_CPU_IRQ_WORKLIST !=
++                   offsetof(struct trap_per_cpu, irq_worklist));
++#endif
++      BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_QMASK !=
++                   offsetof(struct trap_per_cpu, cpu_mondo_qmask));
++      BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_QMASK !=
++                   offsetof(struct trap_per_cpu, dev_mondo_qmask));
++      BUILD_BUG_ON(TRAP_PER_CPU_RESUM_QMASK !=
++                   offsetof(struct trap_per_cpu, resum_qmask));
++      BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_QMASK !=
++                   offsetof(struct trap_per_cpu, nonresum_qmask));
++
++      BUILD_BUG_ON(TSB_CONFIG_TSB !=
++                    offsetof(struct tsb_config, tsb));
++      BUILD_BUG_ON(TSB_CONFIG_RSS_LIMIT !=
++                   offsetof(struct tsb_config, tsb_rss_limit));
++      BUILD_BUG_ON(TSB_CONFIG_NENTRIES !=
++                   offsetof(struct tsb_config, tsb_nentries));
++      BUILD_BUG_ON(TSB_CONFIG_REG_VAL !=
++                   offsetof(struct tsb_config, tsb_reg_val));
++      BUILD_BUG_ON(TSB_CONFIG_MAP_VADDR !=
++                   offsetof(struct tsb_config, tsb_map_vaddr));
++      BUILD_BUG_ON(TSB_CONFIG_MAP_PTE !=
++                   offsetof(struct tsb_config, tsb_map_pte));
+ 
+       /* Attach to the address space of init_task.  On SMP we
+        * do this in smp.c:smp_callin for other cpus.
+        */
+       atomic_inc(&init_mm.mm_count);
+       current->active_mm = &init_mm;
+ }
diff --cc arch/sparc/perfmon/Kconfig

index 0000000,0000000..4672024

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/sparc/perfmon/Kconfig
@@@ -1,0 -1,0 +1,26 @@@
++menu "Hardware Performance Monitoring support"
++config PERFMON
++      bool "Perfmon2 performance monitoring interface"
++      default n
++      help
++      Enables the perfmon2 interface to access the hardware
++      performance counters. See <http://perfmon2.sf.net/> for
++      more details.
++
++config PERFMON_DEBUG
++      bool "Perfmon debugging"
++      depends on PERFMON
++      default n
++      help
++      Enables perfmon debugging support
++
++config PERFMON_DEBUG_FS
++      bool "Enable perfmon statistics reporting via debugfs"
++      default y
++      depends on PERFMON && DEBUG_FS
++      help
++      Enable collection and reporting of perfmon timing statistics under
++      debugfs. This is used for debugging and performance analysis of the
++      subsystem. The debugfs filesystem must be mounted.
++
++endmenu
diff --cc arch/sparc/perfmon/Makefile

index 0000000,0000000..ad2d907

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/sparc/perfmon/Makefile
@@@ -1,0 -1,0 +1,1 @@@
++obj-$(CONFIG_PERFMON) += perfmon.o
diff --cc arch/sparc/perfmon/perfmon.c

index 0000000,0000000..9e29833

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/sparc/perfmon/perfmon.c
@@@ -1,0 -1,0 +1,422 @@@
++/* perfmon.c: sparc64 perfmon support
++ *
++ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/irq.h>
++#include <linux/perfmon_kern.h>
++
++#include <asm/system.h>
++#include <asm/spitfire.h>
++#include <asm/hypervisor.h>
++
++struct pcr_ops {
++      void (*write)(u64);
++      u64 (*read)(void);
++};
++
++static void direct_write_pcr(u64 val)
++{
++      write_pcr(val);
++}
++
++static u64 direct_read_pcr(void)
++{
++      u64 pcr;
++
++      read_pcr(pcr);
++
++      return pcr;
++}
++
++static struct pcr_ops direct_pcr_ops = {
++      .write  = direct_write_pcr,
++      .read   = direct_read_pcr,
++};
++
++/* Using the hypervisor call is needed so that we can set the
++ * hypervisor trace bit correctly, which is hyperprivileged.
++ */
++static void n2_write_pcr(u64 val)
++{
++      unsigned long ret;
++
++      ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
++      if (val != HV_EOK)
++              write_pcr(val);
++}
++
++static u64 n2_read_pcr(void)
++{
++      u64 pcr;
++
++      read_pcr(pcr);
++
++      return pcr;
++}
++
++static struct pcr_ops n2_pcr_ops = {
++      .write  = n2_write_pcr,
++      .read   = n2_read_pcr,
++};
++
++static struct pcr_ops *pcr_ops;
++
++void pfm_arch_write_pmc(struct pfm_context *ctx,
++                      unsigned int cnum, u64 value)
++{
++      /*
++       * we only write to the actual register when monitoring is
++       * active (pfm_start was issued)
++       */
++      if (ctx && ctx->flags.started == 0)
++              return;
++
++      pcr_ops->write(value);
++}
++
++u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
++{
++      return pcr_ops->read();
++}
++
++/*
++ * collect pending overflowed PMDs. Called from pfm_ctxsw()
++ * and from PMU interrupt handler. Must fill in set->povfl_pmds[]
++ * and set->npend_ovfls. Interrupts are masked
++ */
++static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
++{
++      unsigned int max = ctx->regs.max_intr_pmd;
++      u64 wmask = 1ULL << pfm_pmu_conf->counter_width;
++      u64 *intr_pmds = ctx->regs.intr_pmds;
++      u64 *used_mask = set->used_pmds;
++      u64 mask[PFM_PMD_BV];
++      unsigned int i;
++
++      bitmap_and(cast_ulp(mask),
++                 cast_ulp(intr_pmds),
++                 cast_ulp(used_mask),
++                 max);
++
++      /*
++       * check all PMD that can generate interrupts
++       * (that includes counters)
++       */
++      for (i = 0; i < max; i++) {
++              if (test_bit(i, mask)) {
++                      u64 new_val = pfm_arch_read_pmd(ctx, i);
++
++                      PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n",
++                                   i, (unsigned long long)new_val,
++                                   (new_val&wmask) ? 1 : 0);
++
++                      if (new_val & wmask) {
++                              __set_bit(i, set->povfl_pmds);
++                              set->npend_ovfls++;
++                      }
++              }
++      }
++}
++
++static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx,
++                          struct pfm_event_set *set)
++{
++      unsigned int i, max = ctx->regs.max_pmc;
++
++      /*
++       * clear enable bits, assume all pmcs are enable pmcs
++       */
++      for (i = 0; i < max; i++) {
++              if (test_bit(i, set->used_pmcs))
++                      pfm_arch_write_pmc(ctx, i, 0);
++      }
++
++      if (set->npend_ovfls)
++              return;
++
++      __pfm_get_ovfl_pmds(ctx, set);
++}
++
++/*
++ * Called from pfm_ctxsw(). Task is guaranteed to be current.
++ * Context is locked. Interrupts are masked. Monitoring is active.
++ * PMU access is guaranteed. PMC and PMD registers are live in PMU.
++ *
++ * for per-thread:
++ *    must stop monitoring for the task
++ *
++ * Return:
++ *    non-zero : did not save PMDs (as part of stopping the PMU)
++ *           0 : saved PMDs (no need to save them in caller)
++ */
++int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
++{
++      /*
++       * disable lazy restore of PMC registers.
++       */
++      ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
++
++      pfm_stop_active(task, ctx, ctx->active_set);
++
++      return 1;
++}
++
++/*
++ * Called from pfm_stop() and idle notifier
++ *
++ * Interrupts are masked. Context is locked. Set is the active set.
++ *
++ * For per-thread:
++ *   task is not necessarily current. If not current task, then
++ *   task is guaranteed stopped and off any cpu. Access to PMU
++ *   is not guaranteed. Interrupts are masked. Context is locked.
++ *   Set is the active set.
++ *
++ * For system-wide:
++ *    task is current
++ *
++ * must disable active monitoring. ctx cannot be NULL
++ */
++void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
++{
++      /*
++       * no need to go through stop_save()
++       * if we are already stopped
++       */
++      if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
++              return;
++
++      /*
++       * stop live registers and collect pending overflow
++       */
++      if (task == current)
++              pfm_stop_active(task, ctx, ctx->active_set);
++}
++
++/*
++ * Enable active monitoring. Called from pfm_start() and
++ * pfm_arch_unmask_monitoring().
++ *
++ * Interrupts are masked. Context is locked. Set is the active set.
++ *
++ * For per-trhead:
++ *    Task is not necessarily current. If not current task, then task
++ *    is guaranteed stopped and off any cpu. Access to PMU is not guaranteed.
++ *
++ * For system-wide:
++ *    task is always current
++ *
++ * must enable active monitoring.
++ */
++void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
++{
++      struct pfm_event_set *set;
++      unsigned int max_pmc = ctx->regs.max_pmc;
++      unsigned int i;
++
++      if (task != current)
++              return;
++
++      set = ctx->active_set;
++      for (i = 0; i < max_pmc; i++) {
++              if (test_bit(i, set->used_pmcs))
++                      pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
++      }
++}
++
++/*
++ * function called from pfm_switch_sets(), pfm_context_load_thread(),
++ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
++ * context is locked. Interrupts are masked. set cannot be NULL.
++ * Access to the PMU is guaranteed.
++ *
++ * function must restore all PMD registers from set.
++ */
++void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
++{
++      unsigned int max_pmd = ctx->regs.max_pmd;
++      u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
++      u64 *impl_pmds = ctx->regs.pmds;
++      unsigned int i;
++
++      /*
++       * must restore all pmds to avoid leaking
++       * information to user.
++       */
++      for (i = 0; i < max_pmd; i++) {
++              u64 val;
++
++              if (test_bit(i, impl_pmds) == 0)
++                      continue;
++
++              val = set->pmds[i].value;
++
++              /*
++               * set upper bits for counter to ensure
++               * overflow will trigger
++               */
++              val &= ovfl_mask;
++
++              pfm_arch_write_pmd(ctx, i, val);
++      }
++}
++
++/*
++ * function called from pfm_switch_sets(), pfm_context_load_thread(),
++ * pfm_context_load_sys(), pfm_ctxsw().
++ * Context is locked. Interrupts are masked. set cannot be NULL.
++ * Access to the PMU is guaranteed.
++ *
++ * function must restore all PMC registers from set, if needed.
++ */
++void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
++{
++      unsigned int max_pmc = ctx->regs.max_pmc;
++      u64 *impl_pmcs = ctx->regs.pmcs;
++      unsigned int i;
++
++      /* If we're masked or stopped we don't need to bother restoring
++       * the PMCs now.
++       */
++      if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
++              return;
++
++      /*
++       * restore all pmcs
++       */
++      for (i = 0; i < max_pmc; i++)
++              if (test_bit(i, impl_pmcs))
++                      pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
++}
++
++char *pfm_arch_get_pmu_module_name(void)
++{
++      return NULL;
++}
++
++void perfmon_interrupt(struct pt_regs *regs)
++{
++      pfm_interrupt_handler(instruction_pointer(regs), regs);
++}
++
++static struct pfm_regmap_desc pfm_sparc64_pmc_desc[] = {
++      PMC_D(PFM_REG_I, "PCR", 0, 0, 0, 0),
++};
++
++static struct pfm_regmap_desc pfm_sparc64_pmd_desc[] = {
++      PMD_D(PFM_REG_C, "PIC0", 0),
++      PMD_D(PFM_REG_C, "PIC1", 0),
++};
++
++static int pfm_sparc64_probe(void)
++{
++      return 0;
++}
++
++static struct pfm_pmu_config pmu_sparc64_pmu_conf = {
++      .counter_width  = 31,
++      .pmd_desc       = pfm_sparc64_pmd_desc,
++      .num_pmd_entries = 2,
++      .pmc_desc       = pfm_sparc64_pmc_desc,
++      .num_pmc_entries = 1,
++      .probe_pmu      = pfm_sparc64_probe,
++      .flags          = PFM_PMU_BUILTIN_FLAG,
++      .owner          = THIS_MODULE,
++};
++
++static unsigned long perf_hsvc_group;
++static unsigned long perf_hsvc_major;
++static unsigned long perf_hsvc_minor;
++
++static int __init register_perf_hsvc(void)
++{
++      if (tlb_type == hypervisor) {
++              switch (sun4v_chip_type) {
++              case SUN4V_CHIP_NIAGARA1:
++                      perf_hsvc_group = HV_GRP_N2_CPU;
++                      break;
++
++              case SUN4V_CHIP_NIAGARA2:
++                      perf_hsvc_group = HV_GRP_N2_CPU;
++                      break;
++
++              default:
++                      return -ENODEV;
++              }
++
++
++              perf_hsvc_major = 1;
++              perf_hsvc_minor = 0;
++              if (sun4v_hvapi_register(perf_hsvc_group,
++                                       perf_hsvc_major,
++                                       &perf_hsvc_minor)) {
++                      printk("perfmon: Could not register N2 hvapi.\n");
++                      return -ENODEV;
++              }
++      }
++      return 0;
++}
++
++static void unregister_perf_hsvc(void)
++{
++      if (tlb_type != hypervisor)
++              return;
++      sun4v_hvapi_unregister(perf_hsvc_group);
++}
++
++static int __init pfm_sparc64_pmu_init(void)
++{
++      u64 mask;
++      int err;
++
++      err = register_perf_hsvc();
++      if (err)
++              return err;
++
++      if (tlb_type == hypervisor &&
++          sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
++              pcr_ops = &n2_pcr_ops;
++      else
++              pcr_ops = &direct_pcr_ops;
++
++      if (!strcmp(sparc_pmu_type, "ultra12"))
++              mask = (0xf << 11) | (0xf << 4) | 0x7;
++      else if (!strcmp(sparc_pmu_type, "ultra3") ||
++               !strcmp(sparc_pmu_type, "ultra3i") ||
++               !strcmp(sparc_pmu_type, "ultra3+") ||
++               !strcmp(sparc_pmu_type, "ultra4+"))
++              mask = (0x3f << 11) | (0x3f << 4) | 0x7;
++      else if (!strcmp(sparc_pmu_type, "niagara2"))
++              mask = ((1UL << 63) | (1UL << 62) |
++                      (1UL << 31) | (0xfUL << 27) | (0xffUL << 19) |
++                      (1UL << 18) | (0xfUL << 14) | (0xff << 6) |
++                      (0x3UL << 4) | 0x7UL);
++      else if (!strcmp(sparc_pmu_type, "niagara"))
++              mask = ((1UL << 9) | (1UL << 8) |
++                      (0x7UL << 4) | 0x7UL);
++      else {
++              err = -ENODEV;
++              goto out_err;
++      }
++
++      pmu_sparc64_pmu_conf.pmu_name = sparc_pmu_type;
++      pfm_sparc64_pmc_desc[0].rsvd_msk = ~mask;
++
++      return pfm_pmu_register(&pmu_sparc64_pmu_conf);
++
++out_err:
++      unregister_perf_hsvc();
++      return err;
++}
++
++static void __exit pfm_sparc64_pmu_exit(void)
++{
++      unregister_perf_hsvc();
++      return pfm_pmu_unregister(&pmu_sparc64_pmu_conf);
++}
++
++module_init(pfm_sparc64_pmu_init);
++module_exit(pfm_sparc64_pmu_exit);
diff --cc arch/x86/Kconfig

index b3b8062,73f7fe8..6cc7f4e
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -401,10 -391,10 +391,10 @@@ config X86_RDC321
           as R-8610-(G).
           If you don't have one of these chips, you should say N here.
   
- config SCHED_NO_NO_OMIT_FRAME_POINTER
+ config SCHED_OMIT_FRAME_POINTER
         def_bool y
         prompt "Single-depth WCHAN output"
-       depends on X86_32 && !STACK_UNWIND
- -      depends on X86
++      depends on X86 && !STACK_UNWIND
         help
           Calculate simpler /proc/<PID>/wchan values. If this option
           is disabled then wchan values will recurse back to the
@@@ -1496,10 -1513,51 +1525,53 @@@ config COMPAT_VDS
   
           If unsure, say Y.
   
- if !XEN
+ config CMDLINE_BOOL
+       bool "Built-in kernel command line"
+       default n
+       help
+         Allow for specifying boot arguments to the kernel at
+         build time.  On some systems (e.g. embedded ones), it is
+         necessary or convenient to provide some or all of the
+         kernel boot arguments with the kernel itself (that is,
+         to not rely on the boot loader to provide them.)
+ 
+         To compile command line arguments into the kernel,
+         set this option to 'Y', then fill in the
+         the boot arguments in CONFIG_CMDLINE.
+ 
+         Systems with fully functional boot loaders (i.e. non-embedded)
+         should leave this option set to 'N'.
+ 
+ config CMDLINE
+       string "Built-in kernel command string"
+       depends on CMDLINE_BOOL
+       default ""
+       help
+         Enter arguments here that should be compiled into the kernel
+         image and used at boot time.  If the boot loader provides a
+         command line at boot time, it is appended to this string to
+         form the full kernel command line, when the system boots.
+ 
+         However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+         change this behavior.
+ 
+         In most cases, the command line (whether built-in or provided
+         by the boot loader) should specify the device for the root
+         file system.
+ 
+ config CMDLINE_OVERRIDE
+       bool "Built-in command line overrides boot loader arguments"
+       default n
+       depends on CMDLINE_BOOL
+       help
+         Set this option to 'Y' to have the kernel ignore the boot loader
+         command line, and use ONLY the built-in command line.
+ 
+         This is used to work around broken boot loaders.  This should
+         be set to 'N' under normal conditions.
+ 
+ +source "arch/x86/perfmon/Kconfig"
- endif
+ +
   endmenu
   
   config ARCH_ENABLE_MEMORY_HOTPLUG
diff --cc arch/x86/Kconfig.debug

index cad990e,10d6cc3..081a231
--- 1/arch/x86/Kconfig.debug
--- 2/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@@ -304,91 -298,5 +298,91 @@@ config OPTIMIZE_INLININ
   
           If unsure, say N.
   
- -endmenu
+ +config KDB
+ +      bool "Built-in Kernel Debugger support"
-       depends on DEBUG_KERNEL && !XEN
++      depends on DEBUG_KERNEL
+ +      select KALLSYMS
+ +      select KALLSYMS_ALL
+ +      help
+ +        This option provides a built-in kernel debugger.  The built-in
+ +        kernel debugger contains commands which allow memory to be examined,
+ +        instructions to be disassembled and breakpoints to be set.  For details,
+ +        see Documentation/kdb/kdb.mm and the manual pages kdb_bt, kdb_ss, etc.
+ +        Kdb can also be used via the serial port.  Set up the system to
+ +        have a serial console (see Documentation/serial-console.txt).
+ +        The key sequence <escape>KDB on the serial port will cause the
+ +        kernel debugger to be entered with input from the serial port and
+ +        output to the serial console.  If unsure, say N.
+ +
+ +config KDB_MODULES
+ +      tristate "KDB modules"
+ +      depends on KDB
+ +      help
+ +        KDB can be extended by adding your own modules, in directory
+ +        kdb/modules.  This option selects the way that these modules should
+ +        be compiled, as free standing modules (select M) or built into the
+ +        kernel (select Y).  If unsure say M.
+ +
+ +config KDB_OFF
+ +      bool "KDB off by default"
+ +      depends on KDB
+ +      help
+ +        Normally kdb is activated by default, as long as CONFIG_KDB is set.
+ +        If you want to ship a kernel with kdb support but only have kdb
+ +        turned on when the user requests it then select this option.  When
+ +        compiled with CONFIG_KDB_OFF, kdb ignores all events unless you boot
+ +        with kdb=on or you echo "1" > /proc/sys/kernel/kdb.  This option also
+ +        works in reverse, if kdb is normally activated, you can boot with
+ +        kdb=off or echo "0" > /proc/sys/kernel/kdb to deactivate kdb. If
+ +        unsure, say N.
+ +
+ +config KDB_CONTINUE_CATASTROPHIC
+ +      int "KDB continues after catastrophic errors"
+ +      depends on KDB
+ +      default "0"
+ +      help
+ +        This integer controls the behaviour of kdb when the kernel gets a
+ +        catastrophic error, i.e. for a panic, oops, NMI or other watchdog
+ +        tripping.  CONFIG_KDB_CONTINUE_CATASTROPHIC interacts with
+ +        /proc/sys/kernel/kdb and CONFIG_LKCD_DUMP (if your kernel has the
+ +        LKCD patch).
+ +        When KDB is active (/proc/sys/kernel/kdb == 1) and a catastrophic
+ +        error occurs, nothing extra happens until you type 'go'.
+ +        CONFIG_KDB_CONTINUE_CATASTROPHIC == 0 (default).  The first time
+ +        you type 'go', kdb warns you.  The second time you type 'go', KDB
+ +        tries to continue - no guarantees that the kernel is still usable.
+ +        CONFIG_KDB_CONTINUE_CATASTROPHIC == 1.  KDB tries to continue - no
+ +        guarantees that the kernel is still usable.
+ +        CONFIG_KDB_CONTINUE_CATASTROPHIC == 2.  If your kernel has the LKCD
+ +        patch and LKCD is configured to take a dump then KDB forces a dump.
+ +        Whether or not a dump is taken, KDB forces a reboot.
+ +        When KDB is not active (/proc/sys/kernel/kdb == 0) and a catastrophic
+ +        error occurs, the following steps are automatic, no human
+ +        intervention is required.
+ +        CONFIG_KDB_CONTINUE_CATASTROPHIC == 0 (default) or 1.  KDB attempts
+ +        to continue - no guarantees that the kernel is still usable.
+ +        CONFIG_KDB_CONTINUE_CATASTROPHIC == 2.  If your kernel has the LKCD
+ +        patch and LKCD is configured to take a dump then KDB automatically
+ +        forces a dump.  Whether or not a dump is taken, KDB forces a
+ +        reboot.
+ +        If you are not sure, say 0.  Read Documentation/kdb/dump.txt before
+ +        setting to 2.
+ +
+ +config KDB_USB
+ +      bool "Support for USB Keyboard in KDB (OHCI and/or EHCI only)"
+ +      depends on KDB && (USB_OHCI_HCD || USB_EHCI_HCD)
+ +      help
+ +        If you want to use kdb from USB keyboards then say Y here.  If you
+ +        say N then kdb can only be used from a PC (AT) keyboard or a serial
+ +        console.
+ +
+ +config KDB_KDUMP
+ +      bool "Support for Kdump in KDB"
+ +      depends on KDB
+ +      select KEXEC
+ +      default N
+ +      help
+ +        If you want to take Kdump kernel vmcore from KDB then say Y here.
+ +        Of imsire. say N.
   
+ +endmenu
diff --cc arch/x86/Makefile
Simple merge
diff --cc arch/x86/include/asm/Kbuild

index 0000000,4a8e80c..d7d819e

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@@ -1,0 -1,24 +1,25 @@@
+ include include/asm-generic/Kbuild.asm
+ 
+ header-y += boot.h
+ header-y += bootparam.h
+ header-y += debugreg.h
+ header-y += ldt.h
+ header-y += msr-index.h
+ header-y += prctl.h
+ header-y += ptrace-abi.h
+ header-y += sigcontext32.h
+ header-y += ucontext.h
++header-y += perfmon.h
+ header-y += processor-flags.h
+ 
+ unifdef-y += e820.h
+ unifdef-y += ist.h
+ unifdef-y += mce.h
+ unifdef-y += msr.h
+ unifdef-y += mtrr.h
+ unifdef-y += posix_types_32.h
+ unifdef-y += posix_types_64.h
+ unifdef-y += unistd_32.h
+ unifdef-y += unistd_64.h
+ unifdef-y += vm86.h
+ unifdef-y += vsyscall.h
diff --cc arch/x86/include/asm/ansidecl.h

index 0000000,0000000..6354d8b

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/ansidecl.h
@@@ -1,0 -1,0 +1,5 @@@
++#ifdef CONFIG_X86_32
++# include "ansidecl_32.h"
++#else
++# include "ansidecl_64.h"
++#endif
diff --cc arch/x86/include/asm/ansidecl_32.h

index 0000000,0000000..2033011

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/ansidecl_32.h
@@@ -1,0 -1,0 +1,383 @@@
++/* ANSI and traditional C compatability macros
++   Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001
++   Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++This program is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 2 of the License, or
++(at your option) any later version.
++
++This program is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with this program; if not, write to the Free Software
++Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
++
++/* Extracted from binutils 2.16.91.0.2 (OpenSUSE 10.0) and modified for kdb use.
++ * Any trailing whitespace was removed and #ifdef/ifndef __KERNEL__ added as
++ * required.
++ * Keith Owens <kaos@sgi.com> 15 May 2006
++ */
++
++/* ANSI and traditional C compatibility macros
++
++   ANSI C is assumed if __STDC__ is #defined.
++
++   Macro              ANSI C definition       Traditional C definition
++   -----              ---- - ----------       ----------- - ----------
++   ANSI_PROTOTYPES    1                       not defined
++   PTR                        `void *'                `char *'
++   PTRCONST           `void *const'           `char *'
++   LONG_DOUBLE                `long double'           `double'
++   const              not defined             `'
++   volatile           not defined             `'
++   signed             not defined             `'
++   VA_START(ap, var)  va_start(ap, var)       va_start(ap)
++
++   Note that it is safe to write "void foo();" indicating a function
++   with no return value, in all K+R compilers we have been able to test.
++
++   For declaring functions with prototypes, we also provide these:
++
++   PARAMS ((prototype))
++   -- for functions which take a fixed number of arguments.  Use this
++   when declaring the function.  When defining the function, write a
++   K+R style argument list.  For example:
++
++      char *strcpy PARAMS ((char *dest, char *source));
++      ...
++      char *
++      strcpy (dest, source)
++           char *dest;
++           char *source;
++      { ... }
++
++
++   VPARAMS ((prototype, ...))
++   -- for functions which take a variable number of arguments.  Use
++   PARAMS to declare the function, VPARAMS to define it.  For example:
++
++      int printf PARAMS ((const char *format, ...));
++      ...
++      int
++      printf VPARAMS ((const char *format, ...))
++      {
++         ...
++      }
++
++   For writing functions which take variable numbers of arguments, we
++   also provide the VA_OPEN, VA_CLOSE, and VA_FIXEDARG macros.  These
++   hide the differences between K+R <varargs.h> and C89 <stdarg.h> more
++   thoroughly than the simple VA_START() macro mentioned above.
++
++   VA_OPEN and VA_CLOSE are used *instead of* va_start and va_end.
++   Immediately after VA_OPEN, put a sequence of VA_FIXEDARG calls
++   corresponding to the list of fixed arguments.  Then use va_arg
++   normally to get the variable arguments, or pass your va_list object
++   around.  You do not declare the va_list yourself; VA_OPEN does it
++   for you.
++
++   Here is a complete example:
++
++      int
++      printf VPARAMS ((const char *format, ...))
++      {
++         int result;
++
++         VA_OPEN (ap, format);
++         VA_FIXEDARG (ap, const char *, format);
++
++         result = vfprintf (stdout, format, ap);
++         VA_CLOSE (ap);
++
++         return result;
++      }
++
++
++   You can declare variables either before or after the VA_OPEN,
++   VA_FIXEDARG sequence.  Also, VA_OPEN and VA_CLOSE are the beginning
++   and end of a block.  They must appear at the same nesting level,
++   and any variables declared after VA_OPEN go out of scope at
++   VA_CLOSE.  Unfortunately, with a K+R compiler, that includes the
++   argument list.  You can have multiple instances of VA_OPEN/VA_CLOSE
++   pairs in a single function in case you need to traverse the
++   argument list more than once.
++
++   For ease of writing code which uses GCC extensions but needs to be
++   portable to other compilers, we provide the GCC_VERSION macro that
++   simplifies testing __GNUC__ and __GNUC_MINOR__ together, and various
++   wrappers around __attribute__.  Also, __extension__ will be #defined
++   to nothing if it doesn't work.  See below.
++
++   This header also defines a lot of obsolete macros:
++   CONST, VOLATILE, SIGNED, PROTO, EXFUN, DEFUN, DEFUN_VOID,
++   AND, DOTS, NOARGS.  Don't use them.  */
++
++#ifndef       _ANSIDECL_H
++#define _ANSIDECL_H   1
++
++/* Every source file includes this file,
++   so they will all get the switch for lint.  */
++/* LINTLIBRARY */
++
++/* Using MACRO(x,y) in cpp #if conditionals does not work with some
++   older preprocessors.  Thus we can't define something like this:
++
++#define HAVE_GCC_VERSION(MAJOR, MINOR) \
++  (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR)))
++
++and then test "#if HAVE_GCC_VERSION(2,7)".
++
++So instead we use the macro below and test it against specific values.  */
++
++/* This macro simplifies testing whether we are using gcc, and if it
++   is of a particular minimum version. (Both major & minor numbers are
++   significant.)  This macro will evaluate to 0 if we are not using
++   gcc at all.  */
++#ifndef GCC_VERSION
++#define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__)
++#endif /* GCC_VERSION */
++
++#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32) || (defined(__alpha) && defined(__cplusplus))
++/* All known AIX compilers implement these things (but don't always
++   define __STDC__).  The RISC/OS MIPS compiler defines these things
++   in SVR4 mode, but does not define __STDC__.  */
++/* eraxxon@alumni.rice.edu: The Compaq C++ compiler, unlike many other
++   C++ compilers, does not define __STDC__, though it acts as if this
++   was so. (Verified versions: 5.7, 6.2, 6.3, 6.5) */
++
++#define ANSI_PROTOTYPES       1
++#define PTR           void *
++#define PTRCONST      void *const
++#define LONG_DOUBLE   long double
++
++/* PARAMS is often defined elsewhere (e.g. by libintl.h), so wrap it in
++   a #ifndef.  */
++#ifndef PARAMS
++#define PARAMS(ARGS)          ARGS
++#endif
++
++#define VPARAMS(ARGS)         ARGS
++#define VA_START(VA_LIST, VAR)        va_start(VA_LIST, VAR)
++
++/* variadic function helper macros */
++/* "struct Qdmy" swallows the semicolon after VA_OPEN/VA_FIXEDARG's
++   use without inhibiting further decls and without declaring an
++   actual variable.  */
++#define VA_OPEN(AP, VAR)      { va_list AP; va_start(AP, VAR); { struct Qdmy
++#define VA_CLOSE(AP)          } va_end(AP); }
++#define VA_FIXEDARG(AP, T, N) struct Qdmy
++
++#undef const
++#undef volatile
++#undef signed
++
++#ifdef        __KERNEL__
++#ifndef       __STDC_VERSION__
++#define       __STDC_VERSION__ 0
++#endif
++#endif        /* __KERNEL__ */
++
++/* inline requires special treatment; it's in C99, and GCC >=2.7 supports
++   it too, but it's not in C89.  */
++#undef inline
++#if __STDC_VERSION__ > 199901L
++/* it's a keyword */
++#else
++# if GCC_VERSION >= 2007
++#  define inline __inline__   /* __inline__ prevents -pedantic warnings */
++# else
++#  define inline  /* nothing */
++# endif
++#endif
++
++/* These are obsolete.  Do not use.  */
++#ifndef IN_GCC
++#define CONST         const
++#define VOLATILE      volatile
++#define SIGNED                signed
++
++#define PROTO(type, name, arglist)    type name arglist
++#define EXFUN(name, proto)            name proto
++#define DEFUN(name, arglist, args)    name(args)
++#define DEFUN_VOID(name)              name(void)
++#define AND           ,
++#define DOTS          , ...
++#define NOARGS                void
++#endif /* ! IN_GCC */
++
++#else /* Not ANSI C.  */
++
++#undef  ANSI_PROTOTYPES
++#define PTR           char *
++#define PTRCONST      PTR
++#define LONG_DOUBLE   double
++
++#define PARAMS(args)          ()
++#define VPARAMS(args)         (va_alist) va_dcl
++#define VA_START(va_list, var)        va_start(va_list)
++
++#define VA_OPEN(AP, VAR)              { va_list AP; va_start(AP); { struct Qdmy
++#define VA_CLOSE(AP)                  } va_end(AP); }
++#define VA_FIXEDARG(AP, TYPE, NAME)   TYPE NAME = va_arg(AP, TYPE)
++
++/* some systems define these in header files for non-ansi mode */
++#undef const
++#undef volatile
++#undef signed
++#undef inline
++#define const
++#define volatile
++#define signed
++#define inline
++
++#ifndef IN_GCC
++#define CONST
++#define VOLATILE
++#define SIGNED
++
++#define PROTO(type, name, arglist)    type name ()
++#define EXFUN(name, proto)            name()
++#define DEFUN(name, arglist, args)    name arglist args;
++#define DEFUN_VOID(name)              name()
++#define AND           ;
++#define DOTS
++#define NOARGS
++#endif /* ! IN_GCC */
++
++#endif        /* ANSI C.  */
++
++/* Define macros for some gcc attributes.  This permits us to use the
++   macros freely, and know that they will come into play for the
++   version of gcc in which they are supported.  */
++
++#if (GCC_VERSION < 2007)
++# define __attribute__(x)
++#endif
++
++/* Attribute __malloc__ on functions was valid as of gcc 2.96. */
++#ifndef ATTRIBUTE_MALLOC
++# if (GCC_VERSION >= 2096)
++#  define ATTRIBUTE_MALLOC __attribute__ ((__malloc__))
++# else
++#  define ATTRIBUTE_MALLOC
++# endif /* GNUC >= 2.96 */
++#endif /* ATTRIBUTE_MALLOC */
++
++/* Attributes on labels were valid as of gcc 2.93. */
++#ifndef ATTRIBUTE_UNUSED_LABEL
++# if (!defined (__cplusplus) && GCC_VERSION >= 2093)
++#  define ATTRIBUTE_UNUSED_LABEL ATTRIBUTE_UNUSED
++# else
++#  define ATTRIBUTE_UNUSED_LABEL
++# endif /* !__cplusplus && GNUC >= 2.93 */
++#endif /* ATTRIBUTE_UNUSED_LABEL */
++
++#ifndef ATTRIBUTE_UNUSED
++#define ATTRIBUTE_UNUSED __attribute__ ((__unused__))
++#endif /* ATTRIBUTE_UNUSED */
++
++/* Before GCC 3.4, the C++ frontend couldn't parse attributes placed after the
++   identifier name.  */
++#if ! defined(__cplusplus) || (GCC_VERSION >= 3004)
++# define ARG_UNUSED(NAME) NAME ATTRIBUTE_UNUSED
++#else /* !__cplusplus || GNUC >= 3.4 */
++# define ARG_UNUSED(NAME) NAME
++#endif /* !__cplusplus || GNUC >= 3.4 */
++
++#ifndef ATTRIBUTE_NORETURN
++#define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
++#endif /* ATTRIBUTE_NORETURN */
++
++/* Attribute `nonnull' was valid as of gcc 3.3.  */
++#ifndef ATTRIBUTE_NONNULL
++# if (GCC_VERSION >= 3003)
++#  define ATTRIBUTE_NONNULL(m) __attribute__ ((__nonnull__ (m)))
++# else
++#  define ATTRIBUTE_NONNULL(m)
++# endif /* GNUC >= 3.3 */
++#endif /* ATTRIBUTE_NONNULL */
++
++/* Attribute `pure' was valid as of gcc 3.0.  */
++#ifndef ATTRIBUTE_PURE
++# if (GCC_VERSION >= 3000)
++#  define ATTRIBUTE_PURE __attribute__ ((__pure__))
++# else
++#  define ATTRIBUTE_PURE
++# endif /* GNUC >= 3.0 */
++#endif /* ATTRIBUTE_PURE */
++
++/* Use ATTRIBUTE_PRINTF when the format specifier must not be NULL.
++   This was the case for the `printf' format attribute by itself
++   before GCC 3.3, but as of 3.3 we need to add the `nonnull'
++   attribute to retain this behavior.  */
++#ifndef ATTRIBUTE_PRINTF
++#define ATTRIBUTE_PRINTF(m, n) __attribute__ ((__format__ (__printf__, m, n))) ATTRIBUTE_NONNULL(m)
++#define ATTRIBUTE_PRINTF_1 ATTRIBUTE_PRINTF(1, 2)
++#define ATTRIBUTE_PRINTF_2 ATTRIBUTE_PRINTF(2, 3)
++#define ATTRIBUTE_PRINTF_3 ATTRIBUTE_PRINTF(3, 4)
++#define ATTRIBUTE_PRINTF_4 ATTRIBUTE_PRINTF(4, 5)
++#define ATTRIBUTE_PRINTF_5 ATTRIBUTE_PRINTF(5, 6)
++#endif /* ATTRIBUTE_PRINTF */
++
++/* Use ATTRIBUTE_FPTR_PRINTF when the format attribute is to be set on
++   a function pointer.  Format attributes were allowed on function
++   pointers as of gcc 3.1.  */
++#ifndef ATTRIBUTE_FPTR_PRINTF
++# if (GCC_VERSION >= 3001)
++#  define ATTRIBUTE_FPTR_PRINTF(m, n) ATTRIBUTE_PRINTF(m, n)
++# else
++#  define ATTRIBUTE_FPTR_PRINTF(m, n)
++# endif /* GNUC >= 3.1 */
++# define ATTRIBUTE_FPTR_PRINTF_1 ATTRIBUTE_FPTR_PRINTF(1, 2)
++# define ATTRIBUTE_FPTR_PRINTF_2 ATTRIBUTE_FPTR_PRINTF(2, 3)
++# define ATTRIBUTE_FPTR_PRINTF_3 ATTRIBUTE_FPTR_PRINTF(3, 4)
++# define ATTRIBUTE_FPTR_PRINTF_4 ATTRIBUTE_FPTR_PRINTF(4, 5)
++# define ATTRIBUTE_FPTR_PRINTF_5 ATTRIBUTE_FPTR_PRINTF(5, 6)
++#endif /* ATTRIBUTE_FPTR_PRINTF */
++
++/* Use ATTRIBUTE_NULL_PRINTF when the format specifier may be NULL.  A
++   NULL format specifier was allowed as of gcc 3.3.  */
++#ifndef ATTRIBUTE_NULL_PRINTF
++# if (GCC_VERSION >= 3003)
++#  define ATTRIBUTE_NULL_PRINTF(m, n) __attribute__ ((__format__ (__printf__, m, n)))
++# else
++#  define ATTRIBUTE_NULL_PRINTF(m, n)
++# endif /* GNUC >= 3.3 */
++# define ATTRIBUTE_NULL_PRINTF_1 ATTRIBUTE_NULL_PRINTF(1, 2)
++# define ATTRIBUTE_NULL_PRINTF_2 ATTRIBUTE_NULL_PRINTF(2, 3)
++# define ATTRIBUTE_NULL_PRINTF_3 ATTRIBUTE_NULL_PRINTF(3, 4)
++# define ATTRIBUTE_NULL_PRINTF_4 ATTRIBUTE_NULL_PRINTF(4, 5)
++# define ATTRIBUTE_NULL_PRINTF_5 ATTRIBUTE_NULL_PRINTF(5, 6)
++#endif /* ATTRIBUTE_NULL_PRINTF */
++
++/* Attribute `sentinel' was valid as of gcc 3.5.  */
++#ifndef ATTRIBUTE_SENTINEL
++# if (GCC_VERSION >= 3005)
++#  define ATTRIBUTE_SENTINEL __attribute__ ((__sentinel__))
++# else
++#  define ATTRIBUTE_SENTINEL
++# endif /* GNUC >= 3.5 */
++#endif /* ATTRIBUTE_SENTINEL */
++
++
++#ifndef ATTRIBUTE_ALIGNED_ALIGNOF
++# if (GCC_VERSION >= 3000)
++#  define ATTRIBUTE_ALIGNED_ALIGNOF(m) __attribute__ ((__aligned__ (__alignof__ (m))))
++# else
++#  define ATTRIBUTE_ALIGNED_ALIGNOF(m)
++# endif /* GNUC >= 3.0 */
++#endif /* ATTRIBUTE_ALIGNED_ALIGNOF */
++
++/* We use __extension__ in some places to suppress -pedantic warnings
++   about GCC extensions.  This feature didn't work properly before
++   gcc 2.8.  */
++#if GCC_VERSION < 2008
++#define __extension__
++#endif
++
++#endif        /* ansidecl.h   */
diff --cc arch/x86/include/asm/ansidecl_64.h

index 0000000,0000000..2033011

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/ansidecl_64.h
@@@ -1,0 -1,0 +1,383 @@@
++/* ANSI and traditional C compatability macros
++   Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001
++   Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++This program is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 2 of the License, or
++(at your option) any later version.
++
++This program is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with this program; if not, write to the Free Software
++Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
++
++/* Extracted from binutils 2.16.91.0.2 (OpenSUSE 10.0) and modified for kdb use.
++ * Any trailing whitespace was removed and #ifdef/ifndef __KERNEL__ added as
++ * required.
++ * Keith Owens <kaos@sgi.com> 15 May 2006
++ */
++
++/* ANSI and traditional C compatibility macros
++
++   ANSI C is assumed if __STDC__ is #defined.
++
++   Macro              ANSI C definition       Traditional C definition
++   -----              ---- - ----------       ----------- - ----------
++   ANSI_PROTOTYPES    1                       not defined
++   PTR                        `void *'                `char *'
++   PTRCONST           `void *const'           `char *'
++   LONG_DOUBLE                `long double'           `double'
++   const              not defined             `'
++   volatile           not defined             `'
++   signed             not defined             `'
++   VA_START(ap, var)  va_start(ap, var)       va_start(ap)
++
++   Note that it is safe to write "void foo();" indicating a function
++   with no return value, in all K+R compilers we have been able to test.
++
++   For declaring functions with prototypes, we also provide these:
++
++   PARAMS ((prototype))
++   -- for functions which take a fixed number of arguments.  Use this
++   when declaring the function.  When defining the function, write a
++   K+R style argument list.  For example:
++
++      char *strcpy PARAMS ((char *dest, char *source));
++      ...
++      char *
++      strcpy (dest, source)
++           char *dest;
++           char *source;
++      { ... }
++
++
++   VPARAMS ((prototype, ...))
++   -- for functions which take a variable number of arguments.  Use
++   PARAMS to declare the function, VPARAMS to define it.  For example:
++
++      int printf PARAMS ((const char *format, ...));
++      ...
++      int
++      printf VPARAMS ((const char *format, ...))
++      {
++         ...
++      }
++
++   For writing functions which take variable numbers of arguments, we
++   also provide the VA_OPEN, VA_CLOSE, and VA_FIXEDARG macros.  These
++   hide the differences between K+R <varargs.h> and C89 <stdarg.h> more
++   thoroughly than the simple VA_START() macro mentioned above.
++
++   VA_OPEN and VA_CLOSE are used *instead of* va_start and va_end.
++   Immediately after VA_OPEN, put a sequence of VA_FIXEDARG calls
++   corresponding to the list of fixed arguments.  Then use va_arg
++   normally to get the variable arguments, or pass your va_list object
++   around.  You do not declare the va_list yourself; VA_OPEN does it
++   for you.
++
++   Here is a complete example:
++
++      int
++      printf VPARAMS ((const char *format, ...))
++      {
++         int result;
++
++         VA_OPEN (ap, format);
++         VA_FIXEDARG (ap, const char *, format);
++
++         result = vfprintf (stdout, format, ap);
++         VA_CLOSE (ap);
++
++         return result;
++      }
++
++
++   You can declare variables either before or after the VA_OPEN,
++   VA_FIXEDARG sequence.  Also, VA_OPEN and VA_CLOSE are the beginning
++   and end of a block.  They must appear at the same nesting level,
++   and any variables declared after VA_OPEN go out of scope at
++   VA_CLOSE.  Unfortunately, with a K+R compiler, that includes the
++   argument list.  You can have multiple instances of VA_OPEN/VA_CLOSE
++   pairs in a single function in case you need to traverse the
++   argument list more than once.
++
++   For ease of writing code which uses GCC extensions but needs to be
++   portable to other compilers, we provide the GCC_VERSION macro that
++   simplifies testing __GNUC__ and __GNUC_MINOR__ together, and various
++   wrappers around __attribute__.  Also, __extension__ will be #defined
++   to nothing if it doesn't work.  See below.
++
++   This header also defines a lot of obsolete macros:
++   CONST, VOLATILE, SIGNED, PROTO, EXFUN, DEFUN, DEFUN_VOID,
++   AND, DOTS, NOARGS.  Don't use them.  */
++
++#ifndef       _ANSIDECL_H
++#define _ANSIDECL_H   1
++
++/* Every source file includes this file,
++   so they will all get the switch for lint.  */
++/* LINTLIBRARY */
++
++/* Using MACRO(x,y) in cpp #if conditionals does not work with some
++   older preprocessors.  Thus we can't define something like this:
++
++#define HAVE_GCC_VERSION(MAJOR, MINOR) \
++  (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR)))
++
++and then test "#if HAVE_GCC_VERSION(2,7)".
++
++So instead we use the macro below and test it against specific values.  */
++
++/* This macro simplifies testing whether we are using gcc, and if it
++   is of a particular minimum version. (Both major & minor numbers are
++   significant.)  This macro will evaluate to 0 if we are not using
++   gcc at all.  */
++#ifndef GCC_VERSION
++#define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__)
++#endif /* GCC_VERSION */
++
++#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32) || (defined(__alpha) && defined(__cplusplus))
++/* All known AIX compilers implement these things (but don't always
++   define __STDC__).  The RISC/OS MIPS compiler defines these things
++   in SVR4 mode, but does not define __STDC__.  */
++/* eraxxon@alumni.rice.edu: The Compaq C++ compiler, unlike many other
++   C++ compilers, does not define __STDC__, though it acts as if this
++   was so. (Verified versions: 5.7, 6.2, 6.3, 6.5) */
++
++#define ANSI_PROTOTYPES       1
++#define PTR           void *
++#define PTRCONST      void *const
++#define LONG_DOUBLE   long double
++
++/* PARAMS is often defined elsewhere (e.g. by libintl.h), so wrap it in
++   a #ifndef.  */
++#ifndef PARAMS
++#define PARAMS(ARGS)          ARGS
++#endif
++
++#define VPARAMS(ARGS)         ARGS
++#define VA_START(VA_LIST, VAR)        va_start(VA_LIST, VAR)
++
++/* variadic function helper macros */
++/* "struct Qdmy" swallows the semicolon after VA_OPEN/VA_FIXEDARG's
++   use without inhibiting further decls and without declaring an
++   actual variable.  */
++#define VA_OPEN(AP, VAR)      { va_list AP; va_start(AP, VAR); { struct Qdmy
++#define VA_CLOSE(AP)          } va_end(AP); }
++#define VA_FIXEDARG(AP, T, N) struct Qdmy
++
++#undef const
++#undef volatile
++#undef signed
++
++#ifdef        __KERNEL__
++#ifndef       __STDC_VERSION__
++#define       __STDC_VERSION__ 0
++#endif
++#endif        /* __KERNEL__ */
++
++/* inline requires special treatment; it's in C99, and GCC >=2.7 supports
++   it too, but it's not in C89.  */
++#undef inline
++#if __STDC_VERSION__ > 199901L
++/* it's a keyword */
++#else
++# if GCC_VERSION >= 2007
++#  define inline __inline__   /* __inline__ prevents -pedantic warnings */
++# else
++#  define inline  /* nothing */
++# endif
++#endif
++
++/* These are obsolete.  Do not use.  */
++#ifndef IN_GCC
++#define CONST         const
++#define VOLATILE      volatile
++#define SIGNED                signed
++
++#define PROTO(type, name, arglist)    type name arglist
++#define EXFUN(name, proto)            name proto
++#define DEFUN(name, arglist, args)    name(args)
++#define DEFUN_VOID(name)              name(void)
++#define AND           ,
++#define DOTS          , ...
++#define NOARGS                void
++#endif /* ! IN_GCC */
++
++#else /* Not ANSI C.  */
++
++#undef  ANSI_PROTOTYPES
++#define PTR           char *
++#define PTRCONST      PTR
++#define LONG_DOUBLE   double
++
++#define PARAMS(args)          ()
++#define VPARAMS(args)         (va_alist) va_dcl
++#define VA_START(va_list, var)        va_start(va_list)
++
++#define VA_OPEN(AP, VAR)              { va_list AP; va_start(AP); { struct Qdmy
++#define VA_CLOSE(AP)                  } va_end(AP); }
++#define VA_FIXEDARG(AP, TYPE, NAME)   TYPE NAME = va_arg(AP, TYPE)
++
++/* some systems define these in header files for non-ansi mode */
++#undef const
++#undef volatile
++#undef signed
++#undef inline
++#define const
++#define volatile
++#define signed
++#define inline
++
++#ifndef IN_GCC
++#define CONST
++#define VOLATILE
++#define SIGNED
++
++#define PROTO(type, name, arglist)    type name ()
++#define EXFUN(name, proto)            name()
++#define DEFUN(name, arglist, args)    name arglist args;
++#define DEFUN_VOID(name)              name()
++#define AND           ;
++#define DOTS
++#define NOARGS
++#endif /* ! IN_GCC */
++
++#endif        /* ANSI C.  */
++
++/* Define macros for some gcc attributes.  This permits us to use the
++   macros freely, and know that they will come into play for the
++   version of gcc in which they are supported.  */
++
++#if (GCC_VERSION < 2007)
++# define __attribute__(x)
++#endif
++
++/* Attribute __malloc__ on functions was valid as of gcc 2.96. */
++#ifndef ATTRIBUTE_MALLOC
++# if (GCC_VERSION >= 2096)
++#  define ATTRIBUTE_MALLOC __attribute__ ((__malloc__))
++# else
++#  define ATTRIBUTE_MALLOC
++# endif /* GNUC >= 2.96 */
++#endif /* ATTRIBUTE_MALLOC */
++
++/* Attributes on labels were valid as of gcc 2.93. */
++#ifndef ATTRIBUTE_UNUSED_LABEL
++# if (!defined (__cplusplus) && GCC_VERSION >= 2093)
++#  define ATTRIBUTE_UNUSED_LABEL ATTRIBUTE_UNUSED
++# else
++#  define ATTRIBUTE_UNUSED_LABEL
++# endif /* !__cplusplus && GNUC >= 2.93 */
++#endif /* ATTRIBUTE_UNUSED_LABEL */
++
++#ifndef ATTRIBUTE_UNUSED
++#define ATTRIBUTE_UNUSED __attribute__ ((__unused__))
++#endif /* ATTRIBUTE_UNUSED */
++
++/* Before GCC 3.4, the C++ frontend couldn't parse attributes placed after the
++   identifier name.  */
++#if ! defined(__cplusplus) || (GCC_VERSION >= 3004)
++# define ARG_UNUSED(NAME) NAME ATTRIBUTE_UNUSED
++#else /* !__cplusplus || GNUC >= 3.4 */
++# define ARG_UNUSED(NAME) NAME
++#endif /* !__cplusplus || GNUC >= 3.4 */
++
++#ifndef ATTRIBUTE_NORETURN
++#define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
++#endif /* ATTRIBUTE_NORETURN */
++
++/* Attribute `nonnull' was valid as of gcc 3.3.  */
++#ifndef ATTRIBUTE_NONNULL
++# if (GCC_VERSION >= 3003)
++#  define ATTRIBUTE_NONNULL(m) __attribute__ ((__nonnull__ (m)))
++# else
++#  define ATTRIBUTE_NONNULL(m)
++# endif /* GNUC >= 3.3 */
++#endif /* ATTRIBUTE_NONNULL */
++
++/* Attribute `pure' was valid as of gcc 3.0.  */
++#ifndef ATTRIBUTE_PURE
++# if (GCC_VERSION >= 3000)
++#  define ATTRIBUTE_PURE __attribute__ ((__pure__))
++# else
++#  define ATTRIBUTE_PURE
++# endif /* GNUC >= 3.0 */
++#endif /* ATTRIBUTE_PURE */
++
++/* Use ATTRIBUTE_PRINTF when the format specifier must not be NULL.
++   This was the case for the `printf' format attribute by itself
++   before GCC 3.3, but as of 3.3 we need to add the `nonnull'
++   attribute to retain this behavior.  */
++#ifndef ATTRIBUTE_PRINTF
++#define ATTRIBUTE_PRINTF(m, n) __attribute__ ((__format__ (__printf__, m, n))) ATTRIBUTE_NONNULL(m)
++#define ATTRIBUTE_PRINTF_1 ATTRIBUTE_PRINTF(1, 2)
++#define ATTRIBUTE_PRINTF_2 ATTRIBUTE_PRINTF(2, 3)
++#define ATTRIBUTE_PRINTF_3 ATTRIBUTE_PRINTF(3, 4)
++#define ATTRIBUTE_PRINTF_4 ATTRIBUTE_PRINTF(4, 5)
++#define ATTRIBUTE_PRINTF_5 ATTRIBUTE_PRINTF(5, 6)
++#endif /* ATTRIBUTE_PRINTF */
++
++/* Use ATTRIBUTE_FPTR_PRINTF when the format attribute is to be set on
++   a function pointer.  Format attributes were allowed on function
++   pointers as of gcc 3.1.  */
++#ifndef ATTRIBUTE_FPTR_PRINTF
++# if (GCC_VERSION >= 3001)
++#  define ATTRIBUTE_FPTR_PRINTF(m, n) ATTRIBUTE_PRINTF(m, n)
++# else
++#  define ATTRIBUTE_FPTR_PRINTF(m, n)
++# endif /* GNUC >= 3.1 */
++# define ATTRIBUTE_FPTR_PRINTF_1 ATTRIBUTE_FPTR_PRINTF(1, 2)
++# define ATTRIBUTE_FPTR_PRINTF_2 ATTRIBUTE_FPTR_PRINTF(2, 3)
++# define ATTRIBUTE_FPTR_PRINTF_3 ATTRIBUTE_FPTR_PRINTF(3, 4)
++# define ATTRIBUTE_FPTR_PRINTF_4 ATTRIBUTE_FPTR_PRINTF(4, 5)
++# define ATTRIBUTE_FPTR_PRINTF_5 ATTRIBUTE_FPTR_PRINTF(5, 6)
++#endif /* ATTRIBUTE_FPTR_PRINTF */
++
++/* Use ATTRIBUTE_NULL_PRINTF when the format specifier may be NULL.  A
++   NULL format specifier was allowed as of gcc 3.3.  */
++#ifndef ATTRIBUTE_NULL_PRINTF
++# if (GCC_VERSION >= 3003)
++#  define ATTRIBUTE_NULL_PRINTF(m, n) __attribute__ ((__format__ (__printf__, m, n)))
++# else
++#  define ATTRIBUTE_NULL_PRINTF(m, n)
++# endif /* GNUC >= 3.3 */
++# define ATTRIBUTE_NULL_PRINTF_1 ATTRIBUTE_NULL_PRINTF(1, 2)
++# define ATTRIBUTE_NULL_PRINTF_2 ATTRIBUTE_NULL_PRINTF(2, 3)
++# define ATTRIBUTE_NULL_PRINTF_3 ATTRIBUTE_NULL_PRINTF(3, 4)
++# define ATTRIBUTE_NULL_PRINTF_4 ATTRIBUTE_NULL_PRINTF(4, 5)
++# define ATTRIBUTE_NULL_PRINTF_5 ATTRIBUTE_NULL_PRINTF(5, 6)
++#endif /* ATTRIBUTE_NULL_PRINTF */
++
++/* Attribute `sentinel' was valid as of gcc 3.5.  */
++#ifndef ATTRIBUTE_SENTINEL
++# if (GCC_VERSION >= 3005)
++#  define ATTRIBUTE_SENTINEL __attribute__ ((__sentinel__))
++# else
++#  define ATTRIBUTE_SENTINEL
++# endif /* GNUC >= 3.5 */
++#endif /* ATTRIBUTE_SENTINEL */
++
++
++#ifndef ATTRIBUTE_ALIGNED_ALIGNOF
++# if (GCC_VERSION >= 3000)
++#  define ATTRIBUTE_ALIGNED_ALIGNOF(m) __attribute__ ((__aligned__ (__alignof__ (m))))
++# else
++#  define ATTRIBUTE_ALIGNED_ALIGNOF(m)
++# endif /* GNUC >= 3.0 */
++#endif /* ATTRIBUTE_ALIGNED_ALIGNOF */
++
++/* We use __extension__ in some places to suppress -pedantic warnings
++   about GCC extensions.  This feature didn't work properly before
++   gcc 2.8.  */
++#if GCC_VERSION < 2008
++#define __extension__
++#endif
++
++#endif        /* ansidecl.h   */
diff --cc arch/x86/include/asm/bfd.h

index 0000000,0000000..058cf34

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/bfd.h
@@@ -1,0 -1,0 +1,5 @@@
++#ifdef CONFIG_X86_32
++# include "bfd_32.h"
++#else
++# include "bfd_64.h"
++#endif
diff --cc arch/x86/include/asm/bfd_32.h

index 0000000,0000000..2249f52

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/bfd_32.h
@@@ -1,0 -1,0 +1,4921 @@@
++/* DO NOT EDIT!  -*- buffer-read-only: t -*-  This file is automatically
++   generated from "bfd-in.h", "init.c", "opncls.c", "libbfd.c",
++   "bfdio.c", "bfdwin.c", "section.c", "archures.c", "reloc.c",
++   "syms.c", "bfd.c", "archive.c", "corefile.c", "targets.c", "format.c",
++   "linker.c" and "simple.c".
++   Run "make headers" in your build bfd/ to regenerate.  */
++
++/* Main header file for the bfd library -- portable access to object files.
++
++   Copyright 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
++   1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
++
++   Contributed by Cygnus Support.
++
++   This file is part of BFD, the Binary File Descriptor library.
++
++   This program is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 2 of the License, or
++   (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
++
++/* Extracted from binutils 2.16.91.0.2 (OpenSUSE 10.0) and modified for kdb use.
++ * Any trailing whitespace was removed and #ifdef/ifndef __KERNEL__ added as
++ * required.
++ * Keith Owens <kaos@sgi.com> 15 May 2006
++ */
++
++#ifndef __BFD_H_SEEN__
++#define __BFD_H_SEEN__
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#ifdef __KERNEL__
++#include <asm/ansidecl.h>
++#else /* __KERNEL__ */
++#include "ansidecl.h"
++#include "symcat.h"
++#endif        /* __KERNEL__ */
++#if defined (__STDC__) || defined (ALMOST_STDC) || defined (HAVE_STRINGIZE)
++#ifndef SABER
++/* This hack is to avoid a problem with some strict ANSI C preprocessors.
++   The problem is, "32_" is not a valid preprocessing token, and we don't
++   want extra underscores (e.g., "nlm_32_").  The XCONCAT2 macro will
++   cause the inner CONCAT2 macros to be evaluated first, producing
++   still-valid pp-tokens.  Then the final concatenation can be done.  */
++#undef CONCAT4
++#define CONCAT4(a,b,c,d) XCONCAT2(CONCAT2(a,b),CONCAT2(c,d))
++#endif
++#endif
++
++/* The word size used by BFD on the host.  This may be 64 with a 32
++   bit target if the host is 64 bit, or if other 64 bit targets have
++   been selected with --enable-targets, or if --enable-64-bit-bfd.  */
++#ifdef        __KERNEL__
++#define BFD_ARCH_SIZE 32
++#else /* __KERNEL__ */
++#define BFD_ARCH_SIZE 64
++#endif        /* __KERNEL__ */
++
++/* The word size of the default bfd target.  */
++#define BFD_DEFAULT_TARGET_SIZE 32
++
++#define BFD_HOST_64BIT_LONG 0
++#define BFD_HOST_LONG_LONG 1
++#if 1
++#define BFD_HOST_64_BIT long long
++#define BFD_HOST_U_64_BIT unsigned long long
++typedef BFD_HOST_64_BIT bfd_int64_t;
++typedef BFD_HOST_U_64_BIT bfd_uint64_t;
++#endif
++
++#if BFD_ARCH_SIZE >= 64
++#define BFD64
++#endif
++
++#ifndef INLINE
++#if __GNUC__ >= 2
++#define INLINE __inline__
++#else
++#define INLINE
++#endif
++#endif
++
++/* Forward declaration.  */
++typedef struct bfd bfd;
++
++/* Boolean type used in bfd.  Too many systems define their own
++   versions of "boolean" for us to safely typedef a "boolean" of
++   our own.  Using an enum for "bfd_boolean" has its own set of
++   problems, with strange looking casts required to avoid warnings
++   on some older compilers.  Thus we just use an int.
++
++   General rule: Functions which are bfd_boolean return TRUE on
++   success and FALSE on failure (unless they're a predicate).  */
++
++typedef int bfd_boolean;
++#undef FALSE
++#undef TRUE
++#define FALSE 0
++#define TRUE 1
++
++#ifdef BFD64
++
++#ifndef BFD_HOST_64_BIT
++ #error No 64 bit integer type available
++#endif /* ! defined (BFD_HOST_64_BIT) */
++
++typedef BFD_HOST_U_64_BIT bfd_vma;
++typedef BFD_HOST_64_BIT bfd_signed_vma;
++typedef BFD_HOST_U_64_BIT bfd_size_type;
++typedef BFD_HOST_U_64_BIT symvalue;
++
++#ifndef fprintf_vma
++#if BFD_HOST_64BIT_LONG
++#define sprintf_vma(s,x) sprintf (s, "%016lx", x)
++#define fprintf_vma(f,x) fprintf (f, "%016lx", x)
++#else
++#define _bfd_int64_low(x) ((unsigned long) (((x) & 0xffffffff)))
++#define _bfd_int64_high(x) ((unsigned long) (((x) >> 32) & 0xffffffff))
++#define fprintf_vma(s,x) \
++  fprintf ((s), "%08lx%08lx", _bfd_int64_high (x), _bfd_int64_low (x))
++#define sprintf_vma(s,x) \
++  sprintf ((s), "%08lx%08lx", _bfd_int64_high (x), _bfd_int64_low (x))
++#endif
++#endif
++
++#else /* not BFD64  */
++
++/* Represent a target address.  Also used as a generic unsigned type
++   which is guaranteed to be big enough to hold any arithmetic types
++   we need to deal with.  */
++typedef unsigned long bfd_vma;
++
++/* A generic signed type which is guaranteed to be big enough to hold any
++   arithmetic types we need to deal with.  Can be assumed to be compatible
++   with bfd_vma in the same way that signed and unsigned ints are compatible
++   (as parameters, in assignment, etc).  */
++typedef long bfd_signed_vma;
++
++typedef unsigned long symvalue;
++typedef unsigned long bfd_size_type;
++
++/* Print a bfd_vma x on stream s.  */
++#define fprintf_vma(s,x) fprintf (s, "%08lx", x)
++#define sprintf_vma(s,x) sprintf (s, "%08lx", x)
++
++#endif /* not BFD64  */
++
++#define HALF_BFD_SIZE_TYPE \
++  (((bfd_size_type) 1) << (8 * sizeof (bfd_size_type) / 2))
++
++#ifndef BFD_HOST_64_BIT
++/* Fall back on a 32 bit type.  The idea is to make these types always
++   available for function return types, but in the case that
++   BFD_HOST_64_BIT is undefined such a function should abort or
++   otherwise signal an error.  */
++typedef bfd_signed_vma bfd_int64_t;
++typedef bfd_vma bfd_uint64_t;
++#endif
++
++/* An offset into a file.  BFD always uses the largest possible offset
++   based on the build time availability of fseek, fseeko, or fseeko64.  */
++typedef BFD_HOST_64_BIT file_ptr;
++typedef unsigned BFD_HOST_64_BIT ufile_ptr;
++
++extern void bfd_sprintf_vma (bfd *, char *, bfd_vma);
++extern void bfd_fprintf_vma (bfd *, void *, bfd_vma);
++
++#define printf_vma(x) fprintf_vma(stdout,x)
++#define bfd_printf_vma(abfd,x) bfd_fprintf_vma (abfd,stdout,x)
++
++typedef unsigned int flagword;        /* 32 bits of flags */
++typedef unsigned char bfd_byte;
++\f
++/* File formats.  */
++
++typedef enum bfd_format
++{
++  bfd_unknown = 0,    /* File format is unknown.  */
++  bfd_object,         /* Linker/assembler/compiler output.  */
++  bfd_archive,                /* Object archive file.  */
++  bfd_core,           /* Core dump.  */
++  bfd_type_end                /* Marks the end; don't use it!  */
++}
++bfd_format;
++
++/* Values that may appear in the flags field of a BFD.  These also
++   appear in the object_flags field of the bfd_target structure, where
++   they indicate the set of flags used by that backend (not all flags
++   are meaningful for all object file formats) (FIXME: at the moment,
++   the object_flags values have mostly just been copied from backend
++   to another, and are not necessarily correct).  */
++
++/* No flags.  */
++#define BFD_NO_FLAGS          0x00
++
++/* BFD contains relocation entries.  */
++#define HAS_RELOC     0x01
++
++/* BFD is directly executable.  */
++#define EXEC_P        0x02
++
++/* BFD has line number information (basically used for F_LNNO in a
++   COFF header).  */
++#define HAS_LINENO    0x04
++
++/* BFD has debugging information.  */
++#define HAS_DEBUG     0x08
++
++/* BFD has symbols.  */
++#define HAS_SYMS      0x10
++
++/* BFD has local symbols (basically used for F_LSYMS in a COFF
++   header).  */
++#define HAS_LOCALS    0x20
++
++/* BFD is a dynamic object.  */
++#define DYNAMIC       0x40
++
++/* Text section is write protected (if D_PAGED is not set, this is
++   like an a.out NMAGIC file) (the linker sets this by default, but
++   clears it for -r or -N).  */
++#define WP_TEXT       0x80
++
++/* BFD is dynamically paged (this is like an a.out ZMAGIC file) (the
++   linker sets this by default, but clears it for -r or -n or -N).  */
++#define D_PAGED       0x100
++
++/* BFD is relaxable (this means that bfd_relax_section may be able to
++   do something) (sometimes bfd_relax_section can do something even if
++   this is not set).  */
++#define BFD_IS_RELAXABLE 0x200
++
++/* This may be set before writing out a BFD to request using a
++   traditional format.  For example, this is used to request that when
++   writing out an a.out object the symbols not be hashed to eliminate
++   duplicates.  */
++#define BFD_TRADITIONAL_FORMAT 0x400
++
++/* This flag indicates that the BFD contents are actually cached in
++   memory.  If this is set, iostream points to a bfd_in_memory struct.  */
++#define BFD_IN_MEMORY 0x800
++
++/* The sections in this BFD specify a memory page.  */
++#define HAS_LOAD_PAGE 0x1000
++
++/* This BFD has been created by the linker and doesn't correspond
++   to any input file.  */
++#define BFD_LINKER_CREATED 0x2000
++\f
++/* Symbols and relocation.  */
++
++/* A count of carsyms (canonical archive symbols).  */
++typedef unsigned long symindex;
++
++/* How to perform a relocation.  */
++typedef const struct reloc_howto_struct reloc_howto_type;
++
++#define BFD_NO_MORE_SYMBOLS ((symindex) ~0)
++
++/* General purpose part of a symbol X;
++   target specific parts are in libcoff.h, libaout.h, etc.  */
++
++#define bfd_get_section(x) ((x)->section)
++#define bfd_get_output_section(x) ((x)->section->output_section)
++#define bfd_set_section(x,y) ((x)->section) = (y)
++#define bfd_asymbol_base(x) ((x)->section->vma)
++#define bfd_asymbol_value(x) (bfd_asymbol_base(x) + (x)->value)
++#define bfd_asymbol_name(x) ((x)->name)
++/*Perhaps future: #define bfd_asymbol_bfd(x) ((x)->section->owner)*/
++#define bfd_asymbol_bfd(x) ((x)->the_bfd)
++#define bfd_asymbol_flavour(x) (bfd_asymbol_bfd(x)->xvec->flavour)
++
++/* A canonical archive symbol.  */
++/* This is a type pun with struct ranlib on purpose!  */
++typedef struct carsym
++{
++  char *name;
++  file_ptr file_offset;       /* Look here to find the file.  */
++}
++carsym;                       /* To make these you call a carsymogen.  */
++
++/* Used in generating armaps (archive tables of contents).
++   Perhaps just a forward definition would do?  */
++struct orl                    /* Output ranlib.  */
++{
++  char **name;                /* Symbol name.  */
++  union
++  {
++    file_ptr pos;
++    bfd *abfd;
++  } u;                        /* bfd* or file position.  */
++  int namidx;         /* Index into string table.  */
++};
++\f
++/* Linenumber stuff.  */
++typedef struct lineno_cache_entry
++{
++  unsigned int line_number;   /* Linenumber from start of function.  */
++  union
++  {
++    struct bfd_symbol *sym;   /* Function name.  */
++    bfd_vma offset;                   /* Offset into section.  */
++  } u;
++}
++alent;
++\f
++/* Object and core file sections.  */
++
++#define       align_power(addr, align)        \
++  (((addr) + ((bfd_vma) 1 << (align)) - 1) & ((bfd_vma) -1 << (align)))
++
++typedef struct bfd_section *sec_ptr;
++
++#define bfd_get_section_name(bfd, ptr) ((ptr)->name + 0)
++#define bfd_get_section_vma(bfd, ptr) ((ptr)->vma + 0)
++#define bfd_get_section_lma(bfd, ptr) ((ptr)->lma + 0)
++#define bfd_get_section_alignment(bfd, ptr) ((ptr)->alignment_power + 0)
++#define bfd_section_name(bfd, ptr) ((ptr)->name)
++#define bfd_section_size(bfd, ptr) ((ptr)->size)
++#define bfd_get_section_size(ptr) ((ptr)->size)
++#define bfd_section_vma(bfd, ptr) ((ptr)->vma)
++#define bfd_section_lma(bfd, ptr) ((ptr)->lma)
++#define bfd_section_alignment(bfd, ptr) ((ptr)->alignment_power)
++#define bfd_get_section_flags(bfd, ptr) ((ptr)->flags + 0)
++#define bfd_get_section_userdata(bfd, ptr) ((ptr)->userdata)
++
++#define bfd_is_com_section(ptr) (((ptr)->flags & SEC_IS_COMMON) != 0)
++
++#define bfd_set_section_vma(bfd, ptr, val) (((ptr)->vma = (ptr)->lma = (val)), ((ptr)->user_set_vma = TRUE), TRUE)
++#define bfd_set_section_alignment(bfd, ptr, val) (((ptr)->alignment_power = (val)),TRUE)
++#define bfd_set_section_userdata(bfd, ptr, val) (((ptr)->userdata = (val)),TRUE)
++/* Find the address one past the end of SEC.  */
++#define bfd_get_section_limit(bfd, sec) \
++  (((sec)->rawsize ? (sec)->rawsize : (sec)->size) \
++   / bfd_octets_per_byte (bfd))
++
++typedef struct stat stat_type;
++\f
++typedef enum bfd_print_symbol
++{
++  bfd_print_symbol_name,
++  bfd_print_symbol_more,
++  bfd_print_symbol_all
++} bfd_print_symbol_type;
++
++/* Information about a symbol that nm needs.  */
++
++typedef struct _symbol_info
++{
++  symvalue value;
++  char type;
++  const char *name;            /* Symbol name.  */
++  unsigned char stab_type;     /* Stab type.  */
++  char stab_other;             /* Stab other.  */
++  short stab_desc;             /* Stab desc.  */
++  const char *stab_name;       /* String for stab type.  */
++} symbol_info;
++
++/* Get the name of a stabs type code.  */
++
++extern const char *bfd_get_stab_name (int);
++\f
++/* Hash table routines.  There is no way to free up a hash table.  */
++
++/* An element in the hash table.  Most uses will actually use a larger
++   structure, and an instance of this will be the first field.  */
++
++struct bfd_hash_entry
++{
++  /* Next entry for this hash code.  */
++  struct bfd_hash_entry *next;
++  /* String being hashed.  */
++  const char *string;
++  /* Hash code.  This is the full hash code, not the index into the
++     table.  */
++  unsigned long hash;
++};
++
++/* A hash table.  */
++
++struct bfd_hash_table
++{
++  /* The hash array.  */
++  struct bfd_hash_entry **table;
++  /* The number of slots in the hash table.  */
++  unsigned int size;
++  /* A function used to create new elements in the hash table.  The
++     first entry is itself a pointer to an element.  When this
++     function is first invoked, this pointer will be NULL.  However,
++     having the pointer permits a hierarchy of method functions to be
++     built each of which calls the function in the superclass.  Thus
++     each function should be written to allocate a new block of memory
++     only if the argument is NULL.  */
++  struct bfd_hash_entry *(*newfunc)
++    (struct bfd_hash_entry *, struct bfd_hash_table *, const char *);
++   /* An objalloc for this hash table.  This is a struct objalloc *,
++     but we use void * to avoid requiring the inclusion of objalloc.h.  */
++  void *memory;
++};
++
++/* Initialize a hash table.  */
++extern bfd_boolean bfd_hash_table_init
++  (struct bfd_hash_table *,
++   struct bfd_hash_entry *(*) (struct bfd_hash_entry *,
++                             struct bfd_hash_table *,
++                             const char *));
++
++/* Initialize a hash table specifying a size.  */
++extern bfd_boolean bfd_hash_table_init_n
++  (struct bfd_hash_table *,
++   struct bfd_hash_entry *(*) (struct bfd_hash_entry *,
++                             struct bfd_hash_table *,
++                             const char *),
++   unsigned int size);
++
++/* Free up a hash table.  */
++extern void bfd_hash_table_free
++  (struct bfd_hash_table *);
++
++/* Look up a string in a hash table.  If CREATE is TRUE, a new entry
++   will be created for this string if one does not already exist.  The
++   COPY argument must be TRUE if this routine should copy the string
++   into newly allocated memory when adding an entry.  */
++extern struct bfd_hash_entry *bfd_hash_lookup
++  (struct bfd_hash_table *, const char *, bfd_boolean create,
++   bfd_boolean copy);
++
++/* Replace an entry in a hash table.  */
++extern void bfd_hash_replace
++  (struct bfd_hash_table *, struct bfd_hash_entry *old,
++   struct bfd_hash_entry *nw);
++
++/* Base method for creating a hash table entry.  */
++extern struct bfd_hash_entry *bfd_hash_newfunc
++  (struct bfd_hash_entry *, struct bfd_hash_table *, const char *);
++
++/* Grab some space for a hash table entry.  */
++extern void *bfd_hash_allocate
++  (struct bfd_hash_table *, unsigned int);
++
++/* Traverse a hash table in a random order, calling a function on each
++   element.  If the function returns FALSE, the traversal stops.  The
++   INFO argument is passed to the function.  */
++extern void bfd_hash_traverse
++  (struct bfd_hash_table *,
++   bfd_boolean (*) (struct bfd_hash_entry *, void *),
++   void *info);
++
++/* Allows the default size of a hash table to be configured. New hash
++   tables allocated using bfd_hash_table_init will be created with
++   this size.  */
++extern void bfd_hash_set_default_size (bfd_size_type);
++
++/* This structure is used to keep track of stabs in sections
++   information while linking.  */
++
++struct stab_info
++{
++  /* A hash table used to hold stabs strings.  */
++  struct bfd_strtab_hash *strings;
++  /* The header file hash table.  */
++  struct bfd_hash_table includes;
++  /* The first .stabstr section.  */
++  struct bfd_section *stabstr;
++};
++
++#define COFF_SWAP_TABLE (void *) &bfd_coff_std_swap_table
++
++/* User program access to BFD facilities.  */
++
++/* Direct I/O routines, for programs which know more about the object
++   file than BFD does.  Use higher level routines if possible.  */
++
++extern bfd_size_type bfd_bread (void *, bfd_size_type, bfd *);
++extern bfd_size_type bfd_bwrite (const void *, bfd_size_type, bfd *);
++extern int bfd_seek (bfd *, file_ptr, int);
++extern file_ptr bfd_tell (bfd *);
++extern int bfd_flush (bfd *);
++extern int bfd_stat (bfd *, struct stat *);
++
++/* Deprecated old routines.  */
++#if __GNUC__
++#define bfd_read(BUF, ELTSIZE, NITEMS, ABFD)                          \
++  (warn_deprecated ("bfd_read", __FILE__, __LINE__, __FUNCTION__),    \
++   bfd_bread ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#define bfd_write(BUF, ELTSIZE, NITEMS, ABFD)                         \
++  (warn_deprecated ("bfd_write", __FILE__, __LINE__, __FUNCTION__),   \
++   bfd_bwrite ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#else
++#define bfd_read(BUF, ELTSIZE, NITEMS, ABFD)                          \
++  (warn_deprecated ("bfd_read", (const char *) 0, 0, (const char *) 0), \
++   bfd_bread ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#define bfd_write(BUF, ELTSIZE, NITEMS, ABFD)                         \
++  (warn_deprecated ("bfd_write", (const char *) 0, 0, (const char *) 0),\
++   bfd_bwrite ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#endif
++extern void warn_deprecated (const char *, const char *, int, const char *);
++
++/* Cast from const char * to char * so that caller can assign to
++   a char * without a warning.  */
++#define bfd_get_filename(abfd) ((char *) (abfd)->filename)
++#define bfd_get_cacheable(abfd) ((abfd)->cacheable)
++#define bfd_get_format(abfd) ((abfd)->format)
++#define bfd_get_target(abfd) ((abfd)->xvec->name)
++#define bfd_get_flavour(abfd) ((abfd)->xvec->flavour)
++#define bfd_family_coff(abfd) \
++  (bfd_get_flavour (abfd) == bfd_target_coff_flavour || \
++   bfd_get_flavour (abfd) == bfd_target_xcoff_flavour)
++#define bfd_big_endian(abfd) ((abfd)->xvec->byteorder == BFD_ENDIAN_BIG)
++#define bfd_little_endian(abfd) ((abfd)->xvec->byteorder == BFD_ENDIAN_LITTLE)
++#define bfd_header_big_endian(abfd) \
++  ((abfd)->xvec->header_byteorder == BFD_ENDIAN_BIG)
++#define bfd_header_little_endian(abfd) \
++  ((abfd)->xvec->header_byteorder == BFD_ENDIAN_LITTLE)
++#define bfd_get_file_flags(abfd) ((abfd)->flags)
++#define bfd_applicable_file_flags(abfd) ((abfd)->xvec->object_flags)
++#define bfd_applicable_section_flags(abfd) ((abfd)->xvec->section_flags)
++#define bfd_my_archive(abfd) ((abfd)->my_archive)
++#define bfd_has_map(abfd) ((abfd)->has_armap)
++
++#define bfd_valid_reloc_types(abfd) ((abfd)->xvec->valid_reloc_types)
++#define bfd_usrdata(abfd) ((abfd)->usrdata)
++
++#define bfd_get_start_address(abfd) ((abfd)->start_address)
++#define bfd_get_symcount(abfd) ((abfd)->symcount)
++#define bfd_get_outsymbols(abfd) ((abfd)->outsymbols)
++#define bfd_count_sections(abfd) ((abfd)->section_count)
++
++#define bfd_get_dynamic_symcount(abfd) ((abfd)->dynsymcount)
++
++#define bfd_get_symbol_leading_char(abfd) ((abfd)->xvec->symbol_leading_char)
++
++#define bfd_set_cacheable(abfd,bool) (((abfd)->cacheable = bool), TRUE)
++
++extern bfd_boolean bfd_cache_close
++  (bfd *abfd);
++/* NB: This declaration should match the autogenerated one in libbfd.h.  */
++
++extern bfd_boolean bfd_cache_close_all (void);
++
++extern bfd_boolean bfd_record_phdr
++  (bfd *, unsigned long, bfd_boolean, flagword, bfd_boolean, bfd_vma,
++   bfd_boolean, bfd_boolean, unsigned int, struct bfd_section **);
++
++/* Byte swapping routines.  */
++
++bfd_uint64_t bfd_getb64 (const void *);
++bfd_uint64_t bfd_getl64 (const void *);
++bfd_int64_t bfd_getb_signed_64 (const void *);
++bfd_int64_t bfd_getl_signed_64 (const void *);
++bfd_vma bfd_getb32 (const void *);
++bfd_vma bfd_getl32 (const void *);
++bfd_signed_vma bfd_getb_signed_32 (const void *);
++bfd_signed_vma bfd_getl_signed_32 (const void *);
++bfd_vma bfd_getb16 (const void *);
++bfd_vma bfd_getl16 (const void *);
++bfd_signed_vma bfd_getb_signed_16 (const void *);
++bfd_signed_vma bfd_getl_signed_16 (const void *);
++void bfd_putb64 (bfd_uint64_t, void *);
++void bfd_putl64 (bfd_uint64_t, void *);
++void bfd_putb32 (bfd_vma, void *);
++void bfd_putl32 (bfd_vma, void *);
++void bfd_putb16 (bfd_vma, void *);
++void bfd_putl16 (bfd_vma, void *);
++
++/* Byte swapping routines which take size and endiannes as arguments.  */
++
++bfd_uint64_t bfd_get_bits (const void *, int, bfd_boolean);
++void bfd_put_bits (bfd_uint64_t, void *, int, bfd_boolean);
++
++extern bfd_boolean bfd_section_already_linked_table_init (void);
++extern void bfd_section_already_linked_table_free (void);
++\f
++/* Externally visible ECOFF routines.  */
++
++#if defined(__STDC__) || defined(ALMOST_STDC)
++struct ecoff_debug_info;
++struct ecoff_debug_swap;
++struct ecoff_extr;
++struct bfd_symbol;
++struct bfd_link_info;
++struct bfd_link_hash_entry;
++struct bfd_elf_version_tree;
++#endif
++extern bfd_vma bfd_ecoff_get_gp_value
++  (bfd * abfd);
++extern bfd_boolean bfd_ecoff_set_gp_value
++  (bfd *abfd, bfd_vma gp_value);
++extern bfd_boolean bfd_ecoff_set_regmasks
++  (bfd *abfd, unsigned long gprmask, unsigned long fprmask,
++   unsigned long *cprmask);
++extern void *bfd_ecoff_debug_init
++  (bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, struct bfd_link_info *);
++extern void bfd_ecoff_debug_free
++  (void *handle, bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, struct bfd_link_info *);
++extern bfd_boolean bfd_ecoff_debug_accumulate
++  (void *handle, bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, bfd *input_bfd,
++   struct ecoff_debug_info *input_debug,
++   const struct ecoff_debug_swap *input_swap, struct bfd_link_info *);
++extern bfd_boolean bfd_ecoff_debug_accumulate_other
++  (void *handle, bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, bfd *input_bfd,
++   struct bfd_link_info *);
++extern bfd_boolean bfd_ecoff_debug_externals
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap, bfd_boolean relocatable,
++   bfd_boolean (*get_extr) (struct bfd_symbol *, struct ecoff_extr *),
++   void (*set_index) (struct bfd_symbol *, bfd_size_type));
++extern bfd_boolean bfd_ecoff_debug_one_external
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap, const char *name,
++   struct ecoff_extr *esym);
++extern bfd_size_type bfd_ecoff_debug_size
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap);
++extern bfd_boolean bfd_ecoff_write_debug
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap, file_ptr where);
++extern bfd_boolean bfd_ecoff_write_accumulated_debug
++  (void *handle, bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap,
++   struct bfd_link_info *info, file_ptr where);
++
++/* Externally visible ELF routines.  */
++
++struct bfd_link_needed_list
++{
++  struct bfd_link_needed_list *next;
++  bfd *by;
++  const char *name;
++};
++
++enum dynamic_lib_link_class {
++  DYN_NORMAL = 0,
++  DYN_AS_NEEDED = 1,
++  DYN_DT_NEEDED = 2,
++  DYN_NO_ADD_NEEDED = 4,
++  DYN_NO_NEEDED = 8
++};
++
++extern bfd_boolean bfd_elf_record_link_assignment
++  (struct bfd_link_info *, const char *, bfd_boolean);
++extern struct bfd_link_needed_list *bfd_elf_get_needed_list
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_elf_get_bfd_needed_list
++  (bfd *, struct bfd_link_needed_list **);
++extern bfd_boolean bfd_elf_size_dynamic_sections
++  (bfd *, const char *, const char *, const char *, const char * const *,
++   struct bfd_link_info *, struct bfd_section **,
++   struct bfd_elf_version_tree *);
++extern bfd_boolean bfd_elf_size_dynsym_hash_dynstr
++  (bfd *, struct bfd_link_info *);
++extern void bfd_elf_set_dt_needed_name
++  (bfd *, const char *);
++extern const char *bfd_elf_get_dt_soname
++  (bfd *);
++extern void bfd_elf_set_dyn_lib_class
++  (bfd *, int);
++extern int bfd_elf_get_dyn_lib_class
++  (bfd *);
++extern struct bfd_link_needed_list *bfd_elf_get_runpath_list
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_elf_discard_info
++  (bfd *, struct bfd_link_info *);
++extern unsigned int _bfd_elf_default_action_discarded
++  (struct bfd_section *);
++
++/* Return an upper bound on the number of bytes required to store a
++   copy of ABFD's program header table entries.  Return -1 if an error
++   occurs; bfd_get_error will return an appropriate code.  */
++extern long bfd_get_elf_phdr_upper_bound
++  (bfd *abfd);
++
++/* Copy ABFD's program header table entries to *PHDRS.  The entries
++   will be stored as an array of Elf_Internal_Phdr structures, as
++   defined in include/elf/internal.h.  To find out how large the
++   buffer needs to be, call bfd_get_elf_phdr_upper_bound.
++
++   Return the number of program header table entries read, or -1 if an
++   error occurs; bfd_get_error will return an appropriate code.  */
++extern int bfd_get_elf_phdrs
++  (bfd *abfd, void *phdrs);
++
++/* Create a new BFD as if by bfd_openr.  Rather than opening a file,
++   reconstruct an ELF file by reading the segments out of remote memory
++   based on the ELF file header at EHDR_VMA and the ELF program headers it
++   points to.  If not null, *LOADBASEP is filled in with the difference
++   between the VMAs from which the segments were read, and the VMAs the
++   file headers (and hence BFD's idea of each section's VMA) put them at.
++
++   The function TARGET_READ_MEMORY is called to copy LEN bytes from the
++   remote memory at target address VMA into the local buffer at MYADDR; it
++   should return zero on success or an `errno' code on failure.  TEMPL must
++   be a BFD for an ELF target with the word size and byte order found in
++   the remote memory.  */
++extern bfd *bfd_elf_bfd_from_remote_memory
++  (bfd *templ, bfd_vma ehdr_vma, bfd_vma *loadbasep,
++   int (*target_read_memory) (bfd_vma vma, bfd_byte *myaddr, int len));
++
++/* Return the arch_size field of an elf bfd, or -1 if not elf.  */
++extern int bfd_get_arch_size
++  (bfd *);
++
++/* Return TRUE if address "naturally" sign extends, or -1 if not elf.  */
++extern int bfd_get_sign_extend_vma
++  (bfd *);
++
++extern struct bfd_section *_bfd_elf_tls_setup
++  (bfd *, struct bfd_link_info *);
++
++extern void _bfd_elf_provide_symbol
++  (struct bfd_link_info *, const char *, bfd_vma, struct bfd_section *);
++
++extern void _bfd_elf_provide_section_bound_symbols
++  (struct bfd_link_info *, struct bfd_section *, const char *, const char *);
++
++extern void _bfd_elf_fix_excluded_sec_syms
++  (bfd *, struct bfd_link_info *);
++
++extern bfd_boolean bfd_m68k_elf32_create_embedded_relocs
++  (bfd *, struct bfd_link_info *, struct bfd_section *, struct bfd_section *,
++   char **);
++
++/* SunOS shared library support routines for the linker.  */
++
++extern struct bfd_link_needed_list *bfd_sunos_get_needed_list
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_sunos_record_link_assignment
++  (bfd *, struct bfd_link_info *, const char *);
++extern bfd_boolean bfd_sunos_size_dynamic_sections
++  (bfd *, struct bfd_link_info *, struct bfd_section **,
++   struct bfd_section **, struct bfd_section **);
++
++/* Linux shared library support routines for the linker.  */
++
++extern bfd_boolean bfd_i386linux_size_dynamic_sections
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_m68klinux_size_dynamic_sections
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_sparclinux_size_dynamic_sections
++  (bfd *, struct bfd_link_info *);
++
++/* mmap hacks */
++
++struct _bfd_window_internal;
++typedef struct _bfd_window_internal bfd_window_internal;
++
++typedef struct _bfd_window
++{
++  /* What the user asked for.  */
++  void *data;
++  bfd_size_type size;
++  /* The actual window used by BFD.  Small user-requested read-only
++     regions sharing a page may share a single window into the object
++     file.  Read-write versions shouldn't until I've fixed things to
++     keep track of which portions have been claimed by the
++     application; don't want to give the same region back when the
++     application wants two writable copies!  */
++  struct _bfd_window_internal *i;
++}
++bfd_window;
++
++extern void bfd_init_window
++  (bfd_window *);
++extern void bfd_free_window
++  (bfd_window *);
++extern bfd_boolean bfd_get_file_window
++  (bfd *, file_ptr, bfd_size_type, bfd_window *, bfd_boolean);
++
++/* XCOFF support routines for the linker.  */
++
++extern bfd_boolean bfd_xcoff_link_record_set
++  (bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *, bfd_size_type);
++extern bfd_boolean bfd_xcoff_import_symbol
++  (bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *, bfd_vma,
++   const char *, const char *, const char *, unsigned int);
++extern bfd_boolean bfd_xcoff_export_symbol
++  (bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *);
++extern bfd_boolean bfd_xcoff_link_count_reloc
++  (bfd *, struct bfd_link_info *, const char *);
++extern bfd_boolean bfd_xcoff_record_link_assignment
++  (bfd *, struct bfd_link_info *, const char *);
++extern bfd_boolean bfd_xcoff_size_dynamic_sections
++  (bfd *, struct bfd_link_info *, const char *, const char *,
++   unsigned long, unsigned long, unsigned long, bfd_boolean,
++   int, bfd_boolean, bfd_boolean, struct bfd_section **, bfd_boolean);
++extern bfd_boolean bfd_xcoff_link_generate_rtinit
++  (bfd *, const char *, const char *, bfd_boolean);
++
++/* XCOFF support routines for ar.  */
++extern bfd_boolean bfd_xcoff_ar_archive_set_magic
++  (bfd *, char *);
++
++/* Externally visible COFF routines.  */
++
++#if defined(__STDC__) || defined(ALMOST_STDC)
++struct internal_syment;
++union internal_auxent;
++#endif
++
++extern bfd_boolean bfd_coff_get_syment
++  (bfd *, struct bfd_symbol *, struct internal_syment *);
++
++extern bfd_boolean bfd_coff_get_auxent
++  (bfd *, struct bfd_symbol *, int, union internal_auxent *);
++
++extern bfd_boolean bfd_coff_set_symbol_class
++  (bfd *, struct bfd_symbol *, unsigned int);
++
++extern bfd_boolean bfd_m68k_coff_create_embedded_relocs
++  (bfd *, struct bfd_link_info *, struct bfd_section *, struct bfd_section *, char **);
++
++/* ARM Interworking support.  Called from linker.  */
++extern bfd_boolean bfd_arm_allocate_interworking_sections
++  (struct bfd_link_info *);
++
++extern bfd_boolean bfd_arm_process_before_allocation
++  (bfd *, struct bfd_link_info *, int);
++
++extern bfd_boolean bfd_arm_get_bfd_for_interworking
++  (bfd *, struct bfd_link_info *);
++
++/* PE ARM Interworking support.  Called from linker.  */
++extern bfd_boolean bfd_arm_pe_allocate_interworking_sections
++  (struct bfd_link_info *);
++
++extern bfd_boolean bfd_arm_pe_process_before_allocation
++  (bfd *, struct bfd_link_info *, int);
++
++extern bfd_boolean bfd_arm_pe_get_bfd_for_interworking
++  (bfd *, struct bfd_link_info *);
++
++/* ELF ARM Interworking support.  Called from linker.  */
++extern bfd_boolean bfd_elf32_arm_allocate_interworking_sections
++  (struct bfd_link_info *);
++
++extern bfd_boolean bfd_elf32_arm_process_before_allocation
++  (bfd *, struct bfd_link_info *, int);
++
++void bfd_elf32_arm_set_target_relocs
++  (struct bfd_link_info *, int, char *, int, int);
++
++extern bfd_boolean bfd_elf32_arm_get_bfd_for_interworking
++  (bfd *, struct bfd_link_info *);
++
++extern bfd_boolean bfd_elf32_arm_add_glue_sections_to_bfd
++  (bfd *, struct bfd_link_info *);
++
++/* ELF ARM mapping symbol support */
++extern bfd_boolean bfd_is_arm_mapping_symbol_name
++  (const char * name);
++
++/* ARM Note section processing.  */
++extern bfd_boolean bfd_arm_merge_machines
++  (bfd *, bfd *);
++
++extern bfd_boolean bfd_arm_update_notes
++  (bfd *, const char *);
++
++extern unsigned int bfd_arm_get_mach_from_notes
++  (bfd *, const char *);
++
++/* TI COFF load page support.  */
++extern void bfd_ticoff_set_section_load_page
++  (struct bfd_section *, int);
++
++extern int bfd_ticoff_get_section_load_page
++  (struct bfd_section *);
++
++/* H8/300 functions.  */
++extern bfd_vma bfd_h8300_pad_address
++  (bfd *, bfd_vma);
++
++/* IA64 Itanium code generation.  Called from linker.  */
++extern void bfd_elf32_ia64_after_parse
++  (int);
++
++extern void bfd_elf64_ia64_after_parse
++  (int);
++
++/* This structure is used for a comdat section, as in PE.  A comdat
++   section is associated with a particular symbol.  When the linker
++   sees a comdat section, it keeps only one of the sections with a
++   given name and associated with a given symbol.  */
++
++struct coff_comdat_info
++{
++  /* The name of the symbol associated with a comdat section.  */
++  const char *name;
++
++  /* The local symbol table index of the symbol associated with a
++     comdat section.  This is only meaningful to the object file format
++     specific code; it is not an index into the list returned by
++     bfd_canonicalize_symtab.  */
++  long symbol;
++};
++
++extern struct coff_comdat_info *bfd_coff_get_comdat_section
++  (bfd *, struct bfd_section *);
++
++/* Extracted from init.c.  */
++void bfd_init (void);
++
++/* Extracted from opncls.c.  */
++bfd *bfd_fopen (const char *filename, const char *target,
++    const char *mode, int fd);
++
++bfd *bfd_openr (const char *filename, const char *target);
++
++bfd *bfd_fdopenr (const char *filename, const char *target, int fd);
++
++bfd *bfd_openstreamr (const char *, const char *, void *);
++
++bfd *bfd_openr_iovec (const char *filename, const char *target,
++    void *(*open) (struct bfd *nbfd,
++    void *open_closure),
++    void *open_closure,
++    file_ptr (*pread) (struct bfd *nbfd,
++    void *stream,
++    void *buf,
++    file_ptr nbytes,
++    file_ptr offset),
++    int (*close) (struct bfd *nbfd,
++    void *stream));
++
++bfd *bfd_openw (const char *filename, const char *target);
++
++bfd_boolean bfd_close (bfd *abfd);
++
++bfd_boolean bfd_close_all_done (bfd *);
++
++bfd *bfd_create (const char *filename, bfd *templ);
++
++bfd_boolean bfd_make_writable (bfd *abfd);
++
++bfd_boolean bfd_make_readable (bfd *abfd);
++
++unsigned long bfd_calc_gnu_debuglink_crc32
++   (unsigned long crc, const unsigned char *buf, bfd_size_type len);
++
++char *bfd_follow_gnu_debuglink (bfd *abfd, const char *dir);
++
++struct bfd_section *bfd_create_gnu_debuglink_section
++   (bfd *abfd, const char *filename);
++
++bfd_boolean bfd_fill_in_gnu_debuglink_section
++   (bfd *abfd, struct bfd_section *sect, const char *filename);
++
++/* Extracted from libbfd.c.  */
++
++/* Byte swapping macros for user section data.  */
++
++#define bfd_put_8(abfd, val, ptr) \
++  ((void) (*((unsigned char *) (ptr)) = (val) & 0xff))
++#define bfd_put_signed_8 \
++  bfd_put_8
++#define bfd_get_8(abfd, ptr) \
++  (*(unsigned char *) (ptr) & 0xff)
++#define bfd_get_signed_8(abfd, ptr) \
++  (((*(unsigned char *) (ptr) & 0xff) ^ 0x80) - 0x80)
++
++#define bfd_put_16(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_putx16, ((val),(ptr)))
++#define bfd_put_signed_16 \
++  bfd_put_16
++#define bfd_get_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx16, (ptr))
++#define bfd_get_signed_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx_signed_16, (ptr))
++
++#define bfd_put_32(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_putx32, ((val),(ptr)))
++#define bfd_put_signed_32 \
++  bfd_put_32
++#define bfd_get_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx32, (ptr))
++#define bfd_get_signed_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx_signed_32, (ptr))
++
++#define bfd_put_64(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_putx64, ((val), (ptr)))
++#define bfd_put_signed_64 \
++  bfd_put_64
++#define bfd_get_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx64, (ptr))
++#define bfd_get_signed_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx_signed_64, (ptr))
++
++#define bfd_get(bits, abfd, ptr)                       \
++  ((bits) == 8 ? (bfd_vma) bfd_get_8 (abfd, ptr)       \
++   : (bits) == 16 ? bfd_get_16 (abfd, ptr)             \
++   : (bits) == 32 ? bfd_get_32 (abfd, ptr)             \
++   : (bits) == 64 ? bfd_get_64 (abfd, ptr)             \
++   : (abort (), (bfd_vma) - 1))
++
++#define bfd_put(bits, abfd, val, ptr)                  \
++  ((bits) == 8 ? bfd_put_8  (abfd, val, ptr)           \
++   : (bits) == 16 ? bfd_put_16 (abfd, val, ptr)                \
++   : (bits) == 32 ? bfd_put_32 (abfd, val, ptr)                \
++   : (bits) == 64 ? bfd_put_64 (abfd, val, ptr)                \
++   : (abort (), (void) 0))
++
++
++/* Byte swapping macros for file header data.  */
++
++#define bfd_h_put_8(abfd, val, ptr) \
++  bfd_put_8 (abfd, val, ptr)
++#define bfd_h_put_signed_8(abfd, val, ptr) \
++  bfd_put_8 (abfd, val, ptr)
++#define bfd_h_get_8(abfd, ptr) \
++  bfd_get_8 (abfd, ptr)
++#define bfd_h_get_signed_8(abfd, ptr) \
++  bfd_get_signed_8 (abfd, ptr)
++
++#define bfd_h_put_16(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_h_putx16, (val, ptr))
++#define bfd_h_put_signed_16 \
++  bfd_h_put_16
++#define bfd_h_get_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx16, (ptr))
++#define bfd_h_get_signed_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx_signed_16, (ptr))
++
++#define bfd_h_put_32(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_h_putx32, (val, ptr))
++#define bfd_h_put_signed_32 \
++  bfd_h_put_32
++#define bfd_h_get_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx32, (ptr))
++#define bfd_h_get_signed_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx_signed_32, (ptr))
++
++#define bfd_h_put_64(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_h_putx64, (val, ptr))
++#define bfd_h_put_signed_64 \
++  bfd_h_put_64
++#define bfd_h_get_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx64, (ptr))
++#define bfd_h_get_signed_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx_signed_64, (ptr))
++
++/* Aliases for the above, which should eventually go away.  */
++
++#define H_PUT_64  bfd_h_put_64
++#define H_PUT_32  bfd_h_put_32
++#define H_PUT_16  bfd_h_put_16
++#define H_PUT_8   bfd_h_put_8
++#define H_PUT_S64 bfd_h_put_signed_64
++#define H_PUT_S32 bfd_h_put_signed_32
++#define H_PUT_S16 bfd_h_put_signed_16
++#define H_PUT_S8  bfd_h_put_signed_8
++#define H_GET_64  bfd_h_get_64
++#define H_GET_32  bfd_h_get_32
++#define H_GET_16  bfd_h_get_16
++#define H_GET_8   bfd_h_get_8
++#define H_GET_S64 bfd_h_get_signed_64
++#define H_GET_S32 bfd_h_get_signed_32
++#define H_GET_S16 bfd_h_get_signed_16
++#define H_GET_S8  bfd_h_get_signed_8
++
++
++/* Extracted from bfdio.c.  */
++long bfd_get_mtime (bfd *abfd);
++
++long bfd_get_size (bfd *abfd);
++
++/* Extracted from bfdwin.c.  */
++/* Extracted from section.c.  */
++typedef struct bfd_section
++{
++  /* The name of the section; the name isn't a copy, the pointer is
++     the same as that passed to bfd_make_section.  */
++  const char *name;
++
++  /* A unique sequence number.  */
++  int id;
++
++  /* Which section in the bfd; 0..n-1 as sections are created in a bfd.  */
++  int index;
++
++  /* The next section in the list belonging to the BFD, or NULL.  */
++  struct bfd_section *next;
++
++  /* The previous section in the list belonging to the BFD, or NULL.  */
++  struct bfd_section *prev;
++
++  /* The field flags contains attributes of the section. Some
++     flags are read in from the object file, and some are
++     synthesized from other information.  */
++  flagword flags;
++
++#define SEC_NO_FLAGS   0x000
++
++  /* Tells the OS to allocate space for this section when loading.
++     This is clear for a section containing debug information only.  */
++#define SEC_ALLOC      0x001
++
++  /* Tells the OS to load the section from the file when loading.
++     This is clear for a .bss section.  */
++#define SEC_LOAD       0x002
++
++  /* The section contains data still to be relocated, so there is
++     some relocation information too.  */
++#define SEC_RELOC      0x004
++
++  /* A signal to the OS that the section contains read only data.  */
++#define SEC_READONLY   0x008
++
++  /* The section contains code only.  */
++#define SEC_CODE       0x010
++
++  /* The section contains data only.  */
++#define SEC_DATA       0x020
++
++  /* The section will reside in ROM.  */
++#define SEC_ROM        0x040
++
++  /* The section contains constructor information. This section
++     type is used by the linker to create lists of constructors and
++     destructors used by <<g++>>. When a back end sees a symbol
++     which should be used in a constructor list, it creates a new
++     section for the type of name (e.g., <<__CTOR_LIST__>>), attaches
++     the symbol to it, and builds a relocation. To build the lists
++     of constructors, all the linker has to do is catenate all the
++     sections called <<__CTOR_LIST__>> and relocate the data
++     contained within - exactly the operations it would peform on
++     standard data.  */
++#define SEC_CONSTRUCTOR 0x080
++
++  /* The section has contents - a data section could be
++     <<SEC_ALLOC>> | <<SEC_HAS_CONTENTS>>; a debug section could be
++     <<SEC_HAS_CONTENTS>>  */
++#define SEC_HAS_CONTENTS 0x100
++
++  /* An instruction to the linker to not output the section
++     even if it has information which would normally be written.  */
++#define SEC_NEVER_LOAD 0x200
++
++  /* The section contains thread local data.  */
++#define SEC_THREAD_LOCAL 0x400
++
++  /* The section has GOT references.  This flag is only for the
++     linker, and is currently only used by the elf32-hppa back end.
++     It will be set if global offset table references were detected
++     in this section, which indicate to the linker that the section
++     contains PIC code, and must be handled specially when doing a
++     static link.  */
++#define SEC_HAS_GOT_REF 0x800
++
++  /* The section contains common symbols (symbols may be defined
++     multiple times, the value of a symbol is the amount of
++     space it requires, and the largest symbol value is the one
++     used).  Most targets have exactly one of these (which we
++     translate to bfd_com_section_ptr), but ECOFF has two.  */
++#define SEC_IS_COMMON 0x1000
++
++  /* The section contains only debugging information.  For
++     example, this is set for ELF .debug and .stab sections.
++     strip tests this flag to see if a section can be
++     discarded.  */
++#define SEC_DEBUGGING 0x2000
++
++  /* The contents of this section are held in memory pointed to
++     by the contents field.  This is checked by bfd_get_section_contents,
++     and the data is retrieved from memory if appropriate.  */
++#define SEC_IN_MEMORY 0x4000
++
++  /* The contents of this section are to be excluded by the
++     linker for executable and shared objects unless those
++     objects are to be further relocated.  */
++#define SEC_EXCLUDE 0x8000
++
++  /* The contents of this section are to be sorted based on the sum of
++     the symbol and addend values specified by the associated relocation
++     entries.  Entries without associated relocation entries will be
++     appended to the end of the section in an unspecified order.  */
++#define SEC_SORT_ENTRIES 0x10000
++
++  /* When linking, duplicate sections of the same name should be
++     discarded, rather than being combined into a single section as
++     is usually done.  This is similar to how common symbols are
++     handled.  See SEC_LINK_DUPLICATES below.  */
++#define SEC_LINK_ONCE 0x20000
++
++  /* If SEC_LINK_ONCE is set, this bitfield describes how the linker
++     should handle duplicate sections.  */
++#define SEC_LINK_DUPLICATES 0x40000
++
++  /* This value for SEC_LINK_DUPLICATES means that duplicate
++     sections with the same name should simply be discarded.  */
++#define SEC_LINK_DUPLICATES_DISCARD 0x0
++
++  /* This value for SEC_LINK_DUPLICATES means that the linker
++     should warn if there are any duplicate sections, although
++     it should still only link one copy.  */
++#define SEC_LINK_DUPLICATES_ONE_ONLY 0x80000
++
++  /* This value for SEC_LINK_DUPLICATES means that the linker
++     should warn if any duplicate sections are a different size.  */
++#define SEC_LINK_DUPLICATES_SAME_SIZE 0x100000
++
++  /* This value for SEC_LINK_DUPLICATES means that the linker
++     should warn if any duplicate sections contain different
++     contents.  */
++#define SEC_LINK_DUPLICATES_SAME_CONTENTS \
++  (SEC_LINK_DUPLICATES_ONE_ONLY | SEC_LINK_DUPLICATES_SAME_SIZE)
++
++  /* This section was created by the linker as part of dynamic
++     relocation or other arcane processing.  It is skipped when
++     going through the first-pass output, trusting that someone
++     else up the line will take care of it later.  */
++#define SEC_LINKER_CREATED 0x200000
++
++  /* This section should not be subject to garbage collection.  */
++#define SEC_KEEP 0x400000
++
++  /* This section contains "short" data, and should be placed
++     "near" the GP.  */
++#define SEC_SMALL_DATA 0x800000
++
++  /* Attempt to merge identical entities in the section.
++     Entity size is given in the entsize field.  */
++#define SEC_MERGE 0x1000000
++
++  /* If given with SEC_MERGE, entities to merge are zero terminated
++     strings where entsize specifies character size instead of fixed
++     size entries.  */
++#define SEC_STRINGS 0x2000000
++
++  /* This section contains data about section groups.  */
++#define SEC_GROUP 0x4000000
++
++  /* The section is a COFF shared library section.  This flag is
++     only for the linker.  If this type of section appears in
++     the input file, the linker must copy it to the output file
++     without changing the vma or size.  FIXME: Although this
++     was originally intended to be general, it really is COFF
++     specific (and the flag was renamed to indicate this).  It
++     might be cleaner to have some more general mechanism to
++     allow the back end to control what the linker does with
++     sections.  */
++#define SEC_COFF_SHARED_LIBRARY 0x10000000
++
++  /* This section contains data which may be shared with other
++     executables or shared objects. This is for COFF only.  */
++#define SEC_COFF_SHARED 0x20000000
++
++  /* When a section with this flag is being linked, then if the size of
++     the input section is less than a page, it should not cross a page
++     boundary.  If the size of the input section is one page or more,
++     it should be aligned on a page boundary.  This is for TI
++     TMS320C54X only.  */
++#define SEC_TIC54X_BLOCK 0x40000000
++
++  /* Conditionally link this section; do not link if there are no
++     references found to any symbol in the section.  This is for TI
++     TMS320C54X only.  */
++#define SEC_TIC54X_CLINK 0x80000000
++
++  /*  End of section flags.  */
++
++  /* Some internal packed boolean fields.  */
++
++  /* See the vma field.  */
++  unsigned int user_set_vma : 1;
++
++  /* A mark flag used by some of the linker backends.  */
++  unsigned int linker_mark : 1;
++
++  /* Another mark flag used by some of the linker backends.  Set for
++     output sections that have an input section.  */
++  unsigned int linker_has_input : 1;
++
++  /* Mark flags used by some linker backends for garbage collection.  */
++  unsigned int gc_mark : 1;
++  unsigned int gc_mark_from_eh : 1;
++
++  /* The following flags are used by the ELF linker. */
++
++  /* Mark sections which have been allocated to segments.  */
++  unsigned int segment_mark : 1;
++
++  /* Type of sec_info information.  */
++  unsigned int sec_info_type:3;
++#define ELF_INFO_TYPE_NONE      0
++#define ELF_INFO_TYPE_STABS     1
++#define ELF_INFO_TYPE_MERGE     2
++#define ELF_INFO_TYPE_EH_FRAME  3
++#define ELF_INFO_TYPE_JUST_SYMS 4
++
++  /* Nonzero if this section uses RELA relocations, rather than REL.  */
++  unsigned int use_rela_p:1;
++
++  /* Bits used by various backends.  The generic code doesn't touch
++     these fields.  */
++
++  /* Nonzero if this section has TLS related relocations.  */
++  unsigned int has_tls_reloc:1;
++
++  /* Nonzero if this section has a gp reloc.  */
++  unsigned int has_gp_reloc:1;
++
++  /* Nonzero if this section needs the relax finalize pass.  */
++  unsigned int need_finalize_relax:1;
++
++  /* Whether relocations have been processed.  */
++  unsigned int reloc_done : 1;
++
++  /* End of internal packed boolean fields.  */
++
++  /*  The virtual memory address of the section - where it will be
++      at run time.  The symbols are relocated against this.  The
++      user_set_vma flag is maintained by bfd; if it's not set, the
++      backend can assign addresses (for example, in <<a.out>>, where
++      the default address for <<.data>> is dependent on the specific
++      target and various flags).  */
++  bfd_vma vma;
++
++  /*  The load address of the section - where it would be in a
++      rom image; really only used for writing section header
++      information.  */
++  bfd_vma lma;
++
++  /* The size of the section in octets, as it will be output.
++     Contains a value even if the section has no contents (e.g., the
++     size of <<.bss>>).  */
++  bfd_size_type size;
++
++  /* For input sections, the original size on disk of the section, in
++     octets.  This field is used by the linker relaxation code.  It is
++     currently only set for sections where the linker relaxation scheme
++     doesn't cache altered section and reloc contents (stabs, eh_frame,
++     SEC_MERGE, some coff relaxing targets), and thus the original size
++     needs to be kept to read the section multiple times.
++     For output sections, rawsize holds the section size calculated on
++     a previous linker relaxation pass.  */
++  bfd_size_type rawsize;
++
++  /* If this section is going to be output, then this value is the
++     offset in *bytes* into the output section of the first byte in the
++     input section (byte ==> smallest addressable unit on the
++     target).  In most cases, if this was going to start at the
++     100th octet (8-bit quantity) in the output section, this value
++     would be 100.  However, if the target byte size is 16 bits
++     (bfd_octets_per_byte is "2"), this value would be 50.  */
++  bfd_vma output_offset;
++
++  /* The output section through which to map on output.  */
++  struct bfd_section *output_section;
++
++  /* The alignment requirement of the section, as an exponent of 2 -
++     e.g., 3 aligns to 2^3 (or 8).  */
++  unsigned int alignment_power;
++
++  /* If an input section, a pointer to a vector of relocation
++     records for the data in this section.  */
++  struct reloc_cache_entry *relocation;
++
++  /* If an output section, a pointer to a vector of pointers to
++     relocation records for the data in this section.  */
++  struct reloc_cache_entry **orelocation;
++
++  /* The number of relocation records in one of the above.  */
++  unsigned reloc_count;
++
++  /* Information below is back end specific - and not always used
++     or updated.  */
++
++  /* File position of section data.  */
++  file_ptr filepos;
++
++  /* File position of relocation info.  */
++  file_ptr rel_filepos;
++
++  /* File position of line data.  */
++  file_ptr line_filepos;
++
++  /* Pointer to data for applications.  */
++  void *userdata;
++
++  /* If the SEC_IN_MEMORY flag is set, this points to the actual
++     contents.  */
++  unsigned char *contents;
++
++  /* Attached line number information.  */
++  alent *lineno;
++
++  /* Number of line number records.  */
++  unsigned int lineno_count;
++
++  /* Entity size for merging purposes.  */
++  unsigned int entsize;
++
++  /* Points to the kept section if this section is a link-once section,
++     and is discarded.  */
++  struct bfd_section *kept_section;
++
++  /* When a section is being output, this value changes as more
++     linenumbers are written out.  */
++  file_ptr moving_line_filepos;
++
++  /* What the section number is in the target world.  */
++  int target_index;
++
++  void *used_by_bfd;
++
++  /* If this is a constructor section then here is a list of the
++     relocations created to relocate items within it.  */
++  struct relent_chain *constructor_chain;
++
++  /* The BFD which owns the section.  */
++  bfd *owner;
++
++  /* A symbol which points at this section only.  */
++  struct bfd_symbol *symbol;
++  struct bfd_symbol **symbol_ptr_ptr;
++
++  /* Early in the link process, map_head and map_tail are used to build
++     a list of input sections attached to an output section.  Later,
++     output sections use these fields for a list of bfd_link_order
++     structs.  */
++  union {
++    struct bfd_link_order *link_order;
++    struct bfd_section *s;
++  } map_head, map_tail;
++} asection;
++
++/* These sections are global, and are managed by BFD.  The application
++   and target back end are not permitted to change the values in
++   these sections.  New code should use the section_ptr macros rather
++   than referring directly to the const sections.  The const sections
++   may eventually vanish.  */
++#define BFD_ABS_SECTION_NAME "*ABS*"
++#define BFD_UND_SECTION_NAME "*UND*"
++#define BFD_COM_SECTION_NAME "*COM*"
++#define BFD_IND_SECTION_NAME "*IND*"
++
++/* The absolute section.  */
++extern asection bfd_abs_section;
++#define bfd_abs_section_ptr ((asection *) &bfd_abs_section)
++#define bfd_is_abs_section(sec) ((sec) == bfd_abs_section_ptr)
++/* Pointer to the undefined section.  */
++extern asection bfd_und_section;
++#define bfd_und_section_ptr ((asection *) &bfd_und_section)
++#define bfd_is_und_section(sec) ((sec) == bfd_und_section_ptr)
++/* Pointer to the common section.  */
++extern asection bfd_com_section;
++#define bfd_com_section_ptr ((asection *) &bfd_com_section)
++/* Pointer to the indirect section.  */
++extern asection bfd_ind_section;
++#define bfd_ind_section_ptr ((asection *) &bfd_ind_section)
++#define bfd_is_ind_section(sec) ((sec) == bfd_ind_section_ptr)
++
++#define bfd_is_const_section(SEC)              \
++ (   ((SEC) == bfd_abs_section_ptr)            \
++  || ((SEC) == bfd_und_section_ptr)            \
++  || ((SEC) == bfd_com_section_ptr)            \
++  || ((SEC) == bfd_ind_section_ptr))
++
++extern const struct bfd_symbol * const bfd_abs_symbol;
++extern const struct bfd_symbol * const bfd_com_symbol;
++extern const struct bfd_symbol * const bfd_und_symbol;
++extern const struct bfd_symbol * const bfd_ind_symbol;
++
++/* Macros to handle insertion and deletion of a bfd's sections.  These
++   only handle the list pointers, ie. do not adjust section_count,
++   target_index etc.  */
++#define bfd_section_list_remove(ABFD, S) \
++  do                                                   \
++    {                                                  \
++      asection *_s = S;                                \
++      asection *_next = _s->next;                      \
++      asection *_prev = _s->prev;                      \
++      if (_prev)                                       \
++        _prev->next = _next;                           \
++      else                                             \
++        (ABFD)->sections = _next;                      \
++      if (_next)                                       \
++        _next->prev = _prev;                           \
++      else                                             \
++        (ABFD)->section_last = _prev;                  \
++    }                                                  \
++  while (0)
++#define bfd_section_list_append(ABFD, S) \
++  do                                                   \
++    {                                                  \
++      asection *_s = S;                                \
++      bfd *_abfd = ABFD;                               \
++      _s->next = NULL;                                 \
++      if (_abfd->section_last)                         \
++        {                                              \
++          _s->prev = _abfd->section_last;              \
++          _abfd->section_last->next = _s;              \
++        }                                              \
++      else                                             \
++        {                                              \
++          _s->prev = NULL;                             \
++          _abfd->sections = _s;                        \
++        }                                              \
++      _abfd->section_last = _s;                        \
++    }                                                  \
++  while (0)
++#define bfd_section_list_prepend(ABFD, S) \
++  do                                                   \
++    {                                                  \
++      asection *_s = S;                                \
++      bfd *_abfd = ABFD;                               \
++      _s->prev = NULL;                                 \
++      if (_abfd->sections)                             \
++        {                                              \
++          _s->next = _abfd->sections;                  \
++          _abfd->sections->prev = _s;                  \
++        }                                              \
++      else                                             \
++        {                                              \
++          _s->next = NULL;                             \
++          _abfd->section_last = _s;                    \
++        }                                              \
++      _abfd->sections = _s;                            \
++    }                                                  \
++  while (0)
++#define bfd_section_list_insert_after(ABFD, A, S) \
++  do                                                   \
++    {                                                  \
++      asection *_a = A;                                \
++      asection *_s = S;                                \
++      asection *_next = _a->next;                      \
++      _s->next = _next;                                \
++      _s->prev = _a;                                   \
++      _a->next = _s;                                   \
++      if (_next)                                       \
++        _next->prev = _s;                              \
++      else                                             \
++        (ABFD)->section_last = _s;                     \
++    }                                                  \
++  while (0)
++#define bfd_section_list_insert_before(ABFD, B, S) \
++  do                                                   \
++    {                                                  \
++      asection *_b = B;                                \
++      asection *_s = S;                                \
++      asection *_prev = _b->prev;                      \
++      _s->prev = _prev;                                \
++      _s->next = _b;                                   \
++      _b->prev = _s;                                   \
++      if (_prev)                                       \
++        _prev->next = _s;                              \
++      else                                             \
++        (ABFD)->sections = _s;                         \
++    }                                                  \
++  while (0)
++#define bfd_section_removed_from_list(ABFD, S) \
++  ((S)->next == NULL ? (ABFD)->section_last != (S) : (S)->next->prev != (S))
++
++void bfd_section_list_clear (bfd *);
++
++asection *bfd_get_section_by_name (bfd *abfd, const char *name);
++
++asection *bfd_get_section_by_name_if
++   (bfd *abfd,
++    const char *name,
++    bfd_boolean (*func) (bfd *abfd, asection *sect, void *obj),
++    void *obj);
++
++char *bfd_get_unique_section_name
++   (bfd *abfd, const char *templat, int *count);
++
++asection *bfd_make_section_old_way (bfd *abfd, const char *name);
++
++asection *bfd_make_section_anyway_with_flags
++   (bfd *abfd, const char *name, flagword flags);
++
++asection *bfd_make_section_anyway (bfd *abfd, const char *name);
++
++asection *bfd_make_section_with_flags
++   (bfd *, const char *name, flagword flags);
++
++asection *bfd_make_section (bfd *, const char *name);
++
++bfd_boolean bfd_set_section_flags
++   (bfd *abfd, asection *sec, flagword flags);
++
++void bfd_map_over_sections
++   (bfd *abfd,
++    void (*func) (bfd *abfd, asection *sect, void *obj),
++    void *obj);
++
++asection *bfd_sections_find_if
++   (bfd *abfd,
++    bfd_boolean (*operation) (bfd *abfd, asection *sect, void *obj),
++    void *obj);
++
++bfd_boolean bfd_set_section_size
++   (bfd *abfd, asection *sec, bfd_size_type val);
++
++bfd_boolean bfd_set_section_contents
++   (bfd *abfd, asection *section, const void *data,
++    file_ptr offset, bfd_size_type count);
++
++bfd_boolean bfd_get_section_contents
++   (bfd *abfd, asection *section, void *location, file_ptr offset,
++    bfd_size_type count);
++
++bfd_boolean bfd_malloc_and_get_section
++   (bfd *abfd, asection *section, bfd_byte **buf);
++
++bfd_boolean bfd_copy_private_section_data
++   (bfd *ibfd, asection *isec, bfd *obfd, asection *osec);
++
++#define bfd_copy_private_section_data(ibfd, isection, obfd, osection) \
++     BFD_SEND (obfd, _bfd_copy_private_section_data, \
++               (ibfd, isection, obfd, osection))
++bfd_boolean bfd_generic_is_group_section (bfd *, const asection *sec);
++
++bfd_boolean bfd_generic_discard_group (bfd *abfd, asection *group);
++
++/* Extracted from archures.c.  */
++enum bfd_architecture
++{
++  bfd_arch_unknown,   /* File arch not known.  */
++  bfd_arch_obscure,   /* Arch known, not one of these.  */
++  bfd_arch_m68k,      /* Motorola 68xxx */
++#define bfd_mach_m68000 1
++#define bfd_mach_m68008 2
++#define bfd_mach_m68010 3
++#define bfd_mach_m68020 4
++#define bfd_mach_m68030 5
++#define bfd_mach_m68040 6
++#define bfd_mach_m68060 7
++#define bfd_mach_cpu32  8
++#define bfd_mach_mcf5200  9
++#define bfd_mach_mcf5206e 10
++#define bfd_mach_mcf5307  11
++#define bfd_mach_mcf5407  12
++#define bfd_mach_mcf528x  13
++#define bfd_mach_mcfv4e   14
++#define bfd_mach_mcf521x   15
++#define bfd_mach_mcf5249   16
++#define bfd_mach_mcf547x   17
++#define bfd_mach_mcf548x   18
++  bfd_arch_vax,       /* DEC Vax */
++  bfd_arch_i960,      /* Intel 960 */
++    /* The order of the following is important.
++       lower number indicates a machine type that
++       only accepts a subset of the instructions
++       available to machines with higher numbers.
++       The exception is the "ca", which is
++       incompatible with all other machines except
++       "core".  */
++
++#define bfd_mach_i960_core      1
++#define bfd_mach_i960_ka_sa     2
++#define bfd_mach_i960_kb_sb     3
++#define bfd_mach_i960_mc        4
++#define bfd_mach_i960_xa        5
++#define bfd_mach_i960_ca        6
++#define bfd_mach_i960_jx        7
++#define bfd_mach_i960_hx        8
++
++  bfd_arch_or32,      /* OpenRISC 32 */
++
++  bfd_arch_a29k,      /* AMD 29000 */
++  bfd_arch_sparc,     /* SPARC */
++#define bfd_mach_sparc                 1
++/* The difference between v8plus and v9 is that v9 is a true 64 bit env.  */
++#define bfd_mach_sparc_sparclet        2
++#define bfd_mach_sparc_sparclite       3
++#define bfd_mach_sparc_v8plus          4
++#define bfd_mach_sparc_v8plusa         5 /* with ultrasparc add'ns.  */
++#define bfd_mach_sparc_sparclite_le    6
++#define bfd_mach_sparc_v9              7
++#define bfd_mach_sparc_v9a             8 /* with ultrasparc add'ns.  */
++#define bfd_mach_sparc_v8plusb         9 /* with cheetah add'ns.  */
++#define bfd_mach_sparc_v9b             10 /* with cheetah add'ns.  */
++/* Nonzero if MACH has the v9 instruction set.  */
++#define bfd_mach_sparc_v9_p(mach) \
++  ((mach) >= bfd_mach_sparc_v8plus && (mach) <= bfd_mach_sparc_v9b \
++   && (mach) != bfd_mach_sparc_sparclite_le)
++/* Nonzero if MACH is a 64 bit sparc architecture.  */
++#define bfd_mach_sparc_64bit_p(mach) \
++  ((mach) >= bfd_mach_sparc_v9 && (mach) != bfd_mach_sparc_v8plusb)
++  bfd_arch_mips,      /* MIPS Rxxxx */
++#define bfd_mach_mips3000              3000
++#define bfd_mach_mips3900              3900
++#define bfd_mach_mips4000              4000
++#define bfd_mach_mips4010              4010
++#define bfd_mach_mips4100              4100
++#define bfd_mach_mips4111              4111
++#define bfd_mach_mips4120              4120
++#define bfd_mach_mips4300              4300
++#define bfd_mach_mips4400              4400
++#define bfd_mach_mips4600              4600
++#define bfd_mach_mips4650              4650
++#define bfd_mach_mips5000              5000
++#define bfd_mach_mips5400              5400
++#define bfd_mach_mips5500              5500
++#define bfd_mach_mips6000              6000
++#define bfd_mach_mips7000              7000
++#define bfd_mach_mips8000              8000
++#define bfd_mach_mips9000              9000
++#define bfd_mach_mips10000             10000
++#define bfd_mach_mips12000             12000
++#define bfd_mach_mips16                16
++#define bfd_mach_mips5                 5
++#define bfd_mach_mips_sb1              12310201 /* octal 'SB', 01 */
++#define bfd_mach_mipsisa32             32
++#define bfd_mach_mipsisa32r2           33
++#define bfd_mach_mipsisa64             64
++#define bfd_mach_mipsisa64r2           65
++  bfd_arch_i386,      /* Intel 386 */
++#define bfd_mach_i386_i386 1
++#define bfd_mach_i386_i8086 2
++#define bfd_mach_i386_i386_intel_syntax 3
++#define bfd_mach_x86_64 64
++#define bfd_mach_x86_64_intel_syntax 65
++  bfd_arch_we32k,     /* AT&T WE32xxx */
++  bfd_arch_tahoe,     /* CCI/Harris Tahoe */
++  bfd_arch_i860,      /* Intel 860 */
++  bfd_arch_i370,      /* IBM 360/370 Mainframes */
++  bfd_arch_romp,      /* IBM ROMP PC/RT */
++  bfd_arch_alliant,   /* Alliant */
++  bfd_arch_convex,    /* Convex */
++  bfd_arch_m88k,      /* Motorola 88xxx */
++  bfd_arch_m98k,      /* Motorola 98xxx */
++  bfd_arch_pyramid,   /* Pyramid Technology */
++  bfd_arch_h8300,     /* Renesas H8/300 (formerly Hitachi H8/300) */
++#define bfd_mach_h8300    1
++#define bfd_mach_h8300h   2
++#define bfd_mach_h8300s   3
++#define bfd_mach_h8300hn  4
++#define bfd_mach_h8300sn  5
++#define bfd_mach_h8300sx  6
++#define bfd_mach_h8300sxn 7
++  bfd_arch_pdp11,     /* DEC PDP-11 */
++  bfd_arch_powerpc,   /* PowerPC */
++#define bfd_mach_ppc           32
++#define bfd_mach_ppc64         64
++#define bfd_mach_ppc_403       403
++#define bfd_mach_ppc_403gc     4030
++#define bfd_mach_ppc_505       505
++#define bfd_mach_ppc_601       601
++#define bfd_mach_ppc_602       602
++#define bfd_mach_ppc_603       603
++#define bfd_mach_ppc_ec603e    6031
++#define bfd_mach_ppc_604       604
++#define bfd_mach_ppc_620       620
++#define bfd_mach_ppc_630       630
++#define bfd_mach_ppc_750       750
++#define bfd_mach_ppc_860       860
++#define bfd_mach_ppc_a35       35
++#define bfd_mach_ppc_rs64ii    642
++#define bfd_mach_ppc_rs64iii   643
++#define bfd_mach_ppc_7400      7400
++#define bfd_mach_ppc_e500      500
++  bfd_arch_rs6000,    /* IBM RS/6000 */
++#define bfd_mach_rs6k          6000
++#define bfd_mach_rs6k_rs1      6001
++#define bfd_mach_rs6k_rsc      6003
++#define bfd_mach_rs6k_rs2      6002
++  bfd_arch_hppa,      /* HP PA RISC */
++#define bfd_mach_hppa10        10
++#define bfd_mach_hppa11        11
++#define bfd_mach_hppa20        20
++#define bfd_mach_hppa20w       25
++  bfd_arch_d10v,      /* Mitsubishi D10V */
++#define bfd_mach_d10v          1
++#define bfd_mach_d10v_ts2      2
++#define bfd_mach_d10v_ts3      3
++  bfd_arch_d30v,      /* Mitsubishi D30V */
++  bfd_arch_dlx,       /* DLX */
++  bfd_arch_m68hc11,   /* Motorola 68HC11 */
++  bfd_arch_m68hc12,   /* Motorola 68HC12 */
++#define bfd_mach_m6812_default 0
++#define bfd_mach_m6812         1
++#define bfd_mach_m6812s        2
++  bfd_arch_z8k,       /* Zilog Z8000 */
++#define bfd_mach_z8001         1
++#define bfd_mach_z8002         2
++  bfd_arch_h8500,     /* Renesas H8/500 (formerly Hitachi H8/500) */
++  bfd_arch_sh,        /* Renesas / SuperH SH (formerly Hitachi SH) */
++#define bfd_mach_sh            1
++#define bfd_mach_sh2        0x20
++#define bfd_mach_sh_dsp     0x2d
++#define bfd_mach_sh2a       0x2a
++#define bfd_mach_sh2a_nofpu 0x2b
++#define bfd_mach_sh2a_nofpu_or_sh4_nommu_nofpu 0x2a1
++#define bfd_mach_sh2a_nofpu_or_sh3_nommu 0x2a2
++#define bfd_mach_sh2a_or_sh4  0x2a3
++#define bfd_mach_sh2a_or_sh3e 0x2a4
++#define bfd_mach_sh2e       0x2e
++#define bfd_mach_sh3        0x30
++#define bfd_mach_sh3_nommu  0x31
++#define bfd_mach_sh3_dsp    0x3d
++#define bfd_mach_sh3e       0x3e
++#define bfd_mach_sh4        0x40
++#define bfd_mach_sh4_nofpu  0x41
++#define bfd_mach_sh4_nommu_nofpu  0x42
++#define bfd_mach_sh4a       0x4a
++#define bfd_mach_sh4a_nofpu 0x4b
++#define bfd_mach_sh4al_dsp  0x4d
++#define bfd_mach_sh5        0x50
++  bfd_arch_alpha,     /* Dec Alpha */
++#define bfd_mach_alpha_ev4  0x10
++#define bfd_mach_alpha_ev5  0x20
++#define bfd_mach_alpha_ev6  0x30
++  bfd_arch_arm,       /* Advanced Risc Machines ARM.  */
++#define bfd_mach_arm_unknown   0
++#define bfd_mach_arm_2         1
++#define bfd_mach_arm_2a        2
++#define bfd_mach_arm_3         3
++#define bfd_mach_arm_3M        4
++#define bfd_mach_arm_4         5
++#define bfd_mach_arm_4T        6
++#define bfd_mach_arm_5         7
++#define bfd_mach_arm_5T        8
++#define bfd_mach_arm_5TE       9
++#define bfd_mach_arm_XScale    10
++#define bfd_mach_arm_ep9312    11
++#define bfd_mach_arm_iWMMXt    12
++  bfd_arch_ns32k,     /* National Semiconductors ns32000 */
++  bfd_arch_w65,       /* WDC 65816 */
++  bfd_arch_tic30,     /* Texas Instruments TMS320C30 */
++  bfd_arch_tic4x,     /* Texas Instruments TMS320C3X/4X */
++#define bfd_mach_tic3x         30
++#define bfd_mach_tic4x         40
++  bfd_arch_tic54x,    /* Texas Instruments TMS320C54X */
++  bfd_arch_tic80,     /* TI TMS320c80 (MVP) */
++  bfd_arch_v850,      /* NEC V850 */
++#define bfd_mach_v850          1
++#define bfd_mach_v850e         'E'
++#define bfd_mach_v850e1        '1'
++  bfd_arch_arc,       /* ARC Cores */
++#define bfd_mach_arc_5         5
++#define bfd_mach_arc_6         6
++#define bfd_mach_arc_7         7
++#define bfd_mach_arc_8         8
++ bfd_arch_m32c,     /* Renesas M16C/M32C.  */
++#define bfd_mach_m16c        0x75
++#define bfd_mach_m32c        0x78
++  bfd_arch_m32r,      /* Renesas M32R (formerly Mitsubishi M32R/D) */
++#define bfd_mach_m32r          1 /* For backwards compatibility.  */
++#define bfd_mach_m32rx         'x'
++#define bfd_mach_m32r2         '2'
++  bfd_arch_mn10200,   /* Matsushita MN10200 */
++  bfd_arch_mn10300,   /* Matsushita MN10300 */
++#define bfd_mach_mn10300               300
++#define bfd_mach_am33          330
++#define bfd_mach_am33_2        332
++  bfd_arch_fr30,
++#define bfd_mach_fr30          0x46523330
++  bfd_arch_frv,
++#define bfd_mach_frv           1
++#define bfd_mach_frvsimple     2
++#define bfd_mach_fr300         300
++#define bfd_mach_fr400         400
++#define bfd_mach_fr450         450
++#define bfd_mach_frvtomcat     499     /* fr500 prototype */
++#define bfd_mach_fr500         500
++#define bfd_mach_fr550         550
++  bfd_arch_mcore,
++  bfd_arch_ia64,      /* HP/Intel ia64 */
++#define bfd_mach_ia64_elf64    64
++#define bfd_mach_ia64_elf32    32
++  bfd_arch_ip2k,      /* Ubicom IP2K microcontrollers. */
++#define bfd_mach_ip2022        1
++#define bfd_mach_ip2022ext     2
++ bfd_arch_iq2000,     /* Vitesse IQ2000.  */
++#define bfd_mach_iq2000        1
++#define bfd_mach_iq10          2
++  bfd_arch_ms1,
++#define bfd_mach_ms1           1
++#define bfd_mach_mrisc2        2
++  bfd_arch_pj,
++  bfd_arch_avr,       /* Atmel AVR microcontrollers.  */
++#define bfd_mach_avr1          1
++#define bfd_mach_avr2          2
++#define bfd_mach_avr3          3
++#define bfd_mach_avr4          4
++#define bfd_mach_avr5          5
++  bfd_arch_cr16c,       /* National Semiconductor CompactRISC. */
++#define bfd_mach_cr16c         1
++  bfd_arch_crx,       /*  National Semiconductor CRX.  */
++#define bfd_mach_crx           1
++  bfd_arch_cris,      /* Axis CRIS */
++#define bfd_mach_cris_v0_v10   255
++#define bfd_mach_cris_v32      32
++#define bfd_mach_cris_v10_v32  1032
++  bfd_arch_s390,      /* IBM s390 */
++#define bfd_mach_s390_31       31
++#define bfd_mach_s390_64       64
++  bfd_arch_openrisc,  /* OpenRISC */
++  bfd_arch_mmix,      /* Donald Knuth's educational processor.  */
++  bfd_arch_xstormy16,
++#define bfd_mach_xstormy16     1
++  bfd_arch_msp430,    /* Texas Instruments MSP430 architecture.  */
++#define bfd_mach_msp11          11
++#define bfd_mach_msp110         110
++#define bfd_mach_msp12          12
++#define bfd_mach_msp13          13
++#define bfd_mach_msp14          14
++#define bfd_mach_msp15          15
++#define bfd_mach_msp16          16
++#define bfd_mach_msp31          31
++#define bfd_mach_msp32          32
++#define bfd_mach_msp33          33
++#define bfd_mach_msp41          41
++#define bfd_mach_msp42          42
++#define bfd_mach_msp43          43
++#define bfd_mach_msp44          44
++  bfd_arch_xtensa,    /* Tensilica's Xtensa cores.  */
++#define bfd_mach_xtensa        1
++   bfd_arch_maxq,     /* Dallas MAXQ 10/20 */
++#define bfd_mach_maxq10    10
++#define bfd_mach_maxq20    20
++  bfd_arch_last
++  };
++
++typedef struct bfd_arch_info
++{
++  int bits_per_word;
++  int bits_per_address;
++  int bits_per_byte;
++  enum bfd_architecture arch;
++  unsigned long mach;
++  const char *arch_name;
++  const char *printable_name;
++  unsigned int section_align_power;
++  /* TRUE if this is the default machine for the architecture.
++     The default arch should be the first entry for an arch so that
++     all the entries for that arch can be accessed via <<next>>.  */
++  bfd_boolean the_default;
++  const struct bfd_arch_info * (*compatible)
++    (const struct bfd_arch_info *a, const struct bfd_arch_info *b);
++
++  bfd_boolean (*scan) (const struct bfd_arch_info *, const char *);
++
++  const struct bfd_arch_info *next;
++}
++bfd_arch_info_type;
++
++const char *bfd_printable_name (bfd *abfd);
++
++const bfd_arch_info_type *bfd_scan_arch (const char *string);
++
++const char **bfd_arch_list (void);
++
++const bfd_arch_info_type *bfd_arch_get_compatible
++   (const bfd *abfd, const bfd *bbfd, bfd_boolean accept_unknowns);
++
++void bfd_set_arch_info (bfd *abfd, const bfd_arch_info_type *arg);
++
++enum bfd_architecture bfd_get_arch (bfd *abfd);
++
++unsigned long bfd_get_mach (bfd *abfd);
++
++unsigned int bfd_arch_bits_per_byte (bfd *abfd);
++
++unsigned int bfd_arch_bits_per_address (bfd *abfd);
++
++const bfd_arch_info_type *bfd_get_arch_info (bfd *abfd);
++
++const bfd_arch_info_type *bfd_lookup_arch
++   (enum bfd_architecture arch, unsigned long machine);
++
++const char *bfd_printable_arch_mach
++   (enum bfd_architecture arch, unsigned long machine);
++
++unsigned int bfd_octets_per_byte (bfd *abfd);
++
++unsigned int bfd_arch_mach_octets_per_byte
++   (enum bfd_architecture arch, unsigned long machine);
++
++/* Extracted from reloc.c.  */
++typedef enum bfd_reloc_status
++{
++  /* No errors detected.  */
++  bfd_reloc_ok,
++
++  /* The relocation was performed, but there was an overflow.  */
++  bfd_reloc_overflow,
++
++  /* The address to relocate was not within the section supplied.  */
++  bfd_reloc_outofrange,
++
++  /* Used by special functions.  */
++  bfd_reloc_continue,
++
++  /* Unsupported relocation size requested.  */
++  bfd_reloc_notsupported,
++
++  /* Unused.  */
++  bfd_reloc_other,
++
++  /* The symbol to relocate against was undefined.  */
++  bfd_reloc_undefined,
++
++  /* The relocation was performed, but may not be ok - presently
++     generated only when linking i960 coff files with i960 b.out
++     symbols.  If this type is returned, the error_message argument
++     to bfd_perform_relocation will be set.  */
++  bfd_reloc_dangerous
++ }
++ bfd_reloc_status_type;
++
++
++typedef struct reloc_cache_entry
++{
++  /* A pointer into the canonical table of pointers.  */
++  struct bfd_symbol **sym_ptr_ptr;
++
++  /* offset in section.  */
++  bfd_size_type address;
++
++  /* addend for relocation value.  */
++  bfd_vma addend;
++
++  /* Pointer to how to perform the required relocation.  */
++  reloc_howto_type *howto;
++
++}
++arelent;
++
++enum complain_overflow
++{
++  /* Do not complain on overflow.  */
++  complain_overflow_dont,
++
++  /* Complain if the bitfield overflows, whether it is considered
++     as signed or unsigned.  */
++  complain_overflow_bitfield,
++
++  /* Complain if the value overflows when considered as signed
++     number.  */
++  complain_overflow_signed,
++
++  /* Complain if the value overflows when considered as an
++     unsigned number.  */
++  complain_overflow_unsigned
++};
++
++struct reloc_howto_struct
++{
++  /*  The type field has mainly a documentary use - the back end can
++      do what it wants with it, though normally the back end's
++      external idea of what a reloc number is stored
++      in this field.  For example, a PC relative word relocation
++      in a coff environment has the type 023 - because that's
++      what the outside world calls a R_PCRWORD reloc.  */
++  unsigned int type;
++
++  /*  The value the final relocation is shifted right by.  This drops
++      unwanted data from the relocation.  */
++  unsigned int rightshift;
++
++  /*  The size of the item to be relocated.  This is *not* a
++      power-of-two measure.  To get the number of bytes operated
++      on by a type of relocation, use bfd_get_reloc_size.  */
++  int size;
++
++  /*  The number of bits in the item to be relocated.  This is used
++      when doing overflow checking.  */
++  unsigned int bitsize;
++
++  /*  Notes that the relocation is relative to the location in the
++      data section of the addend.  The relocation function will
++      subtract from the relocation value the address of the location
++      being relocated.  */
++  bfd_boolean pc_relative;
++
++  /*  The bit position of the reloc value in the destination.
++      The relocated value is left shifted by this amount.  */
++  unsigned int bitpos;
++
++  /* What type of overflow error should be checked for when
++     relocating.  */
++  enum complain_overflow complain_on_overflow;
++
++  /* If this field is non null, then the supplied function is
++     called rather than the normal function.  This allows really
++     strange relocation methods to be accommodated (e.g., i960 callj
++     instructions).  */
++  bfd_reloc_status_type (*special_function)
++    (bfd *, arelent *, struct bfd_symbol *, void *, asection *,
++     bfd *, char **);
++
++  /* The textual name of the relocation type.  */
++  char *name;
++
++  /* Some formats record a relocation addend in the section contents
++     rather than with the relocation.  For ELF formats this is the
++     distinction between USE_REL and USE_RELA (though the code checks
++     for USE_REL == 1/0).  The value of this field is TRUE if the
++     addend is recorded with the section contents; when performing a
++     partial link (ld -r) the section contents (the data) will be
++     modified.  The value of this field is FALSE if addends are
++     recorded with the relocation (in arelent.addend); when performing
++     a partial link the relocation will be modified.
++     All relocations for all ELF USE_RELA targets should set this field
++     to FALSE (values of TRUE should be looked on with suspicion).
++     However, the converse is not true: not all relocations of all ELF
++     USE_REL targets set this field to TRUE.  Why this is so is peculiar
++     to each particular target.  For relocs that aren't used in partial
++     links (e.g. GOT stuff) it doesn't matter what this is set to.  */
++  bfd_boolean partial_inplace;
++
++  /* src_mask selects the part of the instruction (or data) to be used
++     in the relocation sum.  If the target relocations don't have an
++     addend in the reloc, eg. ELF USE_REL, src_mask will normally equal
++     dst_mask to extract the addend from the section contents.  If
++     relocations do have an addend in the reloc, eg. ELF USE_RELA, this
++     field should be zero.  Non-zero values for ELF USE_RELA targets are
++     bogus as in those cases the value in the dst_mask part of the
++     section contents should be treated as garbage.  */
++  bfd_vma src_mask;
++
++  /* dst_mask selects which parts of the instruction (or data) are
++     replaced with a relocated value.  */
++  bfd_vma dst_mask;
++
++  /* When some formats create PC relative instructions, they leave
++     the value of the pc of the place being relocated in the offset
++     slot of the instruction, so that a PC relative relocation can
++     be made just by adding in an ordinary offset (e.g., sun3 a.out).
++     Some formats leave the displacement part of an instruction
++     empty (e.g., m88k bcs); this flag signals the fact.  */
++  bfd_boolean pcrel_offset;
++};
++
++#define HOWTO(C, R, S, B, P, BI, O, SF, NAME, INPLACE, MASKSRC, MASKDST, PC) \
++  { (unsigned) C, R, S, B, P, BI, O, SF, NAME, INPLACE, MASKSRC, MASKDST, PC }
++#define NEWHOWTO(FUNCTION, NAME, SIZE, REL, IN) \
++  HOWTO (0, 0, SIZE, 0, REL, 0, complain_overflow_dont, FUNCTION, \
++         NAME, FALSE, 0, 0, IN)
++
++#define EMPTY_HOWTO(C) \
++  HOWTO ((C), 0, 0, 0, FALSE, 0, complain_overflow_dont, NULL, \
++         NULL, FALSE, 0, 0, FALSE)
++
++#define HOWTO_PREPARE(relocation, symbol)               \
++  {                                                     \
++    if (symbol != NULL)                                 \
++      {                                                 \
++        if (bfd_is_com_section (symbol->section))       \
++          {                                             \
++            relocation = 0;                             \
++          }                                             \
++        else                                            \
++          {                                             \
++            relocation = symbol->value;                 \
++          }                                             \
++      }                                                 \
++  }
++
++unsigned int bfd_get_reloc_size (reloc_howto_type *);
++
++typedef struct relent_chain
++{
++  arelent relent;
++  struct relent_chain *next;
++}
++arelent_chain;
++
++bfd_reloc_status_type bfd_check_overflow
++   (enum complain_overflow how,
++    unsigned int bitsize,
++    unsigned int rightshift,
++    unsigned int addrsize,
++    bfd_vma relocation);
++
++bfd_reloc_status_type bfd_perform_relocation
++   (bfd *abfd,
++    arelent *reloc_entry,
++    void *data,
++    asection *input_section,
++    bfd *output_bfd,
++    char **error_message);
++
++bfd_reloc_status_type bfd_install_relocation
++   (bfd *abfd,
++    arelent *reloc_entry,
++    void *data, bfd_vma data_start,
++    asection *input_section,
++    char **error_message);
++
++enum bfd_reloc_code_real {
++  _dummy_first_bfd_reloc_code_real,
++
++
++/* Basic absolute relocations of N bits.  */
++  BFD_RELOC_64,
++  BFD_RELOC_32,
++  BFD_RELOC_26,
++  BFD_RELOC_24,
++  BFD_RELOC_16,
++  BFD_RELOC_14,
++  BFD_RELOC_8,
++
++/* PC-relative relocations.  Sometimes these are relative to the address
++of the relocation itself; sometimes they are relative to the start of
++the section containing the relocation.  It depends on the specific target.
++
++The 24-bit relocation is used in some Intel 960 configurations.  */
++  BFD_RELOC_64_PCREL,
++  BFD_RELOC_32_PCREL,
++  BFD_RELOC_24_PCREL,
++  BFD_RELOC_16_PCREL,
++  BFD_RELOC_12_PCREL,
++  BFD_RELOC_8_PCREL,
++
++/* Section relative relocations.  Some targets need this for DWARF2.  */
++  BFD_RELOC_32_SECREL,
++
++/* For ELF.  */
++  BFD_RELOC_32_GOT_PCREL,
++  BFD_RELOC_16_GOT_PCREL,
++  BFD_RELOC_8_GOT_PCREL,
++  BFD_RELOC_32_GOTOFF,
++  BFD_RELOC_16_GOTOFF,
++  BFD_RELOC_LO16_GOTOFF,
++  BFD_RELOC_HI16_GOTOFF,
++  BFD_RELOC_HI16_S_GOTOFF,
++  BFD_RELOC_8_GOTOFF,
++  BFD_RELOC_64_PLT_PCREL,
++  BFD_RELOC_32_PLT_PCREL,
++  BFD_RELOC_24_PLT_PCREL,
++  BFD_RELOC_16_PLT_PCREL,
++  BFD_RELOC_8_PLT_PCREL,
++  BFD_RELOC_64_PLTOFF,
++  BFD_RELOC_32_PLTOFF,
++  BFD_RELOC_16_PLTOFF,
++  BFD_RELOC_LO16_PLTOFF,
++  BFD_RELOC_HI16_PLTOFF,
++  BFD_RELOC_HI16_S_PLTOFF,
++  BFD_RELOC_8_PLTOFF,
++
++/* Relocations used by 68K ELF.  */
++  BFD_RELOC_68K_GLOB_DAT,
++  BFD_RELOC_68K_JMP_SLOT,
++  BFD_RELOC_68K_RELATIVE,
++
++/* Linkage-table relative.  */
++  BFD_RELOC_32_BASEREL,
++  BFD_RELOC_16_BASEREL,
++  BFD_RELOC_LO16_BASEREL,
++  BFD_RELOC_HI16_BASEREL,
++  BFD_RELOC_HI16_S_BASEREL,
++  BFD_RELOC_8_BASEREL,
++  BFD_RELOC_RVA,
++
++/* Absolute 8-bit relocation, but used to form an address like 0xFFnn.  */
++  BFD_RELOC_8_FFnn,
++
++/* These PC-relative relocations are stored as word displacements --
++i.e., byte displacements shifted right two bits.  The 30-bit word
++displacement (<<32_PCREL_S2>> -- 32 bits, shifted 2) is used on the
++SPARC.  (SPARC tools generally refer to this as <<WDISP30>>.)  The
++signed 16-bit displacement is used on the MIPS, and the 23-bit
++displacement is used on the Alpha.  */
++  BFD_RELOC_32_PCREL_S2,
++  BFD_RELOC_16_PCREL_S2,
++  BFD_RELOC_23_PCREL_S2,
++
++/* High 22 bits and low 10 bits of 32-bit value, placed into lower bits of
++the target word.  These are used on the SPARC.  */
++  BFD_RELOC_HI22,
++  BFD_RELOC_LO10,
++
++/* For systems that allocate a Global Pointer register, these are
++displacements off that register.  These relocation types are
++handled specially, because the value the register will have is
++decided relatively late.  */
++  BFD_RELOC_GPREL16,
++  BFD_RELOC_GPREL32,
++
++/* Reloc types used for i960/b.out.  */
++  BFD_RELOC_I960_CALLJ,
++
++/* SPARC ELF relocations.  There is probably some overlap with other
++relocation types already defined.  */
++  BFD_RELOC_NONE,
++  BFD_RELOC_SPARC_WDISP22,
++  BFD_RELOC_SPARC22,
++  BFD_RELOC_SPARC13,
++  BFD_RELOC_SPARC_GOT10,
++  BFD_RELOC_SPARC_GOT13,
++  BFD_RELOC_SPARC_GOT22,
++  BFD_RELOC_SPARC_PC10,
++  BFD_RELOC_SPARC_PC22,
++  BFD_RELOC_SPARC_WPLT30,
++  BFD_RELOC_SPARC_COPY,
++  BFD_RELOC_SPARC_GLOB_DAT,
++  BFD_RELOC_SPARC_JMP_SLOT,
++  BFD_RELOC_SPARC_RELATIVE,
++  BFD_RELOC_SPARC_UA16,
++  BFD_RELOC_SPARC_UA32,
++  BFD_RELOC_SPARC_UA64,
++
++/* I think these are specific to SPARC a.out (e.g., Sun 4).  */
++  BFD_RELOC_SPARC_BASE13,
++  BFD_RELOC_SPARC_BASE22,
++
++/* SPARC64 relocations  */
++#define BFD_RELOC_SPARC_64 BFD_RELOC_64
++  BFD_RELOC_SPARC_10,
++  BFD_RELOC_SPARC_11,
++  BFD_RELOC_SPARC_OLO10,
++  BFD_RELOC_SPARC_HH22,
++  BFD_RELOC_SPARC_HM10,
++  BFD_RELOC_SPARC_LM22,
++  BFD_RELOC_SPARC_PC_HH22,
++  BFD_RELOC_SPARC_PC_HM10,
++  BFD_RELOC_SPARC_PC_LM22,
++  BFD_RELOC_SPARC_WDISP16,
++  BFD_RELOC_SPARC_WDISP19,
++  BFD_RELOC_SPARC_7,
++  BFD_RELOC_SPARC_6,
++  BFD_RELOC_SPARC_5,
++#define BFD_RELOC_SPARC_DISP64 BFD_RELOC_64_PCREL
++  BFD_RELOC_SPARC_PLT32,
++  BFD_RELOC_SPARC_PLT64,
++  BFD_RELOC_SPARC_HIX22,
++  BFD_RELOC_SPARC_LOX10,
++  BFD_RELOC_SPARC_H44,
++  BFD_RELOC_SPARC_M44,
++  BFD_RELOC_SPARC_L44,
++  BFD_RELOC_SPARC_REGISTER,
++
++/* SPARC little endian relocation  */
++  BFD_RELOC_SPARC_REV32,
++
++/* SPARC TLS relocations  */
++  BFD_RELOC_SPARC_TLS_GD_HI22,
++  BFD_RELOC_SPARC_TLS_GD_LO10,
++  BFD_RELOC_SPARC_TLS_GD_ADD,
++  BFD_RELOC_SPARC_TLS_GD_CALL,
++  BFD_RELOC_SPARC_TLS_LDM_HI22,
++  BFD_RELOC_SPARC_TLS_LDM_LO10,
++  BFD_RELOC_SPARC_TLS_LDM_ADD,
++  BFD_RELOC_SPARC_TLS_LDM_CALL,
++  BFD_RELOC_SPARC_TLS_LDO_HIX22,
++  BFD_RELOC_SPARC_TLS_LDO_LOX10,
++  BFD_RELOC_SPARC_TLS_LDO_ADD,
++  BFD_RELOC_SPARC_TLS_IE_HI22,
++  BFD_RELOC_SPARC_TLS_IE_LO10,
++  BFD_RELOC_SPARC_TLS_IE_LD,
++  BFD_RELOC_SPARC_TLS_IE_LDX,
++  BFD_RELOC_SPARC_TLS_IE_ADD,
++  BFD_RELOC_SPARC_TLS_LE_HIX22,
++  BFD_RELOC_SPARC_TLS_LE_LOX10,
++  BFD_RELOC_SPARC_TLS_DTPMOD32,
++  BFD_RELOC_SPARC_TLS_DTPMOD64,
++  BFD_RELOC_SPARC_TLS_DTPOFF32,
++  BFD_RELOC_SPARC_TLS_DTPOFF64,
++  BFD_RELOC_SPARC_TLS_TPOFF32,
++  BFD_RELOC_SPARC_TLS_TPOFF64,
++
++/* Alpha ECOFF and ELF relocations.  Some of these treat the symbol or
++"addend" in some special way.
++For GPDISP_HI16 ("gpdisp") relocations, the symbol is ignored when
++writing; when reading, it will be the absolute section symbol.  The
++addend is the displacement in bytes of the "lda" instruction from
++the "ldah" instruction (which is at the address of this reloc).  */
++  BFD_RELOC_ALPHA_GPDISP_HI16,
++
++/* For GPDISP_LO16 ("ignore") relocations, the symbol is handled as
++with GPDISP_HI16 relocs.  The addend is ignored when writing the
++relocations out, and is filled in with the file's GP value on
++reading, for convenience.  */
++  BFD_RELOC_ALPHA_GPDISP_LO16,
++
++/* The ELF GPDISP relocation is exactly the same as the GPDISP_HI16
++relocation except that there is no accompanying GPDISP_LO16
++relocation.  */
++  BFD_RELOC_ALPHA_GPDISP,
++
++/* The Alpha LITERAL/LITUSE relocs are produced by a symbol reference;
++the assembler turns it into a LDQ instruction to load the address of
++the symbol, and then fills in a register in the real instruction.
++
++The LITERAL reloc, at the LDQ instruction, refers to the .lita
++section symbol.  The addend is ignored when writing, but is filled
++in with the file's GP value on reading, for convenience, as with the
++GPDISP_LO16 reloc.
++
++The ELF_LITERAL reloc is somewhere between 16_GOTOFF and GPDISP_LO16.
++It should refer to the symbol to be referenced, as with 16_GOTOFF,
++but it generates output not based on the position within the .got
++section, but relative to the GP value chosen for the file during the
++final link stage.
++
++The LITUSE reloc, on the instruction using the loaded address, gives
++information to the linker that it might be able to use to optimize
++away some literal section references.  The symbol is ignored (read
++as the absolute section symbol), and the "addend" indicates the type
++of instruction using the register:
++1 - "memory" fmt insn
++2 - byte-manipulation (byte offset reg)
++3 - jsr (target of branch)  */
++  BFD_RELOC_ALPHA_LITERAL,
++  BFD_RELOC_ALPHA_ELF_LITERAL,
++  BFD_RELOC_ALPHA_LITUSE,
++
++/* The HINT relocation indicates a value that should be filled into the
++"hint" field of a jmp/jsr/ret instruction, for possible branch-
++prediction logic which may be provided on some processors.  */
++  BFD_RELOC_ALPHA_HINT,
++
++/* The LINKAGE relocation outputs a linkage pair in the object file,
++which is filled by the linker.  */
++  BFD_RELOC_ALPHA_LINKAGE,
++
++/* The CODEADDR relocation outputs a STO_CA in the object file,
++which is filled by the linker.  */
++  BFD_RELOC_ALPHA_CODEADDR,
++
++/* The GPREL_HI/LO relocations together form a 32-bit offset from the
++GP register.  */
++  BFD_RELOC_ALPHA_GPREL_HI16,
++  BFD_RELOC_ALPHA_GPREL_LO16,
++
++/* Like BFD_RELOC_23_PCREL_S2, except that the source and target must
++share a common GP, and the target address is adjusted for
++STO_ALPHA_STD_GPLOAD.  */
++  BFD_RELOC_ALPHA_BRSGP,
++
++/* Alpha thread-local storage relocations.  */
++  BFD_RELOC_ALPHA_TLSGD,
++  BFD_RELOC_ALPHA_TLSLDM,
++  BFD_RELOC_ALPHA_DTPMOD64,
++  BFD_RELOC_ALPHA_GOTDTPREL16,
++  BFD_RELOC_ALPHA_DTPREL64,
++  BFD_RELOC_ALPHA_DTPREL_HI16,
++  BFD_RELOC_ALPHA_DTPREL_LO16,
++  BFD_RELOC_ALPHA_DTPREL16,
++  BFD_RELOC_ALPHA_GOTTPREL16,
++  BFD_RELOC_ALPHA_TPREL64,
++  BFD_RELOC_ALPHA_TPREL_HI16,
++  BFD_RELOC_ALPHA_TPREL_LO16,
++  BFD_RELOC_ALPHA_TPREL16,
++
++/* Bits 27..2 of the relocation address shifted right 2 bits;
++simple reloc otherwise.  */
++  BFD_RELOC_MIPS_JMP,
++
++/* The MIPS16 jump instruction.  */
++  BFD_RELOC_MIPS16_JMP,
++
++/* MIPS16 GP relative reloc.  */
++  BFD_RELOC_MIPS16_GPREL,
++
++/* High 16 bits of 32-bit value; simple reloc.  */
++  BFD_RELOC_HI16,
++
++/* High 16 bits of 32-bit value but the low 16 bits will be sign
++extended and added to form the final result.  If the low 16
++bits form a negative number, we need to add one to the high value
++to compensate for the borrow when the low bits are added.  */
++  BFD_RELOC_HI16_S,
++
++/* Low 16 bits.  */
++  BFD_RELOC_LO16,
++
++/* High 16 bits of 32-bit pc-relative value  */
++  BFD_RELOC_HI16_PCREL,
++
++/* High 16 bits of 32-bit pc-relative value, adjusted  */
++  BFD_RELOC_HI16_S_PCREL,
++
++/* Low 16 bits of pc-relative value  */
++  BFD_RELOC_LO16_PCREL,
++
++/* MIPS16 high 16 bits of 32-bit value.  */
++  BFD_RELOC_MIPS16_HI16,
++
++/* MIPS16 high 16 bits of 32-bit value but the low 16 bits will be sign
++extended and added to form the final result.  If the low 16
++bits form a negative number, we need to add one to the high value
++to compensate for the borrow when the low bits are added.  */
++  BFD_RELOC_MIPS16_HI16_S,
++
++/* MIPS16 low 16 bits.  */
++  BFD_RELOC_MIPS16_LO16,
++
++/* Relocation against a MIPS literal section.  */
++  BFD_RELOC_MIPS_LITERAL,
++
++/* MIPS ELF relocations.  */
++  BFD_RELOC_MIPS_GOT16,
++  BFD_RELOC_MIPS_CALL16,
++  BFD_RELOC_MIPS_GOT_HI16,
++  BFD_RELOC_MIPS_GOT_LO16,
++  BFD_RELOC_MIPS_CALL_HI16,
++  BFD_RELOC_MIPS_CALL_LO16,
++  BFD_RELOC_MIPS_SUB,
++  BFD_RELOC_MIPS_GOT_PAGE,
++  BFD_RELOC_MIPS_GOT_OFST,
++  BFD_RELOC_MIPS_GOT_DISP,
++  BFD_RELOC_MIPS_SHIFT5,
++  BFD_RELOC_MIPS_SHIFT6,
++  BFD_RELOC_MIPS_INSERT_A,
++  BFD_RELOC_MIPS_INSERT_B,
++  BFD_RELOC_MIPS_DELETE,
++  BFD_RELOC_MIPS_HIGHEST,
++  BFD_RELOC_MIPS_HIGHER,
++  BFD_RELOC_MIPS_SCN_DISP,
++  BFD_RELOC_MIPS_REL16,
++  BFD_RELOC_MIPS_RELGOT,
++  BFD_RELOC_MIPS_JALR,
++  BFD_RELOC_MIPS_TLS_DTPMOD32,
++  BFD_RELOC_MIPS_TLS_DTPREL32,
++  BFD_RELOC_MIPS_TLS_DTPMOD64,
++  BFD_RELOC_MIPS_TLS_DTPREL64,
++  BFD_RELOC_MIPS_TLS_GD,
++  BFD_RELOC_MIPS_TLS_LDM,
++  BFD_RELOC_MIPS_TLS_DTPREL_HI16,
++  BFD_RELOC_MIPS_TLS_DTPREL_LO16,
++  BFD_RELOC_MIPS_TLS_GOTTPREL,
++  BFD_RELOC_MIPS_TLS_TPREL32,
++  BFD_RELOC_MIPS_TLS_TPREL64,
++  BFD_RELOC_MIPS_TLS_TPREL_HI16,
++  BFD_RELOC_MIPS_TLS_TPREL_LO16,
++
++
++/* Fujitsu Frv Relocations.  */
++  BFD_RELOC_FRV_LABEL16,
++  BFD_RELOC_FRV_LABEL24,
++  BFD_RELOC_FRV_LO16,
++  BFD_RELOC_FRV_HI16,
++  BFD_RELOC_FRV_GPREL12,
++  BFD_RELOC_FRV_GPRELU12,
++  BFD_RELOC_FRV_GPREL32,
++  BFD_RELOC_FRV_GPRELHI,
++  BFD_RELOC_FRV_GPRELLO,
++  BFD_RELOC_FRV_GOT12,
++  BFD_RELOC_FRV_GOTHI,
++  BFD_RELOC_FRV_GOTLO,
++  BFD_RELOC_FRV_FUNCDESC,
++  BFD_RELOC_FRV_FUNCDESC_GOT12,
++  BFD_RELOC_FRV_FUNCDESC_GOTHI,
++  BFD_RELOC_FRV_FUNCDESC_GOTLO,
++  BFD_RELOC_FRV_FUNCDESC_VALUE,
++  BFD_RELOC_FRV_FUNCDESC_GOTOFF12,
++  BFD_RELOC_FRV_FUNCDESC_GOTOFFHI,
++  BFD_RELOC_FRV_FUNCDESC_GOTOFFLO,
++  BFD_RELOC_FRV_GOTOFF12,
++  BFD_RELOC_FRV_GOTOFFHI,
++  BFD_RELOC_FRV_GOTOFFLO,
++  BFD_RELOC_FRV_GETTLSOFF,
++  BFD_RELOC_FRV_TLSDESC_VALUE,
++  BFD_RELOC_FRV_GOTTLSDESC12,
++  BFD_RELOC_FRV_GOTTLSDESCHI,
++  BFD_RELOC_FRV_GOTTLSDESCLO,
++  BFD_RELOC_FRV_TLSMOFF12,
++  BFD_RELOC_FRV_TLSMOFFHI,
++  BFD_RELOC_FRV_TLSMOFFLO,
++  BFD_RELOC_FRV_GOTTLSOFF12,
++  BFD_RELOC_FRV_GOTTLSOFFHI,
++  BFD_RELOC_FRV_GOTTLSOFFLO,
++  BFD_RELOC_FRV_TLSOFF,
++  BFD_RELOC_FRV_TLSDESC_RELAX,
++  BFD_RELOC_FRV_GETTLSOFF_RELAX,
++  BFD_RELOC_FRV_TLSOFF_RELAX,
++  BFD_RELOC_FRV_TLSMOFF,
++
++
++/* This is a 24bit GOT-relative reloc for the mn10300.  */
++  BFD_RELOC_MN10300_GOTOFF24,
++
++/* This is a 32bit GOT-relative reloc for the mn10300, offset by two bytes
++in the instruction.  */
++  BFD_RELOC_MN10300_GOT32,
++
++/* This is a 24bit GOT-relative reloc for the mn10300, offset by two bytes
++in the instruction.  */
++  BFD_RELOC_MN10300_GOT24,
++
++/* This is a 16bit GOT-relative reloc for the mn10300, offset by two bytes
++in the instruction.  */
++  BFD_RELOC_MN10300_GOT16,
++
++/* Copy symbol at runtime.  */
++  BFD_RELOC_MN10300_COPY,
++
++/* Create GOT entry.  */
++  BFD_RELOC_MN10300_GLOB_DAT,
++
++/* Create PLT entry.  */
++  BFD_RELOC_MN10300_JMP_SLOT,
++
++/* Adjust by program base.  */
++  BFD_RELOC_MN10300_RELATIVE,
++
++
++/* i386/elf relocations  */
++  BFD_RELOC_386_GOT32,
++  BFD_RELOC_386_PLT32,
++  BFD_RELOC_386_COPY,
++  BFD_RELOC_386_GLOB_DAT,
++  BFD_RELOC_386_JUMP_SLOT,
++  BFD_RELOC_386_RELATIVE,
++  BFD_RELOC_386_GOTOFF,
++  BFD_RELOC_386_GOTPC,
++  BFD_RELOC_386_TLS_TPOFF,
++  BFD_RELOC_386_TLS_IE,
++  BFD_RELOC_386_TLS_GOTIE,
++  BFD_RELOC_386_TLS_LE,
++  BFD_RELOC_386_TLS_GD,
++  BFD_RELOC_386_TLS_LDM,
++  BFD_RELOC_386_TLS_LDO_32,
++  BFD_RELOC_386_TLS_IE_32,
++  BFD_RELOC_386_TLS_LE_32,
++  BFD_RELOC_386_TLS_DTPMOD32,
++  BFD_RELOC_386_TLS_DTPOFF32,
++  BFD_RELOC_386_TLS_TPOFF32,
++
++/* x86-64/elf relocations  */
++  BFD_RELOC_X86_64_GOT32,
++  BFD_RELOC_X86_64_PLT32,
++  BFD_RELOC_X86_64_COPY,
++  BFD_RELOC_X86_64_GLOB_DAT,
++  BFD_RELOC_X86_64_JUMP_SLOT,
++  BFD_RELOC_X86_64_RELATIVE,
++  BFD_RELOC_X86_64_GOTPCREL,
++  BFD_RELOC_X86_64_32S,
++  BFD_RELOC_X86_64_DTPMOD64,
++  BFD_RELOC_X86_64_DTPOFF64,
++  BFD_RELOC_X86_64_TPOFF64,
++  BFD_RELOC_X86_64_TLSGD,
++  BFD_RELOC_X86_64_TLSLD,
++  BFD_RELOC_X86_64_DTPOFF32,
++  BFD_RELOC_X86_64_GOTTPOFF,
++  BFD_RELOC_X86_64_TPOFF32,
++  BFD_RELOC_X86_64_GOTOFF64,
++  BFD_RELOC_X86_64_GOTPC32,
++
++/* ns32k relocations  */
++  BFD_RELOC_NS32K_IMM_8,
++  BFD_RELOC_NS32K_IMM_16,
++  BFD_RELOC_NS32K_IMM_32,
++  BFD_RELOC_NS32K_IMM_8_PCREL,
++  BFD_RELOC_NS32K_IMM_16_PCREL,
++  BFD_RELOC_NS32K_IMM_32_PCREL,
++  BFD_RELOC_NS32K_DISP_8,
++  BFD_RELOC_NS32K_DISP_16,
++  BFD_RELOC_NS32K_DISP_32,
++  BFD_RELOC_NS32K_DISP_8_PCREL,
++  BFD_RELOC_NS32K_DISP_16_PCREL,
++  BFD_RELOC_NS32K_DISP_32_PCREL,
++
++/* PDP11 relocations  */
++  BFD_RELOC_PDP11_DISP_8_PCREL,
++  BFD_RELOC_PDP11_DISP_6_PCREL,
++
++/* Picojava relocs.  Not all of these appear in object files.  */
++  BFD_RELOC_PJ_CODE_HI16,
++  BFD_RELOC_PJ_CODE_LO16,
++  BFD_RELOC_PJ_CODE_DIR16,
++  BFD_RELOC_PJ_CODE_DIR32,
++  BFD_RELOC_PJ_CODE_REL16,
++  BFD_RELOC_PJ_CODE_REL32,
++
++/* Power(rs6000) and PowerPC relocations.  */
++  BFD_RELOC_PPC_B26,
++  BFD_RELOC_PPC_BA26,
++  BFD_RELOC_PPC_TOC16,
++  BFD_RELOC_PPC_B16,
++  BFD_RELOC_PPC_B16_BRTAKEN,
++  BFD_RELOC_PPC_B16_BRNTAKEN,
++  BFD_RELOC_PPC_BA16,
++  BFD_RELOC_PPC_BA16_BRTAKEN,
++  BFD_RELOC_PPC_BA16_BRNTAKEN,
++  BFD_RELOC_PPC_COPY,
++  BFD_RELOC_PPC_GLOB_DAT,
++  BFD_RELOC_PPC_JMP_SLOT,
++  BFD_RELOC_PPC_RELATIVE,
++  BFD_RELOC_PPC_LOCAL24PC,
++  BFD_RELOC_PPC_EMB_NADDR32,
++  BFD_RELOC_PPC_EMB_NADDR16,
++  BFD_RELOC_PPC_EMB_NADDR16_LO,
++  BFD_RELOC_PPC_EMB_NADDR16_HI,
++  BFD_RELOC_PPC_EMB_NADDR16_HA,
++  BFD_RELOC_PPC_EMB_SDAI16,
++  BFD_RELOC_PPC_EMB_SDA2I16,
++  BFD_RELOC_PPC_EMB_SDA2REL,
++  BFD_RELOC_PPC_EMB_SDA21,
++  BFD_RELOC_PPC_EMB_MRKREF,
++  BFD_RELOC_PPC_EMB_RELSEC16,
++  BFD_RELOC_PPC_EMB_RELST_LO,
++  BFD_RELOC_PPC_EMB_RELST_HI,
++  BFD_RELOC_PPC_EMB_RELST_HA,
++  BFD_RELOC_PPC_EMB_BIT_FLD,
++  BFD_RELOC_PPC_EMB_RELSDA,
++  BFD_RELOC_PPC64_HIGHER,
++  BFD_RELOC_PPC64_HIGHER_S,
++  BFD_RELOC_PPC64_HIGHEST,
++  BFD_RELOC_PPC64_HIGHEST_S,
++  BFD_RELOC_PPC64_TOC16_LO,
++  BFD_RELOC_PPC64_TOC16_HI,
++  BFD_RELOC_PPC64_TOC16_HA,
++  BFD_RELOC_PPC64_TOC,
++  BFD_RELOC_PPC64_PLTGOT16,
++  BFD_RELOC_PPC64_PLTGOT16_LO,
++  BFD_RELOC_PPC64_PLTGOT16_HI,
++  BFD_RELOC_PPC64_PLTGOT16_HA,
++  BFD_RELOC_PPC64_ADDR16_DS,
++  BFD_RELOC_PPC64_ADDR16_LO_DS,
++  BFD_RELOC_PPC64_GOT16_DS,
++  BFD_RELOC_PPC64_GOT16_LO_DS,
++  BFD_RELOC_PPC64_PLT16_LO_DS,
++  BFD_RELOC_PPC64_SECTOFF_DS,
++  BFD_RELOC_PPC64_SECTOFF_LO_DS,
++  BFD_RELOC_PPC64_TOC16_DS,
++  BFD_RELOC_PPC64_TOC16_LO_DS,
++  BFD_RELOC_PPC64_PLTGOT16_DS,
++  BFD_RELOC_PPC64_PLTGOT16_LO_DS,
++
++/* PowerPC and PowerPC64 thread-local storage relocations.  */
++  BFD_RELOC_PPC_TLS,
++  BFD_RELOC_PPC_DTPMOD,
++  BFD_RELOC_PPC_TPREL16,
++  BFD_RELOC_PPC_TPREL16_LO,
++  BFD_RELOC_PPC_TPREL16_HI,
++  BFD_RELOC_PPC_TPREL16_HA,
++  BFD_RELOC_PPC_TPREL,
++  BFD_RELOC_PPC_DTPREL16,
++  BFD_RELOC_PPC_DTPREL16_LO,
++  BFD_RELOC_PPC_DTPREL16_HI,
++  BFD_RELOC_PPC_DTPREL16_HA,
++  BFD_RELOC_PPC_DTPREL,
++  BFD_RELOC_PPC_GOT_TLSGD16,
++  BFD_RELOC_PPC_GOT_TLSGD16_LO,
++  BFD_RELOC_PPC_GOT_TLSGD16_HI,
++  BFD_RELOC_PPC_GOT_TLSGD16_HA,
++  BFD_RELOC_PPC_GOT_TLSLD16,
++  BFD_RELOC_PPC_GOT_TLSLD16_LO,
++  BFD_RELOC_PPC_GOT_TLSLD16_HI,
++  BFD_RELOC_PPC_GOT_TLSLD16_HA,
++  BFD_RELOC_PPC_GOT_TPREL16,
++  BFD_RELOC_PPC_GOT_TPREL16_LO,
++  BFD_RELOC_PPC_GOT_TPREL16_HI,
++  BFD_RELOC_PPC_GOT_TPREL16_HA,
++  BFD_RELOC_PPC_GOT_DTPREL16,
++  BFD_RELOC_PPC_GOT_DTPREL16_LO,
++  BFD_RELOC_PPC_GOT_DTPREL16_HI,
++  BFD_RELOC_PPC_GOT_DTPREL16_HA,
++  BFD_RELOC_PPC64_TPREL16_DS,
++  BFD_RELOC_PPC64_TPREL16_LO_DS,
++  BFD_RELOC_PPC64_TPREL16_HIGHER,
++  BFD_RELOC_PPC64_TPREL16_HIGHERA,
++  BFD_RELOC_PPC64_TPREL16_HIGHEST,
++  BFD_RELOC_PPC64_TPREL16_HIGHESTA,
++  BFD_RELOC_PPC64_DTPREL16_DS,
++  BFD_RELOC_PPC64_DTPREL16_LO_DS,
++  BFD_RELOC_PPC64_DTPREL16_HIGHER,
++  BFD_RELOC_PPC64_DTPREL16_HIGHERA,
++  BFD_RELOC_PPC64_DTPREL16_HIGHEST,
++  BFD_RELOC_PPC64_DTPREL16_HIGHESTA,
++
++/* IBM 370/390 relocations  */
++  BFD_RELOC_I370_D12,
++
++/* The type of reloc used to build a constructor table - at the moment
++probably a 32 bit wide absolute relocation, but the target can choose.
++It generally does map to one of the other relocation types.  */
++  BFD_RELOC_CTOR,
++
++/* ARM 26 bit pc-relative branch.  The lowest two bits must be zero and are
++not stored in the instruction.  */
++  BFD_RELOC_ARM_PCREL_BRANCH,
++
++/* ARM 26 bit pc-relative branch.  The lowest bit must be zero and is
++not stored in the instruction.  The 2nd lowest bit comes from a 1 bit
++field in the instruction.  */
++  BFD_RELOC_ARM_PCREL_BLX,
++
++/* Thumb 22 bit pc-relative branch.  The lowest bit must be zero and is
++not stored in the instruction.  The 2nd lowest bit comes from a 1 bit
++field in the instruction.  */
++  BFD_RELOC_THUMB_PCREL_BLX,
++
++/* Thumb 7-, 9-, 12-, 20-, 23-, and 25-bit pc-relative branches.
++The lowest bit must be zero and is not stored in the instruction.
++Note that the corresponding ELF R_ARM_THM_JUMPnn constant has an
++"nn" one smaller in all cases.  Note further that BRANCH23
++corresponds to R_ARM_THM_CALL.  */
++  BFD_RELOC_THUMB_PCREL_BRANCH7,
++  BFD_RELOC_THUMB_PCREL_BRANCH9,
++  BFD_RELOC_THUMB_PCREL_BRANCH12,
++  BFD_RELOC_THUMB_PCREL_BRANCH20,
++  BFD_RELOC_THUMB_PCREL_BRANCH23,
++  BFD_RELOC_THUMB_PCREL_BRANCH25,
++
++/* 12-bit immediate offset, used in ARM-format ldr and str instructions.  */
++  BFD_RELOC_ARM_OFFSET_IMM,
++
++/* 5-bit immediate offset, used in Thumb-format ldr and str instructions.  */
++  BFD_RELOC_ARM_THUMB_OFFSET,
++
++/* Pc-relative or absolute relocation depending on target.  Used for
++entries in .init_array sections.  */
++  BFD_RELOC_ARM_TARGET1,
++
++/* Read-only segment base relative address.  */
++  BFD_RELOC_ARM_ROSEGREL32,
++
++/* Data segment base relative address.  */
++  BFD_RELOC_ARM_SBREL32,
++
++/* This reloc is used for references to RTTI data from exception handling
++tables.  The actual definition depends on the target.  It may be a
++pc-relative or some form of GOT-indirect relocation.  */
++  BFD_RELOC_ARM_TARGET2,
++
++/* 31-bit PC relative address.  */
++  BFD_RELOC_ARM_PREL31,
++
++/* Relocations for setting up GOTs and PLTs for shared libraries.  */
++  BFD_RELOC_ARM_JUMP_SLOT,
++  BFD_RELOC_ARM_GLOB_DAT,
++  BFD_RELOC_ARM_GOT32,
++  BFD_RELOC_ARM_PLT32,
++  BFD_RELOC_ARM_RELATIVE,
++  BFD_RELOC_ARM_GOTOFF,
++  BFD_RELOC_ARM_GOTPC,
++
++/* ARM thread-local storage relocations.  */
++  BFD_RELOC_ARM_TLS_GD32,
++  BFD_RELOC_ARM_TLS_LDO32,
++  BFD_RELOC_ARM_TLS_LDM32,
++  BFD_RELOC_ARM_TLS_DTPOFF32,
++  BFD_RELOC_ARM_TLS_DTPMOD32,
++  BFD_RELOC_ARM_TLS_TPOFF32,
++  BFD_RELOC_ARM_TLS_IE32,
++  BFD_RELOC_ARM_TLS_LE32,
++
++/* These relocs are only used within the ARM assembler.  They are not
++(at present) written to any object files.  */
++  BFD_RELOC_ARM_IMMEDIATE,
++  BFD_RELOC_ARM_ADRL_IMMEDIATE,
++  BFD_RELOC_ARM_T32_IMMEDIATE,
++  BFD_RELOC_ARM_SHIFT_IMM,
++  BFD_RELOC_ARM_SMI,
++  BFD_RELOC_ARM_SWI,
++  BFD_RELOC_ARM_MULTI,
++  BFD_RELOC_ARM_CP_OFF_IMM,
++  BFD_RELOC_ARM_CP_OFF_IMM_S2,
++  BFD_RELOC_ARM_ADR_IMM,
++  BFD_RELOC_ARM_LDR_IMM,
++  BFD_RELOC_ARM_LITERAL,
++  BFD_RELOC_ARM_IN_POOL,
++  BFD_RELOC_ARM_OFFSET_IMM8,
++  BFD_RELOC_ARM_T32_OFFSET_U8,
++  BFD_RELOC_ARM_T32_OFFSET_IMM,
++  BFD_RELOC_ARM_HWLITERAL,
++  BFD_RELOC_ARM_THUMB_ADD,
++  BFD_RELOC_ARM_THUMB_IMM,
++  BFD_RELOC_ARM_THUMB_SHIFT,
++
++/* Renesas / SuperH SH relocs.  Not all of these appear in object files.  */
++  BFD_RELOC_SH_PCDISP8BY2,
++  BFD_RELOC_SH_PCDISP12BY2,
++  BFD_RELOC_SH_IMM3,
++  BFD_RELOC_SH_IMM3U,
++  BFD_RELOC_SH_DISP12,
++  BFD_RELOC_SH_DISP12BY2,
++  BFD_RELOC_SH_DISP12BY4,
++  BFD_RELOC_SH_DISP12BY8,
++  BFD_RELOC_SH_DISP20,
++  BFD_RELOC_SH_DISP20BY8,
++  BFD_RELOC_SH_IMM4,
++  BFD_RELOC_SH_IMM4BY2,
++  BFD_RELOC_SH_IMM4BY4,
++  BFD_RELOC_SH_IMM8,
++  BFD_RELOC_SH_IMM8BY2,
++  BFD_RELOC_SH_IMM8BY4,
++  BFD_RELOC_SH_PCRELIMM8BY2,
++  BFD_RELOC_SH_PCRELIMM8BY4,
++  BFD_RELOC_SH_SWITCH16,
++  BFD_RELOC_SH_SWITCH32,
++  BFD_RELOC_SH_USES,
++  BFD_RELOC_SH_COUNT,
++  BFD_RELOC_SH_ALIGN,
++  BFD_RELOC_SH_CODE,
++  BFD_RELOC_SH_DATA,
++  BFD_RELOC_SH_LABEL,
++  BFD_RELOC_SH_LOOP_START,
++  BFD_RELOC_SH_LOOP_END,
++  BFD_RELOC_SH_COPY,
++  BFD_RELOC_SH_GLOB_DAT,
++  BFD_RELOC_SH_JMP_SLOT,
++  BFD_RELOC_SH_RELATIVE,
++  BFD_RELOC_SH_GOTPC,
++  BFD_RELOC_SH_GOT_LOW16,
++  BFD_RELOC_SH_GOT_MEDLOW16,
++  BFD_RELOC_SH_GOT_MEDHI16,
++  BFD_RELOC_SH_GOT_HI16,
++  BFD_RELOC_SH_GOTPLT_LOW16,
++  BFD_RELOC_SH_GOTPLT_MEDLOW16,
++  BFD_RELOC_SH_GOTPLT_MEDHI16,
++  BFD_RELOC_SH_GOTPLT_HI16,
++  BFD_RELOC_SH_PLT_LOW16,
++  BFD_RELOC_SH_PLT_MEDLOW16,
++  BFD_RELOC_SH_PLT_MEDHI16,
++  BFD_RELOC_SH_PLT_HI16,
++  BFD_RELOC_SH_GOTOFF_LOW16,
++  BFD_RELOC_SH_GOTOFF_MEDLOW16,
++  BFD_RELOC_SH_GOTOFF_MEDHI16,
++  BFD_RELOC_SH_GOTOFF_HI16,
++  BFD_RELOC_SH_GOTPC_LOW16,
++  BFD_RELOC_SH_GOTPC_MEDLOW16,
++  BFD_RELOC_SH_GOTPC_MEDHI16,
++  BFD_RELOC_SH_GOTPC_HI16,
++  BFD_RELOC_SH_COPY64,
++  BFD_RELOC_SH_GLOB_DAT64,
++  BFD_RELOC_SH_JMP_SLOT64,
++  BFD_RELOC_SH_RELATIVE64,
++  BFD_RELOC_SH_GOT10BY4,
++  BFD_RELOC_SH_GOT10BY8,
++  BFD_RELOC_SH_GOTPLT10BY4,
++  BFD_RELOC_SH_GOTPLT10BY8,
++  BFD_RELOC_SH_GOTPLT32,
++  BFD_RELOC_SH_SHMEDIA_CODE,
++  BFD_RELOC_SH_IMMU5,
++  BFD_RELOC_SH_IMMS6,
++  BFD_RELOC_SH_IMMS6BY32,
++  BFD_RELOC_SH_IMMU6,
++  BFD_RELOC_SH_IMMS10,
++  BFD_RELOC_SH_IMMS10BY2,
++  BFD_RELOC_SH_IMMS10BY4,
++  BFD_RELOC_SH_IMMS10BY8,
++  BFD_RELOC_SH_IMMS16,
++  BFD_RELOC_SH_IMMU16,
++  BFD_RELOC_SH_IMM_LOW16,
++  BFD_RELOC_SH_IMM_LOW16_PCREL,
++  BFD_RELOC_SH_IMM_MEDLOW16,
++  BFD_RELOC_SH_IMM_MEDLOW16_PCREL,
++  BFD_RELOC_SH_IMM_MEDHI16,
++  BFD_RELOC_SH_IMM_MEDHI16_PCREL,
++  BFD_RELOC_SH_IMM_HI16,
++  BFD_RELOC_SH_IMM_HI16_PCREL,
++  BFD_RELOC_SH_PT_16,
++  BFD_RELOC_SH_TLS_GD_32,
++  BFD_RELOC_SH_TLS_LD_32,
++  BFD_RELOC_SH_TLS_LDO_32,
++  BFD_RELOC_SH_TLS_IE_32,
++  BFD_RELOC_SH_TLS_LE_32,
++  BFD_RELOC_SH_TLS_DTPMOD32,
++  BFD_RELOC_SH_TLS_DTPOFF32,
++  BFD_RELOC_SH_TLS_TPOFF32,
++
++/* ARC Cores relocs.
++ARC 22 bit pc-relative branch.  The lowest two bits must be zero and are
++not stored in the instruction.  The high 20 bits are installed in bits 26
++through 7 of the instruction.  */
++  BFD_RELOC_ARC_B22_PCREL,
++
++/* ARC 26 bit absolute branch.  The lowest two bits must be zero and are not
++stored in the instruction.  The high 24 bits are installed in bits 23
++through 0.  */
++  BFD_RELOC_ARC_B26,
++
++/* Mitsubishi D10V relocs.
++This is a 10-bit reloc with the right 2 bits
++assumed to be 0.  */
++  BFD_RELOC_D10V_10_PCREL_R,
++
++/* Mitsubishi D10V relocs.
++This is a 10-bit reloc with the right 2 bits
++assumed to be 0.  This is the same as the previous reloc
++except it is in the left container, i.e.,
++shifted left 15 bits.  */
++  BFD_RELOC_D10V_10_PCREL_L,
++
++/* This is an 18-bit reloc with the right 2 bits
++assumed to be 0.  */
++  BFD_RELOC_D10V_18,
++
++/* This is an 18-bit reloc with the right 2 bits
++assumed to be 0.  */
++  BFD_RELOC_D10V_18_PCREL,
++
++/* Mitsubishi D30V relocs.
++This is a 6-bit absolute reloc.  */
++  BFD_RELOC_D30V_6,
++
++/* This is a 6-bit pc-relative reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_9_PCREL,
++
++/* This is a 6-bit pc-relative reloc with
++the right 3 bits assumed to be 0. Same
++as the previous reloc but on the right side
++of the container.  */
++  BFD_RELOC_D30V_9_PCREL_R,
++
++/* This is a 12-bit absolute reloc with the
++right 3 bitsassumed to be 0.  */
++  BFD_RELOC_D30V_15,
++
++/* This is a 12-bit pc-relative reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_15_PCREL,
++
++/* This is a 12-bit pc-relative reloc with
++the right 3 bits assumed to be 0. Same
++as the previous reloc but on the right side
++of the container.  */
++  BFD_RELOC_D30V_15_PCREL_R,
++
++/* This is an 18-bit absolute reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_21,
++
++/* This is an 18-bit pc-relative reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_21_PCREL,
++
++/* This is an 18-bit pc-relative reloc with
++the right 3 bits assumed to be 0. Same
++as the previous reloc but on the right side
++of the container.  */
++  BFD_RELOC_D30V_21_PCREL_R,
++
++/* This is a 32-bit absolute reloc.  */
++  BFD_RELOC_D30V_32,
++
++/* This is a 32-bit pc-relative reloc.  */
++  BFD_RELOC_D30V_32_PCREL,
++
++/* DLX relocs  */
++  BFD_RELOC_DLX_HI16_S,
++
++/* DLX relocs  */
++  BFD_RELOC_DLX_LO16,
++
++/* DLX relocs  */
++  BFD_RELOC_DLX_JMP26,
++
++/* Renesas M16C/M32C Relocations.  */
++  BFD_RELOC_M16C_8_PCREL8,
++  BFD_RELOC_M16C_16_PCREL8,
++  BFD_RELOC_M16C_8_PCREL16,
++  BFD_RELOC_M16C_8_ELABEL24,
++  BFD_RELOC_M16C_8_ABS16,
++  BFD_RELOC_M16C_16_ABS16,
++  BFD_RELOC_M16C_16_ABS24,
++  BFD_RELOC_M16C_16_ABS32,
++  BFD_RELOC_M16C_24_ABS16,
++  BFD_RELOC_M16C_24_ABS24,
++  BFD_RELOC_M16C_24_ABS32,
++  BFD_RELOC_M16C_32_ABS16,
++  BFD_RELOC_M16C_32_ABS24,
++  BFD_RELOC_M16C_32_ABS32,
++  BFD_RELOC_M16C_40_ABS16,
++  BFD_RELOC_M16C_40_ABS24,
++  BFD_RELOC_M16C_40_ABS32,
++
++/* Renesas M32R (formerly Mitsubishi M32R) relocs.
++This is a 24 bit absolute address.  */
++  BFD_RELOC_M32R_24,
++
++/* This is a 10-bit pc-relative reloc with the right 2 bits assumed to be 0.  */
++  BFD_RELOC_M32R_10_PCREL,
++
++/* This is an 18-bit reloc with the right 2 bits assumed to be 0.  */
++  BFD_RELOC_M32R_18_PCREL,
++
++/* This is a 26-bit reloc with the right 2 bits assumed to be 0.  */
++  BFD_RELOC_M32R_26_PCREL,
++
++/* This is a 16-bit reloc containing the high 16 bits of an address
++used when the lower 16 bits are treated as unsigned.  */
++  BFD_RELOC_M32R_HI16_ULO,
++
++/* This is a 16-bit reloc containing the high 16 bits of an address
++used when the lower 16 bits are treated as signed.  */
++  BFD_RELOC_M32R_HI16_SLO,
++
++/* This is a 16-bit reloc containing the lower 16 bits of an address.  */
++  BFD_RELOC_M32R_LO16,
++
++/* This is a 16-bit reloc containing the small data area offset for use in
++add3, load, and store instructions.  */
++  BFD_RELOC_M32R_SDA16,
++
++/* For PIC.  */
++  BFD_RELOC_M32R_GOT24,
++  BFD_RELOC_M32R_26_PLTREL,
++  BFD_RELOC_M32R_COPY,
++  BFD_RELOC_M32R_GLOB_DAT,
++  BFD_RELOC_M32R_JMP_SLOT,
++  BFD_RELOC_M32R_RELATIVE,
++  BFD_RELOC_M32R_GOTOFF,
++  BFD_RELOC_M32R_GOTOFF_HI_ULO,
++  BFD_RELOC_M32R_GOTOFF_HI_SLO,
++  BFD_RELOC_M32R_GOTOFF_LO,
++  BFD_RELOC_M32R_GOTPC24,
++  BFD_RELOC_M32R_GOT16_HI_ULO,
++  BFD_RELOC_M32R_GOT16_HI_SLO,
++  BFD_RELOC_M32R_GOT16_LO,
++  BFD_RELOC_M32R_GOTPC_HI_ULO,
++  BFD_RELOC_M32R_GOTPC_HI_SLO,
++  BFD_RELOC_M32R_GOTPC_LO,
++
++/* This is a 9-bit reloc  */
++  BFD_RELOC_V850_9_PCREL,
++
++/* This is a 22-bit reloc  */
++  BFD_RELOC_V850_22_PCREL,
++
++/* This is a 16 bit offset from the short data area pointer.  */
++  BFD_RELOC_V850_SDA_16_16_OFFSET,
++
++/* This is a 16 bit offset (of which only 15 bits are used) from the
++short data area pointer.  */
++  BFD_RELOC_V850_SDA_15_16_OFFSET,
++
++/* This is a 16 bit offset from the zero data area pointer.  */
++  BFD_RELOC_V850_ZDA_16_16_OFFSET,
++
++/* This is a 16 bit offset (of which only 15 bits are used) from the
++zero data area pointer.  */
++  BFD_RELOC_V850_ZDA_15_16_OFFSET,
++
++/* This is an 8 bit offset (of which only 6 bits are used) from the
++tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_6_8_OFFSET,
++
++/* This is an 8bit offset (of which only 7 bits are used) from the tiny
++data area pointer.  */
++  BFD_RELOC_V850_TDA_7_8_OFFSET,
++
++/* This is a 7 bit offset from the tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_7_7_OFFSET,
++
++/* This is a 16 bit offset from the tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_16_16_OFFSET,
++
++/* This is a 5 bit offset (of which only 4 bits are used) from the tiny
++data area pointer.  */
++  BFD_RELOC_V850_TDA_4_5_OFFSET,
++
++/* This is a 4 bit offset from the tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_4_4_OFFSET,
++
++/* This is a 16 bit offset from the short data area pointer, with the
++bits placed non-contiguously in the instruction.  */
++  BFD_RELOC_V850_SDA_16_16_SPLIT_OFFSET,
++
++/* This is a 16 bit offset from the zero data area pointer, with the
++bits placed non-contiguously in the instruction.  */
++  BFD_RELOC_V850_ZDA_16_16_SPLIT_OFFSET,
++
++/* This is a 6 bit offset from the call table base pointer.  */
++  BFD_RELOC_V850_CALLT_6_7_OFFSET,
++
++/* This is a 16 bit offset from the call table base pointer.  */
++  BFD_RELOC_V850_CALLT_16_16_OFFSET,
++
++/* Used for relaxing indirect function calls.  */
++  BFD_RELOC_V850_LONGCALL,
++
++/* Used for relaxing indirect jumps.  */
++  BFD_RELOC_V850_LONGJUMP,
++
++/* Used to maintain alignment whilst relaxing.  */
++  BFD_RELOC_V850_ALIGN,
++
++/* This is a variation of BFD_RELOC_LO16 that can be used in v850e ld.bu
++instructions.  */
++  BFD_RELOC_V850_LO16_SPLIT_OFFSET,
++
++/* This is a 32bit pcrel reloc for the mn10300, offset by two bytes in the
++instruction.  */
++  BFD_RELOC_MN10300_32_PCREL,
++
++/* This is a 16bit pcrel reloc for the mn10300, offset by two bytes in the
++instruction.  */
++  BFD_RELOC_MN10300_16_PCREL,
++
++/* This is a 8bit DP reloc for the tms320c30, where the most
++significant 8 bits of a 24 bit word are placed into the least
++significant 8 bits of the opcode.  */
++  BFD_RELOC_TIC30_LDP,
++
++/* This is a 7bit reloc for the tms320c54x, where the least
++significant 7 bits of a 16 bit word are placed into the least
++significant 7 bits of the opcode.  */
++  BFD_RELOC_TIC54X_PARTLS7,
++
++/* This is a 9bit DP reloc for the tms320c54x, where the most
++significant 9 bits of a 16 bit word are placed into the least
++significant 9 bits of the opcode.  */
++  BFD_RELOC_TIC54X_PARTMS9,
++
++/* This is an extended address 23-bit reloc for the tms320c54x.  */
++  BFD_RELOC_TIC54X_23,
++
++/* This is a 16-bit reloc for the tms320c54x, where the least
++significant 16 bits of a 23-bit extended address are placed into
++the opcode.  */
++  BFD_RELOC_TIC54X_16_OF_23,
++
++/* This is a reloc for the tms320c54x, where the most
++significant 7 bits of a 23-bit extended address are placed into
++the opcode.  */
++  BFD_RELOC_TIC54X_MS7_OF_23,
++
++/* This is a 48 bit reloc for the FR30 that stores 32 bits.  */
++  BFD_RELOC_FR30_48,
++
++/* This is a 32 bit reloc for the FR30 that stores 20 bits split up into
++two sections.  */
++  BFD_RELOC_FR30_20,
++
++/* This is a 16 bit reloc for the FR30 that stores a 6 bit word offset in
++4 bits.  */
++  BFD_RELOC_FR30_6_IN_4,
++
++/* This is a 16 bit reloc for the FR30 that stores an 8 bit byte offset
++into 8 bits.  */
++  BFD_RELOC_FR30_8_IN_8,
++
++/* This is a 16 bit reloc for the FR30 that stores a 9 bit short offset
++into 8 bits.  */
++  BFD_RELOC_FR30_9_IN_8,
++
++/* This is a 16 bit reloc for the FR30 that stores a 10 bit word offset
++into 8 bits.  */
++  BFD_RELOC_FR30_10_IN_8,
++
++/* This is a 16 bit reloc for the FR30 that stores a 9 bit pc relative
++short offset into 8 bits.  */
++  BFD_RELOC_FR30_9_PCREL,
++
++/* This is a 16 bit reloc for the FR30 that stores a 12 bit pc relative
++short offset into 11 bits.  */
++  BFD_RELOC_FR30_12_PCREL,
++
++/* Motorola Mcore relocations.  */
++  BFD_RELOC_MCORE_PCREL_IMM8BY4,
++  BFD_RELOC_MCORE_PCREL_IMM11BY2,
++  BFD_RELOC_MCORE_PCREL_IMM4BY2,
++  BFD_RELOC_MCORE_PCREL_32,
++  BFD_RELOC_MCORE_PCREL_JSR_IMM11BY2,
++  BFD_RELOC_MCORE_RVA,
++
++/* These are relocations for the GETA instruction.  */
++  BFD_RELOC_MMIX_GETA,
++  BFD_RELOC_MMIX_GETA_1,
++  BFD_RELOC_MMIX_GETA_2,
++  BFD_RELOC_MMIX_GETA_3,
++
++/* These are relocations for a conditional branch instruction.  */
++  BFD_RELOC_MMIX_CBRANCH,
++  BFD_RELOC_MMIX_CBRANCH_J,
++  BFD_RELOC_MMIX_CBRANCH_1,
++  BFD_RELOC_MMIX_CBRANCH_2,
++  BFD_RELOC_MMIX_CBRANCH_3,
++
++/* These are relocations for the PUSHJ instruction.  */
++  BFD_RELOC_MMIX_PUSHJ,
++  BFD_RELOC_MMIX_PUSHJ_1,
++  BFD_RELOC_MMIX_PUSHJ_2,
++  BFD_RELOC_MMIX_PUSHJ_3,
++  BFD_RELOC_MMIX_PUSHJ_STUBBABLE,
++
++/* These are relocations for the JMP instruction.  */
++  BFD_RELOC_MMIX_JMP,
++  BFD_RELOC_MMIX_JMP_1,
++  BFD_RELOC_MMIX_JMP_2,
++  BFD_RELOC_MMIX_JMP_3,
++
++/* This is a relocation for a relative address as in a GETA instruction or
++a branch.  */
++  BFD_RELOC_MMIX_ADDR19,
++
++/* This is a relocation for a relative address as in a JMP instruction.  */
++  BFD_RELOC_MMIX_ADDR27,
++
++/* This is a relocation for an instruction field that may be a general
++register or a value 0..255.  */
++  BFD_RELOC_MMIX_REG_OR_BYTE,
++
++/* This is a relocation for an instruction field that may be a general
++register.  */
++  BFD_RELOC_MMIX_REG,
++
++/* This is a relocation for two instruction fields holding a register and
++an offset, the equivalent of the relocation.  */
++  BFD_RELOC_MMIX_BASE_PLUS_OFFSET,
++
++/* This relocation is an assertion that the expression is not allocated as
++a global register.  It does not modify contents.  */
++  BFD_RELOC_MMIX_LOCAL,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit pc relative
++short offset into 7 bits.  */
++  BFD_RELOC_AVR_7_PCREL,
++
++/* This is a 16 bit reloc for the AVR that stores 13 bit pc relative
++short offset into 12 bits.  */
++  BFD_RELOC_AVR_13_PCREL,
++
++/* This is a 16 bit reloc for the AVR that stores 17 bit value (usually
++program memory address) into 16 bits.  */
++  BFD_RELOC_AVR_16_PM,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (usually
++data memory address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_LO8_LDI,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (high 8 bit
++of data memory address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HI8_LDI,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (most high 8 bit
++of program memory address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HH8_LDI,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(usually data memory address) into 8 bit immediate value of SUBI insn.  */
++  BFD_RELOC_AVR_LO8_LDI_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(high 8 bit of data memory address) into 8 bit immediate value of
++SUBI insn.  */
++  BFD_RELOC_AVR_HI8_LDI_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(most high 8 bit of program memory address) into 8 bit immediate value
++of LDI or SUBI insn.  */
++  BFD_RELOC_AVR_HH8_LDI_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (usually
++command address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_LO8_LDI_PM,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (high 8 bit
++of command address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HI8_LDI_PM,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (most high 8 bit
++of command address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HH8_LDI_PM,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(usually command address) into 8 bit immediate value of SUBI insn.  */
++  BFD_RELOC_AVR_LO8_LDI_PM_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(high 8 bit of 16 bit command address) into 8 bit immediate value
++of SUBI insn.  */
++  BFD_RELOC_AVR_HI8_LDI_PM_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(high 6 bit of 22 bit command address) into 8 bit immediate
++value of SUBI insn.  */
++  BFD_RELOC_AVR_HH8_LDI_PM_NEG,
++
++/* This is a 32 bit reloc for the AVR that stores 23 bit value
++into 22 bits.  */
++  BFD_RELOC_AVR_CALL,
++
++/* This is a 16 bit reloc for the AVR that stores all needed bits
++for absolute addressing with ldi with overflow check to linktime  */
++  BFD_RELOC_AVR_LDI,
++
++/* This is a 6 bit reloc for the AVR that stores offset for ldd/std
++instructions  */
++  BFD_RELOC_AVR_6,
++
++/* This is a 6 bit reloc for the AVR that stores offset for adiw/sbiw
++instructions  */
++  BFD_RELOC_AVR_6_ADIW,
++
++/* Direct 12 bit.  */
++  BFD_RELOC_390_12,
++
++/* 12 bit GOT offset.  */
++  BFD_RELOC_390_GOT12,
++
++/* 32 bit PC relative PLT address.  */
++  BFD_RELOC_390_PLT32,
++
++/* Copy symbol at runtime.  */
++  BFD_RELOC_390_COPY,
++
++/* Create GOT entry.  */
++  BFD_RELOC_390_GLOB_DAT,
++
++/* Create PLT entry.  */
++  BFD_RELOC_390_JMP_SLOT,
++
++/* Adjust by program base.  */
++  BFD_RELOC_390_RELATIVE,
++
++/* 32 bit PC relative offset to GOT.  */
++  BFD_RELOC_390_GOTPC,
++
++/* 16 bit GOT offset.  */
++  BFD_RELOC_390_GOT16,
++
++/* PC relative 16 bit shifted by 1.  */
++  BFD_RELOC_390_PC16DBL,
++
++/* 16 bit PC rel. PLT shifted by 1.  */
++  BFD_RELOC_390_PLT16DBL,
++
++/* PC relative 32 bit shifted by 1.  */
++  BFD_RELOC_390_PC32DBL,
++
++/* 32 bit PC rel. PLT shifted by 1.  */
++  BFD_RELOC_390_PLT32DBL,
++
++/* 32 bit PC rel. GOT shifted by 1.  */
++  BFD_RELOC_390_GOTPCDBL,
++
++/* 64 bit GOT offset.  */
++  BFD_RELOC_390_GOT64,
++
++/* 64 bit PC relative PLT address.  */
++  BFD_RELOC_390_PLT64,
++
++/* 32 bit rel. offset to GOT entry.  */
++  BFD_RELOC_390_GOTENT,
++
++/* 64 bit offset to GOT.  */
++  BFD_RELOC_390_GOTOFF64,
++
++/* 12-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT12,
++
++/* 16-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT16,
++
++/* 32-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT32,
++
++/* 64-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT64,
++
++/* 32-bit rel. offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLTENT,
++
++/* 16-bit rel. offset from the GOT to a PLT entry.  */
++  BFD_RELOC_390_PLTOFF16,
++
++/* 32-bit rel. offset from the GOT to a PLT entry.  */
++  BFD_RELOC_390_PLTOFF32,
++
++/* 64-bit rel. offset from the GOT to a PLT entry.  */
++  BFD_RELOC_390_PLTOFF64,
++
++/* s390 tls relocations.  */
++  BFD_RELOC_390_TLS_LOAD,
++  BFD_RELOC_390_TLS_GDCALL,
++  BFD_RELOC_390_TLS_LDCALL,
++  BFD_RELOC_390_TLS_GD32,
++  BFD_RELOC_390_TLS_GD64,
++  BFD_RELOC_390_TLS_GOTIE12,
++  BFD_RELOC_390_TLS_GOTIE32,
++  BFD_RELOC_390_TLS_GOTIE64,
++  BFD_RELOC_390_TLS_LDM32,
++  BFD_RELOC_390_TLS_LDM64,
++  BFD_RELOC_390_TLS_IE32,
++  BFD_RELOC_390_TLS_IE64,
++  BFD_RELOC_390_TLS_IEENT,
++  BFD_RELOC_390_TLS_LE32,
++  BFD_RELOC_390_TLS_LE64,
++  BFD_RELOC_390_TLS_LDO32,
++  BFD_RELOC_390_TLS_LDO64,
++  BFD_RELOC_390_TLS_DTPMOD,
++  BFD_RELOC_390_TLS_DTPOFF,
++  BFD_RELOC_390_TLS_TPOFF,
++
++/* Long displacement extension.  */
++  BFD_RELOC_390_20,
++  BFD_RELOC_390_GOT20,
++  BFD_RELOC_390_GOTPLT20,
++  BFD_RELOC_390_TLS_GOTIE20,
++
++/* Scenix IP2K - 9-bit register number / data address  */
++  BFD_RELOC_IP2K_FR9,
++
++/* Scenix IP2K - 4-bit register/data bank number  */
++  BFD_RELOC_IP2K_BANK,
++
++/* Scenix IP2K - low 13 bits of instruction word address  */
++  BFD_RELOC_IP2K_ADDR16CJP,
++
++/* Scenix IP2K - high 3 bits of instruction word address  */
++  BFD_RELOC_IP2K_PAGE3,
++
++/* Scenix IP2K - ext/low/high 8 bits of data address  */
++  BFD_RELOC_IP2K_LO8DATA,
++  BFD_RELOC_IP2K_HI8DATA,
++  BFD_RELOC_IP2K_EX8DATA,
++
++/* Scenix IP2K - low/high 8 bits of instruction word address  */
++  BFD_RELOC_IP2K_LO8INSN,
++  BFD_RELOC_IP2K_HI8INSN,
++
++/* Scenix IP2K - even/odd PC modifier to modify snb pcl.0  */
++  BFD_RELOC_IP2K_PC_SKIP,
++
++/* Scenix IP2K - 16 bit word address in text section.  */
++  BFD_RELOC_IP2K_TEXT,
++
++/* Scenix IP2K - 7-bit sp or dp offset  */
++  BFD_RELOC_IP2K_FR_OFFSET,
++
++/* Scenix VPE4K coprocessor - data/insn-space addressing  */
++  BFD_RELOC_VPE4KMATH_DATA,
++  BFD_RELOC_VPE4KMATH_INSN,
++
++/* These two relocations are used by the linker to determine which of
++the entries in a C++ virtual function table are actually used.  When
++the --gc-sections option is given, the linker will zero out the entries
++that are not used, so that the code for those functions need not be
++included in the output.
++
++VTABLE_INHERIT is a zero-space relocation used to describe to the
++linker the inheritance tree of a C++ virtual function table.  The
++relocation's symbol should be the parent class' vtable, and the
++relocation should be located at the child vtable.
++
++VTABLE_ENTRY is a zero-space relocation that describes the use of a
++virtual function table entry.  The reloc's symbol should refer to the
++table of the class mentioned in the code.  Off of that base, an offset
++describes the entry that is being used.  For Rela hosts, this offset
++is stored in the reloc's addend.  For Rel hosts, we are forced to put
++this offset in the reloc's section offset.  */
++  BFD_RELOC_VTABLE_INHERIT,
++  BFD_RELOC_VTABLE_ENTRY,
++
++/* Intel IA64 Relocations.  */
++  BFD_RELOC_IA64_IMM14,
++  BFD_RELOC_IA64_IMM22,
++  BFD_RELOC_IA64_IMM64,
++  BFD_RELOC_IA64_DIR32MSB,
++  BFD_RELOC_IA64_DIR32LSB,
++  BFD_RELOC_IA64_DIR64MSB,
++  BFD_RELOC_IA64_DIR64LSB,
++  BFD_RELOC_IA64_GPREL22,
++  BFD_RELOC_IA64_GPREL64I,
++  BFD_RELOC_IA64_GPREL32MSB,
++  BFD_RELOC_IA64_GPREL32LSB,
++  BFD_RELOC_IA64_GPREL64MSB,
++  BFD_RELOC_IA64_GPREL64LSB,
++  BFD_RELOC_IA64_LTOFF22,
++  BFD_RELOC_IA64_LTOFF64I,
++  BFD_RELOC_IA64_PLTOFF22,
++  BFD_RELOC_IA64_PLTOFF64I,
++  BFD_RELOC_IA64_PLTOFF64MSB,
++  BFD_RELOC_IA64_PLTOFF64LSB,
++  BFD_RELOC_IA64_FPTR64I,
++  BFD_RELOC_IA64_FPTR32MSB,
++  BFD_RELOC_IA64_FPTR32LSB,
++  BFD_RELOC_IA64_FPTR64MSB,
++  BFD_RELOC_IA64_FPTR64LSB,
++  BFD_RELOC_IA64_PCREL21B,
++  BFD_RELOC_IA64_PCREL21BI,
++  BFD_RELOC_IA64_PCREL21M,
++  BFD_RELOC_IA64_PCREL21F,
++  BFD_RELOC_IA64_PCREL22,
++  BFD_RELOC_IA64_PCREL60B,
++  BFD_RELOC_IA64_PCREL64I,
++  BFD_RELOC_IA64_PCREL32MSB,
++  BFD_RELOC_IA64_PCREL32LSB,
++  BFD_RELOC_IA64_PCREL64MSB,
++  BFD_RELOC_IA64_PCREL64LSB,
++  BFD_RELOC_IA64_LTOFF_FPTR22,
++  BFD_RELOC_IA64_LTOFF_FPTR64I,
++  BFD_RELOC_IA64_LTOFF_FPTR32MSB,
++  BFD_RELOC_IA64_LTOFF_FPTR32LSB,
++  BFD_RELOC_IA64_LTOFF_FPTR64MSB,
++  BFD_RELOC_IA64_LTOFF_FPTR64LSB,
++  BFD_RELOC_IA64_SEGREL32MSB,
++  BFD_RELOC_IA64_SEGREL32LSB,
++  BFD_RELOC_IA64_SEGREL64MSB,
++  BFD_RELOC_IA64_SEGREL64LSB,
++  BFD_RELOC_IA64_SECREL32MSB,
++  BFD_RELOC_IA64_SECREL32LSB,
++  BFD_RELOC_IA64_SECREL64MSB,
++  BFD_RELOC_IA64_SECREL64LSB,
++  BFD_RELOC_IA64_REL32MSB,
++  BFD_RELOC_IA64_REL32LSB,
++  BFD_RELOC_IA64_REL64MSB,
++  BFD_RELOC_IA64_REL64LSB,
++  BFD_RELOC_IA64_LTV32MSB,
++  BFD_RELOC_IA64_LTV32LSB,
++  BFD_RELOC_IA64_LTV64MSB,
++  BFD_RELOC_IA64_LTV64LSB,
++  BFD_RELOC_IA64_IPLTMSB,
++  BFD_RELOC_IA64_IPLTLSB,
++  BFD_RELOC_IA64_COPY,
++  BFD_RELOC_IA64_LTOFF22X,
++  BFD_RELOC_IA64_LDXMOV,
++  BFD_RELOC_IA64_TPREL14,
++  BFD_RELOC_IA64_TPREL22,
++  BFD_RELOC_IA64_TPREL64I,
++  BFD_RELOC_IA64_TPREL64MSB,
++  BFD_RELOC_IA64_TPREL64LSB,
++  BFD_RELOC_IA64_LTOFF_TPREL22,
++  BFD_RELOC_IA64_DTPMOD64MSB,
++  BFD_RELOC_IA64_DTPMOD64LSB,
++  BFD_RELOC_IA64_LTOFF_DTPMOD22,
++  BFD_RELOC_IA64_DTPREL14,
++  BFD_RELOC_IA64_DTPREL22,
++  BFD_RELOC_IA64_DTPREL64I,
++  BFD_RELOC_IA64_DTPREL32MSB,
++  BFD_RELOC_IA64_DTPREL32LSB,
++  BFD_RELOC_IA64_DTPREL64MSB,
++  BFD_RELOC_IA64_DTPREL64LSB,
++  BFD_RELOC_IA64_LTOFF_DTPREL22,
++
++/* Motorola 68HC11 reloc.
++This is the 8 bit high part of an absolute address.  */
++  BFD_RELOC_M68HC11_HI8,
++
++/* Motorola 68HC11 reloc.
++This is the 8 bit low part of an absolute address.  */
++  BFD_RELOC_M68HC11_LO8,
++
++/* Motorola 68HC11 reloc.
++This is the 3 bit of a value.  */
++  BFD_RELOC_M68HC11_3B,
++
++/* Motorola 68HC11 reloc.
++This reloc marks the beginning of a jump/call instruction.
++It is used for linker relaxation to correctly identify beginning
++of instruction and change some branches to use PC-relative
++addressing mode.  */
++  BFD_RELOC_M68HC11_RL_JUMP,
++
++/* Motorola 68HC11 reloc.
++This reloc marks a group of several instructions that gcc generates
++and for which the linker relaxation pass can modify and/or remove
++some of them.  */
++  BFD_RELOC_M68HC11_RL_GROUP,
++
++/* Motorola 68HC11 reloc.
++This is the 16-bit lower part of an address.  It is used for 'call'
++instruction to specify the symbol address without any special
++transformation (due to memory bank window).  */
++  BFD_RELOC_M68HC11_LO16,
++
++/* Motorola 68HC11 reloc.
++This is a 8-bit reloc that specifies the page number of an address.
++It is used by 'call' instruction to specify the page number of
++the symbol.  */
++  BFD_RELOC_M68HC11_PAGE,
++
++/* Motorola 68HC11 reloc.
++This is a 24-bit reloc that represents the address with a 16-bit
++value and a 8-bit page number.  The symbol address is transformed
++to follow the 16K memory bank of 68HC12 (seen as mapped in the window).  */
++  BFD_RELOC_M68HC11_24,
++
++/* Motorola 68HC12 reloc.
++This is the 5 bits of a value.  */
++  BFD_RELOC_M68HC12_5B,
++
++/* NS CR16C Relocations.  */
++  BFD_RELOC_16C_NUM08,
++  BFD_RELOC_16C_NUM08_C,
++  BFD_RELOC_16C_NUM16,
++  BFD_RELOC_16C_NUM16_C,
++  BFD_RELOC_16C_NUM32,
++  BFD_RELOC_16C_NUM32_C,
++  BFD_RELOC_16C_DISP04,
++  BFD_RELOC_16C_DISP04_C,
++  BFD_RELOC_16C_DISP08,
++  BFD_RELOC_16C_DISP08_C,
++  BFD_RELOC_16C_DISP16,
++  BFD_RELOC_16C_DISP16_C,
++  BFD_RELOC_16C_DISP24,
++  BFD_RELOC_16C_DISP24_C,
++  BFD_RELOC_16C_DISP24a,
++  BFD_RELOC_16C_DISP24a_C,
++  BFD_RELOC_16C_REG04,
++  BFD_RELOC_16C_REG04_C,
++  BFD_RELOC_16C_REG04a,
++  BFD_RELOC_16C_REG04a_C,
++  BFD_RELOC_16C_REG14,
++  BFD_RELOC_16C_REG14_C,
++  BFD_RELOC_16C_REG16,
++  BFD_RELOC_16C_REG16_C,
++  BFD_RELOC_16C_REG20,
++  BFD_RELOC_16C_REG20_C,
++  BFD_RELOC_16C_ABS20,
++  BFD_RELOC_16C_ABS20_C,
++  BFD_RELOC_16C_ABS24,
++  BFD_RELOC_16C_ABS24_C,
++  BFD_RELOC_16C_IMM04,
++  BFD_RELOC_16C_IMM04_C,
++  BFD_RELOC_16C_IMM16,
++  BFD_RELOC_16C_IMM16_C,
++  BFD_RELOC_16C_IMM20,
++  BFD_RELOC_16C_IMM20_C,
++  BFD_RELOC_16C_IMM24,
++  BFD_RELOC_16C_IMM24_C,
++  BFD_RELOC_16C_IMM32,
++  BFD_RELOC_16C_IMM32_C,
++
++/* NS CRX Relocations.  */
++  BFD_RELOC_CRX_REL4,
++  BFD_RELOC_CRX_REL8,
++  BFD_RELOC_CRX_REL8_CMP,
++  BFD_RELOC_CRX_REL16,
++  BFD_RELOC_CRX_REL24,
++  BFD_RELOC_CRX_REL32,
++  BFD_RELOC_CRX_REGREL12,
++  BFD_RELOC_CRX_REGREL22,
++  BFD_RELOC_CRX_REGREL28,
++  BFD_RELOC_CRX_REGREL32,
++  BFD_RELOC_CRX_ABS16,
++  BFD_RELOC_CRX_ABS32,
++  BFD_RELOC_CRX_NUM8,
++  BFD_RELOC_CRX_NUM16,
++  BFD_RELOC_CRX_NUM32,
++  BFD_RELOC_CRX_IMM16,
++  BFD_RELOC_CRX_IMM32,
++  BFD_RELOC_CRX_SWITCH8,
++  BFD_RELOC_CRX_SWITCH16,
++  BFD_RELOC_CRX_SWITCH32,
++
++/* These relocs are only used within the CRIS assembler.  They are not
++(at present) written to any object files.  */
++  BFD_RELOC_CRIS_BDISP8,
++  BFD_RELOC_CRIS_UNSIGNED_5,
++  BFD_RELOC_CRIS_SIGNED_6,
++  BFD_RELOC_CRIS_UNSIGNED_6,
++  BFD_RELOC_CRIS_SIGNED_8,
++  BFD_RELOC_CRIS_UNSIGNED_8,
++  BFD_RELOC_CRIS_SIGNED_16,
++  BFD_RELOC_CRIS_UNSIGNED_16,
++  BFD_RELOC_CRIS_LAPCQ_OFFSET,
++  BFD_RELOC_CRIS_UNSIGNED_4,
++
++/* Relocs used in ELF shared libraries for CRIS.  */
++  BFD_RELOC_CRIS_COPY,
++  BFD_RELOC_CRIS_GLOB_DAT,
++  BFD_RELOC_CRIS_JUMP_SLOT,
++  BFD_RELOC_CRIS_RELATIVE,
++
++/* 32-bit offset to symbol-entry within GOT.  */
++  BFD_RELOC_CRIS_32_GOT,
++
++/* 16-bit offset to symbol-entry within GOT.  */
++  BFD_RELOC_CRIS_16_GOT,
++
++/* 32-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_CRIS_32_GOTPLT,
++
++/* 16-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_CRIS_16_GOTPLT,
++
++/* 32-bit offset to symbol, relative to GOT.  */
++  BFD_RELOC_CRIS_32_GOTREL,
++
++/* 32-bit offset to symbol with PLT entry, relative to GOT.  */
++  BFD_RELOC_CRIS_32_PLT_GOTREL,
++
++/* 32-bit offset to symbol with PLT entry, relative to this relocation.  */
++  BFD_RELOC_CRIS_32_PLT_PCREL,
++
++/* Intel i860 Relocations.  */
++  BFD_RELOC_860_COPY,
++  BFD_RELOC_860_GLOB_DAT,
++  BFD_RELOC_860_JUMP_SLOT,
++  BFD_RELOC_860_RELATIVE,
++  BFD_RELOC_860_PC26,
++  BFD_RELOC_860_PLT26,
++  BFD_RELOC_860_PC16,
++  BFD_RELOC_860_LOW0,
++  BFD_RELOC_860_SPLIT0,
++  BFD_RELOC_860_LOW1,
++  BFD_RELOC_860_SPLIT1,
++  BFD_RELOC_860_LOW2,
++  BFD_RELOC_860_SPLIT2,
++  BFD_RELOC_860_LOW3,
++  BFD_RELOC_860_LOGOT0,
++  BFD_RELOC_860_SPGOT0,
++  BFD_RELOC_860_LOGOT1,
++  BFD_RELOC_860_SPGOT1,
++  BFD_RELOC_860_LOGOTOFF0,
++  BFD_RELOC_860_SPGOTOFF0,
++  BFD_RELOC_860_LOGOTOFF1,
++  BFD_RELOC_860_SPGOTOFF1,
++  BFD_RELOC_860_LOGOTOFF2,
++  BFD_RELOC_860_LOGOTOFF3,
++  BFD_RELOC_860_LOPC,
++  BFD_RELOC_860_HIGHADJ,
++  BFD_RELOC_860_HAGOT,
++  BFD_RELOC_860_HAGOTOFF,
++  BFD_RELOC_860_HAPC,
++  BFD_RELOC_860_HIGH,
++  BFD_RELOC_860_HIGOT,
++  BFD_RELOC_860_HIGOTOFF,
++
++/* OpenRISC Relocations.  */
++  BFD_RELOC_OPENRISC_ABS_26,
++  BFD_RELOC_OPENRISC_REL_26,
++
++/* H8 elf Relocations.  */
++  BFD_RELOC_H8_DIR16A8,
++  BFD_RELOC_H8_DIR16R8,
++  BFD_RELOC_H8_DIR24A8,
++  BFD_RELOC_H8_DIR24R8,
++  BFD_RELOC_H8_DIR32A16,
++
++/* Sony Xstormy16 Relocations.  */
++  BFD_RELOC_XSTORMY16_REL_12,
++  BFD_RELOC_XSTORMY16_12,
++  BFD_RELOC_XSTORMY16_24,
++  BFD_RELOC_XSTORMY16_FPTR16,
++
++/* Relocations used by VAX ELF.  */
++  BFD_RELOC_VAX_GLOB_DAT,
++  BFD_RELOC_VAX_JMP_SLOT,
++  BFD_RELOC_VAX_RELATIVE,
++
++/* Morpho MS1 - 16 bit immediate relocation.  */
++  BFD_RELOC_MS1_PC16,
++
++/* Morpho MS1 - Hi 16 bits of an address.  */
++  BFD_RELOC_MS1_HI16,
++
++/* Morpho MS1 - Low 16 bits of an address.  */
++  BFD_RELOC_MS1_LO16,
++
++/* Morpho MS1 - Used to tell the linker which vtable entries are used.  */
++  BFD_RELOC_MS1_GNU_VTINHERIT,
++
++/* Morpho MS1 - Used to tell the linker which vtable entries are used.  */
++  BFD_RELOC_MS1_GNU_VTENTRY,
++
++/* msp430 specific relocation codes  */
++  BFD_RELOC_MSP430_10_PCREL,
++  BFD_RELOC_MSP430_16_PCREL,
++  BFD_RELOC_MSP430_16,
++  BFD_RELOC_MSP430_16_PCREL_BYTE,
++  BFD_RELOC_MSP430_16_BYTE,
++  BFD_RELOC_MSP430_2X_PCREL,
++  BFD_RELOC_MSP430_RL_PCREL,
++
++/* IQ2000 Relocations.  */
++  BFD_RELOC_IQ2000_OFFSET_16,
++  BFD_RELOC_IQ2000_OFFSET_21,
++  BFD_RELOC_IQ2000_UHI16,
++
++/* Special Xtensa relocation used only by PLT entries in ELF shared
++objects to indicate that the runtime linker should set the value
++to one of its own internal functions or data structures.  */
++  BFD_RELOC_XTENSA_RTLD,
++
++/* Xtensa relocations for ELF shared objects.  */
++  BFD_RELOC_XTENSA_GLOB_DAT,
++  BFD_RELOC_XTENSA_JMP_SLOT,
++  BFD_RELOC_XTENSA_RELATIVE,
++
++/* Xtensa relocation used in ELF object files for symbols that may require
++PLT entries.  Otherwise, this is just a generic 32-bit relocation.  */
++  BFD_RELOC_XTENSA_PLT,
++
++/* Xtensa relocations to mark the difference of two local symbols.
++These are only needed to support linker relaxation and can be ignored
++when not relaxing.  The field is set to the value of the difference
++assuming no relaxation.  The relocation encodes the position of the
++first symbol so the linker can determine whether to adjust the field
++value.  */
++  BFD_RELOC_XTENSA_DIFF8,
++  BFD_RELOC_XTENSA_DIFF16,
++  BFD_RELOC_XTENSA_DIFF32,
++
++/* Generic Xtensa relocations for instruction operands.  Only the slot
++number is encoded in the relocation.  The relocation applies to the
++last PC-relative immediate operand, or if there are no PC-relative
++immediates, to the last immediate operand.  */
++  BFD_RELOC_XTENSA_SLOT0_OP,
++  BFD_RELOC_XTENSA_SLOT1_OP,
++  BFD_RELOC_XTENSA_SLOT2_OP,
++  BFD_RELOC_XTENSA_SLOT3_OP,
++  BFD_RELOC_XTENSA_SLOT4_OP,
++  BFD_RELOC_XTENSA_SLOT5_OP,
++  BFD_RELOC_XTENSA_SLOT6_OP,
++  BFD_RELOC_XTENSA_SLOT7_OP,
++  BFD_RELOC_XTENSA_SLOT8_OP,
++  BFD_RELOC_XTENSA_SLOT9_OP,
++  BFD_RELOC_XTENSA_SLOT10_OP,
++  BFD_RELOC_XTENSA_SLOT11_OP,
++  BFD_RELOC_XTENSA_SLOT12_OP,
++  BFD_RELOC_XTENSA_SLOT13_OP,
++  BFD_RELOC_XTENSA_SLOT14_OP,
++
++/* Alternate Xtensa relocations.  Only the slot is encoded in the
++relocation.  The meaning of these relocations is opcode-specific.  */
++  BFD_RELOC_XTENSA_SLOT0_ALT,
++  BFD_RELOC_XTENSA_SLOT1_ALT,
++  BFD_RELOC_XTENSA_SLOT2_ALT,
++  BFD_RELOC_XTENSA_SLOT3_ALT,
++  BFD_RELOC_XTENSA_SLOT4_ALT,
++  BFD_RELOC_XTENSA_SLOT5_ALT,
++  BFD_RELOC_XTENSA_SLOT6_ALT,
++  BFD_RELOC_XTENSA_SLOT7_ALT,
++  BFD_RELOC_XTENSA_SLOT8_ALT,
++  BFD_RELOC_XTENSA_SLOT9_ALT,
++  BFD_RELOC_XTENSA_SLOT10_ALT,
++  BFD_RELOC_XTENSA_SLOT11_ALT,
++  BFD_RELOC_XTENSA_SLOT12_ALT,
++  BFD_RELOC_XTENSA_SLOT13_ALT,
++  BFD_RELOC_XTENSA_SLOT14_ALT,
++
++/* Xtensa relocations for backward compatibility.  These have all been
++replaced by BFD_RELOC_XTENSA_SLOT0_OP.  */
++  BFD_RELOC_XTENSA_OP0,
++  BFD_RELOC_XTENSA_OP1,
++  BFD_RELOC_XTENSA_OP2,
++
++/* Xtensa relocation to mark that the assembler expanded the
++instructions from an original target.  The expansion size is
++encoded in the reloc size.  */
++  BFD_RELOC_XTENSA_ASM_EXPAND,
++
++/* Xtensa relocation to mark that the linker should simplify
++assembler-expanded instructions.  This is commonly used
++internally by the linker after analysis of a
++BFD_RELOC_XTENSA_ASM_EXPAND.  */
++  BFD_RELOC_XTENSA_ASM_SIMPLIFY,
++  BFD_RELOC_UNUSED };
++typedef enum bfd_reloc_code_real bfd_reloc_code_real_type;
++reloc_howto_type *bfd_reloc_type_lookup
++   (bfd *abfd, bfd_reloc_code_real_type code);
++
++const char *bfd_get_reloc_code_name (bfd_reloc_code_real_type code);
++
++/* Extracted from syms.c.  */
++
++typedef struct bfd_symbol
++{
++  /* A pointer to the BFD which owns the symbol. This information
++     is necessary so that a back end can work out what additional
++     information (invisible to the application writer) is carried
++     with the symbol.
++
++     This field is *almost* redundant, since you can use section->owner
++     instead, except that some symbols point to the global sections
++     bfd_{abs,com,und}_section.  This could be fixed by making
++     these globals be per-bfd (or per-target-flavor).  FIXME.  */
++  struct bfd *the_bfd; /* Use bfd_asymbol_bfd(sym) to access this field.  */
++
++  /* The text of the symbol. The name is left alone, and not copied; the
++     application may not alter it.  */
++  const char *name;
++
++  /* The value of the symbol.  This really should be a union of a
++     numeric value with a pointer, since some flags indicate that
++     a pointer to another symbol is stored here.  */
++  symvalue value;
++
++  /* Attributes of a symbol.  */
++#define BSF_NO_FLAGS    0x00
++
++  /* The symbol has local scope; <<static>> in <<C>>. The value
++     is the offset into the section of the data.  */
++#define BSF_LOCAL      0x01
++
++  /* The symbol has global scope; initialized data in <<C>>. The
++     value is the offset into the section of the data.  */
++#define BSF_GLOBAL     0x02
++
++  /* The symbol has global scope and is exported. The value is
++     the offset into the section of the data.  */
++#define BSF_EXPORT     BSF_GLOBAL /* No real difference.  */
++
++  /* A normal C symbol would be one of:
++     <<BSF_LOCAL>>, <<BSF_FORT_COMM>>,  <<BSF_UNDEFINED>> or
++     <<BSF_GLOBAL>>.  */
++
++  /* The symbol is a debugging record. The value has an arbitrary
++     meaning, unless BSF_DEBUGGING_RELOC is also set.  */
++#define BSF_DEBUGGING  0x08
++
++  /* The symbol denotes a function entry point.  Used in ELF,
++     perhaps others someday.  */
++#define BSF_FUNCTION    0x10
++
++  /* Used by the linker.  */
++#define BSF_KEEP        0x20
++#define BSF_KEEP_G      0x40
++
++  /* A weak global symbol, overridable without warnings by
++     a regular global symbol of the same name.  */
++#define BSF_WEAK        0x80
++
++  /* This symbol was created to point to a section, e.g. ELF's
++     STT_SECTION symbols.  */
++#define BSF_SECTION_SYM 0x100
++
++  /* The symbol used to be a common symbol, but now it is
++     allocated.  */
++#define BSF_OLD_COMMON  0x200
++
++  /* The default value for common data.  */
++#define BFD_FORT_COMM_DEFAULT_VALUE 0
++
++  /* In some files the type of a symbol sometimes alters its
++     location in an output file - ie in coff a <<ISFCN>> symbol
++     which is also <<C_EXT>> symbol appears where it was
++     declared and not at the end of a section.  This bit is set
++     by the target BFD part to convey this information.  */
++#define BSF_NOT_AT_END    0x400
++
++  /* Signal that the symbol is the label of constructor section.  */
++#define BSF_CONSTRUCTOR   0x800
++
++  /* Signal that the symbol is a warning symbol.  The name is a
++     warning.  The name of the next symbol is the one to warn about;
++     if a reference is made to a symbol with the same name as the next
++     symbol, a warning is issued by the linker.  */
++#define BSF_WARNING       0x1000
++
++  /* Signal that the symbol is indirect.  This symbol is an indirect
++     pointer to the symbol with the same name as the next symbol.  */
++#define BSF_INDIRECT      0x2000
++
++  /* BSF_FILE marks symbols that contain a file name.  This is used
++     for ELF STT_FILE symbols.  */
++#define BSF_FILE          0x4000
++
++  /* Symbol is from dynamic linking information.  */
++#define BSF_DYNAMIC       0x8000
++
++  /* The symbol denotes a data object.  Used in ELF, and perhaps
++     others someday.  */
++#define BSF_OBJECT        0x10000
++
++  /* This symbol is a debugging symbol.  The value is the offset
++     into the section of the data.  BSF_DEBUGGING should be set
++     as well.  */
++#define BSF_DEBUGGING_RELOC 0x20000
++
++  /* This symbol is thread local.  Used in ELF.  */
++#define BSF_THREAD_LOCAL  0x40000
++
++  flagword flags;
++
++  /* A pointer to the section to which this symbol is
++     relative.  This will always be non NULL, there are special
++     sections for undefined and absolute symbols.  */
++  struct bfd_section *section;
++
++  /* Back end special data.  */
++  union
++    {
++      void *p;
++      bfd_vma i;
++    }
++  udata;
++}
++asymbol;
++
++#define bfd_get_symtab_upper_bound(abfd) \
++     BFD_SEND (abfd, _bfd_get_symtab_upper_bound, (abfd))
++
++bfd_boolean bfd_is_local_label (bfd *abfd, asymbol *sym);
++
++bfd_boolean bfd_is_local_label_name (bfd *abfd, const char *name);
++
++#define bfd_is_local_label_name(abfd, name) \
++  BFD_SEND (abfd, _bfd_is_local_label_name, (abfd, name))
++
++bfd_boolean bfd_is_target_special_symbol (bfd *abfd, asymbol *sym);
++
++#define bfd_is_target_special_symbol(abfd, sym) \
++  BFD_SEND (abfd, _bfd_is_target_special_symbol, (abfd, sym))
++
++#define bfd_canonicalize_symtab(abfd, location) \
++  BFD_SEND (abfd, _bfd_canonicalize_symtab, (abfd, location))
++
++bfd_boolean bfd_set_symtab
++   (bfd *abfd, asymbol **location, unsigned int count);
++
++void bfd_print_symbol_vandf (bfd *abfd, void *file, asymbol *symbol);
++
++#define bfd_make_empty_symbol(abfd) \
++  BFD_SEND (abfd, _bfd_make_empty_symbol, (abfd))
++
++asymbol *_bfd_generic_make_empty_symbol (bfd *);
++
++#define bfd_make_debug_symbol(abfd,ptr,size) \
++  BFD_SEND (abfd, _bfd_make_debug_symbol, (abfd, ptr, size))
++
++int bfd_decode_symclass (asymbol *symbol);
++
++bfd_boolean bfd_is_undefined_symclass (int symclass);
++
++void bfd_symbol_info (asymbol *symbol, symbol_info *ret);
++
++bfd_boolean bfd_copy_private_symbol_data
++   (bfd *ibfd, asymbol *isym, bfd *obfd, asymbol *osym);
++
++#define bfd_copy_private_symbol_data(ibfd, isymbol, obfd, osymbol) \
++  BFD_SEND (obfd, _bfd_copy_private_symbol_data, \
++            (ibfd, isymbol, obfd, osymbol))
++
++/* Extracted from bfd.c.  */
++struct bfd
++{
++  /* A unique identifier of the BFD  */
++  unsigned int id;
++
++  /* The filename the application opened the BFD with.  */
++  const char *filename;
++
++  /* A pointer to the target jump table.  */
++  const struct bfd_target *xvec;
++
++  /* The IOSTREAM, and corresponding IO vector that provide access
++     to the file backing the BFD.  */
++  void *iostream;
++  const struct bfd_iovec *iovec;
++
++  /* Is the file descriptor being cached?  That is, can it be closed as
++     needed, and re-opened when accessed later?  */
++  bfd_boolean cacheable;
++
++  /* Marks whether there was a default target specified when the
++     BFD was opened. This is used to select which matching algorithm
++     to use to choose the back end.  */
++  bfd_boolean target_defaulted;
++
++  /* The caching routines use these to maintain a
++     least-recently-used list of BFDs.  */
++  struct bfd *lru_prev, *lru_next;
++
++  /* When a file is closed by the caching routines, BFD retains
++     state information on the file here...  */
++  ufile_ptr where;
++
++  /* ... and here: (``once'' means at least once).  */
++  bfd_boolean opened_once;
++
++  /* Set if we have a locally maintained mtime value, rather than
++     getting it from the file each time.  */
++  bfd_boolean mtime_set;
++
++  /* File modified time, if mtime_set is TRUE.  */
++  long mtime;
++
++  /* Reserved for an unimplemented file locking extension.  */
++  int ifd;
++
++  /* The format which belongs to the BFD. (object, core, etc.)  */
++  bfd_format format;
++
++  /* The direction with which the BFD was opened.  */
++  enum bfd_direction
++    {
++      no_direction = 0,
++      read_direction = 1,
++      write_direction = 2,
++      both_direction = 3
++    }
++  direction;
++
++  /* Format_specific flags.  */
++  flagword flags;
++
++  /* Currently my_archive is tested before adding origin to
++     anything. I believe that this can become always an add of
++     origin, with origin set to 0 for non archive files.  */
++  ufile_ptr origin;
++
++  /* Remember when output has begun, to stop strange things
++     from happening.  */
++  bfd_boolean output_has_begun;
++
++  /* A hash table for section names.  */
++  struct bfd_hash_table section_htab;
++
++  /* Pointer to linked list of sections.  */
++  struct bfd_section *sections;
++
++  /* The last section on the section list.  */
++  struct bfd_section *section_last;
++
++  /* The number of sections.  */
++  unsigned int section_count;
++
++  /* Stuff only useful for object files:
++     The start address.  */
++  bfd_vma start_address;
++
++  /* Used for input and output.  */
++  unsigned int symcount;
++
++  /* Symbol table for output BFD (with symcount entries).  */
++  struct bfd_symbol  **outsymbols;
++
++  /* Used for slurped dynamic symbol tables.  */
++  unsigned int dynsymcount;
++
++  /* Pointer to structure which contains architecture information.  */
++  const struct bfd_arch_info *arch_info;
++
++  /* Flag set if symbols from this BFD should not be exported.  */
++  bfd_boolean no_export;
++
++  /* Stuff only useful for archives.  */
++  void *arelt_data;
++  struct bfd *my_archive;      /* The containing archive BFD.  */
++  struct bfd *next;            /* The next BFD in the archive.  */
++  struct bfd *archive_head;    /* The first BFD in the archive.  */
++  bfd_boolean has_armap;
++
++  /* A chain of BFD structures involved in a link.  */
++  struct bfd *link_next;
++
++  /* A field used by _bfd_generic_link_add_archive_symbols.  This will
++     be used only for archive elements.  */
++  int archive_pass;
++
++  /* Used by the back end to hold private data.  */
++  union
++    {
++      struct aout_data_struct *aout_data;
++      struct artdata *aout_ar_data;
++      struct _oasys_data *oasys_obj_data;
++      struct _oasys_ar_data *oasys_ar_data;
++      struct coff_tdata *coff_obj_data;
++      struct pe_tdata *pe_obj_data;
++      struct xcoff_tdata *xcoff_obj_data;
++      struct ecoff_tdata *ecoff_obj_data;
++      struct ieee_data_struct *ieee_data;
++      struct ieee_ar_data_struct *ieee_ar_data;
++      struct srec_data_struct *srec_data;
++      struct ihex_data_struct *ihex_data;
++      struct tekhex_data_struct *tekhex_data;
++      struct elf_obj_tdata *elf_obj_data;
++      struct nlm_obj_tdata *nlm_obj_data;
++      struct bout_data_struct *bout_data;
++      struct mmo_data_struct *mmo_data;
++      struct sun_core_struct *sun_core_data;
++      struct sco5_core_struct *sco5_core_data;
++      struct trad_core_struct *trad_core_data;
++      struct som_data_struct *som_data;
++      struct hpux_core_struct *hpux_core_data;
++      struct hppabsd_core_struct *hppabsd_core_data;
++      struct sgi_core_struct *sgi_core_data;
++      struct lynx_core_struct *lynx_core_data;
++      struct osf_core_struct *osf_core_data;
++      struct cisco_core_struct *cisco_core_data;
++      struct versados_data_struct *versados_data;
++      struct netbsd_core_struct *netbsd_core_data;
++      struct mach_o_data_struct *mach_o_data;
++      struct mach_o_fat_data_struct *mach_o_fat_data;
++      struct bfd_pef_data_struct *pef_data;
++      struct bfd_pef_xlib_data_struct *pef_xlib_data;
++      struct bfd_sym_data_struct *sym_data;
++      void *any;
++    }
++  tdata;
++
++  /* Used by the application to hold private data.  */
++  void *usrdata;
++
++  /* Where all the allocated stuff under this BFD goes.  This is a
++     struct objalloc *, but we use void * to avoid requiring the inclusion
++     of objalloc.h.  */
++  void *memory;
++};
++
++typedef enum bfd_error
++{
++  bfd_error_no_error = 0,
++  bfd_error_system_call,
++  bfd_error_invalid_target,
++  bfd_error_wrong_format,
++  bfd_error_wrong_object_format,
++  bfd_error_invalid_operation,
++  bfd_error_no_memory,
++  bfd_error_no_symbols,
++  bfd_error_no_armap,
++  bfd_error_no_more_archived_files,
++  bfd_error_malformed_archive,
++  bfd_error_file_not_recognized,
++  bfd_error_file_ambiguously_recognized,
++  bfd_error_no_contents,
++  bfd_error_nonrepresentable_section,
++  bfd_error_no_debug_section,
++  bfd_error_bad_value,
++  bfd_error_file_truncated,
++  bfd_error_file_too_big,
++  bfd_error_invalid_error_code
++}
++bfd_error_type;
++
++bfd_error_type bfd_get_error (void);
++
++void bfd_set_error (bfd_error_type error_tag);
++
++const char *bfd_errmsg (bfd_error_type error_tag);
++
++void bfd_perror (const char *message);
++
++typedef void (*bfd_error_handler_type) (const char *, ...);
++
++bfd_error_handler_type bfd_set_error_handler (bfd_error_handler_type);
++
++void bfd_set_error_program_name (const char *);
++
++bfd_error_handler_type bfd_get_error_handler (void);
++
++long bfd_get_reloc_upper_bound (bfd *abfd, asection *sect);
++
++long bfd_canonicalize_reloc
++   (bfd *abfd, asection *sec, arelent **loc, asymbol **syms);
++
++void bfd_set_reloc
++   (bfd *abfd, asection *sec, arelent **rel, unsigned int count);
++
++bfd_boolean bfd_set_file_flags (bfd *abfd, flagword flags);
++
++int bfd_get_arch_size (bfd *abfd);
++
++int bfd_get_sign_extend_vma (bfd *abfd);
++
++bfd_boolean bfd_set_start_address (bfd *abfd, bfd_vma vma);
++
++unsigned int bfd_get_gp_size (bfd *abfd);
++
++void bfd_set_gp_size (bfd *abfd, unsigned int i);
++
++bfd_vma bfd_scan_vma (const char *string, const char **end, int base);
++
++bfd_boolean bfd_copy_private_header_data (bfd *ibfd, bfd *obfd);
++
++#define bfd_copy_private_header_data(ibfd, obfd) \
++     BFD_SEND (obfd, _bfd_copy_private_header_data, \
++               (ibfd, obfd))
++bfd_boolean bfd_copy_private_bfd_data (bfd *ibfd, bfd *obfd);
++
++#define bfd_copy_private_bfd_data(ibfd, obfd) \
++     BFD_SEND (obfd, _bfd_copy_private_bfd_data, \
++               (ibfd, obfd))
++bfd_boolean bfd_merge_private_bfd_data (bfd *ibfd, bfd *obfd);
++
++#define bfd_merge_private_bfd_data(ibfd, obfd) \
++     BFD_SEND (obfd, _bfd_merge_private_bfd_data, \
++               (ibfd, obfd))
++bfd_boolean bfd_set_private_flags (bfd *abfd, flagword flags);
++
++#define bfd_set_private_flags(abfd, flags) \
++     BFD_SEND (abfd, _bfd_set_private_flags, (abfd, flags))
++#define bfd_sizeof_headers(abfd, reloc) \
++       BFD_SEND (abfd, _bfd_sizeof_headers, (abfd, reloc))
++
++#define bfd_find_nearest_line(abfd, sec, syms, off, file, func, line) \
++       BFD_SEND (abfd, _bfd_find_nearest_line, \
++                 (abfd, sec, syms, off, file, func, line))
++
++#define bfd_find_line(abfd, syms, sym, file, line) \
++       BFD_SEND (abfd, _bfd_find_line, \
++                 (abfd, syms, sym, file, line))
++
++#define bfd_find_inliner_info(abfd, file, func, line) \
++       BFD_SEND (abfd, _bfd_find_inliner_info, \
++                 (abfd, file, func, line))
++
++#define bfd_debug_info_start(abfd) \
++       BFD_SEND (abfd, _bfd_debug_info_start, (abfd))
++
++#define bfd_debug_info_end(abfd) \
++       BFD_SEND (abfd, _bfd_debug_info_end, (abfd))
++
++#define bfd_debug_info_accumulate(abfd, section) \
++       BFD_SEND (abfd, _bfd_debug_info_accumulate, (abfd, section))
++
++#define bfd_stat_arch_elt(abfd, stat) \
++       BFD_SEND (abfd, _bfd_stat_arch_elt,(abfd, stat))
++
++#define bfd_update_armap_timestamp(abfd) \
++       BFD_SEND (abfd, _bfd_update_armap_timestamp, (abfd))
++
++#define bfd_set_arch_mach(abfd, arch, mach)\
++       BFD_SEND ( abfd, _bfd_set_arch_mach, (abfd, arch, mach))
++
++#define bfd_relax_section(abfd, section, link_info, again) \
++       BFD_SEND (abfd, _bfd_relax_section, (abfd, section, link_info, again))
++
++#define bfd_gc_sections(abfd, link_info) \
++       BFD_SEND (abfd, _bfd_gc_sections, (abfd, link_info))
++
++#define bfd_merge_sections(abfd, link_info) \
++       BFD_SEND (abfd, _bfd_merge_sections, (abfd, link_info))
++
++#define bfd_is_group_section(abfd, sec) \
++       BFD_SEND (abfd, _bfd_is_group_section, (abfd, sec))
++
++#define bfd_discard_group(abfd, sec) \
++       BFD_SEND (abfd, _bfd_discard_group, (abfd, sec))
++
++#define bfd_link_hash_table_create(abfd) \
++       BFD_SEND (abfd, _bfd_link_hash_table_create, (abfd))
++
++#define bfd_link_hash_table_free(abfd, hash) \
++       BFD_SEND (abfd, _bfd_link_hash_table_free, (hash))
++
++#define bfd_link_add_symbols(abfd, info) \
++       BFD_SEND (abfd, _bfd_link_add_symbols, (abfd, info))
++
++#define bfd_link_just_syms(abfd, sec, info) \
++       BFD_SEND (abfd, _bfd_link_just_syms, (sec, info))
++
++#define bfd_final_link(abfd, info) \
++       BFD_SEND (abfd, _bfd_final_link, (abfd, info))
++
++#define bfd_free_cached_info(abfd) \
++       BFD_SEND (abfd, _bfd_free_cached_info, (abfd))
++
++#define bfd_get_dynamic_symtab_upper_bound(abfd) \
++       BFD_SEND (abfd, _bfd_get_dynamic_symtab_upper_bound, (abfd))
++
++#define bfd_print_private_bfd_data(abfd, file)\
++       BFD_SEND (abfd, _bfd_print_private_bfd_data, (abfd, file))
++
++#define bfd_canonicalize_dynamic_symtab(abfd, asymbols) \
++       BFD_SEND (abfd, _bfd_canonicalize_dynamic_symtab, (abfd, asymbols))
++
++#define bfd_get_synthetic_symtab(abfd, count, syms, dyncount, dynsyms, ret) \
++       BFD_SEND (abfd, _bfd_get_synthetic_symtab, (abfd, count, syms, \
++                                                   dyncount, dynsyms, ret))
++
++#define bfd_get_dynamic_reloc_upper_bound(abfd) \
++       BFD_SEND (abfd, _bfd_get_dynamic_reloc_upper_bound, (abfd))
++
++#define bfd_canonicalize_dynamic_reloc(abfd, arels, asyms) \
++       BFD_SEND (abfd, _bfd_canonicalize_dynamic_reloc, (abfd, arels, asyms))
++
++extern bfd_byte *bfd_get_relocated_section_contents
++  (bfd *, struct bfd_link_info *, struct bfd_link_order *, bfd_byte *,
++   bfd_boolean, asymbol **);
++
++bfd_boolean bfd_alt_mach_code (bfd *abfd, int alternative);
++
++struct bfd_preserve
++{
++  void *marker;
++  void *tdata;
++  flagword flags;
++  const struct bfd_arch_info *arch_info;
++  struct bfd_section *sections;
++  struct bfd_section *section_last;
++  unsigned int section_count;
++  struct bfd_hash_table section_htab;
++};
++
++bfd_boolean bfd_preserve_save (bfd *, struct bfd_preserve *);
++
++void bfd_preserve_restore (bfd *, struct bfd_preserve *);
++
++void bfd_preserve_finish (bfd *, struct bfd_preserve *);
++
++/* Extracted from archive.c.  */
++symindex bfd_get_next_mapent
++   (bfd *abfd, symindex previous, carsym **sym);
++
++bfd_boolean bfd_set_archive_head (bfd *output, bfd *new_head);
++
++bfd *bfd_openr_next_archived_file (bfd *archive, bfd *previous);
++
++/* Extracted from corefile.c.  */
++const char *bfd_core_file_failing_command (bfd *abfd);
++
++int bfd_core_file_failing_signal (bfd *abfd);
++
++bfd_boolean core_file_matches_executable_p
++   (bfd *core_bfd, bfd *exec_bfd);
++
++/* Extracted from targets.c.  */
++#define BFD_SEND(bfd, message, arglist) \
++  ((*((bfd)->xvec->message)) arglist)
++
++#ifdef DEBUG_BFD_SEND
++#undef BFD_SEND
++#define BFD_SEND(bfd, message, arglist) \
++  (((bfd) && (bfd)->xvec && (bfd)->xvec->message) ? \
++    ((*((bfd)->xvec->message)) arglist) : \
++    (bfd_assert (__FILE__,__LINE__), NULL))
++#endif
++#define BFD_SEND_FMT(bfd, message, arglist) \
++  (((bfd)->xvec->message[(int) ((bfd)->format)]) arglist)
++
++#ifdef DEBUG_BFD_SEND
++#undef BFD_SEND_FMT
++#define BFD_SEND_FMT(bfd, message, arglist) \
++  (((bfd) && (bfd)->xvec && (bfd)->xvec->message) ? \
++   (((bfd)->xvec->message[(int) ((bfd)->format)]) arglist) : \
++   (bfd_assert (__FILE__,__LINE__), NULL))
++#endif
++
++enum bfd_flavour
++{
++  bfd_target_unknown_flavour,
++  bfd_target_aout_flavour,
++  bfd_target_coff_flavour,
++  bfd_target_ecoff_flavour,
++  bfd_target_xcoff_flavour,
++  bfd_target_elf_flavour,
++  bfd_target_ieee_flavour,
++  bfd_target_nlm_flavour,
++  bfd_target_oasys_flavour,
++  bfd_target_tekhex_flavour,
++  bfd_target_srec_flavour,
++  bfd_target_ihex_flavour,
++  bfd_target_som_flavour,
++  bfd_target_os9k_flavour,
++  bfd_target_versados_flavour,
++  bfd_target_msdos_flavour,
++  bfd_target_ovax_flavour,
++  bfd_target_evax_flavour,
++  bfd_target_mmo_flavour,
++  bfd_target_mach_o_flavour,
++  bfd_target_pef_flavour,
++  bfd_target_pef_xlib_flavour,
++  bfd_target_sym_flavour
++};
++
++enum bfd_endian { BFD_ENDIAN_BIG, BFD_ENDIAN_LITTLE, BFD_ENDIAN_UNKNOWN };
++
++/* Forward declaration.  */
++typedef struct bfd_link_info _bfd_link_info;
++
++typedef struct bfd_target
++{
++  /* Identifies the kind of target, e.g., SunOS4, Ultrix, etc.  */
++  char *name;
++
++ /* The "flavour" of a back end is a general indication about
++    the contents of a file.  */
++  enum bfd_flavour flavour;
++
++  /* The order of bytes within the data area of a file.  */
++  enum bfd_endian byteorder;
++
++ /* The order of bytes within the header parts of a file.  */
++  enum bfd_endian header_byteorder;
++
++  /* A mask of all the flags which an executable may have set -
++     from the set <<BFD_NO_FLAGS>>, <<HAS_RELOC>>, ...<<D_PAGED>>.  */
++  flagword object_flags;
++
++ /* A mask of all the flags which a section may have set - from
++    the set <<SEC_NO_FLAGS>>, <<SEC_ALLOC>>, ...<<SET_NEVER_LOAD>>.  */
++  flagword section_flags;
++
++ /* The character normally found at the front of a symbol.
++    (if any), perhaps `_'.  */
++  char symbol_leading_char;
++
++ /* The pad character for file names within an archive header.  */
++  char ar_pad_char;
++
++  /* The maximum number of characters in an archive header.  */
++  unsigned short ar_max_namelen;
++
++  /* Entries for byte swapping for data. These are different from the
++     other entry points, since they don't take a BFD as the first argument.
++     Certain other handlers could do the same.  */
++  bfd_uint64_t   (*bfd_getx64) (const void *);
++  bfd_int64_t    (*bfd_getx_signed_64) (const void *);
++  void           (*bfd_putx64) (bfd_uint64_t, void *);
++  bfd_vma        (*bfd_getx32) (const void *);
++  bfd_signed_vma (*bfd_getx_signed_32) (const void *);
++  void           (*bfd_putx32) (bfd_vma, void *);
++  bfd_vma        (*bfd_getx16) (const void *);
++  bfd_signed_vma (*bfd_getx_signed_16) (const void *);
++  void           (*bfd_putx16) (bfd_vma, void *);
++
++  /* Byte swapping for the headers.  */
++  bfd_uint64_t   (*bfd_h_getx64) (const void *);
++  bfd_int64_t    (*bfd_h_getx_signed_64) (const void *);
++  void           (*bfd_h_putx64) (bfd_uint64_t, void *);
++  bfd_vma        (*bfd_h_getx32) (const void *);
++  bfd_signed_vma (*bfd_h_getx_signed_32) (const void *);
++  void           (*bfd_h_putx32) (bfd_vma, void *);
++  bfd_vma        (*bfd_h_getx16) (const void *);
++  bfd_signed_vma (*bfd_h_getx_signed_16) (const void *);
++  void           (*bfd_h_putx16) (bfd_vma, void *);
++
++  /* Format dependent routines: these are vectors of entry points
++     within the target vector structure, one for each format to check.  */
++
++  /* Check the format of a file being read.  Return a <<bfd_target *>> or zero.  */
++  const struct bfd_target *(*_bfd_check_format[bfd_type_end]) (bfd *);
++
++  /* Set the format of a file being written.  */
++  bfd_boolean (*_bfd_set_format[bfd_type_end]) (bfd *);
++
++  /* Write cached information into a file being written, at <<bfd_close>>.  */
++  bfd_boolean (*_bfd_write_contents[bfd_type_end]) (bfd *);
++
++
++  /* Generic entry points.  */
++#define BFD_JUMP_TABLE_GENERIC(NAME) \
++  NAME##_close_and_cleanup, \
++  NAME##_bfd_free_cached_info, \
++  NAME##_new_section_hook, \
++  NAME##_get_section_contents, \
++  NAME##_get_section_contents_in_window
++
++  /* Called when the BFD is being closed to do any necessary cleanup.  */
++  bfd_boolean (*_close_and_cleanup) (bfd *);
++  /* Ask the BFD to free all cached information.  */
++  bfd_boolean (*_bfd_free_cached_info) (bfd *);
++  /* Called when a new section is created.  */
++  bfd_boolean (*_new_section_hook) (bfd *, sec_ptr);
++  /* Read the contents of a section.  */
++  bfd_boolean (*_bfd_get_section_contents)
++    (bfd *, sec_ptr, void *, file_ptr, bfd_size_type);
++  bfd_boolean (*_bfd_get_section_contents_in_window)
++    (bfd *, sec_ptr, bfd_window *, file_ptr, bfd_size_type);
++
++  /* Entry points to copy private data.  */
++#define BFD_JUMP_TABLE_COPY(NAME) \
++  NAME##_bfd_copy_private_bfd_data, \
++  NAME##_bfd_merge_private_bfd_data, \
++  NAME##_bfd_copy_private_section_data, \
++  NAME##_bfd_copy_private_symbol_data, \
++  NAME##_bfd_copy_private_header_data, \
++  NAME##_bfd_set_private_flags, \
++  NAME##_bfd_print_private_bfd_data
++
++  /* Called to copy BFD general private data from one object file
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_bfd_data) (bfd *, bfd *);
++  /* Called to merge BFD general private data from one object file
++     to a common output file when linking.  */
++  bfd_boolean (*_bfd_merge_private_bfd_data) (bfd *, bfd *);
++  /* Called to copy BFD private section data from one object file
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_section_data)
++    (bfd *, sec_ptr, bfd *, sec_ptr);
++  /* Called to copy BFD private symbol data from one symbol
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_symbol_data)
++    (bfd *, asymbol *, bfd *, asymbol *);
++  /* Called to copy BFD private header data from one object file
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_header_data)
++    (bfd *, bfd *);
++  /* Called to set private backend flags.  */
++  bfd_boolean (*_bfd_set_private_flags) (bfd *, flagword);
++
++  /* Called to print private BFD data.  */
++  bfd_boolean (*_bfd_print_private_bfd_data) (bfd *, void *);
++
++  /* Core file entry points.  */
++#define BFD_JUMP_TABLE_CORE(NAME) \
++  NAME##_core_file_failing_command, \
++  NAME##_core_file_failing_signal, \
++  NAME##_core_file_matches_executable_p
++
++  char *      (*_core_file_failing_command) (bfd *);
++  int         (*_core_file_failing_signal) (bfd *);
++  bfd_boolean (*_core_file_matches_executable_p) (bfd *, bfd *);
++
++  /* Archive entry points.  */
++#define BFD_JUMP_TABLE_ARCHIVE(NAME) \
++  NAME##_slurp_armap, \
++  NAME##_slurp_extended_name_table, \
++  NAME##_construct_extended_name_table, \
++  NAME##_truncate_arname, \
++  NAME##_write_armap, \
++  NAME##_read_ar_hdr, \
++  NAME##_openr_next_archived_file, \
++  NAME##_get_elt_at_index, \
++  NAME##_generic_stat_arch_elt, \
++  NAME##_update_armap_timestamp
++
++  bfd_boolean (*_bfd_slurp_armap) (bfd *);
++  bfd_boolean (*_bfd_slurp_extended_name_table) (bfd *);
++  bfd_boolean (*_bfd_construct_extended_name_table)
++    (bfd *, char **, bfd_size_type *, const char **);
++  void        (*_bfd_truncate_arname) (bfd *, const char *, char *);
++  bfd_boolean (*write_armap)
++    (bfd *, unsigned int, struct orl *, unsigned int, int);
++  void *      (*_bfd_read_ar_hdr_fn) (bfd *);
++  bfd *       (*openr_next_archived_file) (bfd *, bfd *);
++#define bfd_get_elt_at_index(b,i) BFD_SEND (b, _bfd_get_elt_at_index, (b,i))
++  bfd *       (*_bfd_get_elt_at_index) (bfd *, symindex);
++  int         (*_bfd_stat_arch_elt) (bfd *, struct stat *);
++  bfd_boolean (*_bfd_update_armap_timestamp) (bfd *);
++
++  /* Entry points used for symbols.  */
++#define BFD_JUMP_TABLE_SYMBOLS(NAME) \
++  NAME##_get_symtab_upper_bound, \
++  NAME##_canonicalize_symtab, \
++  NAME##_make_empty_symbol, \
++  NAME##_print_symbol, \
++  NAME##_get_symbol_info, \
++  NAME##_bfd_is_local_label_name, \
++  NAME##_bfd_is_target_special_symbol, \
++  NAME##_get_lineno, \
++  NAME##_find_nearest_line, \
++  _bfd_generic_find_line, \
++  NAME##_find_inliner_info, \
++  NAME##_bfd_make_debug_symbol, \
++  NAME##_read_minisymbols, \
++  NAME##_minisymbol_to_symbol
++
++  long        (*_bfd_get_symtab_upper_bound) (bfd *);
++  long        (*_bfd_canonicalize_symtab)
++    (bfd *, struct bfd_symbol **);
++  struct bfd_symbol *
++              (*_bfd_make_empty_symbol) (bfd *);
++  void        (*_bfd_print_symbol)
++    (bfd *, void *, struct bfd_symbol *, bfd_print_symbol_type);
++#define bfd_print_symbol(b,p,s,e) BFD_SEND (b, _bfd_print_symbol, (b,p,s,e))
++  void        (*_bfd_get_symbol_info)
++    (bfd *, struct bfd_symbol *, symbol_info *);
++#define bfd_get_symbol_info(b,p,e) BFD_SEND (b, _bfd_get_symbol_info, (b,p,e))
++  bfd_boolean (*_bfd_is_local_label_name) (bfd *, const char *);
++  bfd_boolean (*_bfd_is_target_special_symbol) (bfd *, asymbol *);
++  alent *     (*_get_lineno) (bfd *, struct bfd_symbol *);
++  bfd_boolean (*_bfd_find_nearest_line)
++    (bfd *, struct bfd_section *, struct bfd_symbol **, bfd_vma,
++     const char **, const char **, unsigned int *);
++  bfd_boolean (*_bfd_find_line)
++    (bfd *, struct bfd_symbol **, struct bfd_symbol *,
++     const char **, unsigned int *);
++  bfd_boolean (*_bfd_find_inliner_info)
++    (bfd *, const char **, const char **, unsigned int *);
++ /* Back-door to allow format-aware applications to create debug symbols
++    while using BFD for everything else.  Currently used by the assembler
++    when creating COFF files.  */
++  asymbol *   (*_bfd_make_debug_symbol)
++    (bfd *, void *, unsigned long size);
++#define bfd_read_minisymbols(b, d, m, s) \
++  BFD_SEND (b, _read_minisymbols, (b, d, m, s))
++  long        (*_read_minisymbols)
++    (bfd *, bfd_boolean, void **, unsigned int *);
++#define bfd_minisymbol_to_symbol(b, d, m, f) \
++  BFD_SEND (b, _minisymbol_to_symbol, (b, d, m, f))
++  asymbol *   (*_minisymbol_to_symbol)
++    (bfd *, bfd_boolean, const void *, asymbol *);
++
++  /* Routines for relocs.  */
++#define BFD_JUMP_TABLE_RELOCS(NAME) \
++  NAME##_get_reloc_upper_bound, \
++  NAME##_canonicalize_reloc, \
++  NAME##_bfd_reloc_type_lookup
++
++  long        (*_get_reloc_upper_bound) (bfd *, sec_ptr);
++  long        (*_bfd_canonicalize_reloc)
++    (bfd *, sec_ptr, arelent **, struct bfd_symbol **);
++  /* See documentation on reloc types.  */
++  reloc_howto_type *
++              (*reloc_type_lookup) (bfd *, bfd_reloc_code_real_type);
++
++  /* Routines used when writing an object file.  */
++#define BFD_JUMP_TABLE_WRITE(NAME) \
++  NAME##_set_arch_mach, \
++  NAME##_set_section_contents
++
++  bfd_boolean (*_bfd_set_arch_mach)
++    (bfd *, enum bfd_architecture, unsigned long);
++  bfd_boolean (*_bfd_set_section_contents)
++    (bfd *, sec_ptr, const void *, file_ptr, bfd_size_type);
++
++  /* Routines used by the linker.  */
++#define BFD_JUMP_TABLE_LINK(NAME) \
++  NAME##_sizeof_headers, \
++  NAME##_bfd_get_relocated_section_contents, \
++  NAME##_bfd_relax_section, \
++  NAME##_bfd_link_hash_table_create, \
++  NAME##_bfd_link_hash_table_free, \
++  NAME##_bfd_link_add_symbols, \
++  NAME##_bfd_link_just_syms, \
++  NAME##_bfd_final_link, \
++  NAME##_bfd_link_split_section, \
++  NAME##_bfd_gc_sections, \
++  NAME##_bfd_merge_sections, \
++  NAME##_bfd_is_group_section, \
++  NAME##_bfd_discard_group, \
++  NAME##_section_already_linked \
++
++  int         (*_bfd_sizeof_headers) (bfd *, bfd_boolean);
++  bfd_byte *  (*_bfd_get_relocated_section_contents)
++    (bfd *, struct bfd_link_info *, struct bfd_link_order *,
++     bfd_byte *, bfd_boolean, struct bfd_symbol **);
++
++  bfd_boolean (*_bfd_relax_section)
++    (bfd *, struct bfd_section *, struct bfd_link_info *, bfd_boolean *);
++
++  /* Create a hash table for the linker.  Different backends store
++     different information in this table.  */
++  struct bfd_link_hash_table *
++              (*_bfd_link_hash_table_create) (bfd *);
++
++  /* Release the memory associated with the linker hash table.  */
++  void        (*_bfd_link_hash_table_free) (struct bfd_link_hash_table *);
++
++  /* Add symbols from this object file into the hash table.  */
++  bfd_boolean (*_bfd_link_add_symbols) (bfd *, struct bfd_link_info *);
++
++  /* Indicate that we are only retrieving symbol values from this section.  */
++  void        (*_bfd_link_just_syms) (asection *, struct bfd_link_info *);
++
++  /* Do a link based on the link_order structures attached to each
++     section of the BFD.  */
++  bfd_boolean (*_bfd_final_link) (bfd *, struct bfd_link_info *);
++
++  /* Should this section be split up into smaller pieces during linking.  */
++  bfd_boolean (*_bfd_link_split_section) (bfd *, struct bfd_section *);
++
++  /* Remove sections that are not referenced from the output.  */
++  bfd_boolean (*_bfd_gc_sections) (bfd *, struct bfd_link_info *);
++
++  /* Attempt to merge SEC_MERGE sections.  */
++  bfd_boolean (*_bfd_merge_sections) (bfd *, struct bfd_link_info *);
++
++  /* Is this section a member of a group?  */
++  bfd_boolean (*_bfd_is_group_section) (bfd *, const struct bfd_section *);
++
++  /* Discard members of a group.  */
++  bfd_boolean (*_bfd_discard_group) (bfd *, struct bfd_section *);
++
++  /* Check if SEC has been already linked during a reloceatable or
++     final link.  */
++  void (*_section_already_linked) (bfd *, struct bfd_section *);
++
++  /* Routines to handle dynamic symbols and relocs.  */
++#define BFD_JUMP_TABLE_DYNAMIC(NAME) \
++  NAME##_get_dynamic_symtab_upper_bound, \
++  NAME##_canonicalize_dynamic_symtab, \
++  NAME##_get_synthetic_symtab, \
++  NAME##_get_dynamic_reloc_upper_bound, \
++  NAME##_canonicalize_dynamic_reloc
++
++  /* Get the amount of memory required to hold the dynamic symbols.  */
++  long        (*_bfd_get_dynamic_symtab_upper_bound) (bfd *);
++  /* Read in the dynamic symbols.  */
++  long        (*_bfd_canonicalize_dynamic_symtab)
++    (bfd *, struct bfd_symbol **);
++  /* Create synthetized symbols.  */
++  long        (*_bfd_get_synthetic_symtab)
++    (bfd *, long, struct bfd_symbol **, long, struct bfd_symbol **,
++     struct bfd_symbol **);
++  /* Get the amount of memory required to hold the dynamic relocs.  */
++  long        (*_bfd_get_dynamic_reloc_upper_bound) (bfd *);
++  /* Read in the dynamic relocs.  */
++  long        (*_bfd_canonicalize_dynamic_reloc)
++    (bfd *, arelent **, struct bfd_symbol **);
++
++  /* Opposite endian version of this target.  */
++  const struct bfd_target * alternative_target;
++
++  /* Data for use by back-end routines, which isn't
++     generic enough to belong in this structure.  */
++  const void *backend_data;
++
++} bfd_target;
++
++bfd_boolean bfd_set_default_target (const char *name);
++
++const bfd_target *bfd_find_target (const char *target_name, bfd *abfd);
++
++const char ** bfd_target_list (void);
++
++const bfd_target *bfd_search_for_target
++   (int (*search_func) (const bfd_target *, void *),
++    void *);
++
++/* Extracted from format.c.  */
++bfd_boolean bfd_check_format (bfd *abfd, bfd_format format);
++
++bfd_boolean bfd_check_format_matches
++   (bfd *abfd, bfd_format format, char ***matching);
++
++bfd_boolean bfd_set_format (bfd *abfd, bfd_format format);
++
++const char *bfd_format_string (bfd_format format);
++
++/* Extracted from linker.c.  */
++bfd_boolean bfd_link_split_section (bfd *abfd, asection *sec);
++
++#define bfd_link_split_section(abfd, sec) \
++       BFD_SEND (abfd, _bfd_link_split_section, (abfd, sec))
++
++void bfd_section_already_linked (bfd *abfd, asection *sec);
++
++#define bfd_section_already_linked(abfd, sec) \
++       BFD_SEND (abfd, _section_already_linked, (abfd, sec))
++
++/* Extracted from simple.c.  */
++bfd_byte *bfd_simple_get_relocated_section_contents
++   (bfd *abfd, asection *sec, bfd_byte *outbuf, asymbol **symbol_table);
++
++#ifdef __cplusplus
++}
++#endif
++#endif
diff --cc arch/x86/include/asm/bfd_64.h

index 0000000,0000000..55432f2

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/bfd_64.h
@@@ -1,0 -1,0 +1,4917 @@@
++/* DO NOT EDIT!  -*- buffer-read-only: t -*-  This file is automatically
++   generated from "bfd-in.h", "init.c", "opncls.c", "libbfd.c",
++   "bfdio.c", "bfdwin.c", "section.c", "archures.c", "reloc.c",
++   "syms.c", "bfd.c", "archive.c", "corefile.c", "targets.c", "format.c",
++   "linker.c" and "simple.c".
++   Run "make headers" in your build bfd/ to regenerate.  */
++
++/* Main header file for the bfd library -- portable access to object files.
++
++   Copyright 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
++   1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
++
++   Contributed by Cygnus Support.
++
++   This file is part of BFD, the Binary File Descriptor library.
++
++   This program is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 2 of the License, or
++   (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
++
++/* Extracted from binutils 2.16.91.0.2 (OpenSUSE 10.0) and modified for kdb use.
++ * Any trailing whitespace was removed and #ifdef/ifndef __KERNEL__ added as
++ * required.
++ * Keith Owens <kaos@sgi.com> 15 May 2006
++ */
++
++#ifndef __BFD_H_SEEN__
++#define __BFD_H_SEEN__
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#ifdef __KERNEL__
++#include <asm/ansidecl.h>
++#else /* __KERNEL__ */
++#include "ansidecl.h"
++#include "symcat.h"
++#endif        /* __KERNEL__ */
++#if defined (__STDC__) || defined (ALMOST_STDC) || defined (HAVE_STRINGIZE)
++#ifndef SABER
++/* This hack is to avoid a problem with some strict ANSI C preprocessors.
++   The problem is, "32_" is not a valid preprocessing token, and we don't
++   want extra underscores (e.g., "nlm_32_").  The XCONCAT2 macro will
++   cause the inner CONCAT2 macros to be evaluated first, producing
++   still-valid pp-tokens.  Then the final concatenation can be done.  */
++#undef CONCAT4
++#define CONCAT4(a,b,c,d) XCONCAT2(CONCAT2(a,b),CONCAT2(c,d))
++#endif
++#endif
++
++/* The word size used by BFD on the host.  This may be 64 with a 32
++   bit target if the host is 64 bit, or if other 64 bit targets have
++   been selected with --enable-targets, or if --enable-64-bit-bfd.  */
++#define BFD_ARCH_SIZE 64
++
++/* The word size of the default bfd target.  */
++#define BFD_DEFAULT_TARGET_SIZE 64
++
++#define BFD_HOST_64BIT_LONG 1
++#define BFD_HOST_LONG_LONG 1
++#if 1
++#define BFD_HOST_64_BIT long
++#define BFD_HOST_U_64_BIT unsigned long
++typedef BFD_HOST_64_BIT bfd_int64_t;
++typedef BFD_HOST_U_64_BIT bfd_uint64_t;
++#endif
++
++#if BFD_ARCH_SIZE >= 64
++#define BFD64
++#endif
++
++#ifndef INLINE
++#if __GNUC__ >= 2
++#define INLINE __inline__
++#else
++#define INLINE
++#endif
++#endif
++
++/* Forward declaration.  */
++typedef struct bfd bfd;
++
++/* Boolean type used in bfd.  Too many systems define their own
++   versions of "boolean" for us to safely typedef a "boolean" of
++   our own.  Using an enum for "bfd_boolean" has its own set of
++   problems, with strange looking casts required to avoid warnings
++   on some older compilers.  Thus we just use an int.
++
++   General rule: Functions which are bfd_boolean return TRUE on
++   success and FALSE on failure (unless they're a predicate).  */
++
++typedef int bfd_boolean;
++#undef FALSE
++#undef TRUE
++#define FALSE 0
++#define TRUE 1
++
++#ifdef BFD64
++
++#ifndef BFD_HOST_64_BIT
++ #error No 64 bit integer type available
++#endif /* ! defined (BFD_HOST_64_BIT) */
++
++typedef BFD_HOST_U_64_BIT bfd_vma;
++typedef BFD_HOST_64_BIT bfd_signed_vma;
++typedef BFD_HOST_U_64_BIT bfd_size_type;
++typedef BFD_HOST_U_64_BIT symvalue;
++
++#ifndef fprintf_vma
++#if BFD_HOST_64BIT_LONG
++#define sprintf_vma(s,x) sprintf (s, "%016lx", x)
++#define fprintf_vma(f,x) fprintf (f, "%016lx", x)
++#else
++#define _bfd_int64_low(x) ((unsigned long) (((x) & 0xffffffff)))
++#define _bfd_int64_high(x) ((unsigned long) (((x) >> 32) & 0xffffffff))
++#define fprintf_vma(s,x) \
++  fprintf ((s), "%08lx%08lx", _bfd_int64_high (x), _bfd_int64_low (x))
++#define sprintf_vma(s,x) \
++  sprintf ((s), "%08lx%08lx", _bfd_int64_high (x), _bfd_int64_low (x))
++#endif
++#endif
++
++#else /* not BFD64  */
++
++/* Represent a target address.  Also used as a generic unsigned type
++   which is guaranteed to be big enough to hold any arithmetic types
++   we need to deal with.  */
++typedef unsigned long bfd_vma;
++
++/* A generic signed type which is guaranteed to be big enough to hold any
++   arithmetic types we need to deal with.  Can be assumed to be compatible
++   with bfd_vma in the same way that signed and unsigned ints are compatible
++   (as parameters, in assignment, etc).  */
++typedef long bfd_signed_vma;
++
++typedef unsigned long symvalue;
++typedef unsigned long bfd_size_type;
++
++/* Print a bfd_vma x on stream s.  */
++#define fprintf_vma(s,x) fprintf (s, "%08lx", x)
++#define sprintf_vma(s,x) sprintf (s, "%08lx", x)
++
++#endif /* not BFD64  */
++
++#define HALF_BFD_SIZE_TYPE \
++  (((bfd_size_type) 1) << (8 * sizeof (bfd_size_type) / 2))
++
++#ifndef BFD_HOST_64_BIT
++/* Fall back on a 32 bit type.  The idea is to make these types always
++   available for function return types, but in the case that
++   BFD_HOST_64_BIT is undefined such a function should abort or
++   otherwise signal an error.  */
++typedef bfd_signed_vma bfd_int64_t;
++typedef bfd_vma bfd_uint64_t;
++#endif
++
++/* An offset into a file.  BFD always uses the largest possible offset
++   based on the build time availability of fseek, fseeko, or fseeko64.  */
++typedef BFD_HOST_64_BIT file_ptr;
++typedef unsigned BFD_HOST_64_BIT ufile_ptr;
++
++extern void bfd_sprintf_vma (bfd *, char *, bfd_vma);
++extern void bfd_fprintf_vma (bfd *, void *, bfd_vma);
++
++#define printf_vma(x) fprintf_vma(stdout,x)
++#define bfd_printf_vma(abfd,x) bfd_fprintf_vma (abfd,stdout,x)
++
++typedef unsigned int flagword;        /* 32 bits of flags */
++typedef unsigned char bfd_byte;
++\f
++/* File formats.  */
++
++typedef enum bfd_format
++{
++  bfd_unknown = 0,    /* File format is unknown.  */
++  bfd_object,         /* Linker/assembler/compiler output.  */
++  bfd_archive,                /* Object archive file.  */
++  bfd_core,           /* Core dump.  */
++  bfd_type_end                /* Marks the end; don't use it!  */
++}
++bfd_format;
++
++/* Values that may appear in the flags field of a BFD.  These also
++   appear in the object_flags field of the bfd_target structure, where
++   they indicate the set of flags used by that backend (not all flags
++   are meaningful for all object file formats) (FIXME: at the moment,
++   the object_flags values have mostly just been copied from backend
++   to another, and are not necessarily correct).  */
++
++/* No flags.  */
++#define BFD_NO_FLAGS          0x00
++
++/* BFD contains relocation entries.  */
++#define HAS_RELOC     0x01
++
++/* BFD is directly executable.  */
++#define EXEC_P        0x02
++
++/* BFD has line number information (basically used for F_LNNO in a
++   COFF header).  */
++#define HAS_LINENO    0x04
++
++/* BFD has debugging information.  */
++#define HAS_DEBUG     0x08
++
++/* BFD has symbols.  */
++#define HAS_SYMS      0x10
++
++/* BFD has local symbols (basically used for F_LSYMS in a COFF
++   header).  */
++#define HAS_LOCALS    0x20
++
++/* BFD is a dynamic object.  */
++#define DYNAMIC       0x40
++
++/* Text section is write protected (if D_PAGED is not set, this is
++   like an a.out NMAGIC file) (the linker sets this by default, but
++   clears it for -r or -N).  */
++#define WP_TEXT       0x80
++
++/* BFD is dynamically paged (this is like an a.out ZMAGIC file) (the
++   linker sets this by default, but clears it for -r or -n or -N).  */
++#define D_PAGED       0x100
++
++/* BFD is relaxable (this means that bfd_relax_section may be able to
++   do something) (sometimes bfd_relax_section can do something even if
++   this is not set).  */
++#define BFD_IS_RELAXABLE 0x200
++
++/* This may be set before writing out a BFD to request using a
++   traditional format.  For example, this is used to request that when
++   writing out an a.out object the symbols not be hashed to eliminate
++   duplicates.  */
++#define BFD_TRADITIONAL_FORMAT 0x400
++
++/* This flag indicates that the BFD contents are actually cached in
++   memory.  If this is set, iostream points to a bfd_in_memory struct.  */
++#define BFD_IN_MEMORY 0x800
++
++/* The sections in this BFD specify a memory page.  */
++#define HAS_LOAD_PAGE 0x1000
++
++/* This BFD has been created by the linker and doesn't correspond
++   to any input file.  */
++#define BFD_LINKER_CREATED 0x2000
++\f
++/* Symbols and relocation.  */
++
++/* A count of carsyms (canonical archive symbols).  */
++typedef unsigned long symindex;
++
++/* How to perform a relocation.  */
++typedef const struct reloc_howto_struct reloc_howto_type;
++
++#define BFD_NO_MORE_SYMBOLS ((symindex) ~0)
++
++/* General purpose part of a symbol X;
++   target specific parts are in libcoff.h, libaout.h, etc.  */
++
++#define bfd_get_section(x) ((x)->section)
++#define bfd_get_output_section(x) ((x)->section->output_section)
++#define bfd_set_section(x,y) ((x)->section) = (y)
++#define bfd_asymbol_base(x) ((x)->section->vma)
++#define bfd_asymbol_value(x) (bfd_asymbol_base(x) + (x)->value)
++#define bfd_asymbol_name(x) ((x)->name)
++/*Perhaps future: #define bfd_asymbol_bfd(x) ((x)->section->owner)*/
++#define bfd_asymbol_bfd(x) ((x)->the_bfd)
++#define bfd_asymbol_flavour(x) (bfd_asymbol_bfd(x)->xvec->flavour)
++
++/* A canonical archive symbol.  */
++/* This is a type pun with struct ranlib on purpose!  */
++typedef struct carsym
++{
++  char *name;
++  file_ptr file_offset;       /* Look here to find the file.  */
++}
++carsym;                       /* To make these you call a carsymogen.  */
++
++/* Used in generating armaps (archive tables of contents).
++   Perhaps just a forward definition would do?  */
++struct orl                    /* Output ranlib.  */
++{
++  char **name;                /* Symbol name.  */
++  union
++  {
++    file_ptr pos;
++    bfd *abfd;
++  } u;                        /* bfd* or file position.  */
++  int namidx;         /* Index into string table.  */
++};
++\f
++/* Linenumber stuff.  */
++typedef struct lineno_cache_entry
++{
++  unsigned int line_number;   /* Linenumber from start of function.  */
++  union
++  {
++    struct bfd_symbol *sym;   /* Function name.  */
++    bfd_vma offset;                   /* Offset into section.  */
++  } u;
++}
++alent;
++\f
++/* Object and core file sections.  */
++
++#define       align_power(addr, align)        \
++  (((addr) + ((bfd_vma) 1 << (align)) - 1) & ((bfd_vma) -1 << (align)))
++
++typedef struct bfd_section *sec_ptr;
++
++#define bfd_get_section_name(bfd, ptr) ((ptr)->name + 0)
++#define bfd_get_section_vma(bfd, ptr) ((ptr)->vma + 0)
++#define bfd_get_section_lma(bfd, ptr) ((ptr)->lma + 0)
++#define bfd_get_section_alignment(bfd, ptr) ((ptr)->alignment_power + 0)
++#define bfd_section_name(bfd, ptr) ((ptr)->name)
++#define bfd_section_size(bfd, ptr) ((ptr)->size)
++#define bfd_get_section_size(ptr) ((ptr)->size)
++#define bfd_section_vma(bfd, ptr) ((ptr)->vma)
++#define bfd_section_lma(bfd, ptr) ((ptr)->lma)
++#define bfd_section_alignment(bfd, ptr) ((ptr)->alignment_power)
++#define bfd_get_section_flags(bfd, ptr) ((ptr)->flags + 0)
++#define bfd_get_section_userdata(bfd, ptr) ((ptr)->userdata)
++
++#define bfd_is_com_section(ptr) (((ptr)->flags & SEC_IS_COMMON) != 0)
++
++#define bfd_set_section_vma(bfd, ptr, val) (((ptr)->vma = (ptr)->lma = (val)), ((ptr)->user_set_vma = TRUE), TRUE)
++#define bfd_set_section_alignment(bfd, ptr, val) (((ptr)->alignment_power = (val)),TRUE)
++#define bfd_set_section_userdata(bfd, ptr, val) (((ptr)->userdata = (val)),TRUE)
++/* Find the address one past the end of SEC.  */
++#define bfd_get_section_limit(bfd, sec) \
++  (((sec)->rawsize ? (sec)->rawsize : (sec)->size) \
++   / bfd_octets_per_byte (bfd))
++
++typedef struct stat stat_type;
++\f
++typedef enum bfd_print_symbol
++{
++  bfd_print_symbol_name,
++  bfd_print_symbol_more,
++  bfd_print_symbol_all
++} bfd_print_symbol_type;
++
++/* Information about a symbol that nm needs.  */
++
++typedef struct _symbol_info
++{
++  symvalue value;
++  char type;
++  const char *name;            /* Symbol name.  */
++  unsigned char stab_type;     /* Stab type.  */
++  char stab_other;             /* Stab other.  */
++  short stab_desc;             /* Stab desc.  */
++  const char *stab_name;       /* String for stab type.  */
++} symbol_info;
++
++/* Get the name of a stabs type code.  */
++
++extern const char *bfd_get_stab_name (int);
++\f
++/* Hash table routines.  There is no way to free up a hash table.  */
++
++/* An element in the hash table.  Most uses will actually use a larger
++   structure, and an instance of this will be the first field.  */
++
++struct bfd_hash_entry
++{
++  /* Next entry for this hash code.  */
++  struct bfd_hash_entry *next;
++  /* String being hashed.  */
++  const char *string;
++  /* Hash code.  This is the full hash code, not the index into the
++     table.  */
++  unsigned long hash;
++};
++
++/* A hash table.  */
++
++struct bfd_hash_table
++{
++  /* The hash array.  */
++  struct bfd_hash_entry **table;
++  /* The number of slots in the hash table.  */
++  unsigned int size;
++  /* A function used to create new elements in the hash table.  The
++     first entry is itself a pointer to an element.  When this
++     function is first invoked, this pointer will be NULL.  However,
++     having the pointer permits a hierarchy of method functions to be
++     built each of which calls the function in the superclass.  Thus
++     each function should be written to allocate a new block of memory
++     only if the argument is NULL.  */
++  struct bfd_hash_entry *(*newfunc)
++    (struct bfd_hash_entry *, struct bfd_hash_table *, const char *);
++   /* An objalloc for this hash table.  This is a struct objalloc *,
++     but we use void * to avoid requiring the inclusion of objalloc.h.  */
++  void *memory;
++};
++
++/* Initialize a hash table.  */
++extern bfd_boolean bfd_hash_table_init
++  (struct bfd_hash_table *,
++   struct bfd_hash_entry *(*) (struct bfd_hash_entry *,
++                             struct bfd_hash_table *,
++                             const char *));
++
++/* Initialize a hash table specifying a size.  */
++extern bfd_boolean bfd_hash_table_init_n
++  (struct bfd_hash_table *,
++   struct bfd_hash_entry *(*) (struct bfd_hash_entry *,
++                             struct bfd_hash_table *,
++                             const char *),
++   unsigned int size);
++
++/* Free up a hash table.  */
++extern void bfd_hash_table_free
++  (struct bfd_hash_table *);
++
++/* Look up a string in a hash table.  If CREATE is TRUE, a new entry
++   will be created for this string if one does not already exist.  The
++   COPY argument must be TRUE if this routine should copy the string
++   into newly allocated memory when adding an entry.  */
++extern struct bfd_hash_entry *bfd_hash_lookup
++  (struct bfd_hash_table *, const char *, bfd_boolean create,
++   bfd_boolean copy);
++
++/* Replace an entry in a hash table.  */
++extern void bfd_hash_replace
++  (struct bfd_hash_table *, struct bfd_hash_entry *old,
++   struct bfd_hash_entry *nw);
++
++/* Base method for creating a hash table entry.  */
++extern struct bfd_hash_entry *bfd_hash_newfunc
++  (struct bfd_hash_entry *, struct bfd_hash_table *, const char *);
++
++/* Grab some space for a hash table entry.  */
++extern void *bfd_hash_allocate
++  (struct bfd_hash_table *, unsigned int);
++
++/* Traverse a hash table in a random order, calling a function on each
++   element.  If the function returns FALSE, the traversal stops.  The
++   INFO argument is passed to the function.  */
++extern void bfd_hash_traverse
++  (struct bfd_hash_table *,
++   bfd_boolean (*) (struct bfd_hash_entry *, void *),
++   void *info);
++
++/* Allows the default size of a hash table to be configured. New hash
++   tables allocated using bfd_hash_table_init will be created with
++   this size.  */
++extern void bfd_hash_set_default_size (bfd_size_type);
++
++/* This structure is used to keep track of stabs in sections
++   information while linking.  */
++
++struct stab_info
++{
++  /* A hash table used to hold stabs strings.  */
++  struct bfd_strtab_hash *strings;
++  /* The header file hash table.  */
++  struct bfd_hash_table includes;
++  /* The first .stabstr section.  */
++  struct bfd_section *stabstr;
++};
++
++#define COFF_SWAP_TABLE (void *) &bfd_coff_std_swap_table
++
++/* User program access to BFD facilities.  */
++
++/* Direct I/O routines, for programs which know more about the object
++   file than BFD does.  Use higher level routines if possible.  */
++
++extern bfd_size_type bfd_bread (void *, bfd_size_type, bfd *);
++extern bfd_size_type bfd_bwrite (const void *, bfd_size_type, bfd *);
++extern int bfd_seek (bfd *, file_ptr, int);
++extern file_ptr bfd_tell (bfd *);
++extern int bfd_flush (bfd *);
++extern int bfd_stat (bfd *, struct stat *);
++
++/* Deprecated old routines.  */
++#if __GNUC__
++#define bfd_read(BUF, ELTSIZE, NITEMS, ABFD)                          \
++  (warn_deprecated ("bfd_read", __FILE__, __LINE__, __FUNCTION__),    \
++   bfd_bread ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#define bfd_write(BUF, ELTSIZE, NITEMS, ABFD)                         \
++  (warn_deprecated ("bfd_write", __FILE__, __LINE__, __FUNCTION__),   \
++   bfd_bwrite ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#else
++#define bfd_read(BUF, ELTSIZE, NITEMS, ABFD)                          \
++  (warn_deprecated ("bfd_read", (const char *) 0, 0, (const char *) 0), \
++   bfd_bread ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#define bfd_write(BUF, ELTSIZE, NITEMS, ABFD)                         \
++  (warn_deprecated ("bfd_write", (const char *) 0, 0, (const char *) 0),\
++   bfd_bwrite ((BUF), (ELTSIZE) * (NITEMS), (ABFD)))
++#endif
++extern void warn_deprecated (const char *, const char *, int, const char *);
++
++/* Cast from const char * to char * so that caller can assign to
++   a char * without a warning.  */
++#define bfd_get_filename(abfd) ((char *) (abfd)->filename)
++#define bfd_get_cacheable(abfd) ((abfd)->cacheable)
++#define bfd_get_format(abfd) ((abfd)->format)
++#define bfd_get_target(abfd) ((abfd)->xvec->name)
++#define bfd_get_flavour(abfd) ((abfd)->xvec->flavour)
++#define bfd_family_coff(abfd) \
++  (bfd_get_flavour (abfd) == bfd_target_coff_flavour || \
++   bfd_get_flavour (abfd) == bfd_target_xcoff_flavour)
++#define bfd_big_endian(abfd) ((abfd)->xvec->byteorder == BFD_ENDIAN_BIG)
++#define bfd_little_endian(abfd) ((abfd)->xvec->byteorder == BFD_ENDIAN_LITTLE)
++#define bfd_header_big_endian(abfd) \
++  ((abfd)->xvec->header_byteorder == BFD_ENDIAN_BIG)
++#define bfd_header_little_endian(abfd) \
++  ((abfd)->xvec->header_byteorder == BFD_ENDIAN_LITTLE)
++#define bfd_get_file_flags(abfd) ((abfd)->flags)
++#define bfd_applicable_file_flags(abfd) ((abfd)->xvec->object_flags)
++#define bfd_applicable_section_flags(abfd) ((abfd)->xvec->section_flags)
++#define bfd_my_archive(abfd) ((abfd)->my_archive)
++#define bfd_has_map(abfd) ((abfd)->has_armap)
++
++#define bfd_valid_reloc_types(abfd) ((abfd)->xvec->valid_reloc_types)
++#define bfd_usrdata(abfd) ((abfd)->usrdata)
++
++#define bfd_get_start_address(abfd) ((abfd)->start_address)
++#define bfd_get_symcount(abfd) ((abfd)->symcount)
++#define bfd_get_outsymbols(abfd) ((abfd)->outsymbols)
++#define bfd_count_sections(abfd) ((abfd)->section_count)
++
++#define bfd_get_dynamic_symcount(abfd) ((abfd)->dynsymcount)
++
++#define bfd_get_symbol_leading_char(abfd) ((abfd)->xvec->symbol_leading_char)
++
++#define bfd_set_cacheable(abfd,bool) (((abfd)->cacheable = bool), TRUE)
++
++extern bfd_boolean bfd_cache_close
++  (bfd *abfd);
++/* NB: This declaration should match the autogenerated one in libbfd.h.  */
++
++extern bfd_boolean bfd_cache_close_all (void);
++
++extern bfd_boolean bfd_record_phdr
++  (bfd *, unsigned long, bfd_boolean, flagword, bfd_boolean, bfd_vma,
++   bfd_boolean, bfd_boolean, unsigned int, struct bfd_section **);
++
++/* Byte swapping routines.  */
++
++bfd_uint64_t bfd_getb64 (const void *);
++bfd_uint64_t bfd_getl64 (const void *);
++bfd_int64_t bfd_getb_signed_64 (const void *);
++bfd_int64_t bfd_getl_signed_64 (const void *);
++bfd_vma bfd_getb32 (const void *);
++bfd_vma bfd_getl32 (const void *);
++bfd_signed_vma bfd_getb_signed_32 (const void *);
++bfd_signed_vma bfd_getl_signed_32 (const void *);
++bfd_vma bfd_getb16 (const void *);
++bfd_vma bfd_getl16 (const void *);
++bfd_signed_vma bfd_getb_signed_16 (const void *);
++bfd_signed_vma bfd_getl_signed_16 (const void *);
++void bfd_putb64 (bfd_uint64_t, void *);
++void bfd_putl64 (bfd_uint64_t, void *);
++void bfd_putb32 (bfd_vma, void *);
++void bfd_putl32 (bfd_vma, void *);
++void bfd_putb16 (bfd_vma, void *);
++void bfd_putl16 (bfd_vma, void *);
++
++/* Byte swapping routines which take size and endiannes as arguments.  */
++
++bfd_uint64_t bfd_get_bits (const void *, int, bfd_boolean);
++void bfd_put_bits (bfd_uint64_t, void *, int, bfd_boolean);
++
++extern bfd_boolean bfd_section_already_linked_table_init (void);
++extern void bfd_section_already_linked_table_free (void);
++\f
++/* Externally visible ECOFF routines.  */
++
++#if defined(__STDC__) || defined(ALMOST_STDC)
++struct ecoff_debug_info;
++struct ecoff_debug_swap;
++struct ecoff_extr;
++struct bfd_symbol;
++struct bfd_link_info;
++struct bfd_link_hash_entry;
++struct bfd_elf_version_tree;
++#endif
++extern bfd_vma bfd_ecoff_get_gp_value
++  (bfd * abfd);
++extern bfd_boolean bfd_ecoff_set_gp_value
++  (bfd *abfd, bfd_vma gp_value);
++extern bfd_boolean bfd_ecoff_set_regmasks
++  (bfd *abfd, unsigned long gprmask, unsigned long fprmask,
++   unsigned long *cprmask);
++extern void *bfd_ecoff_debug_init
++  (bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, struct bfd_link_info *);
++extern void bfd_ecoff_debug_free
++  (void *handle, bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, struct bfd_link_info *);
++extern bfd_boolean bfd_ecoff_debug_accumulate
++  (void *handle, bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, bfd *input_bfd,
++   struct ecoff_debug_info *input_debug,
++   const struct ecoff_debug_swap *input_swap, struct bfd_link_info *);
++extern bfd_boolean bfd_ecoff_debug_accumulate_other
++  (void *handle, bfd *output_bfd, struct ecoff_debug_info *output_debug,
++   const struct ecoff_debug_swap *output_swap, bfd *input_bfd,
++   struct bfd_link_info *);
++extern bfd_boolean bfd_ecoff_debug_externals
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap, bfd_boolean relocatable,
++   bfd_boolean (*get_extr) (struct bfd_symbol *, struct ecoff_extr *),
++   void (*set_index) (struct bfd_symbol *, bfd_size_type));
++extern bfd_boolean bfd_ecoff_debug_one_external
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap, const char *name,
++   struct ecoff_extr *esym);
++extern bfd_size_type bfd_ecoff_debug_size
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap);
++extern bfd_boolean bfd_ecoff_write_debug
++  (bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap, file_ptr where);
++extern bfd_boolean bfd_ecoff_write_accumulated_debug
++  (void *handle, bfd *abfd, struct ecoff_debug_info *debug,
++   const struct ecoff_debug_swap *swap,
++   struct bfd_link_info *info, file_ptr where);
++
++/* Externally visible ELF routines.  */
++
++struct bfd_link_needed_list
++{
++  struct bfd_link_needed_list *next;
++  bfd *by;
++  const char *name;
++};
++
++enum dynamic_lib_link_class {
++  DYN_NORMAL = 0,
++  DYN_AS_NEEDED = 1,
++  DYN_DT_NEEDED = 2,
++  DYN_NO_ADD_NEEDED = 4,
++  DYN_NO_NEEDED = 8
++};
++
++extern bfd_boolean bfd_elf_record_link_assignment
++  (struct bfd_link_info *, const char *, bfd_boolean);
++extern struct bfd_link_needed_list *bfd_elf_get_needed_list
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_elf_get_bfd_needed_list
++  (bfd *, struct bfd_link_needed_list **);
++extern bfd_boolean bfd_elf_size_dynamic_sections
++  (bfd *, const char *, const char *, const char *, const char * const *,
++   struct bfd_link_info *, struct bfd_section **,
++   struct bfd_elf_version_tree *);
++extern bfd_boolean bfd_elf_size_dynsym_hash_dynstr
++  (bfd *, struct bfd_link_info *);
++extern void bfd_elf_set_dt_needed_name
++  (bfd *, const char *);
++extern const char *bfd_elf_get_dt_soname
++  (bfd *);
++extern void bfd_elf_set_dyn_lib_class
++  (bfd *, int);
++extern int bfd_elf_get_dyn_lib_class
++  (bfd *);
++extern struct bfd_link_needed_list *bfd_elf_get_runpath_list
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_elf_discard_info
++  (bfd *, struct bfd_link_info *);
++extern unsigned int _bfd_elf_default_action_discarded
++  (struct bfd_section *);
++
++/* Return an upper bound on the number of bytes required to store a
++   copy of ABFD's program header table entries.  Return -1 if an error
++   occurs; bfd_get_error will return an appropriate code.  */
++extern long bfd_get_elf_phdr_upper_bound
++  (bfd *abfd);
++
++/* Copy ABFD's program header table entries to *PHDRS.  The entries
++   will be stored as an array of Elf_Internal_Phdr structures, as
++   defined in include/elf/internal.h.  To find out how large the
++   buffer needs to be, call bfd_get_elf_phdr_upper_bound.
++
++   Return the number of program header table entries read, or -1 if an
++   error occurs; bfd_get_error will return an appropriate code.  */
++extern int bfd_get_elf_phdrs
++  (bfd *abfd, void *phdrs);
++
++/* Create a new BFD as if by bfd_openr.  Rather than opening a file,
++   reconstruct an ELF file by reading the segments out of remote memory
++   based on the ELF file header at EHDR_VMA and the ELF program headers it
++   points to.  If not null, *LOADBASEP is filled in with the difference
++   between the VMAs from which the segments were read, and the VMAs the
++   file headers (and hence BFD's idea of each section's VMA) put them at.
++
++   The function TARGET_READ_MEMORY is called to copy LEN bytes from the
++   remote memory at target address VMA into the local buffer at MYADDR; it
++   should return zero on success or an `errno' code on failure.  TEMPL must
++   be a BFD for an ELF target with the word size and byte order found in
++   the remote memory.  */
++extern bfd *bfd_elf_bfd_from_remote_memory
++  (bfd *templ, bfd_vma ehdr_vma, bfd_vma *loadbasep,
++   int (*target_read_memory) (bfd_vma vma, bfd_byte *myaddr, int len));
++
++/* Return the arch_size field of an elf bfd, or -1 if not elf.  */
++extern int bfd_get_arch_size
++  (bfd *);
++
++/* Return TRUE if address "naturally" sign extends, or -1 if not elf.  */
++extern int bfd_get_sign_extend_vma
++  (bfd *);
++
++extern struct bfd_section *_bfd_elf_tls_setup
++  (bfd *, struct bfd_link_info *);
++
++extern void _bfd_elf_provide_symbol
++  (struct bfd_link_info *, const char *, bfd_vma, struct bfd_section *);
++
++extern void _bfd_elf_provide_section_bound_symbols
++  (struct bfd_link_info *, struct bfd_section *, const char *, const char *);
++
++extern void _bfd_elf_fix_excluded_sec_syms
++  (bfd *, struct bfd_link_info *);
++
++extern bfd_boolean bfd_m68k_elf32_create_embedded_relocs
++  (bfd *, struct bfd_link_info *, struct bfd_section *, struct bfd_section *,
++   char **);
++
++/* SunOS shared library support routines for the linker.  */
++
++extern struct bfd_link_needed_list *bfd_sunos_get_needed_list
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_sunos_record_link_assignment
++  (bfd *, struct bfd_link_info *, const char *);
++extern bfd_boolean bfd_sunos_size_dynamic_sections
++  (bfd *, struct bfd_link_info *, struct bfd_section **,
++   struct bfd_section **, struct bfd_section **);
++
++/* Linux shared library support routines for the linker.  */
++
++extern bfd_boolean bfd_i386linux_size_dynamic_sections
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_m68klinux_size_dynamic_sections
++  (bfd *, struct bfd_link_info *);
++extern bfd_boolean bfd_sparclinux_size_dynamic_sections
++  (bfd *, struct bfd_link_info *);
++
++/* mmap hacks */
++
++struct _bfd_window_internal;
++typedef struct _bfd_window_internal bfd_window_internal;
++
++typedef struct _bfd_window
++{
++  /* What the user asked for.  */
++  void *data;
++  bfd_size_type size;
++  /* The actual window used by BFD.  Small user-requested read-only
++     regions sharing a page may share a single window into the object
++     file.  Read-write versions shouldn't until I've fixed things to
++     keep track of which portions have been claimed by the
++     application; don't want to give the same region back when the
++     application wants two writable copies!  */
++  struct _bfd_window_internal *i;
++}
++bfd_window;
++
++extern void bfd_init_window
++  (bfd_window *);
++extern void bfd_free_window
++  (bfd_window *);
++extern bfd_boolean bfd_get_file_window
++  (bfd *, file_ptr, bfd_size_type, bfd_window *, bfd_boolean);
++
++/* XCOFF support routines for the linker.  */
++
++extern bfd_boolean bfd_xcoff_link_record_set
++  (bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *, bfd_size_type);
++extern bfd_boolean bfd_xcoff_import_symbol
++  (bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *, bfd_vma,
++   const char *, const char *, const char *, unsigned int);
++extern bfd_boolean bfd_xcoff_export_symbol
++  (bfd *, struct bfd_link_info *, struct bfd_link_hash_entry *);
++extern bfd_boolean bfd_xcoff_link_count_reloc
++  (bfd *, struct bfd_link_info *, const char *);
++extern bfd_boolean bfd_xcoff_record_link_assignment
++  (bfd *, struct bfd_link_info *, const char *);
++extern bfd_boolean bfd_xcoff_size_dynamic_sections
++  (bfd *, struct bfd_link_info *, const char *, const char *,
++   unsigned long, unsigned long, unsigned long, bfd_boolean,
++   int, bfd_boolean, bfd_boolean, struct bfd_section **, bfd_boolean);
++extern bfd_boolean bfd_xcoff_link_generate_rtinit
++  (bfd *, const char *, const char *, bfd_boolean);
++
++/* XCOFF support routines for ar.  */
++extern bfd_boolean bfd_xcoff_ar_archive_set_magic
++  (bfd *, char *);
++
++/* Externally visible COFF routines.  */
++
++#if defined(__STDC__) || defined(ALMOST_STDC)
++struct internal_syment;
++union internal_auxent;
++#endif
++
++extern bfd_boolean bfd_coff_get_syment
++  (bfd *, struct bfd_symbol *, struct internal_syment *);
++
++extern bfd_boolean bfd_coff_get_auxent
++  (bfd *, struct bfd_symbol *, int, union internal_auxent *);
++
++extern bfd_boolean bfd_coff_set_symbol_class
++  (bfd *, struct bfd_symbol *, unsigned int);
++
++extern bfd_boolean bfd_m68k_coff_create_embedded_relocs
++  (bfd *, struct bfd_link_info *, struct bfd_section *, struct bfd_section *, char **);
++
++/* ARM Interworking support.  Called from linker.  */
++extern bfd_boolean bfd_arm_allocate_interworking_sections
++  (struct bfd_link_info *);
++
++extern bfd_boolean bfd_arm_process_before_allocation
++  (bfd *, struct bfd_link_info *, int);
++
++extern bfd_boolean bfd_arm_get_bfd_for_interworking
++  (bfd *, struct bfd_link_info *);
++
++/* PE ARM Interworking support.  Called from linker.  */
++extern bfd_boolean bfd_arm_pe_allocate_interworking_sections
++  (struct bfd_link_info *);
++
++extern bfd_boolean bfd_arm_pe_process_before_allocation
++  (bfd *, struct bfd_link_info *, int);
++
++extern bfd_boolean bfd_arm_pe_get_bfd_for_interworking
++  (bfd *, struct bfd_link_info *);
++
++/* ELF ARM Interworking support.  Called from linker.  */
++extern bfd_boolean bfd_elf32_arm_allocate_interworking_sections
++  (struct bfd_link_info *);
++
++extern bfd_boolean bfd_elf32_arm_process_before_allocation
++  (bfd *, struct bfd_link_info *, int);
++
++void bfd_elf32_arm_set_target_relocs
++  (struct bfd_link_info *, int, char *, int, int);
++
++extern bfd_boolean bfd_elf32_arm_get_bfd_for_interworking
++  (bfd *, struct bfd_link_info *);
++
++extern bfd_boolean bfd_elf32_arm_add_glue_sections_to_bfd
++  (bfd *, struct bfd_link_info *);
++
++/* ELF ARM mapping symbol support */
++extern bfd_boolean bfd_is_arm_mapping_symbol_name
++  (const char * name);
++
++/* ARM Note section processing.  */
++extern bfd_boolean bfd_arm_merge_machines
++  (bfd *, bfd *);
++
++extern bfd_boolean bfd_arm_update_notes
++  (bfd *, const char *);
++
++extern unsigned int bfd_arm_get_mach_from_notes
++  (bfd *, const char *);
++
++/* TI COFF load page support.  */
++extern void bfd_ticoff_set_section_load_page
++  (struct bfd_section *, int);
++
++extern int bfd_ticoff_get_section_load_page
++  (struct bfd_section *);
++
++/* H8/300 functions.  */
++extern bfd_vma bfd_h8300_pad_address
++  (bfd *, bfd_vma);
++
++/* IA64 Itanium code generation.  Called from linker.  */
++extern void bfd_elf32_ia64_after_parse
++  (int);
++
++extern void bfd_elf64_ia64_after_parse
++  (int);
++
++/* This structure is used for a comdat section, as in PE.  A comdat
++   section is associated with a particular symbol.  When the linker
++   sees a comdat section, it keeps only one of the sections with a
++   given name and associated with a given symbol.  */
++
++struct coff_comdat_info
++{
++  /* The name of the symbol associated with a comdat section.  */
++  const char *name;
++
++  /* The local symbol table index of the symbol associated with a
++     comdat section.  This is only meaningful to the object file format
++     specific code; it is not an index into the list returned by
++     bfd_canonicalize_symtab.  */
++  long symbol;
++};
++
++extern struct coff_comdat_info *bfd_coff_get_comdat_section
++  (bfd *, struct bfd_section *);
++
++/* Extracted from init.c.  */
++void bfd_init (void);
++
++/* Extracted from opncls.c.  */
++bfd *bfd_fopen (const char *filename, const char *target,
++    const char *mode, int fd);
++
++bfd *bfd_openr (const char *filename, const char *target);
++
++bfd *bfd_fdopenr (const char *filename, const char *target, int fd);
++
++bfd *bfd_openstreamr (const char *, const char *, void *);
++
++bfd *bfd_openr_iovec (const char *filename, const char *target,
++    void *(*open) (struct bfd *nbfd,
++    void *open_closure),
++    void *open_closure,
++    file_ptr (*pread) (struct bfd *nbfd,
++    void *stream,
++    void *buf,
++    file_ptr nbytes,
++    file_ptr offset),
++    int (*close) (struct bfd *nbfd,
++    void *stream));
++
++bfd *bfd_openw (const char *filename, const char *target);
++
++bfd_boolean bfd_close (bfd *abfd);
++
++bfd_boolean bfd_close_all_done (bfd *);
++
++bfd *bfd_create (const char *filename, bfd *templ);
++
++bfd_boolean bfd_make_writable (bfd *abfd);
++
++bfd_boolean bfd_make_readable (bfd *abfd);
++
++unsigned long bfd_calc_gnu_debuglink_crc32
++   (unsigned long crc, const unsigned char *buf, bfd_size_type len);
++
++char *bfd_follow_gnu_debuglink (bfd *abfd, const char *dir);
++
++struct bfd_section *bfd_create_gnu_debuglink_section
++   (bfd *abfd, const char *filename);
++
++bfd_boolean bfd_fill_in_gnu_debuglink_section
++   (bfd *abfd, struct bfd_section *sect, const char *filename);
++
++/* Extracted from libbfd.c.  */
++
++/* Byte swapping macros for user section data.  */
++
++#define bfd_put_8(abfd, val, ptr) \
++  ((void) (*((unsigned char *) (ptr)) = (val) & 0xff))
++#define bfd_put_signed_8 \
++  bfd_put_8
++#define bfd_get_8(abfd, ptr) \
++  (*(unsigned char *) (ptr) & 0xff)
++#define bfd_get_signed_8(abfd, ptr) \
++  (((*(unsigned char *) (ptr) & 0xff) ^ 0x80) - 0x80)
++
++#define bfd_put_16(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_putx16, ((val),(ptr)))
++#define bfd_put_signed_16 \
++  bfd_put_16
++#define bfd_get_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx16, (ptr))
++#define bfd_get_signed_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx_signed_16, (ptr))
++
++#define bfd_put_32(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_putx32, ((val),(ptr)))
++#define bfd_put_signed_32 \
++  bfd_put_32
++#define bfd_get_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx32, (ptr))
++#define bfd_get_signed_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx_signed_32, (ptr))
++
++#define bfd_put_64(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_putx64, ((val), (ptr)))
++#define bfd_put_signed_64 \
++  bfd_put_64
++#define bfd_get_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx64, (ptr))
++#define bfd_get_signed_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_getx_signed_64, (ptr))
++
++#define bfd_get(bits, abfd, ptr)                       \
++  ((bits) == 8 ? (bfd_vma) bfd_get_8 (abfd, ptr)       \
++   : (bits) == 16 ? bfd_get_16 (abfd, ptr)             \
++   : (bits) == 32 ? bfd_get_32 (abfd, ptr)             \
++   : (bits) == 64 ? bfd_get_64 (abfd, ptr)             \
++   : (abort (), (bfd_vma) - 1))
++
++#define bfd_put(bits, abfd, val, ptr)                  \
++  ((bits) == 8 ? bfd_put_8  (abfd, val, ptr)           \
++   : (bits) == 16 ? bfd_put_16 (abfd, val, ptr)                \
++   : (bits) == 32 ? bfd_put_32 (abfd, val, ptr)                \
++   : (bits) == 64 ? bfd_put_64 (abfd, val, ptr)                \
++   : (abort (), (void) 0))
++
++
++/* Byte swapping macros for file header data.  */
++
++#define bfd_h_put_8(abfd, val, ptr) \
++  bfd_put_8 (abfd, val, ptr)
++#define bfd_h_put_signed_8(abfd, val, ptr) \
++  bfd_put_8 (abfd, val, ptr)
++#define bfd_h_get_8(abfd, ptr) \
++  bfd_get_8 (abfd, ptr)
++#define bfd_h_get_signed_8(abfd, ptr) \
++  bfd_get_signed_8 (abfd, ptr)
++
++#define bfd_h_put_16(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_h_putx16, (val, ptr))
++#define bfd_h_put_signed_16 \
++  bfd_h_put_16
++#define bfd_h_get_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx16, (ptr))
++#define bfd_h_get_signed_16(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx_signed_16, (ptr))
++
++#define bfd_h_put_32(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_h_putx32, (val, ptr))
++#define bfd_h_put_signed_32 \
++  bfd_h_put_32
++#define bfd_h_get_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx32, (ptr))
++#define bfd_h_get_signed_32(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx_signed_32, (ptr))
++
++#define bfd_h_put_64(abfd, val, ptr) \
++  BFD_SEND (abfd, bfd_h_putx64, (val, ptr))
++#define bfd_h_put_signed_64 \
++  bfd_h_put_64
++#define bfd_h_get_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx64, (ptr))
++#define bfd_h_get_signed_64(abfd, ptr) \
++  BFD_SEND (abfd, bfd_h_getx_signed_64, (ptr))
++
++/* Aliases for the above, which should eventually go away.  */
++
++#define H_PUT_64  bfd_h_put_64
++#define H_PUT_32  bfd_h_put_32
++#define H_PUT_16  bfd_h_put_16
++#define H_PUT_8   bfd_h_put_8
++#define H_PUT_S64 bfd_h_put_signed_64
++#define H_PUT_S32 bfd_h_put_signed_32
++#define H_PUT_S16 bfd_h_put_signed_16
++#define H_PUT_S8  bfd_h_put_signed_8
++#define H_GET_64  bfd_h_get_64
++#define H_GET_32  bfd_h_get_32
++#define H_GET_16  bfd_h_get_16
++#define H_GET_8   bfd_h_get_8
++#define H_GET_S64 bfd_h_get_signed_64
++#define H_GET_S32 bfd_h_get_signed_32
++#define H_GET_S16 bfd_h_get_signed_16
++#define H_GET_S8  bfd_h_get_signed_8
++
++
++/* Extracted from bfdio.c.  */
++long bfd_get_mtime (bfd *abfd);
++
++long bfd_get_size (bfd *abfd);
++
++/* Extracted from bfdwin.c.  */
++/* Extracted from section.c.  */
++typedef struct bfd_section
++{
++  /* The name of the section; the name isn't a copy, the pointer is
++     the same as that passed to bfd_make_section.  */
++  const char *name;
++
++  /* A unique sequence number.  */
++  int id;
++
++  /* Which section in the bfd; 0..n-1 as sections are created in a bfd.  */
++  int index;
++
++  /* The next section in the list belonging to the BFD, or NULL.  */
++  struct bfd_section *next;
++
++  /* The previous section in the list belonging to the BFD, or NULL.  */
++  struct bfd_section *prev;
++
++  /* The field flags contains attributes of the section. Some
++     flags are read in from the object file, and some are
++     synthesized from other information.  */
++  flagword flags;
++
++#define SEC_NO_FLAGS   0x000
++
++  /* Tells the OS to allocate space for this section when loading.
++     This is clear for a section containing debug information only.  */
++#define SEC_ALLOC      0x001
++
++  /* Tells the OS to load the section from the file when loading.
++     This is clear for a .bss section.  */
++#define SEC_LOAD       0x002
++
++  /* The section contains data still to be relocated, so there is
++     some relocation information too.  */
++#define SEC_RELOC      0x004
++
++  /* A signal to the OS that the section contains read only data.  */
++#define SEC_READONLY   0x008
++
++  /* The section contains code only.  */
++#define SEC_CODE       0x010
++
++  /* The section contains data only.  */
++#define SEC_DATA       0x020
++
++  /* The section will reside in ROM.  */
++#define SEC_ROM        0x040
++
++  /* The section contains constructor information. This section
++     type is used by the linker to create lists of constructors and
++     destructors used by <<g++>>. When a back end sees a symbol
++     which should be used in a constructor list, it creates a new
++     section for the type of name (e.g., <<__CTOR_LIST__>>), attaches
++     the symbol to it, and builds a relocation. To build the lists
++     of constructors, all the linker has to do is catenate all the
++     sections called <<__CTOR_LIST__>> and relocate the data
++     contained within - exactly the operations it would peform on
++     standard data.  */
++#define SEC_CONSTRUCTOR 0x080
++
++  /* The section has contents - a data section could be
++     <<SEC_ALLOC>> | <<SEC_HAS_CONTENTS>>; a debug section could be
++     <<SEC_HAS_CONTENTS>>  */
++#define SEC_HAS_CONTENTS 0x100
++
++  /* An instruction to the linker to not output the section
++     even if it has information which would normally be written.  */
++#define SEC_NEVER_LOAD 0x200
++
++  /* The section contains thread local data.  */
++#define SEC_THREAD_LOCAL 0x400
++
++  /* The section has GOT references.  This flag is only for the
++     linker, and is currently only used by the elf32-hppa back end.
++     It will be set if global offset table references were detected
++     in this section, which indicate to the linker that the section
++     contains PIC code, and must be handled specially when doing a
++     static link.  */
++#define SEC_HAS_GOT_REF 0x800
++
++  /* The section contains common symbols (symbols may be defined
++     multiple times, the value of a symbol is the amount of
++     space it requires, and the largest symbol value is the one
++     used).  Most targets have exactly one of these (which we
++     translate to bfd_com_section_ptr), but ECOFF has two.  */
++#define SEC_IS_COMMON 0x1000
++
++  /* The section contains only debugging information.  For
++     example, this is set for ELF .debug and .stab sections.
++     strip tests this flag to see if a section can be
++     discarded.  */
++#define SEC_DEBUGGING 0x2000
++
++  /* The contents of this section are held in memory pointed to
++     by the contents field.  This is checked by bfd_get_section_contents,
++     and the data is retrieved from memory if appropriate.  */
++#define SEC_IN_MEMORY 0x4000
++
++  /* The contents of this section are to be excluded by the
++     linker for executable and shared objects unless those
++     objects are to be further relocated.  */
++#define SEC_EXCLUDE 0x8000
++
++  /* The contents of this section are to be sorted based on the sum of
++     the symbol and addend values specified by the associated relocation
++     entries.  Entries without associated relocation entries will be
++     appended to the end of the section in an unspecified order.  */
++#define SEC_SORT_ENTRIES 0x10000
++
++  /* When linking, duplicate sections of the same name should be
++     discarded, rather than being combined into a single section as
++     is usually done.  This is similar to how common symbols are
++     handled.  See SEC_LINK_DUPLICATES below.  */
++#define SEC_LINK_ONCE 0x20000
++
++  /* If SEC_LINK_ONCE is set, this bitfield describes how the linker
++     should handle duplicate sections.  */
++#define SEC_LINK_DUPLICATES 0x40000
++
++  /* This value for SEC_LINK_DUPLICATES means that duplicate
++     sections with the same name should simply be discarded.  */
++#define SEC_LINK_DUPLICATES_DISCARD 0x0
++
++  /* This value for SEC_LINK_DUPLICATES means that the linker
++     should warn if there are any duplicate sections, although
++     it should still only link one copy.  */
++#define SEC_LINK_DUPLICATES_ONE_ONLY 0x80000
++
++  /* This value for SEC_LINK_DUPLICATES means that the linker
++     should warn if any duplicate sections are a different size.  */
++#define SEC_LINK_DUPLICATES_SAME_SIZE 0x100000
++
++  /* This value for SEC_LINK_DUPLICATES means that the linker
++     should warn if any duplicate sections contain different
++     contents.  */
++#define SEC_LINK_DUPLICATES_SAME_CONTENTS \
++  (SEC_LINK_DUPLICATES_ONE_ONLY | SEC_LINK_DUPLICATES_SAME_SIZE)
++
++  /* This section was created by the linker as part of dynamic
++     relocation or other arcane processing.  It is skipped when
++     going through the first-pass output, trusting that someone
++     else up the line will take care of it later.  */
++#define SEC_LINKER_CREATED 0x200000
++
++  /* This section should not be subject to garbage collection.  */
++#define SEC_KEEP 0x400000
++
++  /* This section contains "short" data, and should be placed
++     "near" the GP.  */
++#define SEC_SMALL_DATA 0x800000
++
++  /* Attempt to merge identical entities in the section.
++     Entity size is given in the entsize field.  */
++#define SEC_MERGE 0x1000000
++
++  /* If given with SEC_MERGE, entities to merge are zero terminated
++     strings where entsize specifies character size instead of fixed
++     size entries.  */
++#define SEC_STRINGS 0x2000000
++
++  /* This section contains data about section groups.  */
++#define SEC_GROUP 0x4000000
++
++  /* The section is a COFF shared library section.  This flag is
++     only for the linker.  If this type of section appears in
++     the input file, the linker must copy it to the output file
++     without changing the vma or size.  FIXME: Although this
++     was originally intended to be general, it really is COFF
++     specific (and the flag was renamed to indicate this).  It
++     might be cleaner to have some more general mechanism to
++     allow the back end to control what the linker does with
++     sections.  */
++#define SEC_COFF_SHARED_LIBRARY 0x10000000
++
++  /* This section contains data which may be shared with other
++     executables or shared objects. This is for COFF only.  */
++#define SEC_COFF_SHARED 0x20000000
++
++  /* When a section with this flag is being linked, then if the size of
++     the input section is less than a page, it should not cross a page
++     boundary.  If the size of the input section is one page or more,
++     it should be aligned on a page boundary.  This is for TI
++     TMS320C54X only.  */
++#define SEC_TIC54X_BLOCK 0x40000000
++
++  /* Conditionally link this section; do not link if there are no
++     references found to any symbol in the section.  This is for TI
++     TMS320C54X only.  */
++#define SEC_TIC54X_CLINK 0x80000000
++
++  /*  End of section flags.  */
++
++  /* Some internal packed boolean fields.  */
++
++  /* See the vma field.  */
++  unsigned int user_set_vma : 1;
++
++  /* A mark flag used by some of the linker backends.  */
++  unsigned int linker_mark : 1;
++
++  /* Another mark flag used by some of the linker backends.  Set for
++     output sections that have an input section.  */
++  unsigned int linker_has_input : 1;
++
++  /* Mark flags used by some linker backends for garbage collection.  */
++  unsigned int gc_mark : 1;
++  unsigned int gc_mark_from_eh : 1;
++
++  /* The following flags are used by the ELF linker. */
++
++  /* Mark sections which have been allocated to segments.  */
++  unsigned int segment_mark : 1;
++
++  /* Type of sec_info information.  */
++  unsigned int sec_info_type:3;
++#define ELF_INFO_TYPE_NONE      0
++#define ELF_INFO_TYPE_STABS     1
++#define ELF_INFO_TYPE_MERGE     2
++#define ELF_INFO_TYPE_EH_FRAME  3
++#define ELF_INFO_TYPE_JUST_SYMS 4
++
++  /* Nonzero if this section uses RELA relocations, rather than REL.  */
++  unsigned int use_rela_p:1;
++
++  /* Bits used by various backends.  The generic code doesn't touch
++     these fields.  */
++
++  /* Nonzero if this section has TLS related relocations.  */
++  unsigned int has_tls_reloc:1;
++
++  /* Nonzero if this section has a gp reloc.  */
++  unsigned int has_gp_reloc:1;
++
++  /* Nonzero if this section needs the relax finalize pass.  */
++  unsigned int need_finalize_relax:1;
++
++  /* Whether relocations have been processed.  */
++  unsigned int reloc_done : 1;
++
++  /* End of internal packed boolean fields.  */
++
++  /*  The virtual memory address of the section - where it will be
++      at run time.  The symbols are relocated against this.  The
++      user_set_vma flag is maintained by bfd; if it's not set, the
++      backend can assign addresses (for example, in <<a.out>>, where
++      the default address for <<.data>> is dependent on the specific
++      target and various flags).  */
++  bfd_vma vma;
++
++  /*  The load address of the section - where it would be in a
++      rom image; really only used for writing section header
++      information.  */
++  bfd_vma lma;
++
++  /* The size of the section in octets, as it will be output.
++     Contains a value even if the section has no contents (e.g., the
++     size of <<.bss>>).  */
++  bfd_size_type size;
++
++  /* For input sections, the original size on disk of the section, in
++     octets.  This field is used by the linker relaxation code.  It is
++     currently only set for sections where the linker relaxation scheme
++     doesn't cache altered section and reloc contents (stabs, eh_frame,
++     SEC_MERGE, some coff relaxing targets), and thus the original size
++     needs to be kept to read the section multiple times.
++     For output sections, rawsize holds the section size calculated on
++     a previous linker relaxation pass.  */
++  bfd_size_type rawsize;
++
++  /* If this section is going to be output, then this value is the
++     offset in *bytes* into the output section of the first byte in the
++     input section (byte ==> smallest addressable unit on the
++     target).  In most cases, if this was going to start at the
++     100th octet (8-bit quantity) in the output section, this value
++     would be 100.  However, if the target byte size is 16 bits
++     (bfd_octets_per_byte is "2"), this value would be 50.  */
++  bfd_vma output_offset;
++
++  /* The output section through which to map on output.  */
++  struct bfd_section *output_section;
++
++  /* The alignment requirement of the section, as an exponent of 2 -
++     e.g., 3 aligns to 2^3 (or 8).  */
++  unsigned int alignment_power;
++
++  /* If an input section, a pointer to a vector of relocation
++     records for the data in this section.  */
++  struct reloc_cache_entry *relocation;
++
++  /* If an output section, a pointer to a vector of pointers to
++     relocation records for the data in this section.  */
++  struct reloc_cache_entry **orelocation;
++
++  /* The number of relocation records in one of the above.  */
++  unsigned reloc_count;
++
++  /* Information below is back end specific - and not always used
++     or updated.  */
++
++  /* File position of section data.  */
++  file_ptr filepos;
++
++  /* File position of relocation info.  */
++  file_ptr rel_filepos;
++
++  /* File position of line data.  */
++  file_ptr line_filepos;
++
++  /* Pointer to data for applications.  */
++  void *userdata;
++
++  /* If the SEC_IN_MEMORY flag is set, this points to the actual
++     contents.  */
++  unsigned char *contents;
++
++  /* Attached line number information.  */
++  alent *lineno;
++
++  /* Number of line number records.  */
++  unsigned int lineno_count;
++
++  /* Entity size for merging purposes.  */
++  unsigned int entsize;
++
++  /* Points to the kept section if this section is a link-once section,
++     and is discarded.  */
++  struct bfd_section *kept_section;
++
++  /* When a section is being output, this value changes as more
++     linenumbers are written out.  */
++  file_ptr moving_line_filepos;
++
++  /* What the section number is in the target world.  */
++  int target_index;
++
++  void *used_by_bfd;
++
++  /* If this is a constructor section then here is a list of the
++     relocations created to relocate items within it.  */
++  struct relent_chain *constructor_chain;
++
++  /* The BFD which owns the section.  */
++  bfd *owner;
++
++  /* A symbol which points at this section only.  */
++  struct bfd_symbol *symbol;
++  struct bfd_symbol **symbol_ptr_ptr;
++
++  /* Early in the link process, map_head and map_tail are used to build
++     a list of input sections attached to an output section.  Later,
++     output sections use these fields for a list of bfd_link_order
++     structs.  */
++  union {
++    struct bfd_link_order *link_order;
++    struct bfd_section *s;
++  } map_head, map_tail;
++} asection;
++
++/* These sections are global, and are managed by BFD.  The application
++   and target back end are not permitted to change the values in
++   these sections.  New code should use the section_ptr macros rather
++   than referring directly to the const sections.  The const sections
++   may eventually vanish.  */
++#define BFD_ABS_SECTION_NAME "*ABS*"
++#define BFD_UND_SECTION_NAME "*UND*"
++#define BFD_COM_SECTION_NAME "*COM*"
++#define BFD_IND_SECTION_NAME "*IND*"
++
++/* The absolute section.  */
++extern asection bfd_abs_section;
++#define bfd_abs_section_ptr ((asection *) &bfd_abs_section)
++#define bfd_is_abs_section(sec) ((sec) == bfd_abs_section_ptr)
++/* Pointer to the undefined section.  */
++extern asection bfd_und_section;
++#define bfd_und_section_ptr ((asection *) &bfd_und_section)
++#define bfd_is_und_section(sec) ((sec) == bfd_und_section_ptr)
++/* Pointer to the common section.  */
++extern asection bfd_com_section;
++#define bfd_com_section_ptr ((asection *) &bfd_com_section)
++/* Pointer to the indirect section.  */
++extern asection bfd_ind_section;
++#define bfd_ind_section_ptr ((asection *) &bfd_ind_section)
++#define bfd_is_ind_section(sec) ((sec) == bfd_ind_section_ptr)
++
++#define bfd_is_const_section(SEC)              \
++ (   ((SEC) == bfd_abs_section_ptr)            \
++  || ((SEC) == bfd_und_section_ptr)            \
++  || ((SEC) == bfd_com_section_ptr)            \
++  || ((SEC) == bfd_ind_section_ptr))
++
++extern const struct bfd_symbol * const bfd_abs_symbol;
++extern const struct bfd_symbol * const bfd_com_symbol;
++extern const struct bfd_symbol * const bfd_und_symbol;
++extern const struct bfd_symbol * const bfd_ind_symbol;
++
++/* Macros to handle insertion and deletion of a bfd's sections.  These
++   only handle the list pointers, ie. do not adjust section_count,
++   target_index etc.  */
++#define bfd_section_list_remove(ABFD, S) \
++  do                                                   \
++    {                                                  \
++      asection *_s = S;                                \
++      asection *_next = _s->next;                      \
++      asection *_prev = _s->prev;                      \
++      if (_prev)                                       \
++        _prev->next = _next;                           \
++      else                                             \
++        (ABFD)->sections = _next;                      \
++      if (_next)                                       \
++        _next->prev = _prev;                           \
++      else                                             \
++        (ABFD)->section_last = _prev;                  \
++    }                                                  \
++  while (0)
++#define bfd_section_list_append(ABFD, S) \
++  do                                                   \
++    {                                                  \
++      asection *_s = S;                                \
++      bfd *_abfd = ABFD;                               \
++      _s->next = NULL;                                 \
++      if (_abfd->section_last)                         \
++        {                                              \
++          _s->prev = _abfd->section_last;              \
++          _abfd->section_last->next = _s;              \
++        }                                              \
++      else                                             \
++        {                                              \
++          _s->prev = NULL;                             \
++          _abfd->sections = _s;                        \
++        }                                              \
++      _abfd->section_last = _s;                        \
++    }                                                  \
++  while (0)
++#define bfd_section_list_prepend(ABFD, S) \
++  do                                                   \
++    {                                                  \
++      asection *_s = S;                                \
++      bfd *_abfd = ABFD;                               \
++      _s->prev = NULL;                                 \
++      if (_abfd->sections)                             \
++        {                                              \
++          _s->next = _abfd->sections;                  \
++          _abfd->sections->prev = _s;                  \
++        }                                              \
++      else                                             \
++        {                                              \
++          _s->next = NULL;                             \
++          _abfd->section_last = _s;                    \
++        }                                              \
++      _abfd->sections = _s;                            \
++    }                                                  \
++  while (0)
++#define bfd_section_list_insert_after(ABFD, A, S) \
++  do                                                   \
++    {                                                  \
++      asection *_a = A;                                \
++      asection *_s = S;                                \
++      asection *_next = _a->next;                      \
++      _s->next = _next;                                \
++      _s->prev = _a;                                   \
++      _a->next = _s;                                   \
++      if (_next)                                       \
++        _next->prev = _s;                              \
++      else                                             \
++        (ABFD)->section_last = _s;                     \
++    }                                                  \
++  while (0)
++#define bfd_section_list_insert_before(ABFD, B, S) \
++  do                                                   \
++    {                                                  \
++      asection *_b = B;                                \
++      asection *_s = S;                                \
++      asection *_prev = _b->prev;                      \
++      _s->prev = _prev;                                \
++      _s->next = _b;                                   \
++      _b->prev = _s;                                   \
++      if (_prev)                                       \
++        _prev->next = _s;                              \
++      else                                             \
++        (ABFD)->sections = _s;                         \
++    }                                                  \
++  while (0)
++#define bfd_section_removed_from_list(ABFD, S) \
++  ((S)->next == NULL ? (ABFD)->section_last != (S) : (S)->next->prev != (S))
++
++void bfd_section_list_clear (bfd *);
++
++asection *bfd_get_section_by_name (bfd *abfd, const char *name);
++
++asection *bfd_get_section_by_name_if
++   (bfd *abfd,
++    const char *name,
++    bfd_boolean (*func) (bfd *abfd, asection *sect, void *obj),
++    void *obj);
++
++char *bfd_get_unique_section_name
++   (bfd *abfd, const char *templat, int *count);
++
++asection *bfd_make_section_old_way (bfd *abfd, const char *name);
++
++asection *bfd_make_section_anyway_with_flags
++   (bfd *abfd, const char *name, flagword flags);
++
++asection *bfd_make_section_anyway (bfd *abfd, const char *name);
++
++asection *bfd_make_section_with_flags
++   (bfd *, const char *name, flagword flags);
++
++asection *bfd_make_section (bfd *, const char *name);
++
++bfd_boolean bfd_set_section_flags
++   (bfd *abfd, asection *sec, flagword flags);
++
++void bfd_map_over_sections
++   (bfd *abfd,
++    void (*func) (bfd *abfd, asection *sect, void *obj),
++    void *obj);
++
++asection *bfd_sections_find_if
++   (bfd *abfd,
++    bfd_boolean (*operation) (bfd *abfd, asection *sect, void *obj),
++    void *obj);
++
++bfd_boolean bfd_set_section_size
++   (bfd *abfd, asection *sec, bfd_size_type val);
++
++bfd_boolean bfd_set_section_contents
++   (bfd *abfd, asection *section, const void *data,
++    file_ptr offset, bfd_size_type count);
++
++bfd_boolean bfd_get_section_contents
++   (bfd *abfd, asection *section, void *location, file_ptr offset,
++    bfd_size_type count);
++
++bfd_boolean bfd_malloc_and_get_section
++   (bfd *abfd, asection *section, bfd_byte **buf);
++
++bfd_boolean bfd_copy_private_section_data
++   (bfd *ibfd, asection *isec, bfd *obfd, asection *osec);
++
++#define bfd_copy_private_section_data(ibfd, isection, obfd, osection) \
++     BFD_SEND (obfd, _bfd_copy_private_section_data, \
++               (ibfd, isection, obfd, osection))
++bfd_boolean bfd_generic_is_group_section (bfd *, const asection *sec);
++
++bfd_boolean bfd_generic_discard_group (bfd *abfd, asection *group);
++
++/* Extracted from archures.c.  */
++enum bfd_architecture
++{
++  bfd_arch_unknown,   /* File arch not known.  */
++  bfd_arch_obscure,   /* Arch known, not one of these.  */
++  bfd_arch_m68k,      /* Motorola 68xxx */
++#define bfd_mach_m68000 1
++#define bfd_mach_m68008 2
++#define bfd_mach_m68010 3
++#define bfd_mach_m68020 4
++#define bfd_mach_m68030 5
++#define bfd_mach_m68040 6
++#define bfd_mach_m68060 7
++#define bfd_mach_cpu32  8
++#define bfd_mach_mcf5200  9
++#define bfd_mach_mcf5206e 10
++#define bfd_mach_mcf5307  11
++#define bfd_mach_mcf5407  12
++#define bfd_mach_mcf528x  13
++#define bfd_mach_mcfv4e   14
++#define bfd_mach_mcf521x   15
++#define bfd_mach_mcf5249   16
++#define bfd_mach_mcf547x   17
++#define bfd_mach_mcf548x   18
++  bfd_arch_vax,       /* DEC Vax */
++  bfd_arch_i960,      /* Intel 960 */
++    /* The order of the following is important.
++       lower number indicates a machine type that
++       only accepts a subset of the instructions
++       available to machines with higher numbers.
++       The exception is the "ca", which is
++       incompatible with all other machines except
++       "core".  */
++
++#define bfd_mach_i960_core      1
++#define bfd_mach_i960_ka_sa     2
++#define bfd_mach_i960_kb_sb     3
++#define bfd_mach_i960_mc        4
++#define bfd_mach_i960_xa        5
++#define bfd_mach_i960_ca        6
++#define bfd_mach_i960_jx        7
++#define bfd_mach_i960_hx        8
++
++  bfd_arch_or32,      /* OpenRISC 32 */
++
++  bfd_arch_a29k,      /* AMD 29000 */
++  bfd_arch_sparc,     /* SPARC */
++#define bfd_mach_sparc                 1
++/* The difference between v8plus and v9 is that v9 is a true 64 bit env.  */
++#define bfd_mach_sparc_sparclet        2
++#define bfd_mach_sparc_sparclite       3
++#define bfd_mach_sparc_v8plus          4
++#define bfd_mach_sparc_v8plusa         5 /* with ultrasparc add'ns.  */
++#define bfd_mach_sparc_sparclite_le    6
++#define bfd_mach_sparc_v9              7
++#define bfd_mach_sparc_v9a             8 /* with ultrasparc add'ns.  */
++#define bfd_mach_sparc_v8plusb         9 /* with cheetah add'ns.  */
++#define bfd_mach_sparc_v9b             10 /* with cheetah add'ns.  */
++/* Nonzero if MACH has the v9 instruction set.  */
++#define bfd_mach_sparc_v9_p(mach) \
++  ((mach) >= bfd_mach_sparc_v8plus && (mach) <= bfd_mach_sparc_v9b \
++   && (mach) != bfd_mach_sparc_sparclite_le)
++/* Nonzero if MACH is a 64 bit sparc architecture.  */
++#define bfd_mach_sparc_64bit_p(mach) \
++  ((mach) >= bfd_mach_sparc_v9 && (mach) != bfd_mach_sparc_v8plusb)
++  bfd_arch_mips,      /* MIPS Rxxxx */
++#define bfd_mach_mips3000              3000
++#define bfd_mach_mips3900              3900
++#define bfd_mach_mips4000              4000
++#define bfd_mach_mips4010              4010
++#define bfd_mach_mips4100              4100
++#define bfd_mach_mips4111              4111
++#define bfd_mach_mips4120              4120
++#define bfd_mach_mips4300              4300
++#define bfd_mach_mips4400              4400
++#define bfd_mach_mips4600              4600
++#define bfd_mach_mips4650              4650
++#define bfd_mach_mips5000              5000
++#define bfd_mach_mips5400              5400
++#define bfd_mach_mips5500              5500
++#define bfd_mach_mips6000              6000
++#define bfd_mach_mips7000              7000
++#define bfd_mach_mips8000              8000
++#define bfd_mach_mips9000              9000
++#define bfd_mach_mips10000             10000
++#define bfd_mach_mips12000             12000
++#define bfd_mach_mips16                16
++#define bfd_mach_mips5                 5
++#define bfd_mach_mips_sb1              12310201 /* octal 'SB', 01 */
++#define bfd_mach_mipsisa32             32
++#define bfd_mach_mipsisa32r2           33
++#define bfd_mach_mipsisa64             64
++#define bfd_mach_mipsisa64r2           65
++  bfd_arch_i386,      /* Intel 386 */
++#define bfd_mach_i386_i386 1
++#define bfd_mach_i386_i8086 2
++#define bfd_mach_i386_i386_intel_syntax 3
++#define bfd_mach_x86_64 64
++#define bfd_mach_x86_64_intel_syntax 65
++  bfd_arch_we32k,     /* AT&T WE32xxx */
++  bfd_arch_tahoe,     /* CCI/Harris Tahoe */
++  bfd_arch_i860,      /* Intel 860 */
++  bfd_arch_i370,      /* IBM 360/370 Mainframes */
++  bfd_arch_romp,      /* IBM ROMP PC/RT */
++  bfd_arch_alliant,   /* Alliant */
++  bfd_arch_convex,    /* Convex */
++  bfd_arch_m88k,      /* Motorola 88xxx */
++  bfd_arch_m98k,      /* Motorola 98xxx */
++  bfd_arch_pyramid,   /* Pyramid Technology */
++  bfd_arch_h8300,     /* Renesas H8/300 (formerly Hitachi H8/300) */
++#define bfd_mach_h8300    1
++#define bfd_mach_h8300h   2
++#define bfd_mach_h8300s   3
++#define bfd_mach_h8300hn  4
++#define bfd_mach_h8300sn  5
++#define bfd_mach_h8300sx  6
++#define bfd_mach_h8300sxn 7
++  bfd_arch_pdp11,     /* DEC PDP-11 */
++  bfd_arch_powerpc,   /* PowerPC */
++#define bfd_mach_ppc           32
++#define bfd_mach_ppc64         64
++#define bfd_mach_ppc_403       403
++#define bfd_mach_ppc_403gc     4030
++#define bfd_mach_ppc_505       505
++#define bfd_mach_ppc_601       601
++#define bfd_mach_ppc_602       602
++#define bfd_mach_ppc_603       603
++#define bfd_mach_ppc_ec603e    6031
++#define bfd_mach_ppc_604       604
++#define bfd_mach_ppc_620       620
++#define bfd_mach_ppc_630       630
++#define bfd_mach_ppc_750       750
++#define bfd_mach_ppc_860       860
++#define bfd_mach_ppc_a35       35
++#define bfd_mach_ppc_rs64ii    642
++#define bfd_mach_ppc_rs64iii   643
++#define bfd_mach_ppc_7400      7400
++#define bfd_mach_ppc_e500      500
++  bfd_arch_rs6000,    /* IBM RS/6000 */
++#define bfd_mach_rs6k          6000
++#define bfd_mach_rs6k_rs1      6001
++#define bfd_mach_rs6k_rsc      6003
++#define bfd_mach_rs6k_rs2      6002
++  bfd_arch_hppa,      /* HP PA RISC */
++#define bfd_mach_hppa10        10
++#define bfd_mach_hppa11        11
++#define bfd_mach_hppa20        20
++#define bfd_mach_hppa20w       25
++  bfd_arch_d10v,      /* Mitsubishi D10V */
++#define bfd_mach_d10v          1
++#define bfd_mach_d10v_ts2      2
++#define bfd_mach_d10v_ts3      3
++  bfd_arch_d30v,      /* Mitsubishi D30V */
++  bfd_arch_dlx,       /* DLX */
++  bfd_arch_m68hc11,   /* Motorola 68HC11 */
++  bfd_arch_m68hc12,   /* Motorola 68HC12 */
++#define bfd_mach_m6812_default 0
++#define bfd_mach_m6812         1
++#define bfd_mach_m6812s        2
++  bfd_arch_z8k,       /* Zilog Z8000 */
++#define bfd_mach_z8001         1
++#define bfd_mach_z8002         2
++  bfd_arch_h8500,     /* Renesas H8/500 (formerly Hitachi H8/500) */
++  bfd_arch_sh,        /* Renesas / SuperH SH (formerly Hitachi SH) */
++#define bfd_mach_sh            1
++#define bfd_mach_sh2        0x20
++#define bfd_mach_sh_dsp     0x2d
++#define bfd_mach_sh2a       0x2a
++#define bfd_mach_sh2a_nofpu 0x2b
++#define bfd_mach_sh2a_nofpu_or_sh4_nommu_nofpu 0x2a1
++#define bfd_mach_sh2a_nofpu_or_sh3_nommu 0x2a2
++#define bfd_mach_sh2a_or_sh4  0x2a3
++#define bfd_mach_sh2a_or_sh3e 0x2a4
++#define bfd_mach_sh2e       0x2e
++#define bfd_mach_sh3        0x30
++#define bfd_mach_sh3_nommu  0x31
++#define bfd_mach_sh3_dsp    0x3d
++#define bfd_mach_sh3e       0x3e
++#define bfd_mach_sh4        0x40
++#define bfd_mach_sh4_nofpu  0x41
++#define bfd_mach_sh4_nommu_nofpu  0x42
++#define bfd_mach_sh4a       0x4a
++#define bfd_mach_sh4a_nofpu 0x4b
++#define bfd_mach_sh4al_dsp  0x4d
++#define bfd_mach_sh5        0x50
++  bfd_arch_alpha,     /* Dec Alpha */
++#define bfd_mach_alpha_ev4  0x10
++#define bfd_mach_alpha_ev5  0x20
++#define bfd_mach_alpha_ev6  0x30
++  bfd_arch_arm,       /* Advanced Risc Machines ARM.  */
++#define bfd_mach_arm_unknown   0
++#define bfd_mach_arm_2         1
++#define bfd_mach_arm_2a        2
++#define bfd_mach_arm_3         3
++#define bfd_mach_arm_3M        4
++#define bfd_mach_arm_4         5
++#define bfd_mach_arm_4T        6
++#define bfd_mach_arm_5         7
++#define bfd_mach_arm_5T        8
++#define bfd_mach_arm_5TE       9
++#define bfd_mach_arm_XScale    10
++#define bfd_mach_arm_ep9312    11
++#define bfd_mach_arm_iWMMXt    12
++  bfd_arch_ns32k,     /* National Semiconductors ns32000 */
++  bfd_arch_w65,       /* WDC 65816 */
++  bfd_arch_tic30,     /* Texas Instruments TMS320C30 */
++  bfd_arch_tic4x,     /* Texas Instruments TMS320C3X/4X */
++#define bfd_mach_tic3x         30
++#define bfd_mach_tic4x         40
++  bfd_arch_tic54x,    /* Texas Instruments TMS320C54X */
++  bfd_arch_tic80,     /* TI TMS320c80 (MVP) */
++  bfd_arch_v850,      /* NEC V850 */
++#define bfd_mach_v850          1
++#define bfd_mach_v850e         'E'
++#define bfd_mach_v850e1        '1'
++  bfd_arch_arc,       /* ARC Cores */
++#define bfd_mach_arc_5         5
++#define bfd_mach_arc_6         6
++#define bfd_mach_arc_7         7
++#define bfd_mach_arc_8         8
++ bfd_arch_m32c,     /* Renesas M16C/M32C.  */
++#define bfd_mach_m16c        0x75
++#define bfd_mach_m32c        0x78
++  bfd_arch_m32r,      /* Renesas M32R (formerly Mitsubishi M32R/D) */
++#define bfd_mach_m32r          1 /* For backwards compatibility.  */
++#define bfd_mach_m32rx         'x'
++#define bfd_mach_m32r2         '2'
++  bfd_arch_mn10200,   /* Matsushita MN10200 */
++  bfd_arch_mn10300,   /* Matsushita MN10300 */
++#define bfd_mach_mn10300               300
++#define bfd_mach_am33          330
++#define bfd_mach_am33_2        332
++  bfd_arch_fr30,
++#define bfd_mach_fr30          0x46523330
++  bfd_arch_frv,
++#define bfd_mach_frv           1
++#define bfd_mach_frvsimple     2
++#define bfd_mach_fr300         300
++#define bfd_mach_fr400         400
++#define bfd_mach_fr450         450
++#define bfd_mach_frvtomcat     499     /* fr500 prototype */
++#define bfd_mach_fr500         500
++#define bfd_mach_fr550         550
++  bfd_arch_mcore,
++  bfd_arch_ia64,      /* HP/Intel ia64 */
++#define bfd_mach_ia64_elf64    64
++#define bfd_mach_ia64_elf32    32
++  bfd_arch_ip2k,      /* Ubicom IP2K microcontrollers. */
++#define bfd_mach_ip2022        1
++#define bfd_mach_ip2022ext     2
++ bfd_arch_iq2000,     /* Vitesse IQ2000.  */
++#define bfd_mach_iq2000        1
++#define bfd_mach_iq10          2
++  bfd_arch_ms1,
++#define bfd_mach_ms1           1
++#define bfd_mach_mrisc2        2
++  bfd_arch_pj,
++  bfd_arch_avr,       /* Atmel AVR microcontrollers.  */
++#define bfd_mach_avr1          1
++#define bfd_mach_avr2          2
++#define bfd_mach_avr3          3
++#define bfd_mach_avr4          4
++#define bfd_mach_avr5          5
++  bfd_arch_cr16c,       /* National Semiconductor CompactRISC. */
++#define bfd_mach_cr16c         1
++  bfd_arch_crx,       /*  National Semiconductor CRX.  */
++#define bfd_mach_crx           1
++  bfd_arch_cris,      /* Axis CRIS */
++#define bfd_mach_cris_v0_v10   255
++#define bfd_mach_cris_v32      32
++#define bfd_mach_cris_v10_v32  1032
++  bfd_arch_s390,      /* IBM s390 */
++#define bfd_mach_s390_31       31
++#define bfd_mach_s390_64       64
++  bfd_arch_openrisc,  /* OpenRISC */
++  bfd_arch_mmix,      /* Donald Knuth's educational processor.  */
++  bfd_arch_xstormy16,
++#define bfd_mach_xstormy16     1
++  bfd_arch_msp430,    /* Texas Instruments MSP430 architecture.  */
++#define bfd_mach_msp11          11
++#define bfd_mach_msp110         110
++#define bfd_mach_msp12          12
++#define bfd_mach_msp13          13
++#define bfd_mach_msp14          14
++#define bfd_mach_msp15          15
++#define bfd_mach_msp16          16
++#define bfd_mach_msp31          31
++#define bfd_mach_msp32          32
++#define bfd_mach_msp33          33
++#define bfd_mach_msp41          41
++#define bfd_mach_msp42          42
++#define bfd_mach_msp43          43
++#define bfd_mach_msp44          44
++  bfd_arch_xtensa,    /* Tensilica's Xtensa cores.  */
++#define bfd_mach_xtensa        1
++   bfd_arch_maxq,     /* Dallas MAXQ 10/20 */
++#define bfd_mach_maxq10    10
++#define bfd_mach_maxq20    20
++  bfd_arch_last
++  };
++
++typedef struct bfd_arch_info
++{
++  int bits_per_word;
++  int bits_per_address;
++  int bits_per_byte;
++  enum bfd_architecture arch;
++  unsigned long mach;
++  const char *arch_name;
++  const char *printable_name;
++  unsigned int section_align_power;
++  /* TRUE if this is the default machine for the architecture.
++     The default arch should be the first entry for an arch so that
++     all the entries for that arch can be accessed via <<next>>.  */
++  bfd_boolean the_default;
++  const struct bfd_arch_info * (*compatible)
++    (const struct bfd_arch_info *a, const struct bfd_arch_info *b);
++
++  bfd_boolean (*scan) (const struct bfd_arch_info *, const char *);
++
++  const struct bfd_arch_info *next;
++}
++bfd_arch_info_type;
++
++const char *bfd_printable_name (bfd *abfd);
++
++const bfd_arch_info_type *bfd_scan_arch (const char *string);
++
++const char **bfd_arch_list (void);
++
++const bfd_arch_info_type *bfd_arch_get_compatible
++   (const bfd *abfd, const bfd *bbfd, bfd_boolean accept_unknowns);
++
++void bfd_set_arch_info (bfd *abfd, const bfd_arch_info_type *arg);
++
++enum bfd_architecture bfd_get_arch (bfd *abfd);
++
++unsigned long bfd_get_mach (bfd *abfd);
++
++unsigned int bfd_arch_bits_per_byte (bfd *abfd);
++
++unsigned int bfd_arch_bits_per_address (bfd *abfd);
++
++const bfd_arch_info_type *bfd_get_arch_info (bfd *abfd);
++
++const bfd_arch_info_type *bfd_lookup_arch
++   (enum bfd_architecture arch, unsigned long machine);
++
++const char *bfd_printable_arch_mach
++   (enum bfd_architecture arch, unsigned long machine);
++
++unsigned int bfd_octets_per_byte (bfd *abfd);
++
++unsigned int bfd_arch_mach_octets_per_byte
++   (enum bfd_architecture arch, unsigned long machine);
++
++/* Extracted from reloc.c.  */
++typedef enum bfd_reloc_status
++{
++  /* No errors detected.  */
++  bfd_reloc_ok,
++
++  /* The relocation was performed, but there was an overflow.  */
++  bfd_reloc_overflow,
++
++  /* The address to relocate was not within the section supplied.  */
++  bfd_reloc_outofrange,
++
++  /* Used by special functions.  */
++  bfd_reloc_continue,
++
++  /* Unsupported relocation size requested.  */
++  bfd_reloc_notsupported,
++
++  /* Unused.  */
++  bfd_reloc_other,
++
++  /* The symbol to relocate against was undefined.  */
++  bfd_reloc_undefined,
++
++  /* The relocation was performed, but may not be ok - presently
++     generated only when linking i960 coff files with i960 b.out
++     symbols.  If this type is returned, the error_message argument
++     to bfd_perform_relocation will be set.  */
++  bfd_reloc_dangerous
++ }
++ bfd_reloc_status_type;
++
++
++typedef struct reloc_cache_entry
++{
++  /* A pointer into the canonical table of pointers.  */
++  struct bfd_symbol **sym_ptr_ptr;
++
++  /* offset in section.  */
++  bfd_size_type address;
++
++  /* addend for relocation value.  */
++  bfd_vma addend;
++
++  /* Pointer to how to perform the required relocation.  */
++  reloc_howto_type *howto;
++
++}
++arelent;
++
++enum complain_overflow
++{
++  /* Do not complain on overflow.  */
++  complain_overflow_dont,
++
++  /* Complain if the bitfield overflows, whether it is considered
++     as signed or unsigned.  */
++  complain_overflow_bitfield,
++
++  /* Complain if the value overflows when considered as signed
++     number.  */
++  complain_overflow_signed,
++
++  /* Complain if the value overflows when considered as an
++     unsigned number.  */
++  complain_overflow_unsigned
++};
++
++struct reloc_howto_struct
++{
++  /*  The type field has mainly a documentary use - the back end can
++      do what it wants with it, though normally the back end's
++      external idea of what a reloc number is stored
++      in this field.  For example, a PC relative word relocation
++      in a coff environment has the type 023 - because that's
++      what the outside world calls a R_PCRWORD reloc.  */
++  unsigned int type;
++
++  /*  The value the final relocation is shifted right by.  This drops
++      unwanted data from the relocation.  */
++  unsigned int rightshift;
++
++  /*  The size of the item to be relocated.  This is *not* a
++      power-of-two measure.  To get the number of bytes operated
++      on by a type of relocation, use bfd_get_reloc_size.  */
++  int size;
++
++  /*  The number of bits in the item to be relocated.  This is used
++      when doing overflow checking.  */
++  unsigned int bitsize;
++
++  /*  Notes that the relocation is relative to the location in the
++      data section of the addend.  The relocation function will
++      subtract from the relocation value the address of the location
++      being relocated.  */
++  bfd_boolean pc_relative;
++
++  /*  The bit position of the reloc value in the destination.
++      The relocated value is left shifted by this amount.  */
++  unsigned int bitpos;
++
++  /* What type of overflow error should be checked for when
++     relocating.  */
++  enum complain_overflow complain_on_overflow;
++
++  /* If this field is non null, then the supplied function is
++     called rather than the normal function.  This allows really
++     strange relocation methods to be accommodated (e.g., i960 callj
++     instructions).  */
++  bfd_reloc_status_type (*special_function)
++    (bfd *, arelent *, struct bfd_symbol *, void *, asection *,
++     bfd *, char **);
++
++  /* The textual name of the relocation type.  */
++  char *name;
++
++  /* Some formats record a relocation addend in the section contents
++     rather than with the relocation.  For ELF formats this is the
++     distinction between USE_REL and USE_RELA (though the code checks
++     for USE_REL == 1/0).  The value of this field is TRUE if the
++     addend is recorded with the section contents; when performing a
++     partial link (ld -r) the section contents (the data) will be
++     modified.  The value of this field is FALSE if addends are
++     recorded with the relocation (in arelent.addend); when performing
++     a partial link the relocation will be modified.
++     All relocations for all ELF USE_RELA targets should set this field
++     to FALSE (values of TRUE should be looked on with suspicion).
++     However, the converse is not true: not all relocations of all ELF
++     USE_REL targets set this field to TRUE.  Why this is so is peculiar
++     to each particular target.  For relocs that aren't used in partial
++     links (e.g. GOT stuff) it doesn't matter what this is set to.  */
++  bfd_boolean partial_inplace;
++
++  /* src_mask selects the part of the instruction (or data) to be used
++     in the relocation sum.  If the target relocations don't have an
++     addend in the reloc, eg. ELF USE_REL, src_mask will normally equal
++     dst_mask to extract the addend from the section contents.  If
++     relocations do have an addend in the reloc, eg. ELF USE_RELA, this
++     field should be zero.  Non-zero values for ELF USE_RELA targets are
++     bogus as in those cases the value in the dst_mask part of the
++     section contents should be treated as garbage.  */
++  bfd_vma src_mask;
++
++  /* dst_mask selects which parts of the instruction (or data) are
++     replaced with a relocated value.  */
++  bfd_vma dst_mask;
++
++  /* When some formats create PC relative instructions, they leave
++     the value of the pc of the place being relocated in the offset
++     slot of the instruction, so that a PC relative relocation can
++     be made just by adding in an ordinary offset (e.g., sun3 a.out).
++     Some formats leave the displacement part of an instruction
++     empty (e.g., m88k bcs); this flag signals the fact.  */
++  bfd_boolean pcrel_offset;
++};
++
++#define HOWTO(C, R, S, B, P, BI, O, SF, NAME, INPLACE, MASKSRC, MASKDST, PC) \
++  { (unsigned) C, R, S, B, P, BI, O, SF, NAME, INPLACE, MASKSRC, MASKDST, PC }
++#define NEWHOWTO(FUNCTION, NAME, SIZE, REL, IN) \
++  HOWTO (0, 0, SIZE, 0, REL, 0, complain_overflow_dont, FUNCTION, \
++         NAME, FALSE, 0, 0, IN)
++
++#define EMPTY_HOWTO(C) \
++  HOWTO ((C), 0, 0, 0, FALSE, 0, complain_overflow_dont, NULL, \
++         NULL, FALSE, 0, 0, FALSE)
++
++#define HOWTO_PREPARE(relocation, symbol)               \
++  {                                                     \
++    if (symbol != NULL)                                 \
++      {                                                 \
++        if (bfd_is_com_section (symbol->section))       \
++          {                                             \
++            relocation = 0;                             \
++          }                                             \
++        else                                            \
++          {                                             \
++            relocation = symbol->value;                 \
++          }                                             \
++      }                                                 \
++  }
++
++unsigned int bfd_get_reloc_size (reloc_howto_type *);
++
++typedef struct relent_chain
++{
++  arelent relent;
++  struct relent_chain *next;
++}
++arelent_chain;
++
++bfd_reloc_status_type bfd_check_overflow
++   (enum complain_overflow how,
++    unsigned int bitsize,
++    unsigned int rightshift,
++    unsigned int addrsize,
++    bfd_vma relocation);
++
++bfd_reloc_status_type bfd_perform_relocation
++   (bfd *abfd,
++    arelent *reloc_entry,
++    void *data,
++    asection *input_section,
++    bfd *output_bfd,
++    char **error_message);
++
++bfd_reloc_status_type bfd_install_relocation
++   (bfd *abfd,
++    arelent *reloc_entry,
++    void *data, bfd_vma data_start,
++    asection *input_section,
++    char **error_message);
++
++enum bfd_reloc_code_real {
++  _dummy_first_bfd_reloc_code_real,
++
++
++/* Basic absolute relocations of N bits.  */
++  BFD_RELOC_64,
++  BFD_RELOC_32,
++  BFD_RELOC_26,
++  BFD_RELOC_24,
++  BFD_RELOC_16,
++  BFD_RELOC_14,
++  BFD_RELOC_8,
++
++/* PC-relative relocations.  Sometimes these are relative to the address
++of the relocation itself; sometimes they are relative to the start of
++the section containing the relocation.  It depends on the specific target.
++
++The 24-bit relocation is used in some Intel 960 configurations.  */
++  BFD_RELOC_64_PCREL,
++  BFD_RELOC_32_PCREL,
++  BFD_RELOC_24_PCREL,
++  BFD_RELOC_16_PCREL,
++  BFD_RELOC_12_PCREL,
++  BFD_RELOC_8_PCREL,
++
++/* Section relative relocations.  Some targets need this for DWARF2.  */
++  BFD_RELOC_32_SECREL,
++
++/* For ELF.  */
++  BFD_RELOC_32_GOT_PCREL,
++  BFD_RELOC_16_GOT_PCREL,
++  BFD_RELOC_8_GOT_PCREL,
++  BFD_RELOC_32_GOTOFF,
++  BFD_RELOC_16_GOTOFF,
++  BFD_RELOC_LO16_GOTOFF,
++  BFD_RELOC_HI16_GOTOFF,
++  BFD_RELOC_HI16_S_GOTOFF,
++  BFD_RELOC_8_GOTOFF,
++  BFD_RELOC_64_PLT_PCREL,
++  BFD_RELOC_32_PLT_PCREL,
++  BFD_RELOC_24_PLT_PCREL,
++  BFD_RELOC_16_PLT_PCREL,
++  BFD_RELOC_8_PLT_PCREL,
++  BFD_RELOC_64_PLTOFF,
++  BFD_RELOC_32_PLTOFF,
++  BFD_RELOC_16_PLTOFF,
++  BFD_RELOC_LO16_PLTOFF,
++  BFD_RELOC_HI16_PLTOFF,
++  BFD_RELOC_HI16_S_PLTOFF,
++  BFD_RELOC_8_PLTOFF,
++
++/* Relocations used by 68K ELF.  */
++  BFD_RELOC_68K_GLOB_DAT,
++  BFD_RELOC_68K_JMP_SLOT,
++  BFD_RELOC_68K_RELATIVE,
++
++/* Linkage-table relative.  */
++  BFD_RELOC_32_BASEREL,
++  BFD_RELOC_16_BASEREL,
++  BFD_RELOC_LO16_BASEREL,
++  BFD_RELOC_HI16_BASEREL,
++  BFD_RELOC_HI16_S_BASEREL,
++  BFD_RELOC_8_BASEREL,
++  BFD_RELOC_RVA,
++
++/* Absolute 8-bit relocation, but used to form an address like 0xFFnn.  */
++  BFD_RELOC_8_FFnn,
++
++/* These PC-relative relocations are stored as word displacements --
++i.e., byte displacements shifted right two bits.  The 30-bit word
++displacement (<<32_PCREL_S2>> -- 32 bits, shifted 2) is used on the
++SPARC.  (SPARC tools generally refer to this as <<WDISP30>>.)  The
++signed 16-bit displacement is used on the MIPS, and the 23-bit
++displacement is used on the Alpha.  */
++  BFD_RELOC_32_PCREL_S2,
++  BFD_RELOC_16_PCREL_S2,
++  BFD_RELOC_23_PCREL_S2,
++
++/* High 22 bits and low 10 bits of 32-bit value, placed into lower bits of
++the target word.  These are used on the SPARC.  */
++  BFD_RELOC_HI22,
++  BFD_RELOC_LO10,
++
++/* For systems that allocate a Global Pointer register, these are
++displacements off that register.  These relocation types are
++handled specially, because the value the register will have is
++decided relatively late.  */
++  BFD_RELOC_GPREL16,
++  BFD_RELOC_GPREL32,
++
++/* Reloc types used for i960/b.out.  */
++  BFD_RELOC_I960_CALLJ,
++
++/* SPARC ELF relocations.  There is probably some overlap with other
++relocation types already defined.  */
++  BFD_RELOC_NONE,
++  BFD_RELOC_SPARC_WDISP22,
++  BFD_RELOC_SPARC22,
++  BFD_RELOC_SPARC13,
++  BFD_RELOC_SPARC_GOT10,
++  BFD_RELOC_SPARC_GOT13,
++  BFD_RELOC_SPARC_GOT22,
++  BFD_RELOC_SPARC_PC10,
++  BFD_RELOC_SPARC_PC22,
++  BFD_RELOC_SPARC_WPLT30,
++  BFD_RELOC_SPARC_COPY,
++  BFD_RELOC_SPARC_GLOB_DAT,
++  BFD_RELOC_SPARC_JMP_SLOT,
++  BFD_RELOC_SPARC_RELATIVE,
++  BFD_RELOC_SPARC_UA16,
++  BFD_RELOC_SPARC_UA32,
++  BFD_RELOC_SPARC_UA64,
++
++/* I think these are specific to SPARC a.out (e.g., Sun 4).  */
++  BFD_RELOC_SPARC_BASE13,
++  BFD_RELOC_SPARC_BASE22,
++
++/* SPARC64 relocations  */
++#define BFD_RELOC_SPARC_64 BFD_RELOC_64
++  BFD_RELOC_SPARC_10,
++  BFD_RELOC_SPARC_11,
++  BFD_RELOC_SPARC_OLO10,
++  BFD_RELOC_SPARC_HH22,
++  BFD_RELOC_SPARC_HM10,
++  BFD_RELOC_SPARC_LM22,
++  BFD_RELOC_SPARC_PC_HH22,
++  BFD_RELOC_SPARC_PC_HM10,
++  BFD_RELOC_SPARC_PC_LM22,
++  BFD_RELOC_SPARC_WDISP16,
++  BFD_RELOC_SPARC_WDISP19,
++  BFD_RELOC_SPARC_7,
++  BFD_RELOC_SPARC_6,
++  BFD_RELOC_SPARC_5,
++#define BFD_RELOC_SPARC_DISP64 BFD_RELOC_64_PCREL
++  BFD_RELOC_SPARC_PLT32,
++  BFD_RELOC_SPARC_PLT64,
++  BFD_RELOC_SPARC_HIX22,
++  BFD_RELOC_SPARC_LOX10,
++  BFD_RELOC_SPARC_H44,
++  BFD_RELOC_SPARC_M44,
++  BFD_RELOC_SPARC_L44,
++  BFD_RELOC_SPARC_REGISTER,
++
++/* SPARC little endian relocation  */
++  BFD_RELOC_SPARC_REV32,
++
++/* SPARC TLS relocations  */
++  BFD_RELOC_SPARC_TLS_GD_HI22,
++  BFD_RELOC_SPARC_TLS_GD_LO10,
++  BFD_RELOC_SPARC_TLS_GD_ADD,
++  BFD_RELOC_SPARC_TLS_GD_CALL,
++  BFD_RELOC_SPARC_TLS_LDM_HI22,
++  BFD_RELOC_SPARC_TLS_LDM_LO10,
++  BFD_RELOC_SPARC_TLS_LDM_ADD,
++  BFD_RELOC_SPARC_TLS_LDM_CALL,
++  BFD_RELOC_SPARC_TLS_LDO_HIX22,
++  BFD_RELOC_SPARC_TLS_LDO_LOX10,
++  BFD_RELOC_SPARC_TLS_LDO_ADD,
++  BFD_RELOC_SPARC_TLS_IE_HI22,
++  BFD_RELOC_SPARC_TLS_IE_LO10,
++  BFD_RELOC_SPARC_TLS_IE_LD,
++  BFD_RELOC_SPARC_TLS_IE_LDX,
++  BFD_RELOC_SPARC_TLS_IE_ADD,
++  BFD_RELOC_SPARC_TLS_LE_HIX22,
++  BFD_RELOC_SPARC_TLS_LE_LOX10,
++  BFD_RELOC_SPARC_TLS_DTPMOD32,
++  BFD_RELOC_SPARC_TLS_DTPMOD64,
++  BFD_RELOC_SPARC_TLS_DTPOFF32,
++  BFD_RELOC_SPARC_TLS_DTPOFF64,
++  BFD_RELOC_SPARC_TLS_TPOFF32,
++  BFD_RELOC_SPARC_TLS_TPOFF64,
++
++/* Alpha ECOFF and ELF relocations.  Some of these treat the symbol or
++"addend" in some special way.
++For GPDISP_HI16 ("gpdisp") relocations, the symbol is ignored when
++writing; when reading, it will be the absolute section symbol.  The
++addend is the displacement in bytes of the "lda" instruction from
++the "ldah" instruction (which is at the address of this reloc).  */
++  BFD_RELOC_ALPHA_GPDISP_HI16,
++
++/* For GPDISP_LO16 ("ignore") relocations, the symbol is handled as
++with GPDISP_HI16 relocs.  The addend is ignored when writing the
++relocations out, and is filled in with the file's GP value on
++reading, for convenience.  */
++  BFD_RELOC_ALPHA_GPDISP_LO16,
++
++/* The ELF GPDISP relocation is exactly the same as the GPDISP_HI16
++relocation except that there is no accompanying GPDISP_LO16
++relocation.  */
++  BFD_RELOC_ALPHA_GPDISP,
++
++/* The Alpha LITERAL/LITUSE relocs are produced by a symbol reference;
++the assembler turns it into a LDQ instruction to load the address of
++the symbol, and then fills in a register in the real instruction.
++
++The LITERAL reloc, at the LDQ instruction, refers to the .lita
++section symbol.  The addend is ignored when writing, but is filled
++in with the file's GP value on reading, for convenience, as with the
++GPDISP_LO16 reloc.
++
++The ELF_LITERAL reloc is somewhere between 16_GOTOFF and GPDISP_LO16.
++It should refer to the symbol to be referenced, as with 16_GOTOFF,
++but it generates output not based on the position within the .got
++section, but relative to the GP value chosen for the file during the
++final link stage.
++
++The LITUSE reloc, on the instruction using the loaded address, gives
++information to the linker that it might be able to use to optimize
++away some literal section references.  The symbol is ignored (read
++as the absolute section symbol), and the "addend" indicates the type
++of instruction using the register:
++1 - "memory" fmt insn
++2 - byte-manipulation (byte offset reg)
++3 - jsr (target of branch)  */
++  BFD_RELOC_ALPHA_LITERAL,
++  BFD_RELOC_ALPHA_ELF_LITERAL,
++  BFD_RELOC_ALPHA_LITUSE,
++
++/* The HINT relocation indicates a value that should be filled into the
++"hint" field of a jmp/jsr/ret instruction, for possible branch-
++prediction logic which may be provided on some processors.  */
++  BFD_RELOC_ALPHA_HINT,
++
++/* The LINKAGE relocation outputs a linkage pair in the object file,
++which is filled by the linker.  */
++  BFD_RELOC_ALPHA_LINKAGE,
++
++/* The CODEADDR relocation outputs a STO_CA in the object file,
++which is filled by the linker.  */
++  BFD_RELOC_ALPHA_CODEADDR,
++
++/* The GPREL_HI/LO relocations together form a 32-bit offset from the
++GP register.  */
++  BFD_RELOC_ALPHA_GPREL_HI16,
++  BFD_RELOC_ALPHA_GPREL_LO16,
++
++/* Like BFD_RELOC_23_PCREL_S2, except that the source and target must
++share a common GP, and the target address is adjusted for
++STO_ALPHA_STD_GPLOAD.  */
++  BFD_RELOC_ALPHA_BRSGP,
++
++/* Alpha thread-local storage relocations.  */
++  BFD_RELOC_ALPHA_TLSGD,
++  BFD_RELOC_ALPHA_TLSLDM,
++  BFD_RELOC_ALPHA_DTPMOD64,
++  BFD_RELOC_ALPHA_GOTDTPREL16,
++  BFD_RELOC_ALPHA_DTPREL64,
++  BFD_RELOC_ALPHA_DTPREL_HI16,
++  BFD_RELOC_ALPHA_DTPREL_LO16,
++  BFD_RELOC_ALPHA_DTPREL16,
++  BFD_RELOC_ALPHA_GOTTPREL16,
++  BFD_RELOC_ALPHA_TPREL64,
++  BFD_RELOC_ALPHA_TPREL_HI16,
++  BFD_RELOC_ALPHA_TPREL_LO16,
++  BFD_RELOC_ALPHA_TPREL16,
++
++/* Bits 27..2 of the relocation address shifted right 2 bits;
++simple reloc otherwise.  */
++  BFD_RELOC_MIPS_JMP,
++
++/* The MIPS16 jump instruction.  */
++  BFD_RELOC_MIPS16_JMP,
++
++/* MIPS16 GP relative reloc.  */
++  BFD_RELOC_MIPS16_GPREL,
++
++/* High 16 bits of 32-bit value; simple reloc.  */
++  BFD_RELOC_HI16,
++
++/* High 16 bits of 32-bit value but the low 16 bits will be sign
++extended and added to form the final result.  If the low 16
++bits form a negative number, we need to add one to the high value
++to compensate for the borrow when the low bits are added.  */
++  BFD_RELOC_HI16_S,
++
++/* Low 16 bits.  */
++  BFD_RELOC_LO16,
++
++/* High 16 bits of 32-bit pc-relative value  */
++  BFD_RELOC_HI16_PCREL,
++
++/* High 16 bits of 32-bit pc-relative value, adjusted  */
++  BFD_RELOC_HI16_S_PCREL,
++
++/* Low 16 bits of pc-relative value  */
++  BFD_RELOC_LO16_PCREL,
++
++/* MIPS16 high 16 bits of 32-bit value.  */
++  BFD_RELOC_MIPS16_HI16,
++
++/* MIPS16 high 16 bits of 32-bit value but the low 16 bits will be sign
++extended and added to form the final result.  If the low 16
++bits form a negative number, we need to add one to the high value
++to compensate for the borrow when the low bits are added.  */
++  BFD_RELOC_MIPS16_HI16_S,
++
++/* MIPS16 low 16 bits.  */
++  BFD_RELOC_MIPS16_LO16,
++
++/* Relocation against a MIPS literal section.  */
++  BFD_RELOC_MIPS_LITERAL,
++
++/* MIPS ELF relocations.  */
++  BFD_RELOC_MIPS_GOT16,
++  BFD_RELOC_MIPS_CALL16,
++  BFD_RELOC_MIPS_GOT_HI16,
++  BFD_RELOC_MIPS_GOT_LO16,
++  BFD_RELOC_MIPS_CALL_HI16,
++  BFD_RELOC_MIPS_CALL_LO16,
++  BFD_RELOC_MIPS_SUB,
++  BFD_RELOC_MIPS_GOT_PAGE,
++  BFD_RELOC_MIPS_GOT_OFST,
++  BFD_RELOC_MIPS_GOT_DISP,
++  BFD_RELOC_MIPS_SHIFT5,
++  BFD_RELOC_MIPS_SHIFT6,
++  BFD_RELOC_MIPS_INSERT_A,
++  BFD_RELOC_MIPS_INSERT_B,
++  BFD_RELOC_MIPS_DELETE,
++  BFD_RELOC_MIPS_HIGHEST,
++  BFD_RELOC_MIPS_HIGHER,
++  BFD_RELOC_MIPS_SCN_DISP,
++  BFD_RELOC_MIPS_REL16,
++  BFD_RELOC_MIPS_RELGOT,
++  BFD_RELOC_MIPS_JALR,
++  BFD_RELOC_MIPS_TLS_DTPMOD32,
++  BFD_RELOC_MIPS_TLS_DTPREL32,
++  BFD_RELOC_MIPS_TLS_DTPMOD64,
++  BFD_RELOC_MIPS_TLS_DTPREL64,
++  BFD_RELOC_MIPS_TLS_GD,
++  BFD_RELOC_MIPS_TLS_LDM,
++  BFD_RELOC_MIPS_TLS_DTPREL_HI16,
++  BFD_RELOC_MIPS_TLS_DTPREL_LO16,
++  BFD_RELOC_MIPS_TLS_GOTTPREL,
++  BFD_RELOC_MIPS_TLS_TPREL32,
++  BFD_RELOC_MIPS_TLS_TPREL64,
++  BFD_RELOC_MIPS_TLS_TPREL_HI16,
++  BFD_RELOC_MIPS_TLS_TPREL_LO16,
++
++
++/* Fujitsu Frv Relocations.  */
++  BFD_RELOC_FRV_LABEL16,
++  BFD_RELOC_FRV_LABEL24,
++  BFD_RELOC_FRV_LO16,
++  BFD_RELOC_FRV_HI16,
++  BFD_RELOC_FRV_GPREL12,
++  BFD_RELOC_FRV_GPRELU12,
++  BFD_RELOC_FRV_GPREL32,
++  BFD_RELOC_FRV_GPRELHI,
++  BFD_RELOC_FRV_GPRELLO,
++  BFD_RELOC_FRV_GOT12,
++  BFD_RELOC_FRV_GOTHI,
++  BFD_RELOC_FRV_GOTLO,
++  BFD_RELOC_FRV_FUNCDESC,
++  BFD_RELOC_FRV_FUNCDESC_GOT12,
++  BFD_RELOC_FRV_FUNCDESC_GOTHI,
++  BFD_RELOC_FRV_FUNCDESC_GOTLO,
++  BFD_RELOC_FRV_FUNCDESC_VALUE,
++  BFD_RELOC_FRV_FUNCDESC_GOTOFF12,
++  BFD_RELOC_FRV_FUNCDESC_GOTOFFHI,
++  BFD_RELOC_FRV_FUNCDESC_GOTOFFLO,
++  BFD_RELOC_FRV_GOTOFF12,
++  BFD_RELOC_FRV_GOTOFFHI,
++  BFD_RELOC_FRV_GOTOFFLO,
++  BFD_RELOC_FRV_GETTLSOFF,
++  BFD_RELOC_FRV_TLSDESC_VALUE,
++  BFD_RELOC_FRV_GOTTLSDESC12,
++  BFD_RELOC_FRV_GOTTLSDESCHI,
++  BFD_RELOC_FRV_GOTTLSDESCLO,
++  BFD_RELOC_FRV_TLSMOFF12,
++  BFD_RELOC_FRV_TLSMOFFHI,
++  BFD_RELOC_FRV_TLSMOFFLO,
++  BFD_RELOC_FRV_GOTTLSOFF12,
++  BFD_RELOC_FRV_GOTTLSOFFHI,
++  BFD_RELOC_FRV_GOTTLSOFFLO,
++  BFD_RELOC_FRV_TLSOFF,
++  BFD_RELOC_FRV_TLSDESC_RELAX,
++  BFD_RELOC_FRV_GETTLSOFF_RELAX,
++  BFD_RELOC_FRV_TLSOFF_RELAX,
++  BFD_RELOC_FRV_TLSMOFF,
++
++
++/* This is a 24bit GOT-relative reloc for the mn10300.  */
++  BFD_RELOC_MN10300_GOTOFF24,
++
++/* This is a 32bit GOT-relative reloc for the mn10300, offset by two bytes
++in the instruction.  */
++  BFD_RELOC_MN10300_GOT32,
++
++/* This is a 24bit GOT-relative reloc for the mn10300, offset by two bytes
++in the instruction.  */
++  BFD_RELOC_MN10300_GOT24,
++
++/* This is a 16bit GOT-relative reloc for the mn10300, offset by two bytes
++in the instruction.  */
++  BFD_RELOC_MN10300_GOT16,
++
++/* Copy symbol at runtime.  */
++  BFD_RELOC_MN10300_COPY,
++
++/* Create GOT entry.  */
++  BFD_RELOC_MN10300_GLOB_DAT,
++
++/* Create PLT entry.  */
++  BFD_RELOC_MN10300_JMP_SLOT,
++
++/* Adjust by program base.  */
++  BFD_RELOC_MN10300_RELATIVE,
++
++
++/* i386/elf relocations  */
++  BFD_RELOC_386_GOT32,
++  BFD_RELOC_386_PLT32,
++  BFD_RELOC_386_COPY,
++  BFD_RELOC_386_GLOB_DAT,
++  BFD_RELOC_386_JUMP_SLOT,
++  BFD_RELOC_386_RELATIVE,
++  BFD_RELOC_386_GOTOFF,
++  BFD_RELOC_386_GOTPC,
++  BFD_RELOC_386_TLS_TPOFF,
++  BFD_RELOC_386_TLS_IE,
++  BFD_RELOC_386_TLS_GOTIE,
++  BFD_RELOC_386_TLS_LE,
++  BFD_RELOC_386_TLS_GD,
++  BFD_RELOC_386_TLS_LDM,
++  BFD_RELOC_386_TLS_LDO_32,
++  BFD_RELOC_386_TLS_IE_32,
++  BFD_RELOC_386_TLS_LE_32,
++  BFD_RELOC_386_TLS_DTPMOD32,
++  BFD_RELOC_386_TLS_DTPOFF32,
++  BFD_RELOC_386_TLS_TPOFF32,
++
++/* x86-64/elf relocations  */
++  BFD_RELOC_X86_64_GOT32,
++  BFD_RELOC_X86_64_PLT32,
++  BFD_RELOC_X86_64_COPY,
++  BFD_RELOC_X86_64_GLOB_DAT,
++  BFD_RELOC_X86_64_JUMP_SLOT,
++  BFD_RELOC_X86_64_RELATIVE,
++  BFD_RELOC_X86_64_GOTPCREL,
++  BFD_RELOC_X86_64_32S,
++  BFD_RELOC_X86_64_DTPMOD64,
++  BFD_RELOC_X86_64_DTPOFF64,
++  BFD_RELOC_X86_64_TPOFF64,
++  BFD_RELOC_X86_64_TLSGD,
++  BFD_RELOC_X86_64_TLSLD,
++  BFD_RELOC_X86_64_DTPOFF32,
++  BFD_RELOC_X86_64_GOTTPOFF,
++  BFD_RELOC_X86_64_TPOFF32,
++  BFD_RELOC_X86_64_GOTOFF64,
++  BFD_RELOC_X86_64_GOTPC32,
++
++/* ns32k relocations  */
++  BFD_RELOC_NS32K_IMM_8,
++  BFD_RELOC_NS32K_IMM_16,
++  BFD_RELOC_NS32K_IMM_32,
++  BFD_RELOC_NS32K_IMM_8_PCREL,
++  BFD_RELOC_NS32K_IMM_16_PCREL,
++  BFD_RELOC_NS32K_IMM_32_PCREL,
++  BFD_RELOC_NS32K_DISP_8,
++  BFD_RELOC_NS32K_DISP_16,
++  BFD_RELOC_NS32K_DISP_32,
++  BFD_RELOC_NS32K_DISP_8_PCREL,
++  BFD_RELOC_NS32K_DISP_16_PCREL,
++  BFD_RELOC_NS32K_DISP_32_PCREL,
++
++/* PDP11 relocations  */
++  BFD_RELOC_PDP11_DISP_8_PCREL,
++  BFD_RELOC_PDP11_DISP_6_PCREL,
++
++/* Picojava relocs.  Not all of these appear in object files.  */
++  BFD_RELOC_PJ_CODE_HI16,
++  BFD_RELOC_PJ_CODE_LO16,
++  BFD_RELOC_PJ_CODE_DIR16,
++  BFD_RELOC_PJ_CODE_DIR32,
++  BFD_RELOC_PJ_CODE_REL16,
++  BFD_RELOC_PJ_CODE_REL32,
++
++/* Power(rs6000) and PowerPC relocations.  */
++  BFD_RELOC_PPC_B26,
++  BFD_RELOC_PPC_BA26,
++  BFD_RELOC_PPC_TOC16,
++  BFD_RELOC_PPC_B16,
++  BFD_RELOC_PPC_B16_BRTAKEN,
++  BFD_RELOC_PPC_B16_BRNTAKEN,
++  BFD_RELOC_PPC_BA16,
++  BFD_RELOC_PPC_BA16_BRTAKEN,
++  BFD_RELOC_PPC_BA16_BRNTAKEN,
++  BFD_RELOC_PPC_COPY,
++  BFD_RELOC_PPC_GLOB_DAT,
++  BFD_RELOC_PPC_JMP_SLOT,
++  BFD_RELOC_PPC_RELATIVE,
++  BFD_RELOC_PPC_LOCAL24PC,
++  BFD_RELOC_PPC_EMB_NADDR32,
++  BFD_RELOC_PPC_EMB_NADDR16,
++  BFD_RELOC_PPC_EMB_NADDR16_LO,
++  BFD_RELOC_PPC_EMB_NADDR16_HI,
++  BFD_RELOC_PPC_EMB_NADDR16_HA,
++  BFD_RELOC_PPC_EMB_SDAI16,
++  BFD_RELOC_PPC_EMB_SDA2I16,
++  BFD_RELOC_PPC_EMB_SDA2REL,
++  BFD_RELOC_PPC_EMB_SDA21,
++  BFD_RELOC_PPC_EMB_MRKREF,
++  BFD_RELOC_PPC_EMB_RELSEC16,
++  BFD_RELOC_PPC_EMB_RELST_LO,
++  BFD_RELOC_PPC_EMB_RELST_HI,
++  BFD_RELOC_PPC_EMB_RELST_HA,
++  BFD_RELOC_PPC_EMB_BIT_FLD,
++  BFD_RELOC_PPC_EMB_RELSDA,
++  BFD_RELOC_PPC64_HIGHER,
++  BFD_RELOC_PPC64_HIGHER_S,
++  BFD_RELOC_PPC64_HIGHEST,
++  BFD_RELOC_PPC64_HIGHEST_S,
++  BFD_RELOC_PPC64_TOC16_LO,
++  BFD_RELOC_PPC64_TOC16_HI,
++  BFD_RELOC_PPC64_TOC16_HA,
++  BFD_RELOC_PPC64_TOC,
++  BFD_RELOC_PPC64_PLTGOT16,
++  BFD_RELOC_PPC64_PLTGOT16_LO,
++  BFD_RELOC_PPC64_PLTGOT16_HI,
++  BFD_RELOC_PPC64_PLTGOT16_HA,
++  BFD_RELOC_PPC64_ADDR16_DS,
++  BFD_RELOC_PPC64_ADDR16_LO_DS,
++  BFD_RELOC_PPC64_GOT16_DS,
++  BFD_RELOC_PPC64_GOT16_LO_DS,
++  BFD_RELOC_PPC64_PLT16_LO_DS,
++  BFD_RELOC_PPC64_SECTOFF_DS,
++  BFD_RELOC_PPC64_SECTOFF_LO_DS,
++  BFD_RELOC_PPC64_TOC16_DS,
++  BFD_RELOC_PPC64_TOC16_LO_DS,
++  BFD_RELOC_PPC64_PLTGOT16_DS,
++  BFD_RELOC_PPC64_PLTGOT16_LO_DS,
++
++/* PowerPC and PowerPC64 thread-local storage relocations.  */
++  BFD_RELOC_PPC_TLS,
++  BFD_RELOC_PPC_DTPMOD,
++  BFD_RELOC_PPC_TPREL16,
++  BFD_RELOC_PPC_TPREL16_LO,
++  BFD_RELOC_PPC_TPREL16_HI,
++  BFD_RELOC_PPC_TPREL16_HA,
++  BFD_RELOC_PPC_TPREL,
++  BFD_RELOC_PPC_DTPREL16,
++  BFD_RELOC_PPC_DTPREL16_LO,
++  BFD_RELOC_PPC_DTPREL16_HI,
++  BFD_RELOC_PPC_DTPREL16_HA,
++  BFD_RELOC_PPC_DTPREL,
++  BFD_RELOC_PPC_GOT_TLSGD16,
++  BFD_RELOC_PPC_GOT_TLSGD16_LO,
++  BFD_RELOC_PPC_GOT_TLSGD16_HI,
++  BFD_RELOC_PPC_GOT_TLSGD16_HA,
++  BFD_RELOC_PPC_GOT_TLSLD16,
++  BFD_RELOC_PPC_GOT_TLSLD16_LO,
++  BFD_RELOC_PPC_GOT_TLSLD16_HI,
++  BFD_RELOC_PPC_GOT_TLSLD16_HA,
++  BFD_RELOC_PPC_GOT_TPREL16,
++  BFD_RELOC_PPC_GOT_TPREL16_LO,
++  BFD_RELOC_PPC_GOT_TPREL16_HI,
++  BFD_RELOC_PPC_GOT_TPREL16_HA,
++  BFD_RELOC_PPC_GOT_DTPREL16,
++  BFD_RELOC_PPC_GOT_DTPREL16_LO,
++  BFD_RELOC_PPC_GOT_DTPREL16_HI,
++  BFD_RELOC_PPC_GOT_DTPREL16_HA,
++  BFD_RELOC_PPC64_TPREL16_DS,
++  BFD_RELOC_PPC64_TPREL16_LO_DS,
++  BFD_RELOC_PPC64_TPREL16_HIGHER,
++  BFD_RELOC_PPC64_TPREL16_HIGHERA,
++  BFD_RELOC_PPC64_TPREL16_HIGHEST,
++  BFD_RELOC_PPC64_TPREL16_HIGHESTA,
++  BFD_RELOC_PPC64_DTPREL16_DS,
++  BFD_RELOC_PPC64_DTPREL16_LO_DS,
++  BFD_RELOC_PPC64_DTPREL16_HIGHER,
++  BFD_RELOC_PPC64_DTPREL16_HIGHERA,
++  BFD_RELOC_PPC64_DTPREL16_HIGHEST,
++  BFD_RELOC_PPC64_DTPREL16_HIGHESTA,
++
++/* IBM 370/390 relocations  */
++  BFD_RELOC_I370_D12,
++
++/* The type of reloc used to build a constructor table - at the moment
++probably a 32 bit wide absolute relocation, but the target can choose.
++It generally does map to one of the other relocation types.  */
++  BFD_RELOC_CTOR,
++
++/* ARM 26 bit pc-relative branch.  The lowest two bits must be zero and are
++not stored in the instruction.  */
++  BFD_RELOC_ARM_PCREL_BRANCH,
++
++/* ARM 26 bit pc-relative branch.  The lowest bit must be zero and is
++not stored in the instruction.  The 2nd lowest bit comes from a 1 bit
++field in the instruction.  */
++  BFD_RELOC_ARM_PCREL_BLX,
++
++/* Thumb 22 bit pc-relative branch.  The lowest bit must be zero and is
++not stored in the instruction.  The 2nd lowest bit comes from a 1 bit
++field in the instruction.  */
++  BFD_RELOC_THUMB_PCREL_BLX,
++
++/* Thumb 7-, 9-, 12-, 20-, 23-, and 25-bit pc-relative branches.
++The lowest bit must be zero and is not stored in the instruction.
++Note that the corresponding ELF R_ARM_THM_JUMPnn constant has an
++"nn" one smaller in all cases.  Note further that BRANCH23
++corresponds to R_ARM_THM_CALL.  */
++  BFD_RELOC_THUMB_PCREL_BRANCH7,
++  BFD_RELOC_THUMB_PCREL_BRANCH9,
++  BFD_RELOC_THUMB_PCREL_BRANCH12,
++  BFD_RELOC_THUMB_PCREL_BRANCH20,
++  BFD_RELOC_THUMB_PCREL_BRANCH23,
++  BFD_RELOC_THUMB_PCREL_BRANCH25,
++
++/* 12-bit immediate offset, used in ARM-format ldr and str instructions.  */
++  BFD_RELOC_ARM_OFFSET_IMM,
++
++/* 5-bit immediate offset, used in Thumb-format ldr and str instructions.  */
++  BFD_RELOC_ARM_THUMB_OFFSET,
++
++/* Pc-relative or absolute relocation depending on target.  Used for
++entries in .init_array sections.  */
++  BFD_RELOC_ARM_TARGET1,
++
++/* Read-only segment base relative address.  */
++  BFD_RELOC_ARM_ROSEGREL32,
++
++/* Data segment base relative address.  */
++  BFD_RELOC_ARM_SBREL32,
++
++/* This reloc is used for references to RTTI data from exception handling
++tables.  The actual definition depends on the target.  It may be a
++pc-relative or some form of GOT-indirect relocation.  */
++  BFD_RELOC_ARM_TARGET2,
++
++/* 31-bit PC relative address.  */
++  BFD_RELOC_ARM_PREL31,
++
++/* Relocations for setting up GOTs and PLTs for shared libraries.  */
++  BFD_RELOC_ARM_JUMP_SLOT,
++  BFD_RELOC_ARM_GLOB_DAT,
++  BFD_RELOC_ARM_GOT32,
++  BFD_RELOC_ARM_PLT32,
++  BFD_RELOC_ARM_RELATIVE,
++  BFD_RELOC_ARM_GOTOFF,
++  BFD_RELOC_ARM_GOTPC,
++
++/* ARM thread-local storage relocations.  */
++  BFD_RELOC_ARM_TLS_GD32,
++  BFD_RELOC_ARM_TLS_LDO32,
++  BFD_RELOC_ARM_TLS_LDM32,
++  BFD_RELOC_ARM_TLS_DTPOFF32,
++  BFD_RELOC_ARM_TLS_DTPMOD32,
++  BFD_RELOC_ARM_TLS_TPOFF32,
++  BFD_RELOC_ARM_TLS_IE32,
++  BFD_RELOC_ARM_TLS_LE32,
++
++/* These relocs are only used within the ARM assembler.  They are not
++(at present) written to any object files.  */
++  BFD_RELOC_ARM_IMMEDIATE,
++  BFD_RELOC_ARM_ADRL_IMMEDIATE,
++  BFD_RELOC_ARM_T32_IMMEDIATE,
++  BFD_RELOC_ARM_SHIFT_IMM,
++  BFD_RELOC_ARM_SMI,
++  BFD_RELOC_ARM_SWI,
++  BFD_RELOC_ARM_MULTI,
++  BFD_RELOC_ARM_CP_OFF_IMM,
++  BFD_RELOC_ARM_CP_OFF_IMM_S2,
++  BFD_RELOC_ARM_ADR_IMM,
++  BFD_RELOC_ARM_LDR_IMM,
++  BFD_RELOC_ARM_LITERAL,
++  BFD_RELOC_ARM_IN_POOL,
++  BFD_RELOC_ARM_OFFSET_IMM8,
++  BFD_RELOC_ARM_T32_OFFSET_U8,
++  BFD_RELOC_ARM_T32_OFFSET_IMM,
++  BFD_RELOC_ARM_HWLITERAL,
++  BFD_RELOC_ARM_THUMB_ADD,
++  BFD_RELOC_ARM_THUMB_IMM,
++  BFD_RELOC_ARM_THUMB_SHIFT,
++
++/* Renesas / SuperH SH relocs.  Not all of these appear in object files.  */
++  BFD_RELOC_SH_PCDISP8BY2,
++  BFD_RELOC_SH_PCDISP12BY2,
++  BFD_RELOC_SH_IMM3,
++  BFD_RELOC_SH_IMM3U,
++  BFD_RELOC_SH_DISP12,
++  BFD_RELOC_SH_DISP12BY2,
++  BFD_RELOC_SH_DISP12BY4,
++  BFD_RELOC_SH_DISP12BY8,
++  BFD_RELOC_SH_DISP20,
++  BFD_RELOC_SH_DISP20BY8,
++  BFD_RELOC_SH_IMM4,
++  BFD_RELOC_SH_IMM4BY2,
++  BFD_RELOC_SH_IMM4BY4,
++  BFD_RELOC_SH_IMM8,
++  BFD_RELOC_SH_IMM8BY2,
++  BFD_RELOC_SH_IMM8BY4,
++  BFD_RELOC_SH_PCRELIMM8BY2,
++  BFD_RELOC_SH_PCRELIMM8BY4,
++  BFD_RELOC_SH_SWITCH16,
++  BFD_RELOC_SH_SWITCH32,
++  BFD_RELOC_SH_USES,
++  BFD_RELOC_SH_COUNT,
++  BFD_RELOC_SH_ALIGN,
++  BFD_RELOC_SH_CODE,
++  BFD_RELOC_SH_DATA,
++  BFD_RELOC_SH_LABEL,
++  BFD_RELOC_SH_LOOP_START,
++  BFD_RELOC_SH_LOOP_END,
++  BFD_RELOC_SH_COPY,
++  BFD_RELOC_SH_GLOB_DAT,
++  BFD_RELOC_SH_JMP_SLOT,
++  BFD_RELOC_SH_RELATIVE,
++  BFD_RELOC_SH_GOTPC,
++  BFD_RELOC_SH_GOT_LOW16,
++  BFD_RELOC_SH_GOT_MEDLOW16,
++  BFD_RELOC_SH_GOT_MEDHI16,
++  BFD_RELOC_SH_GOT_HI16,
++  BFD_RELOC_SH_GOTPLT_LOW16,
++  BFD_RELOC_SH_GOTPLT_MEDLOW16,
++  BFD_RELOC_SH_GOTPLT_MEDHI16,
++  BFD_RELOC_SH_GOTPLT_HI16,
++  BFD_RELOC_SH_PLT_LOW16,
++  BFD_RELOC_SH_PLT_MEDLOW16,
++  BFD_RELOC_SH_PLT_MEDHI16,
++  BFD_RELOC_SH_PLT_HI16,
++  BFD_RELOC_SH_GOTOFF_LOW16,
++  BFD_RELOC_SH_GOTOFF_MEDLOW16,
++  BFD_RELOC_SH_GOTOFF_MEDHI16,
++  BFD_RELOC_SH_GOTOFF_HI16,
++  BFD_RELOC_SH_GOTPC_LOW16,
++  BFD_RELOC_SH_GOTPC_MEDLOW16,
++  BFD_RELOC_SH_GOTPC_MEDHI16,
++  BFD_RELOC_SH_GOTPC_HI16,
++  BFD_RELOC_SH_COPY64,
++  BFD_RELOC_SH_GLOB_DAT64,
++  BFD_RELOC_SH_JMP_SLOT64,
++  BFD_RELOC_SH_RELATIVE64,
++  BFD_RELOC_SH_GOT10BY4,
++  BFD_RELOC_SH_GOT10BY8,
++  BFD_RELOC_SH_GOTPLT10BY4,
++  BFD_RELOC_SH_GOTPLT10BY8,
++  BFD_RELOC_SH_GOTPLT32,
++  BFD_RELOC_SH_SHMEDIA_CODE,
++  BFD_RELOC_SH_IMMU5,
++  BFD_RELOC_SH_IMMS6,
++  BFD_RELOC_SH_IMMS6BY32,
++  BFD_RELOC_SH_IMMU6,
++  BFD_RELOC_SH_IMMS10,
++  BFD_RELOC_SH_IMMS10BY2,
++  BFD_RELOC_SH_IMMS10BY4,
++  BFD_RELOC_SH_IMMS10BY8,
++  BFD_RELOC_SH_IMMS16,
++  BFD_RELOC_SH_IMMU16,
++  BFD_RELOC_SH_IMM_LOW16,
++  BFD_RELOC_SH_IMM_LOW16_PCREL,
++  BFD_RELOC_SH_IMM_MEDLOW16,
++  BFD_RELOC_SH_IMM_MEDLOW16_PCREL,
++  BFD_RELOC_SH_IMM_MEDHI16,
++  BFD_RELOC_SH_IMM_MEDHI16_PCREL,
++  BFD_RELOC_SH_IMM_HI16,
++  BFD_RELOC_SH_IMM_HI16_PCREL,
++  BFD_RELOC_SH_PT_16,
++  BFD_RELOC_SH_TLS_GD_32,
++  BFD_RELOC_SH_TLS_LD_32,
++  BFD_RELOC_SH_TLS_LDO_32,
++  BFD_RELOC_SH_TLS_IE_32,
++  BFD_RELOC_SH_TLS_LE_32,
++  BFD_RELOC_SH_TLS_DTPMOD32,
++  BFD_RELOC_SH_TLS_DTPOFF32,
++  BFD_RELOC_SH_TLS_TPOFF32,
++
++/* ARC Cores relocs.
++ARC 22 bit pc-relative branch.  The lowest two bits must be zero and are
++not stored in the instruction.  The high 20 bits are installed in bits 26
++through 7 of the instruction.  */
++  BFD_RELOC_ARC_B22_PCREL,
++
++/* ARC 26 bit absolute branch.  The lowest two bits must be zero and are not
++stored in the instruction.  The high 24 bits are installed in bits 23
++through 0.  */
++  BFD_RELOC_ARC_B26,
++
++/* Mitsubishi D10V relocs.
++This is a 10-bit reloc with the right 2 bits
++assumed to be 0.  */
++  BFD_RELOC_D10V_10_PCREL_R,
++
++/* Mitsubishi D10V relocs.
++This is a 10-bit reloc with the right 2 bits
++assumed to be 0.  This is the same as the previous reloc
++except it is in the left container, i.e.,
++shifted left 15 bits.  */
++  BFD_RELOC_D10V_10_PCREL_L,
++
++/* This is an 18-bit reloc with the right 2 bits
++assumed to be 0.  */
++  BFD_RELOC_D10V_18,
++
++/* This is an 18-bit reloc with the right 2 bits
++assumed to be 0.  */
++  BFD_RELOC_D10V_18_PCREL,
++
++/* Mitsubishi D30V relocs.
++This is a 6-bit absolute reloc.  */
++  BFD_RELOC_D30V_6,
++
++/* This is a 6-bit pc-relative reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_9_PCREL,
++
++/* This is a 6-bit pc-relative reloc with
++the right 3 bits assumed to be 0. Same
++as the previous reloc but on the right side
++of the container.  */
++  BFD_RELOC_D30V_9_PCREL_R,
++
++/* This is a 12-bit absolute reloc with the
++right 3 bitsassumed to be 0.  */
++  BFD_RELOC_D30V_15,
++
++/* This is a 12-bit pc-relative reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_15_PCREL,
++
++/* This is a 12-bit pc-relative reloc with
++the right 3 bits assumed to be 0. Same
++as the previous reloc but on the right side
++of the container.  */
++  BFD_RELOC_D30V_15_PCREL_R,
++
++/* This is an 18-bit absolute reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_21,
++
++/* This is an 18-bit pc-relative reloc with
++the right 3 bits assumed to be 0.  */
++  BFD_RELOC_D30V_21_PCREL,
++
++/* This is an 18-bit pc-relative reloc with
++the right 3 bits assumed to be 0. Same
++as the previous reloc but on the right side
++of the container.  */
++  BFD_RELOC_D30V_21_PCREL_R,
++
++/* This is a 32-bit absolute reloc.  */
++  BFD_RELOC_D30V_32,
++
++/* This is a 32-bit pc-relative reloc.  */
++  BFD_RELOC_D30V_32_PCREL,
++
++/* DLX relocs  */
++  BFD_RELOC_DLX_HI16_S,
++
++/* DLX relocs  */
++  BFD_RELOC_DLX_LO16,
++
++/* DLX relocs  */
++  BFD_RELOC_DLX_JMP26,
++
++/* Renesas M16C/M32C Relocations.  */
++  BFD_RELOC_M16C_8_PCREL8,
++  BFD_RELOC_M16C_16_PCREL8,
++  BFD_RELOC_M16C_8_PCREL16,
++  BFD_RELOC_M16C_8_ELABEL24,
++  BFD_RELOC_M16C_8_ABS16,
++  BFD_RELOC_M16C_16_ABS16,
++  BFD_RELOC_M16C_16_ABS24,
++  BFD_RELOC_M16C_16_ABS32,
++  BFD_RELOC_M16C_24_ABS16,
++  BFD_RELOC_M16C_24_ABS24,
++  BFD_RELOC_M16C_24_ABS32,
++  BFD_RELOC_M16C_32_ABS16,
++  BFD_RELOC_M16C_32_ABS24,
++  BFD_RELOC_M16C_32_ABS32,
++  BFD_RELOC_M16C_40_ABS16,
++  BFD_RELOC_M16C_40_ABS24,
++  BFD_RELOC_M16C_40_ABS32,
++
++/* Renesas M32R (formerly Mitsubishi M32R) relocs.
++This is a 24 bit absolute address.  */
++  BFD_RELOC_M32R_24,
++
++/* This is a 10-bit pc-relative reloc with the right 2 bits assumed to be 0.  */
++  BFD_RELOC_M32R_10_PCREL,
++
++/* This is an 18-bit reloc with the right 2 bits assumed to be 0.  */
++  BFD_RELOC_M32R_18_PCREL,
++
++/* This is a 26-bit reloc with the right 2 bits assumed to be 0.  */
++  BFD_RELOC_M32R_26_PCREL,
++
++/* This is a 16-bit reloc containing the high 16 bits of an address
++used when the lower 16 bits are treated as unsigned.  */
++  BFD_RELOC_M32R_HI16_ULO,
++
++/* This is a 16-bit reloc containing the high 16 bits of an address
++used when the lower 16 bits are treated as signed.  */
++  BFD_RELOC_M32R_HI16_SLO,
++
++/* This is a 16-bit reloc containing the lower 16 bits of an address.  */
++  BFD_RELOC_M32R_LO16,
++
++/* This is a 16-bit reloc containing the small data area offset for use in
++add3, load, and store instructions.  */
++  BFD_RELOC_M32R_SDA16,
++
++/* For PIC.  */
++  BFD_RELOC_M32R_GOT24,
++  BFD_RELOC_M32R_26_PLTREL,
++  BFD_RELOC_M32R_COPY,
++  BFD_RELOC_M32R_GLOB_DAT,
++  BFD_RELOC_M32R_JMP_SLOT,
++  BFD_RELOC_M32R_RELATIVE,
++  BFD_RELOC_M32R_GOTOFF,
++  BFD_RELOC_M32R_GOTOFF_HI_ULO,
++  BFD_RELOC_M32R_GOTOFF_HI_SLO,
++  BFD_RELOC_M32R_GOTOFF_LO,
++  BFD_RELOC_M32R_GOTPC24,
++  BFD_RELOC_M32R_GOT16_HI_ULO,
++  BFD_RELOC_M32R_GOT16_HI_SLO,
++  BFD_RELOC_M32R_GOT16_LO,
++  BFD_RELOC_M32R_GOTPC_HI_ULO,
++  BFD_RELOC_M32R_GOTPC_HI_SLO,
++  BFD_RELOC_M32R_GOTPC_LO,
++
++/* This is a 9-bit reloc  */
++  BFD_RELOC_V850_9_PCREL,
++
++/* This is a 22-bit reloc  */
++  BFD_RELOC_V850_22_PCREL,
++
++/* This is a 16 bit offset from the short data area pointer.  */
++  BFD_RELOC_V850_SDA_16_16_OFFSET,
++
++/* This is a 16 bit offset (of which only 15 bits are used) from the
++short data area pointer.  */
++  BFD_RELOC_V850_SDA_15_16_OFFSET,
++
++/* This is a 16 bit offset from the zero data area pointer.  */
++  BFD_RELOC_V850_ZDA_16_16_OFFSET,
++
++/* This is a 16 bit offset (of which only 15 bits are used) from the
++zero data area pointer.  */
++  BFD_RELOC_V850_ZDA_15_16_OFFSET,
++
++/* This is an 8 bit offset (of which only 6 bits are used) from the
++tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_6_8_OFFSET,
++
++/* This is an 8bit offset (of which only 7 bits are used) from the tiny
++data area pointer.  */
++  BFD_RELOC_V850_TDA_7_8_OFFSET,
++
++/* This is a 7 bit offset from the tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_7_7_OFFSET,
++
++/* This is a 16 bit offset from the tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_16_16_OFFSET,
++
++/* This is a 5 bit offset (of which only 4 bits are used) from the tiny
++data area pointer.  */
++  BFD_RELOC_V850_TDA_4_5_OFFSET,
++
++/* This is a 4 bit offset from the tiny data area pointer.  */
++  BFD_RELOC_V850_TDA_4_4_OFFSET,
++
++/* This is a 16 bit offset from the short data area pointer, with the
++bits placed non-contiguously in the instruction.  */
++  BFD_RELOC_V850_SDA_16_16_SPLIT_OFFSET,
++
++/* This is a 16 bit offset from the zero data area pointer, with the
++bits placed non-contiguously in the instruction.  */
++  BFD_RELOC_V850_ZDA_16_16_SPLIT_OFFSET,
++
++/* This is a 6 bit offset from the call table base pointer.  */
++  BFD_RELOC_V850_CALLT_6_7_OFFSET,
++
++/* This is a 16 bit offset from the call table base pointer.  */
++  BFD_RELOC_V850_CALLT_16_16_OFFSET,
++
++/* Used for relaxing indirect function calls.  */
++  BFD_RELOC_V850_LONGCALL,
++
++/* Used for relaxing indirect jumps.  */
++  BFD_RELOC_V850_LONGJUMP,
++
++/* Used to maintain alignment whilst relaxing.  */
++  BFD_RELOC_V850_ALIGN,
++
++/* This is a variation of BFD_RELOC_LO16 that can be used in v850e ld.bu
++instructions.  */
++  BFD_RELOC_V850_LO16_SPLIT_OFFSET,
++
++/* This is a 32bit pcrel reloc for the mn10300, offset by two bytes in the
++instruction.  */
++  BFD_RELOC_MN10300_32_PCREL,
++
++/* This is a 16bit pcrel reloc for the mn10300, offset by two bytes in the
++instruction.  */
++  BFD_RELOC_MN10300_16_PCREL,
++
++/* This is a 8bit DP reloc for the tms320c30, where the most
++significant 8 bits of a 24 bit word are placed into the least
++significant 8 bits of the opcode.  */
++  BFD_RELOC_TIC30_LDP,
++
++/* This is a 7bit reloc for the tms320c54x, where the least
++significant 7 bits of a 16 bit word are placed into the least
++significant 7 bits of the opcode.  */
++  BFD_RELOC_TIC54X_PARTLS7,
++
++/* This is a 9bit DP reloc for the tms320c54x, where the most
++significant 9 bits of a 16 bit word are placed into the least
++significant 9 bits of the opcode.  */
++  BFD_RELOC_TIC54X_PARTMS9,
++
++/* This is an extended address 23-bit reloc for the tms320c54x.  */
++  BFD_RELOC_TIC54X_23,
++
++/* This is a 16-bit reloc for the tms320c54x, where the least
++significant 16 bits of a 23-bit extended address are placed into
++the opcode.  */
++  BFD_RELOC_TIC54X_16_OF_23,
++
++/* This is a reloc for the tms320c54x, where the most
++significant 7 bits of a 23-bit extended address are placed into
++the opcode.  */
++  BFD_RELOC_TIC54X_MS7_OF_23,
++
++/* This is a 48 bit reloc for the FR30 that stores 32 bits.  */
++  BFD_RELOC_FR30_48,
++
++/* This is a 32 bit reloc for the FR30 that stores 20 bits split up into
++two sections.  */
++  BFD_RELOC_FR30_20,
++
++/* This is a 16 bit reloc for the FR30 that stores a 6 bit word offset in
++4 bits.  */
++  BFD_RELOC_FR30_6_IN_4,
++
++/* This is a 16 bit reloc for the FR30 that stores an 8 bit byte offset
++into 8 bits.  */
++  BFD_RELOC_FR30_8_IN_8,
++
++/* This is a 16 bit reloc for the FR30 that stores a 9 bit short offset
++into 8 bits.  */
++  BFD_RELOC_FR30_9_IN_8,
++
++/* This is a 16 bit reloc for the FR30 that stores a 10 bit word offset
++into 8 bits.  */
++  BFD_RELOC_FR30_10_IN_8,
++
++/* This is a 16 bit reloc for the FR30 that stores a 9 bit pc relative
++short offset into 8 bits.  */
++  BFD_RELOC_FR30_9_PCREL,
++
++/* This is a 16 bit reloc for the FR30 that stores a 12 bit pc relative
++short offset into 11 bits.  */
++  BFD_RELOC_FR30_12_PCREL,
++
++/* Motorola Mcore relocations.  */
++  BFD_RELOC_MCORE_PCREL_IMM8BY4,
++  BFD_RELOC_MCORE_PCREL_IMM11BY2,
++  BFD_RELOC_MCORE_PCREL_IMM4BY2,
++  BFD_RELOC_MCORE_PCREL_32,
++  BFD_RELOC_MCORE_PCREL_JSR_IMM11BY2,
++  BFD_RELOC_MCORE_RVA,
++
++/* These are relocations for the GETA instruction.  */
++  BFD_RELOC_MMIX_GETA,
++  BFD_RELOC_MMIX_GETA_1,
++  BFD_RELOC_MMIX_GETA_2,
++  BFD_RELOC_MMIX_GETA_3,
++
++/* These are relocations for a conditional branch instruction.  */
++  BFD_RELOC_MMIX_CBRANCH,
++  BFD_RELOC_MMIX_CBRANCH_J,
++  BFD_RELOC_MMIX_CBRANCH_1,
++  BFD_RELOC_MMIX_CBRANCH_2,
++  BFD_RELOC_MMIX_CBRANCH_3,
++
++/* These are relocations for the PUSHJ instruction.  */
++  BFD_RELOC_MMIX_PUSHJ,
++  BFD_RELOC_MMIX_PUSHJ_1,
++  BFD_RELOC_MMIX_PUSHJ_2,
++  BFD_RELOC_MMIX_PUSHJ_3,
++  BFD_RELOC_MMIX_PUSHJ_STUBBABLE,
++
++/* These are relocations for the JMP instruction.  */
++  BFD_RELOC_MMIX_JMP,
++  BFD_RELOC_MMIX_JMP_1,
++  BFD_RELOC_MMIX_JMP_2,
++  BFD_RELOC_MMIX_JMP_3,
++
++/* This is a relocation for a relative address as in a GETA instruction or
++a branch.  */
++  BFD_RELOC_MMIX_ADDR19,
++
++/* This is a relocation for a relative address as in a JMP instruction.  */
++  BFD_RELOC_MMIX_ADDR27,
++
++/* This is a relocation for an instruction field that may be a general
++register or a value 0..255.  */
++  BFD_RELOC_MMIX_REG_OR_BYTE,
++
++/* This is a relocation for an instruction field that may be a general
++register.  */
++  BFD_RELOC_MMIX_REG,
++
++/* This is a relocation for two instruction fields holding a register and
++an offset, the equivalent of the relocation.  */
++  BFD_RELOC_MMIX_BASE_PLUS_OFFSET,
++
++/* This relocation is an assertion that the expression is not allocated as
++a global register.  It does not modify contents.  */
++  BFD_RELOC_MMIX_LOCAL,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit pc relative
++short offset into 7 bits.  */
++  BFD_RELOC_AVR_7_PCREL,
++
++/* This is a 16 bit reloc for the AVR that stores 13 bit pc relative
++short offset into 12 bits.  */
++  BFD_RELOC_AVR_13_PCREL,
++
++/* This is a 16 bit reloc for the AVR that stores 17 bit value (usually
++program memory address) into 16 bits.  */
++  BFD_RELOC_AVR_16_PM,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (usually
++data memory address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_LO8_LDI,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (high 8 bit
++of data memory address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HI8_LDI,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (most high 8 bit
++of program memory address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HH8_LDI,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(usually data memory address) into 8 bit immediate value of SUBI insn.  */
++  BFD_RELOC_AVR_LO8_LDI_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(high 8 bit of data memory address) into 8 bit immediate value of
++SUBI insn.  */
++  BFD_RELOC_AVR_HI8_LDI_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(most high 8 bit of program memory address) into 8 bit immediate value
++of LDI or SUBI insn.  */
++  BFD_RELOC_AVR_HH8_LDI_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (usually
++command address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_LO8_LDI_PM,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (high 8 bit
++of command address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HI8_LDI_PM,
++
++/* This is a 16 bit reloc for the AVR that stores 8 bit value (most high 8 bit
++of command address) into 8 bit immediate value of LDI insn.  */
++  BFD_RELOC_AVR_HH8_LDI_PM,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(usually command address) into 8 bit immediate value of SUBI insn.  */
++  BFD_RELOC_AVR_LO8_LDI_PM_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(high 8 bit of 16 bit command address) into 8 bit immediate value
++of SUBI insn.  */
++  BFD_RELOC_AVR_HI8_LDI_PM_NEG,
++
++/* This is a 16 bit reloc for the AVR that stores negated 8 bit value
++(high 6 bit of 22 bit command address) into 8 bit immediate
++value of SUBI insn.  */
++  BFD_RELOC_AVR_HH8_LDI_PM_NEG,
++
++/* This is a 32 bit reloc for the AVR that stores 23 bit value
++into 22 bits.  */
++  BFD_RELOC_AVR_CALL,
++
++/* This is a 16 bit reloc for the AVR that stores all needed bits
++for absolute addressing with ldi with overflow check to linktime  */
++  BFD_RELOC_AVR_LDI,
++
++/* This is a 6 bit reloc for the AVR that stores offset for ldd/std
++instructions  */
++  BFD_RELOC_AVR_6,
++
++/* This is a 6 bit reloc for the AVR that stores offset for adiw/sbiw
++instructions  */
++  BFD_RELOC_AVR_6_ADIW,
++
++/* Direct 12 bit.  */
++  BFD_RELOC_390_12,
++
++/* 12 bit GOT offset.  */
++  BFD_RELOC_390_GOT12,
++
++/* 32 bit PC relative PLT address.  */
++  BFD_RELOC_390_PLT32,
++
++/* Copy symbol at runtime.  */
++  BFD_RELOC_390_COPY,
++
++/* Create GOT entry.  */
++  BFD_RELOC_390_GLOB_DAT,
++
++/* Create PLT entry.  */
++  BFD_RELOC_390_JMP_SLOT,
++
++/* Adjust by program base.  */
++  BFD_RELOC_390_RELATIVE,
++
++/* 32 bit PC relative offset to GOT.  */
++  BFD_RELOC_390_GOTPC,
++
++/* 16 bit GOT offset.  */
++  BFD_RELOC_390_GOT16,
++
++/* PC relative 16 bit shifted by 1.  */
++  BFD_RELOC_390_PC16DBL,
++
++/* 16 bit PC rel. PLT shifted by 1.  */
++  BFD_RELOC_390_PLT16DBL,
++
++/* PC relative 32 bit shifted by 1.  */
++  BFD_RELOC_390_PC32DBL,
++
++/* 32 bit PC rel. PLT shifted by 1.  */
++  BFD_RELOC_390_PLT32DBL,
++
++/* 32 bit PC rel. GOT shifted by 1.  */
++  BFD_RELOC_390_GOTPCDBL,
++
++/* 64 bit GOT offset.  */
++  BFD_RELOC_390_GOT64,
++
++/* 64 bit PC relative PLT address.  */
++  BFD_RELOC_390_PLT64,
++
++/* 32 bit rel. offset to GOT entry.  */
++  BFD_RELOC_390_GOTENT,
++
++/* 64 bit offset to GOT.  */
++  BFD_RELOC_390_GOTOFF64,
++
++/* 12-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT12,
++
++/* 16-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT16,
++
++/* 32-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT32,
++
++/* 64-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLT64,
++
++/* 32-bit rel. offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_390_GOTPLTENT,
++
++/* 16-bit rel. offset from the GOT to a PLT entry.  */
++  BFD_RELOC_390_PLTOFF16,
++
++/* 32-bit rel. offset from the GOT to a PLT entry.  */
++  BFD_RELOC_390_PLTOFF32,
++
++/* 64-bit rel. offset from the GOT to a PLT entry.  */
++  BFD_RELOC_390_PLTOFF64,
++
++/* s390 tls relocations.  */
++  BFD_RELOC_390_TLS_LOAD,
++  BFD_RELOC_390_TLS_GDCALL,
++  BFD_RELOC_390_TLS_LDCALL,
++  BFD_RELOC_390_TLS_GD32,
++  BFD_RELOC_390_TLS_GD64,
++  BFD_RELOC_390_TLS_GOTIE12,
++  BFD_RELOC_390_TLS_GOTIE32,
++  BFD_RELOC_390_TLS_GOTIE64,
++  BFD_RELOC_390_TLS_LDM32,
++  BFD_RELOC_390_TLS_LDM64,
++  BFD_RELOC_390_TLS_IE32,
++  BFD_RELOC_390_TLS_IE64,
++  BFD_RELOC_390_TLS_IEENT,
++  BFD_RELOC_390_TLS_LE32,
++  BFD_RELOC_390_TLS_LE64,
++  BFD_RELOC_390_TLS_LDO32,
++  BFD_RELOC_390_TLS_LDO64,
++  BFD_RELOC_390_TLS_DTPMOD,
++  BFD_RELOC_390_TLS_DTPOFF,
++  BFD_RELOC_390_TLS_TPOFF,
++
++/* Long displacement extension.  */
++  BFD_RELOC_390_20,
++  BFD_RELOC_390_GOT20,
++  BFD_RELOC_390_GOTPLT20,
++  BFD_RELOC_390_TLS_GOTIE20,
++
++/* Scenix IP2K - 9-bit register number / data address  */
++  BFD_RELOC_IP2K_FR9,
++
++/* Scenix IP2K - 4-bit register/data bank number  */
++  BFD_RELOC_IP2K_BANK,
++
++/* Scenix IP2K - low 13 bits of instruction word address  */
++  BFD_RELOC_IP2K_ADDR16CJP,
++
++/* Scenix IP2K - high 3 bits of instruction word address  */
++  BFD_RELOC_IP2K_PAGE3,
++
++/* Scenix IP2K - ext/low/high 8 bits of data address  */
++  BFD_RELOC_IP2K_LO8DATA,
++  BFD_RELOC_IP2K_HI8DATA,
++  BFD_RELOC_IP2K_EX8DATA,
++
++/* Scenix IP2K - low/high 8 bits of instruction word address  */
++  BFD_RELOC_IP2K_LO8INSN,
++  BFD_RELOC_IP2K_HI8INSN,
++
++/* Scenix IP2K - even/odd PC modifier to modify snb pcl.0  */
++  BFD_RELOC_IP2K_PC_SKIP,
++
++/* Scenix IP2K - 16 bit word address in text section.  */
++  BFD_RELOC_IP2K_TEXT,
++
++/* Scenix IP2K - 7-bit sp or dp offset  */
++  BFD_RELOC_IP2K_FR_OFFSET,
++
++/* Scenix VPE4K coprocessor - data/insn-space addressing  */
++  BFD_RELOC_VPE4KMATH_DATA,
++  BFD_RELOC_VPE4KMATH_INSN,
++
++/* These two relocations are used by the linker to determine which of
++the entries in a C++ virtual function table are actually used.  When
++the --gc-sections option is given, the linker will zero out the entries
++that are not used, so that the code for those functions need not be
++included in the output.
++
++VTABLE_INHERIT is a zero-space relocation used to describe to the
++linker the inheritance tree of a C++ virtual function table.  The
++relocation's symbol should be the parent class' vtable, and the
++relocation should be located at the child vtable.
++
++VTABLE_ENTRY is a zero-space relocation that describes the use of a
++virtual function table entry.  The reloc's symbol should refer to the
++table of the class mentioned in the code.  Off of that base, an offset
++describes the entry that is being used.  For Rela hosts, this offset
++is stored in the reloc's addend.  For Rel hosts, we are forced to put
++this offset in the reloc's section offset.  */
++  BFD_RELOC_VTABLE_INHERIT,
++  BFD_RELOC_VTABLE_ENTRY,
++
++/* Intel IA64 Relocations.  */
++  BFD_RELOC_IA64_IMM14,
++  BFD_RELOC_IA64_IMM22,
++  BFD_RELOC_IA64_IMM64,
++  BFD_RELOC_IA64_DIR32MSB,
++  BFD_RELOC_IA64_DIR32LSB,
++  BFD_RELOC_IA64_DIR64MSB,
++  BFD_RELOC_IA64_DIR64LSB,
++  BFD_RELOC_IA64_GPREL22,
++  BFD_RELOC_IA64_GPREL64I,
++  BFD_RELOC_IA64_GPREL32MSB,
++  BFD_RELOC_IA64_GPREL32LSB,
++  BFD_RELOC_IA64_GPREL64MSB,
++  BFD_RELOC_IA64_GPREL64LSB,
++  BFD_RELOC_IA64_LTOFF22,
++  BFD_RELOC_IA64_LTOFF64I,
++  BFD_RELOC_IA64_PLTOFF22,
++  BFD_RELOC_IA64_PLTOFF64I,
++  BFD_RELOC_IA64_PLTOFF64MSB,
++  BFD_RELOC_IA64_PLTOFF64LSB,
++  BFD_RELOC_IA64_FPTR64I,
++  BFD_RELOC_IA64_FPTR32MSB,
++  BFD_RELOC_IA64_FPTR32LSB,
++  BFD_RELOC_IA64_FPTR64MSB,
++  BFD_RELOC_IA64_FPTR64LSB,
++  BFD_RELOC_IA64_PCREL21B,
++  BFD_RELOC_IA64_PCREL21BI,
++  BFD_RELOC_IA64_PCREL21M,
++  BFD_RELOC_IA64_PCREL21F,
++  BFD_RELOC_IA64_PCREL22,
++  BFD_RELOC_IA64_PCREL60B,
++  BFD_RELOC_IA64_PCREL64I,
++  BFD_RELOC_IA64_PCREL32MSB,
++  BFD_RELOC_IA64_PCREL32LSB,
++  BFD_RELOC_IA64_PCREL64MSB,
++  BFD_RELOC_IA64_PCREL64LSB,
++  BFD_RELOC_IA64_LTOFF_FPTR22,
++  BFD_RELOC_IA64_LTOFF_FPTR64I,
++  BFD_RELOC_IA64_LTOFF_FPTR32MSB,
++  BFD_RELOC_IA64_LTOFF_FPTR32LSB,
++  BFD_RELOC_IA64_LTOFF_FPTR64MSB,
++  BFD_RELOC_IA64_LTOFF_FPTR64LSB,
++  BFD_RELOC_IA64_SEGREL32MSB,
++  BFD_RELOC_IA64_SEGREL32LSB,
++  BFD_RELOC_IA64_SEGREL64MSB,
++  BFD_RELOC_IA64_SEGREL64LSB,
++  BFD_RELOC_IA64_SECREL32MSB,
++  BFD_RELOC_IA64_SECREL32LSB,
++  BFD_RELOC_IA64_SECREL64MSB,
++  BFD_RELOC_IA64_SECREL64LSB,
++  BFD_RELOC_IA64_REL32MSB,
++  BFD_RELOC_IA64_REL32LSB,
++  BFD_RELOC_IA64_REL64MSB,
++  BFD_RELOC_IA64_REL64LSB,
++  BFD_RELOC_IA64_LTV32MSB,
++  BFD_RELOC_IA64_LTV32LSB,
++  BFD_RELOC_IA64_LTV64MSB,
++  BFD_RELOC_IA64_LTV64LSB,
++  BFD_RELOC_IA64_IPLTMSB,
++  BFD_RELOC_IA64_IPLTLSB,
++  BFD_RELOC_IA64_COPY,
++  BFD_RELOC_IA64_LTOFF22X,
++  BFD_RELOC_IA64_LDXMOV,
++  BFD_RELOC_IA64_TPREL14,
++  BFD_RELOC_IA64_TPREL22,
++  BFD_RELOC_IA64_TPREL64I,
++  BFD_RELOC_IA64_TPREL64MSB,
++  BFD_RELOC_IA64_TPREL64LSB,
++  BFD_RELOC_IA64_LTOFF_TPREL22,
++  BFD_RELOC_IA64_DTPMOD64MSB,
++  BFD_RELOC_IA64_DTPMOD64LSB,
++  BFD_RELOC_IA64_LTOFF_DTPMOD22,
++  BFD_RELOC_IA64_DTPREL14,
++  BFD_RELOC_IA64_DTPREL22,
++  BFD_RELOC_IA64_DTPREL64I,
++  BFD_RELOC_IA64_DTPREL32MSB,
++  BFD_RELOC_IA64_DTPREL32LSB,
++  BFD_RELOC_IA64_DTPREL64MSB,
++  BFD_RELOC_IA64_DTPREL64LSB,
++  BFD_RELOC_IA64_LTOFF_DTPREL22,
++
++/* Motorola 68HC11 reloc.
++This is the 8 bit high part of an absolute address.  */
++  BFD_RELOC_M68HC11_HI8,
++
++/* Motorola 68HC11 reloc.
++This is the 8 bit low part of an absolute address.  */
++  BFD_RELOC_M68HC11_LO8,
++
++/* Motorola 68HC11 reloc.
++This is the 3 bit of a value.  */
++  BFD_RELOC_M68HC11_3B,
++
++/* Motorola 68HC11 reloc.
++This reloc marks the beginning of a jump/call instruction.
++It is used for linker relaxation to correctly identify beginning
++of instruction and change some branches to use PC-relative
++addressing mode.  */
++  BFD_RELOC_M68HC11_RL_JUMP,
++
++/* Motorola 68HC11 reloc.
++This reloc marks a group of several instructions that gcc generates
++and for which the linker relaxation pass can modify and/or remove
++some of them.  */
++  BFD_RELOC_M68HC11_RL_GROUP,
++
++/* Motorola 68HC11 reloc.
++This is the 16-bit lower part of an address.  It is used for 'call'
++instruction to specify the symbol address without any special
++transformation (due to memory bank window).  */
++  BFD_RELOC_M68HC11_LO16,
++
++/* Motorola 68HC11 reloc.
++This is a 8-bit reloc that specifies the page number of an address.
++It is used by 'call' instruction to specify the page number of
++the symbol.  */
++  BFD_RELOC_M68HC11_PAGE,
++
++/* Motorola 68HC11 reloc.
++This is a 24-bit reloc that represents the address with a 16-bit
++value and a 8-bit page number.  The symbol address is transformed
++to follow the 16K memory bank of 68HC12 (seen as mapped in the window).  */
++  BFD_RELOC_M68HC11_24,
++
++/* Motorola 68HC12 reloc.
++This is the 5 bits of a value.  */
++  BFD_RELOC_M68HC12_5B,
++
++/* NS CR16C Relocations.  */
++  BFD_RELOC_16C_NUM08,
++  BFD_RELOC_16C_NUM08_C,
++  BFD_RELOC_16C_NUM16,
++  BFD_RELOC_16C_NUM16_C,
++  BFD_RELOC_16C_NUM32,
++  BFD_RELOC_16C_NUM32_C,
++  BFD_RELOC_16C_DISP04,
++  BFD_RELOC_16C_DISP04_C,
++  BFD_RELOC_16C_DISP08,
++  BFD_RELOC_16C_DISP08_C,
++  BFD_RELOC_16C_DISP16,
++  BFD_RELOC_16C_DISP16_C,
++  BFD_RELOC_16C_DISP24,
++  BFD_RELOC_16C_DISP24_C,
++  BFD_RELOC_16C_DISP24a,
++  BFD_RELOC_16C_DISP24a_C,
++  BFD_RELOC_16C_REG04,
++  BFD_RELOC_16C_REG04_C,
++  BFD_RELOC_16C_REG04a,
++  BFD_RELOC_16C_REG04a_C,
++  BFD_RELOC_16C_REG14,
++  BFD_RELOC_16C_REG14_C,
++  BFD_RELOC_16C_REG16,
++  BFD_RELOC_16C_REG16_C,
++  BFD_RELOC_16C_REG20,
++  BFD_RELOC_16C_REG20_C,
++  BFD_RELOC_16C_ABS20,
++  BFD_RELOC_16C_ABS20_C,
++  BFD_RELOC_16C_ABS24,
++  BFD_RELOC_16C_ABS24_C,
++  BFD_RELOC_16C_IMM04,
++  BFD_RELOC_16C_IMM04_C,
++  BFD_RELOC_16C_IMM16,
++  BFD_RELOC_16C_IMM16_C,
++  BFD_RELOC_16C_IMM20,
++  BFD_RELOC_16C_IMM20_C,
++  BFD_RELOC_16C_IMM24,
++  BFD_RELOC_16C_IMM24_C,
++  BFD_RELOC_16C_IMM32,
++  BFD_RELOC_16C_IMM32_C,
++
++/* NS CRX Relocations.  */
++  BFD_RELOC_CRX_REL4,
++  BFD_RELOC_CRX_REL8,
++  BFD_RELOC_CRX_REL8_CMP,
++  BFD_RELOC_CRX_REL16,
++  BFD_RELOC_CRX_REL24,
++  BFD_RELOC_CRX_REL32,
++  BFD_RELOC_CRX_REGREL12,
++  BFD_RELOC_CRX_REGREL22,
++  BFD_RELOC_CRX_REGREL28,
++  BFD_RELOC_CRX_REGREL32,
++  BFD_RELOC_CRX_ABS16,
++  BFD_RELOC_CRX_ABS32,
++  BFD_RELOC_CRX_NUM8,
++  BFD_RELOC_CRX_NUM16,
++  BFD_RELOC_CRX_NUM32,
++  BFD_RELOC_CRX_IMM16,
++  BFD_RELOC_CRX_IMM32,
++  BFD_RELOC_CRX_SWITCH8,
++  BFD_RELOC_CRX_SWITCH16,
++  BFD_RELOC_CRX_SWITCH32,
++
++/* These relocs are only used within the CRIS assembler.  They are not
++(at present) written to any object files.  */
++  BFD_RELOC_CRIS_BDISP8,
++  BFD_RELOC_CRIS_UNSIGNED_5,
++  BFD_RELOC_CRIS_SIGNED_6,
++  BFD_RELOC_CRIS_UNSIGNED_6,
++  BFD_RELOC_CRIS_SIGNED_8,
++  BFD_RELOC_CRIS_UNSIGNED_8,
++  BFD_RELOC_CRIS_SIGNED_16,
++  BFD_RELOC_CRIS_UNSIGNED_16,
++  BFD_RELOC_CRIS_LAPCQ_OFFSET,
++  BFD_RELOC_CRIS_UNSIGNED_4,
++
++/* Relocs used in ELF shared libraries for CRIS.  */
++  BFD_RELOC_CRIS_COPY,
++  BFD_RELOC_CRIS_GLOB_DAT,
++  BFD_RELOC_CRIS_JUMP_SLOT,
++  BFD_RELOC_CRIS_RELATIVE,
++
++/* 32-bit offset to symbol-entry within GOT.  */
++  BFD_RELOC_CRIS_32_GOT,
++
++/* 16-bit offset to symbol-entry within GOT.  */
++  BFD_RELOC_CRIS_16_GOT,
++
++/* 32-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_CRIS_32_GOTPLT,
++
++/* 16-bit offset to symbol-entry within GOT, with PLT handling.  */
++  BFD_RELOC_CRIS_16_GOTPLT,
++
++/* 32-bit offset to symbol, relative to GOT.  */
++  BFD_RELOC_CRIS_32_GOTREL,
++
++/* 32-bit offset to symbol with PLT entry, relative to GOT.  */
++  BFD_RELOC_CRIS_32_PLT_GOTREL,
++
++/* 32-bit offset to symbol with PLT entry, relative to this relocation.  */
++  BFD_RELOC_CRIS_32_PLT_PCREL,
++
++/* Intel i860 Relocations.  */
++  BFD_RELOC_860_COPY,
++  BFD_RELOC_860_GLOB_DAT,
++  BFD_RELOC_860_JUMP_SLOT,
++  BFD_RELOC_860_RELATIVE,
++  BFD_RELOC_860_PC26,
++  BFD_RELOC_860_PLT26,
++  BFD_RELOC_860_PC16,
++  BFD_RELOC_860_LOW0,
++  BFD_RELOC_860_SPLIT0,
++  BFD_RELOC_860_LOW1,
++  BFD_RELOC_860_SPLIT1,
++  BFD_RELOC_860_LOW2,
++  BFD_RELOC_860_SPLIT2,
++  BFD_RELOC_860_LOW3,
++  BFD_RELOC_860_LOGOT0,
++  BFD_RELOC_860_SPGOT0,
++  BFD_RELOC_860_LOGOT1,
++  BFD_RELOC_860_SPGOT1,
++  BFD_RELOC_860_LOGOTOFF0,
++  BFD_RELOC_860_SPGOTOFF0,
++  BFD_RELOC_860_LOGOTOFF1,
++  BFD_RELOC_860_SPGOTOFF1,
++  BFD_RELOC_860_LOGOTOFF2,
++  BFD_RELOC_860_LOGOTOFF3,
++  BFD_RELOC_860_LOPC,
++  BFD_RELOC_860_HIGHADJ,
++  BFD_RELOC_860_HAGOT,
++  BFD_RELOC_860_HAGOTOFF,
++  BFD_RELOC_860_HAPC,
++  BFD_RELOC_860_HIGH,
++  BFD_RELOC_860_HIGOT,
++  BFD_RELOC_860_HIGOTOFF,
++
++/* OpenRISC Relocations.  */
++  BFD_RELOC_OPENRISC_ABS_26,
++  BFD_RELOC_OPENRISC_REL_26,
++
++/* H8 elf Relocations.  */
++  BFD_RELOC_H8_DIR16A8,
++  BFD_RELOC_H8_DIR16R8,
++  BFD_RELOC_H8_DIR24A8,
++  BFD_RELOC_H8_DIR24R8,
++  BFD_RELOC_H8_DIR32A16,
++
++/* Sony Xstormy16 Relocations.  */
++  BFD_RELOC_XSTORMY16_REL_12,
++  BFD_RELOC_XSTORMY16_12,
++  BFD_RELOC_XSTORMY16_24,
++  BFD_RELOC_XSTORMY16_FPTR16,
++
++/* Relocations used by VAX ELF.  */
++  BFD_RELOC_VAX_GLOB_DAT,
++  BFD_RELOC_VAX_JMP_SLOT,
++  BFD_RELOC_VAX_RELATIVE,
++
++/* Morpho MS1 - 16 bit immediate relocation.  */
++  BFD_RELOC_MS1_PC16,
++
++/* Morpho MS1 - Hi 16 bits of an address.  */
++  BFD_RELOC_MS1_HI16,
++
++/* Morpho MS1 - Low 16 bits of an address.  */
++  BFD_RELOC_MS1_LO16,
++
++/* Morpho MS1 - Used to tell the linker which vtable entries are used.  */
++  BFD_RELOC_MS1_GNU_VTINHERIT,
++
++/* Morpho MS1 - Used to tell the linker which vtable entries are used.  */
++  BFD_RELOC_MS1_GNU_VTENTRY,
++
++/* msp430 specific relocation codes  */
++  BFD_RELOC_MSP430_10_PCREL,
++  BFD_RELOC_MSP430_16_PCREL,
++  BFD_RELOC_MSP430_16,
++  BFD_RELOC_MSP430_16_PCREL_BYTE,
++  BFD_RELOC_MSP430_16_BYTE,
++  BFD_RELOC_MSP430_2X_PCREL,
++  BFD_RELOC_MSP430_RL_PCREL,
++
++/* IQ2000 Relocations.  */
++  BFD_RELOC_IQ2000_OFFSET_16,
++  BFD_RELOC_IQ2000_OFFSET_21,
++  BFD_RELOC_IQ2000_UHI16,
++
++/* Special Xtensa relocation used only by PLT entries in ELF shared
++objects to indicate that the runtime linker should set the value
++to one of its own internal functions or data structures.  */
++  BFD_RELOC_XTENSA_RTLD,
++
++/* Xtensa relocations for ELF shared objects.  */
++  BFD_RELOC_XTENSA_GLOB_DAT,
++  BFD_RELOC_XTENSA_JMP_SLOT,
++  BFD_RELOC_XTENSA_RELATIVE,
++
++/* Xtensa relocation used in ELF object files for symbols that may require
++PLT entries.  Otherwise, this is just a generic 32-bit relocation.  */
++  BFD_RELOC_XTENSA_PLT,
++
++/* Xtensa relocations to mark the difference of two local symbols.
++These are only needed to support linker relaxation and can be ignored
++when not relaxing.  The field is set to the value of the difference
++assuming no relaxation.  The relocation encodes the position of the
++first symbol so the linker can determine whether to adjust the field
++value.  */
++  BFD_RELOC_XTENSA_DIFF8,
++  BFD_RELOC_XTENSA_DIFF16,
++  BFD_RELOC_XTENSA_DIFF32,
++
++/* Generic Xtensa relocations for instruction operands.  Only the slot
++number is encoded in the relocation.  The relocation applies to the
++last PC-relative immediate operand, or if there are no PC-relative
++immediates, to the last immediate operand.  */
++  BFD_RELOC_XTENSA_SLOT0_OP,
++  BFD_RELOC_XTENSA_SLOT1_OP,
++  BFD_RELOC_XTENSA_SLOT2_OP,
++  BFD_RELOC_XTENSA_SLOT3_OP,
++  BFD_RELOC_XTENSA_SLOT4_OP,
++  BFD_RELOC_XTENSA_SLOT5_OP,
++  BFD_RELOC_XTENSA_SLOT6_OP,
++  BFD_RELOC_XTENSA_SLOT7_OP,
++  BFD_RELOC_XTENSA_SLOT8_OP,
++  BFD_RELOC_XTENSA_SLOT9_OP,
++  BFD_RELOC_XTENSA_SLOT10_OP,
++  BFD_RELOC_XTENSA_SLOT11_OP,
++  BFD_RELOC_XTENSA_SLOT12_OP,
++  BFD_RELOC_XTENSA_SLOT13_OP,
++  BFD_RELOC_XTENSA_SLOT14_OP,
++
++/* Alternate Xtensa relocations.  Only the slot is encoded in the
++relocation.  The meaning of these relocations is opcode-specific.  */
++  BFD_RELOC_XTENSA_SLOT0_ALT,
++  BFD_RELOC_XTENSA_SLOT1_ALT,
++  BFD_RELOC_XTENSA_SLOT2_ALT,
++  BFD_RELOC_XTENSA_SLOT3_ALT,
++  BFD_RELOC_XTENSA_SLOT4_ALT,
++  BFD_RELOC_XTENSA_SLOT5_ALT,
++  BFD_RELOC_XTENSA_SLOT6_ALT,
++  BFD_RELOC_XTENSA_SLOT7_ALT,
++  BFD_RELOC_XTENSA_SLOT8_ALT,
++  BFD_RELOC_XTENSA_SLOT9_ALT,
++  BFD_RELOC_XTENSA_SLOT10_ALT,
++  BFD_RELOC_XTENSA_SLOT11_ALT,
++  BFD_RELOC_XTENSA_SLOT12_ALT,
++  BFD_RELOC_XTENSA_SLOT13_ALT,
++  BFD_RELOC_XTENSA_SLOT14_ALT,
++
++/* Xtensa relocations for backward compatibility.  These have all been
++replaced by BFD_RELOC_XTENSA_SLOT0_OP.  */
++  BFD_RELOC_XTENSA_OP0,
++  BFD_RELOC_XTENSA_OP1,
++  BFD_RELOC_XTENSA_OP2,
++
++/* Xtensa relocation to mark that the assembler expanded the
++instructions from an original target.  The expansion size is
++encoded in the reloc size.  */
++  BFD_RELOC_XTENSA_ASM_EXPAND,
++
++/* Xtensa relocation to mark that the linker should simplify
++assembler-expanded instructions.  This is commonly used
++internally by the linker after analysis of a
++BFD_RELOC_XTENSA_ASM_EXPAND.  */
++  BFD_RELOC_XTENSA_ASM_SIMPLIFY,
++  BFD_RELOC_UNUSED };
++typedef enum bfd_reloc_code_real bfd_reloc_code_real_type;
++reloc_howto_type *bfd_reloc_type_lookup
++   (bfd *abfd, bfd_reloc_code_real_type code);
++
++const char *bfd_get_reloc_code_name (bfd_reloc_code_real_type code);
++
++/* Extracted from syms.c.  */
++
++typedef struct bfd_symbol
++{
++  /* A pointer to the BFD which owns the symbol. This information
++     is necessary so that a back end can work out what additional
++     information (invisible to the application writer) is carried
++     with the symbol.
++
++     This field is *almost* redundant, since you can use section->owner
++     instead, except that some symbols point to the global sections
++     bfd_{abs,com,und}_section.  This could be fixed by making
++     these globals be per-bfd (or per-target-flavor).  FIXME.  */
++  struct bfd *the_bfd; /* Use bfd_asymbol_bfd(sym) to access this field.  */
++
++  /* The text of the symbol. The name is left alone, and not copied; the
++     application may not alter it.  */
++  const char *name;
++
++  /* The value of the symbol.  This really should be a union of a
++     numeric value with a pointer, since some flags indicate that
++     a pointer to another symbol is stored here.  */
++  symvalue value;
++
++  /* Attributes of a symbol.  */
++#define BSF_NO_FLAGS    0x00
++
++  /* The symbol has local scope; <<static>> in <<C>>. The value
++     is the offset into the section of the data.  */
++#define BSF_LOCAL      0x01
++
++  /* The symbol has global scope; initialized data in <<C>>. The
++     value is the offset into the section of the data.  */
++#define BSF_GLOBAL     0x02
++
++  /* The symbol has global scope and is exported. The value is
++     the offset into the section of the data.  */
++#define BSF_EXPORT     BSF_GLOBAL /* No real difference.  */
++
++  /* A normal C symbol would be one of:
++     <<BSF_LOCAL>>, <<BSF_FORT_COMM>>,  <<BSF_UNDEFINED>> or
++     <<BSF_GLOBAL>>.  */
++
++  /* The symbol is a debugging record. The value has an arbitrary
++     meaning, unless BSF_DEBUGGING_RELOC is also set.  */
++#define BSF_DEBUGGING  0x08
++
++  /* The symbol denotes a function entry point.  Used in ELF,
++     perhaps others someday.  */
++#define BSF_FUNCTION    0x10
++
++  /* Used by the linker.  */
++#define BSF_KEEP        0x20
++#define BSF_KEEP_G      0x40
++
++  /* A weak global symbol, overridable without warnings by
++     a regular global symbol of the same name.  */
++#define BSF_WEAK        0x80
++
++  /* This symbol was created to point to a section, e.g. ELF's
++     STT_SECTION symbols.  */
++#define BSF_SECTION_SYM 0x100
++
++  /* The symbol used to be a common symbol, but now it is
++     allocated.  */
++#define BSF_OLD_COMMON  0x200
++
++  /* The default value for common data.  */
++#define BFD_FORT_COMM_DEFAULT_VALUE 0
++
++  /* In some files the type of a symbol sometimes alters its
++     location in an output file - ie in coff a <<ISFCN>> symbol
++     which is also <<C_EXT>> symbol appears where it was
++     declared and not at the end of a section.  This bit is set
++     by the target BFD part to convey this information.  */
++#define BSF_NOT_AT_END    0x400
++
++  /* Signal that the symbol is the label of constructor section.  */
++#define BSF_CONSTRUCTOR   0x800
++
++  /* Signal that the symbol is a warning symbol.  The name is a
++     warning.  The name of the next symbol is the one to warn about;
++     if a reference is made to a symbol with the same name as the next
++     symbol, a warning is issued by the linker.  */
++#define BSF_WARNING       0x1000
++
++  /* Signal that the symbol is indirect.  This symbol is an indirect
++     pointer to the symbol with the same name as the next symbol.  */
++#define BSF_INDIRECT      0x2000
++
++  /* BSF_FILE marks symbols that contain a file name.  This is used
++     for ELF STT_FILE symbols.  */
++#define BSF_FILE          0x4000
++
++  /* Symbol is from dynamic linking information.  */
++#define BSF_DYNAMIC       0x8000
++
++  /* The symbol denotes a data object.  Used in ELF, and perhaps
++     others someday.  */
++#define BSF_OBJECT        0x10000
++
++  /* This symbol is a debugging symbol.  The value is the offset
++     into the section of the data.  BSF_DEBUGGING should be set
++     as well.  */
++#define BSF_DEBUGGING_RELOC 0x20000
++
++  /* This symbol is thread local.  Used in ELF.  */
++#define BSF_THREAD_LOCAL  0x40000
++
++  flagword flags;
++
++  /* A pointer to the section to which this symbol is
++     relative.  This will always be non NULL, there are special
++     sections for undefined and absolute symbols.  */
++  struct bfd_section *section;
++
++  /* Back end special data.  */
++  union
++    {
++      void *p;
++      bfd_vma i;
++    }
++  udata;
++}
++asymbol;
++
++#define bfd_get_symtab_upper_bound(abfd) \
++     BFD_SEND (abfd, _bfd_get_symtab_upper_bound, (abfd))
++
++bfd_boolean bfd_is_local_label (bfd *abfd, asymbol *sym);
++
++bfd_boolean bfd_is_local_label_name (bfd *abfd, const char *name);
++
++#define bfd_is_local_label_name(abfd, name) \
++  BFD_SEND (abfd, _bfd_is_local_label_name, (abfd, name))
++
++bfd_boolean bfd_is_target_special_symbol (bfd *abfd, asymbol *sym);
++
++#define bfd_is_target_special_symbol(abfd, sym) \
++  BFD_SEND (abfd, _bfd_is_target_special_symbol, (abfd, sym))
++
++#define bfd_canonicalize_symtab(abfd, location) \
++  BFD_SEND (abfd, _bfd_canonicalize_symtab, (abfd, location))
++
++bfd_boolean bfd_set_symtab
++   (bfd *abfd, asymbol **location, unsigned int count);
++
++void bfd_print_symbol_vandf (bfd *abfd, void *file, asymbol *symbol);
++
++#define bfd_make_empty_symbol(abfd) \
++  BFD_SEND (abfd, _bfd_make_empty_symbol, (abfd))
++
++asymbol *_bfd_generic_make_empty_symbol (bfd *);
++
++#define bfd_make_debug_symbol(abfd,ptr,size) \
++  BFD_SEND (abfd, _bfd_make_debug_symbol, (abfd, ptr, size))
++
++int bfd_decode_symclass (asymbol *symbol);
++
++bfd_boolean bfd_is_undefined_symclass (int symclass);
++
++void bfd_symbol_info (asymbol *symbol, symbol_info *ret);
++
++bfd_boolean bfd_copy_private_symbol_data
++   (bfd *ibfd, asymbol *isym, bfd *obfd, asymbol *osym);
++
++#define bfd_copy_private_symbol_data(ibfd, isymbol, obfd, osymbol) \
++  BFD_SEND (obfd, _bfd_copy_private_symbol_data, \
++            (ibfd, isymbol, obfd, osymbol))
++
++/* Extracted from bfd.c.  */
++struct bfd
++{
++  /* A unique identifier of the BFD  */
++  unsigned int id;
++
++  /* The filename the application opened the BFD with.  */
++  const char *filename;
++
++  /* A pointer to the target jump table.  */
++  const struct bfd_target *xvec;
++
++  /* The IOSTREAM, and corresponding IO vector that provide access
++     to the file backing the BFD.  */
++  void *iostream;
++  const struct bfd_iovec *iovec;
++
++  /* Is the file descriptor being cached?  That is, can it be closed as
++     needed, and re-opened when accessed later?  */
++  bfd_boolean cacheable;
++
++  /* Marks whether there was a default target specified when the
++     BFD was opened. This is used to select which matching algorithm
++     to use to choose the back end.  */
++  bfd_boolean target_defaulted;
++
++  /* The caching routines use these to maintain a
++     least-recently-used list of BFDs.  */
++  struct bfd *lru_prev, *lru_next;
++
++  /* When a file is closed by the caching routines, BFD retains
++     state information on the file here...  */
++  ufile_ptr where;
++
++  /* ... and here: (``once'' means at least once).  */
++  bfd_boolean opened_once;
++
++  /* Set if we have a locally maintained mtime value, rather than
++     getting it from the file each time.  */
++  bfd_boolean mtime_set;
++
++  /* File modified time, if mtime_set is TRUE.  */
++  long mtime;
++
++  /* Reserved for an unimplemented file locking extension.  */
++  int ifd;
++
++  /* The format which belongs to the BFD. (object, core, etc.)  */
++  bfd_format format;
++
++  /* The direction with which the BFD was opened.  */
++  enum bfd_direction
++    {
++      no_direction = 0,
++      read_direction = 1,
++      write_direction = 2,
++      both_direction = 3
++    }
++  direction;
++
++  /* Format_specific flags.  */
++  flagword flags;
++
++  /* Currently my_archive is tested before adding origin to
++     anything. I believe that this can become always an add of
++     origin, with origin set to 0 for non archive files.  */
++  ufile_ptr origin;
++
++  /* Remember when output has begun, to stop strange things
++     from happening.  */
++  bfd_boolean output_has_begun;
++
++  /* A hash table for section names.  */
++  struct bfd_hash_table section_htab;
++
++  /* Pointer to linked list of sections.  */
++  struct bfd_section *sections;
++
++  /* The last section on the section list.  */
++  struct bfd_section *section_last;
++
++  /* The number of sections.  */
++  unsigned int section_count;
++
++  /* Stuff only useful for object files:
++     The start address.  */
++  bfd_vma start_address;
++
++  /* Used for input and output.  */
++  unsigned int symcount;
++
++  /* Symbol table for output BFD (with symcount entries).  */
++  struct bfd_symbol  **outsymbols;
++
++  /* Used for slurped dynamic symbol tables.  */
++  unsigned int dynsymcount;
++
++  /* Pointer to structure which contains architecture information.  */
++  const struct bfd_arch_info *arch_info;
++
++  /* Flag set if symbols from this BFD should not be exported.  */
++  bfd_boolean no_export;
++
++  /* Stuff only useful for archives.  */
++  void *arelt_data;
++  struct bfd *my_archive;      /* The containing archive BFD.  */
++  struct bfd *next;            /* The next BFD in the archive.  */
++  struct bfd *archive_head;    /* The first BFD in the archive.  */
++  bfd_boolean has_armap;
++
++  /* A chain of BFD structures involved in a link.  */
++  struct bfd *link_next;
++
++  /* A field used by _bfd_generic_link_add_archive_symbols.  This will
++     be used only for archive elements.  */
++  int archive_pass;
++
++  /* Used by the back end to hold private data.  */
++  union
++    {
++      struct aout_data_struct *aout_data;
++      struct artdata *aout_ar_data;
++      struct _oasys_data *oasys_obj_data;
++      struct _oasys_ar_data *oasys_ar_data;
++      struct coff_tdata *coff_obj_data;
++      struct pe_tdata *pe_obj_data;
++      struct xcoff_tdata *xcoff_obj_data;
++      struct ecoff_tdata *ecoff_obj_data;
++      struct ieee_data_struct *ieee_data;
++      struct ieee_ar_data_struct *ieee_ar_data;
++      struct srec_data_struct *srec_data;
++      struct ihex_data_struct *ihex_data;
++      struct tekhex_data_struct *tekhex_data;
++      struct elf_obj_tdata *elf_obj_data;
++      struct nlm_obj_tdata *nlm_obj_data;
++      struct bout_data_struct *bout_data;
++      struct mmo_data_struct *mmo_data;
++      struct sun_core_struct *sun_core_data;
++      struct sco5_core_struct *sco5_core_data;
++      struct trad_core_struct *trad_core_data;
++      struct som_data_struct *som_data;
++      struct hpux_core_struct *hpux_core_data;
++      struct hppabsd_core_struct *hppabsd_core_data;
++      struct sgi_core_struct *sgi_core_data;
++      struct lynx_core_struct *lynx_core_data;
++      struct osf_core_struct *osf_core_data;
++      struct cisco_core_struct *cisco_core_data;
++      struct versados_data_struct *versados_data;
++      struct netbsd_core_struct *netbsd_core_data;
++      struct mach_o_data_struct *mach_o_data;
++      struct mach_o_fat_data_struct *mach_o_fat_data;
++      struct bfd_pef_data_struct *pef_data;
++      struct bfd_pef_xlib_data_struct *pef_xlib_data;
++      struct bfd_sym_data_struct *sym_data;
++      void *any;
++    }
++  tdata;
++
++  /* Used by the application to hold private data.  */
++  void *usrdata;
++
++  /* Where all the allocated stuff under this BFD goes.  This is a
++     struct objalloc *, but we use void * to avoid requiring the inclusion
++     of objalloc.h.  */
++  void *memory;
++};
++
++typedef enum bfd_error
++{
++  bfd_error_no_error = 0,
++  bfd_error_system_call,
++  bfd_error_invalid_target,
++  bfd_error_wrong_format,
++  bfd_error_wrong_object_format,
++  bfd_error_invalid_operation,
++  bfd_error_no_memory,
++  bfd_error_no_symbols,
++  bfd_error_no_armap,
++  bfd_error_no_more_archived_files,
++  bfd_error_malformed_archive,
++  bfd_error_file_not_recognized,
++  bfd_error_file_ambiguously_recognized,
++  bfd_error_no_contents,
++  bfd_error_nonrepresentable_section,
++  bfd_error_no_debug_section,
++  bfd_error_bad_value,
++  bfd_error_file_truncated,
++  bfd_error_file_too_big,
++  bfd_error_invalid_error_code
++}
++bfd_error_type;
++
++bfd_error_type bfd_get_error (void);
++
++void bfd_set_error (bfd_error_type error_tag);
++
++const char *bfd_errmsg (bfd_error_type error_tag);
++
++void bfd_perror (const char *message);
++
++typedef void (*bfd_error_handler_type) (const char *, ...);
++
++bfd_error_handler_type bfd_set_error_handler (bfd_error_handler_type);
++
++void bfd_set_error_program_name (const char *);
++
++bfd_error_handler_type bfd_get_error_handler (void);
++
++long bfd_get_reloc_upper_bound (bfd *abfd, asection *sect);
++
++long bfd_canonicalize_reloc
++   (bfd *abfd, asection *sec, arelent **loc, asymbol **syms);
++
++void bfd_set_reloc
++   (bfd *abfd, asection *sec, arelent **rel, unsigned int count);
++
++bfd_boolean bfd_set_file_flags (bfd *abfd, flagword flags);
++
++int bfd_get_arch_size (bfd *abfd);
++
++int bfd_get_sign_extend_vma (bfd *abfd);
++
++bfd_boolean bfd_set_start_address (bfd *abfd, bfd_vma vma);
++
++unsigned int bfd_get_gp_size (bfd *abfd);
++
++void bfd_set_gp_size (bfd *abfd, unsigned int i);
++
++bfd_vma bfd_scan_vma (const char *string, const char **end, int base);
++
++bfd_boolean bfd_copy_private_header_data (bfd *ibfd, bfd *obfd);
++
++#define bfd_copy_private_header_data(ibfd, obfd) \
++     BFD_SEND (obfd, _bfd_copy_private_header_data, \
++               (ibfd, obfd))
++bfd_boolean bfd_copy_private_bfd_data (bfd *ibfd, bfd *obfd);
++
++#define bfd_copy_private_bfd_data(ibfd, obfd) \
++     BFD_SEND (obfd, _bfd_copy_private_bfd_data, \
++               (ibfd, obfd))
++bfd_boolean bfd_merge_private_bfd_data (bfd *ibfd, bfd *obfd);
++
++#define bfd_merge_private_bfd_data(ibfd, obfd) \
++     BFD_SEND (obfd, _bfd_merge_private_bfd_data, \
++               (ibfd, obfd))
++bfd_boolean bfd_set_private_flags (bfd *abfd, flagword flags);
++
++#define bfd_set_private_flags(abfd, flags) \
++     BFD_SEND (abfd, _bfd_set_private_flags, (abfd, flags))
++#define bfd_sizeof_headers(abfd, reloc) \
++       BFD_SEND (abfd, _bfd_sizeof_headers, (abfd, reloc))
++
++#define bfd_find_nearest_line(abfd, sec, syms, off, file, func, line) \
++       BFD_SEND (abfd, _bfd_find_nearest_line, \
++                 (abfd, sec, syms, off, file, func, line))
++
++#define bfd_find_line(abfd, syms, sym, file, line) \
++       BFD_SEND (abfd, _bfd_find_line, \
++                 (abfd, syms, sym, file, line))
++
++#define bfd_find_inliner_info(abfd, file, func, line) \
++       BFD_SEND (abfd, _bfd_find_inliner_info, \
++                 (abfd, file, func, line))
++
++#define bfd_debug_info_start(abfd) \
++       BFD_SEND (abfd, _bfd_debug_info_start, (abfd))
++
++#define bfd_debug_info_end(abfd) \
++       BFD_SEND (abfd, _bfd_debug_info_end, (abfd))
++
++#define bfd_debug_info_accumulate(abfd, section) \
++       BFD_SEND (abfd, _bfd_debug_info_accumulate, (abfd, section))
++
++#define bfd_stat_arch_elt(abfd, stat) \
++       BFD_SEND (abfd, _bfd_stat_arch_elt,(abfd, stat))
++
++#define bfd_update_armap_timestamp(abfd) \
++       BFD_SEND (abfd, _bfd_update_armap_timestamp, (abfd))
++
++#define bfd_set_arch_mach(abfd, arch, mach)\
++       BFD_SEND ( abfd, _bfd_set_arch_mach, (abfd, arch, mach))
++
++#define bfd_relax_section(abfd, section, link_info, again) \
++       BFD_SEND (abfd, _bfd_relax_section, (abfd, section, link_info, again))
++
++#define bfd_gc_sections(abfd, link_info) \
++       BFD_SEND (abfd, _bfd_gc_sections, (abfd, link_info))
++
++#define bfd_merge_sections(abfd, link_info) \
++       BFD_SEND (abfd, _bfd_merge_sections, (abfd, link_info))
++
++#define bfd_is_group_section(abfd, sec) \
++       BFD_SEND (abfd, _bfd_is_group_section, (abfd, sec))
++
++#define bfd_discard_group(abfd, sec) \
++       BFD_SEND (abfd, _bfd_discard_group, (abfd, sec))
++
++#define bfd_link_hash_table_create(abfd) \
++       BFD_SEND (abfd, _bfd_link_hash_table_create, (abfd))
++
++#define bfd_link_hash_table_free(abfd, hash) \
++       BFD_SEND (abfd, _bfd_link_hash_table_free, (hash))
++
++#define bfd_link_add_symbols(abfd, info) \
++       BFD_SEND (abfd, _bfd_link_add_symbols, (abfd, info))
++
++#define bfd_link_just_syms(abfd, sec, info) \
++       BFD_SEND (abfd, _bfd_link_just_syms, (sec, info))
++
++#define bfd_final_link(abfd, info) \
++       BFD_SEND (abfd, _bfd_final_link, (abfd, info))
++
++#define bfd_free_cached_info(abfd) \
++       BFD_SEND (abfd, _bfd_free_cached_info, (abfd))
++
++#define bfd_get_dynamic_symtab_upper_bound(abfd) \
++       BFD_SEND (abfd, _bfd_get_dynamic_symtab_upper_bound, (abfd))
++
++#define bfd_print_private_bfd_data(abfd, file)\
++       BFD_SEND (abfd, _bfd_print_private_bfd_data, (abfd, file))
++
++#define bfd_canonicalize_dynamic_symtab(abfd, asymbols) \
++       BFD_SEND (abfd, _bfd_canonicalize_dynamic_symtab, (abfd, asymbols))
++
++#define bfd_get_synthetic_symtab(abfd, count, syms, dyncount, dynsyms, ret) \
++       BFD_SEND (abfd, _bfd_get_synthetic_symtab, (abfd, count, syms, \
++                                                   dyncount, dynsyms, ret))
++
++#define bfd_get_dynamic_reloc_upper_bound(abfd) \
++       BFD_SEND (abfd, _bfd_get_dynamic_reloc_upper_bound, (abfd))
++
++#define bfd_canonicalize_dynamic_reloc(abfd, arels, asyms) \
++       BFD_SEND (abfd, _bfd_canonicalize_dynamic_reloc, (abfd, arels, asyms))
++
++extern bfd_byte *bfd_get_relocated_section_contents
++  (bfd *, struct bfd_link_info *, struct bfd_link_order *, bfd_byte *,
++   bfd_boolean, asymbol **);
++
++bfd_boolean bfd_alt_mach_code (bfd *abfd, int alternative);
++
++struct bfd_preserve
++{
++  void *marker;
++  void *tdata;
++  flagword flags;
++  const struct bfd_arch_info *arch_info;
++  struct bfd_section *sections;
++  struct bfd_section *section_last;
++  unsigned int section_count;
++  struct bfd_hash_table section_htab;
++};
++
++bfd_boolean bfd_preserve_save (bfd *, struct bfd_preserve *);
++
++void bfd_preserve_restore (bfd *, struct bfd_preserve *);
++
++void bfd_preserve_finish (bfd *, struct bfd_preserve *);
++
++/* Extracted from archive.c.  */
++symindex bfd_get_next_mapent
++   (bfd *abfd, symindex previous, carsym **sym);
++
++bfd_boolean bfd_set_archive_head (bfd *output, bfd *new_head);
++
++bfd *bfd_openr_next_archived_file (bfd *archive, bfd *previous);
++
++/* Extracted from corefile.c.  */
++const char *bfd_core_file_failing_command (bfd *abfd);
++
++int bfd_core_file_failing_signal (bfd *abfd);
++
++bfd_boolean core_file_matches_executable_p
++   (bfd *core_bfd, bfd *exec_bfd);
++
++/* Extracted from targets.c.  */
++#define BFD_SEND(bfd, message, arglist) \
++  ((*((bfd)->xvec->message)) arglist)
++
++#ifdef DEBUG_BFD_SEND
++#undef BFD_SEND
++#define BFD_SEND(bfd, message, arglist) \
++  (((bfd) && (bfd)->xvec && (bfd)->xvec->message) ? \
++    ((*((bfd)->xvec->message)) arglist) : \
++    (bfd_assert (__FILE__,__LINE__), NULL))
++#endif
++#define BFD_SEND_FMT(bfd, message, arglist) \
++  (((bfd)->xvec->message[(int) ((bfd)->format)]) arglist)
++
++#ifdef DEBUG_BFD_SEND
++#undef BFD_SEND_FMT
++#define BFD_SEND_FMT(bfd, message, arglist) \
++  (((bfd) && (bfd)->xvec && (bfd)->xvec->message) ? \
++   (((bfd)->xvec->message[(int) ((bfd)->format)]) arglist) : \
++   (bfd_assert (__FILE__,__LINE__), NULL))
++#endif
++
++enum bfd_flavour
++{
++  bfd_target_unknown_flavour,
++  bfd_target_aout_flavour,
++  bfd_target_coff_flavour,
++  bfd_target_ecoff_flavour,
++  bfd_target_xcoff_flavour,
++  bfd_target_elf_flavour,
++  bfd_target_ieee_flavour,
++  bfd_target_nlm_flavour,
++  bfd_target_oasys_flavour,
++  bfd_target_tekhex_flavour,
++  bfd_target_srec_flavour,
++  bfd_target_ihex_flavour,
++  bfd_target_som_flavour,
++  bfd_target_os9k_flavour,
++  bfd_target_versados_flavour,
++  bfd_target_msdos_flavour,
++  bfd_target_ovax_flavour,
++  bfd_target_evax_flavour,
++  bfd_target_mmo_flavour,
++  bfd_target_mach_o_flavour,
++  bfd_target_pef_flavour,
++  bfd_target_pef_xlib_flavour,
++  bfd_target_sym_flavour
++};
++
++enum bfd_endian { BFD_ENDIAN_BIG, BFD_ENDIAN_LITTLE, BFD_ENDIAN_UNKNOWN };
++
++/* Forward declaration.  */
++typedef struct bfd_link_info _bfd_link_info;
++
++typedef struct bfd_target
++{
++  /* Identifies the kind of target, e.g., SunOS4, Ultrix, etc.  */
++  char *name;
++
++ /* The "flavour" of a back end is a general indication about
++    the contents of a file.  */
++  enum bfd_flavour flavour;
++
++  /* The order of bytes within the data area of a file.  */
++  enum bfd_endian byteorder;
++
++ /* The order of bytes within the header parts of a file.  */
++  enum bfd_endian header_byteorder;
++
++  /* A mask of all the flags which an executable may have set -
++     from the set <<BFD_NO_FLAGS>>, <<HAS_RELOC>>, ...<<D_PAGED>>.  */
++  flagword object_flags;
++
++ /* A mask of all the flags which a section may have set - from
++    the set <<SEC_NO_FLAGS>>, <<SEC_ALLOC>>, ...<<SET_NEVER_LOAD>>.  */
++  flagword section_flags;
++
++ /* The character normally found at the front of a symbol.
++    (if any), perhaps `_'.  */
++  char symbol_leading_char;
++
++ /* The pad character for file names within an archive header.  */
++  char ar_pad_char;
++
++  /* The maximum number of characters in an archive header.  */
++  unsigned short ar_max_namelen;
++
++  /* Entries for byte swapping for data. These are different from the
++     other entry points, since they don't take a BFD as the first argument.
++     Certain other handlers could do the same.  */
++  bfd_uint64_t   (*bfd_getx64) (const void *);
++  bfd_int64_t    (*bfd_getx_signed_64) (const void *);
++  void           (*bfd_putx64) (bfd_uint64_t, void *);
++  bfd_vma        (*bfd_getx32) (const void *);
++  bfd_signed_vma (*bfd_getx_signed_32) (const void *);
++  void           (*bfd_putx32) (bfd_vma, void *);
++  bfd_vma        (*bfd_getx16) (const void *);
++  bfd_signed_vma (*bfd_getx_signed_16) (const void *);
++  void           (*bfd_putx16) (bfd_vma, void *);
++
++  /* Byte swapping for the headers.  */
++  bfd_uint64_t   (*bfd_h_getx64) (const void *);
++  bfd_int64_t    (*bfd_h_getx_signed_64) (const void *);
++  void           (*bfd_h_putx64) (bfd_uint64_t, void *);
++  bfd_vma        (*bfd_h_getx32) (const void *);
++  bfd_signed_vma (*bfd_h_getx_signed_32) (const void *);
++  void           (*bfd_h_putx32) (bfd_vma, void *);
++  bfd_vma        (*bfd_h_getx16) (const void *);
++  bfd_signed_vma (*bfd_h_getx_signed_16) (const void *);
++  void           (*bfd_h_putx16) (bfd_vma, void *);
++
++  /* Format dependent routines: these are vectors of entry points
++     within the target vector structure, one for each format to check.  */
++
++  /* Check the format of a file being read.  Return a <<bfd_target *>> or zero.  */
++  const struct bfd_target *(*_bfd_check_format[bfd_type_end]) (bfd *);
++
++  /* Set the format of a file being written.  */
++  bfd_boolean (*_bfd_set_format[bfd_type_end]) (bfd *);
++
++  /* Write cached information into a file being written, at <<bfd_close>>.  */
++  bfd_boolean (*_bfd_write_contents[bfd_type_end]) (bfd *);
++
++
++  /* Generic entry points.  */
++#define BFD_JUMP_TABLE_GENERIC(NAME) \
++  NAME##_close_and_cleanup, \
++  NAME##_bfd_free_cached_info, \
++  NAME##_new_section_hook, \
++  NAME##_get_section_contents, \
++  NAME##_get_section_contents_in_window
++
++  /* Called when the BFD is being closed to do any necessary cleanup.  */
++  bfd_boolean (*_close_and_cleanup) (bfd *);
++  /* Ask the BFD to free all cached information.  */
++  bfd_boolean (*_bfd_free_cached_info) (bfd *);
++  /* Called when a new section is created.  */
++  bfd_boolean (*_new_section_hook) (bfd *, sec_ptr);
++  /* Read the contents of a section.  */
++  bfd_boolean (*_bfd_get_section_contents)
++    (bfd *, sec_ptr, void *, file_ptr, bfd_size_type);
++  bfd_boolean (*_bfd_get_section_contents_in_window)
++    (bfd *, sec_ptr, bfd_window *, file_ptr, bfd_size_type);
++
++  /* Entry points to copy private data.  */
++#define BFD_JUMP_TABLE_COPY(NAME) \
++  NAME##_bfd_copy_private_bfd_data, \
++  NAME##_bfd_merge_private_bfd_data, \
++  NAME##_bfd_copy_private_section_data, \
++  NAME##_bfd_copy_private_symbol_data, \
++  NAME##_bfd_copy_private_header_data, \
++  NAME##_bfd_set_private_flags, \
++  NAME##_bfd_print_private_bfd_data
++
++  /* Called to copy BFD general private data from one object file
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_bfd_data) (bfd *, bfd *);
++  /* Called to merge BFD general private data from one object file
++     to a common output file when linking.  */
++  bfd_boolean (*_bfd_merge_private_bfd_data) (bfd *, bfd *);
++  /* Called to copy BFD private section data from one object file
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_section_data)
++    (bfd *, sec_ptr, bfd *, sec_ptr);
++  /* Called to copy BFD private symbol data from one symbol
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_symbol_data)
++    (bfd *, asymbol *, bfd *, asymbol *);
++  /* Called to copy BFD private header data from one object file
++     to another.  */
++  bfd_boolean (*_bfd_copy_private_header_data)
++    (bfd *, bfd *);
++  /* Called to set private backend flags.  */
++  bfd_boolean (*_bfd_set_private_flags) (bfd *, flagword);
++
++  /* Called to print private BFD data.  */
++  bfd_boolean (*_bfd_print_private_bfd_data) (bfd *, void *);
++
++  /* Core file entry points.  */
++#define BFD_JUMP_TABLE_CORE(NAME) \
++  NAME##_core_file_failing_command, \
++  NAME##_core_file_failing_signal, \
++  NAME##_core_file_matches_executable_p
++
++  char *      (*_core_file_failing_command) (bfd *);
++  int         (*_core_file_failing_signal) (bfd *);
++  bfd_boolean (*_core_file_matches_executable_p) (bfd *, bfd *);
++
++  /* Archive entry points.  */
++#define BFD_JUMP_TABLE_ARCHIVE(NAME) \
++  NAME##_slurp_armap, \
++  NAME##_slurp_extended_name_table, \
++  NAME##_construct_extended_name_table, \
++  NAME##_truncate_arname, \
++  NAME##_write_armap, \
++  NAME##_read_ar_hdr, \
++  NAME##_openr_next_archived_file, \
++  NAME##_get_elt_at_index, \
++  NAME##_generic_stat_arch_elt, \
++  NAME##_update_armap_timestamp
++
++  bfd_boolean (*_bfd_slurp_armap) (bfd *);
++  bfd_boolean (*_bfd_slurp_extended_name_table) (bfd *);
++  bfd_boolean (*_bfd_construct_extended_name_table)
++    (bfd *, char **, bfd_size_type *, const char **);
++  void        (*_bfd_truncate_arname) (bfd *, const char *, char *);
++  bfd_boolean (*write_armap)
++    (bfd *, unsigned int, struct orl *, unsigned int, int);
++  void *      (*_bfd_read_ar_hdr_fn) (bfd *);
++  bfd *       (*openr_next_archived_file) (bfd *, bfd *);
++#define bfd_get_elt_at_index(b,i) BFD_SEND (b, _bfd_get_elt_at_index, (b,i))
++  bfd *       (*_bfd_get_elt_at_index) (bfd *, symindex);
++  int         (*_bfd_stat_arch_elt) (bfd *, struct stat *);
++  bfd_boolean (*_bfd_update_armap_timestamp) (bfd *);
++
++  /* Entry points used for symbols.  */
++#define BFD_JUMP_TABLE_SYMBOLS(NAME) \
++  NAME##_get_symtab_upper_bound, \
++  NAME##_canonicalize_symtab, \
++  NAME##_make_empty_symbol, \
++  NAME##_print_symbol, \
++  NAME##_get_symbol_info, \
++  NAME##_bfd_is_local_label_name, \
++  NAME##_bfd_is_target_special_symbol, \
++  NAME##_get_lineno, \
++  NAME##_find_nearest_line, \
++  _bfd_generic_find_line, \
++  NAME##_find_inliner_info, \
++  NAME##_bfd_make_debug_symbol, \
++  NAME##_read_minisymbols, \
++  NAME##_minisymbol_to_symbol
++
++  long        (*_bfd_get_symtab_upper_bound) (bfd *);
++  long        (*_bfd_canonicalize_symtab)
++    (bfd *, struct bfd_symbol **);
++  struct bfd_symbol *
++              (*_bfd_make_empty_symbol) (bfd *);
++  void        (*_bfd_print_symbol)
++    (bfd *, void *, struct bfd_symbol *, bfd_print_symbol_type);
++#define bfd_print_symbol(b,p,s,e) BFD_SEND (b, _bfd_print_symbol, (b,p,s,e))
++  void        (*_bfd_get_symbol_info)
++    (bfd *, struct bfd_symbol *, symbol_info *);
++#define bfd_get_symbol_info(b,p,e) BFD_SEND (b, _bfd_get_symbol_info, (b,p,e))
++  bfd_boolean (*_bfd_is_local_label_name) (bfd *, const char *);
++  bfd_boolean (*_bfd_is_target_special_symbol) (bfd *, asymbol *);
++  alent *     (*_get_lineno) (bfd *, struct bfd_symbol *);
++  bfd_boolean (*_bfd_find_nearest_line)
++    (bfd *, struct bfd_section *, struct bfd_symbol **, bfd_vma,
++     const char **, const char **, unsigned int *);
++  bfd_boolean (*_bfd_find_line)
++    (bfd *, struct bfd_symbol **, struct bfd_symbol *,
++     const char **, unsigned int *);
++  bfd_boolean (*_bfd_find_inliner_info)
++    (bfd *, const char **, const char **, unsigned int *);
++ /* Back-door to allow format-aware applications to create debug symbols
++    while using BFD for everything else.  Currently used by the assembler
++    when creating COFF files.  */
++  asymbol *   (*_bfd_make_debug_symbol)
++    (bfd *, void *, unsigned long size);
++#define bfd_read_minisymbols(b, d, m, s) \
++  BFD_SEND (b, _read_minisymbols, (b, d, m, s))
++  long        (*_read_minisymbols)
++    (bfd *, bfd_boolean, void **, unsigned int *);
++#define bfd_minisymbol_to_symbol(b, d, m, f) \
++  BFD_SEND (b, _minisymbol_to_symbol, (b, d, m, f))
++  asymbol *   (*_minisymbol_to_symbol)
++    (bfd *, bfd_boolean, const void *, asymbol *);
++
++  /* Routines for relocs.  */
++#define BFD_JUMP_TABLE_RELOCS(NAME) \
++  NAME##_get_reloc_upper_bound, \
++  NAME##_canonicalize_reloc, \
++  NAME##_bfd_reloc_type_lookup
++
++  long        (*_get_reloc_upper_bound) (bfd *, sec_ptr);
++  long        (*_bfd_canonicalize_reloc)
++    (bfd *, sec_ptr, arelent **, struct bfd_symbol **);
++  /* See documentation on reloc types.  */
++  reloc_howto_type *
++              (*reloc_type_lookup) (bfd *, bfd_reloc_code_real_type);
++
++  /* Routines used when writing an object file.  */
++#define BFD_JUMP_TABLE_WRITE(NAME) \
++  NAME##_set_arch_mach, \
++  NAME##_set_section_contents
++
++  bfd_boolean (*_bfd_set_arch_mach)
++    (bfd *, enum bfd_architecture, unsigned long);
++  bfd_boolean (*_bfd_set_section_contents)
++    (bfd *, sec_ptr, const void *, file_ptr, bfd_size_type);
++
++  /* Routines used by the linker.  */
++#define BFD_JUMP_TABLE_LINK(NAME) \
++  NAME##_sizeof_headers, \
++  NAME##_bfd_get_relocated_section_contents, \
++  NAME##_bfd_relax_section, \
++  NAME##_bfd_link_hash_table_create, \
++  NAME##_bfd_link_hash_table_free, \
++  NAME##_bfd_link_add_symbols, \
++  NAME##_bfd_link_just_syms, \
++  NAME##_bfd_final_link, \
++  NAME##_bfd_link_split_section, \
++  NAME##_bfd_gc_sections, \
++  NAME##_bfd_merge_sections, \
++  NAME##_bfd_is_group_section, \
++  NAME##_bfd_discard_group, \
++  NAME##_section_already_linked \
++
++  int         (*_bfd_sizeof_headers) (bfd *, bfd_boolean);
++  bfd_byte *  (*_bfd_get_relocated_section_contents)
++    (bfd *, struct bfd_link_info *, struct bfd_link_order *,
++     bfd_byte *, bfd_boolean, struct bfd_symbol **);
++
++  bfd_boolean (*_bfd_relax_section)
++    (bfd *, struct bfd_section *, struct bfd_link_info *, bfd_boolean *);
++
++  /* Create a hash table for the linker.  Different backends store
++     different information in this table.  */
++  struct bfd_link_hash_table *
++              (*_bfd_link_hash_table_create) (bfd *);
++
++  /* Release the memory associated with the linker hash table.  */
++  void        (*_bfd_link_hash_table_free) (struct bfd_link_hash_table *);
++
++  /* Add symbols from this object file into the hash table.  */
++  bfd_boolean (*_bfd_link_add_symbols) (bfd *, struct bfd_link_info *);
++
++  /* Indicate that we are only retrieving symbol values from this section.  */
++  void        (*_bfd_link_just_syms) (asection *, struct bfd_link_info *);
++
++  /* Do a link based on the link_order structures attached to each
++     section of the BFD.  */
++  bfd_boolean (*_bfd_final_link) (bfd *, struct bfd_link_info *);
++
++  /* Should this section be split up into smaller pieces during linking.  */
++  bfd_boolean (*_bfd_link_split_section) (bfd *, struct bfd_section *);
++
++  /* Remove sections that are not referenced from the output.  */
++  bfd_boolean (*_bfd_gc_sections) (bfd *, struct bfd_link_info *);
++
++  /* Attempt to merge SEC_MERGE sections.  */
++  bfd_boolean (*_bfd_merge_sections) (bfd *, struct bfd_link_info *);
++
++  /* Is this section a member of a group?  */
++  bfd_boolean (*_bfd_is_group_section) (bfd *, const struct bfd_section *);
++
++  /* Discard members of a group.  */
++  bfd_boolean (*_bfd_discard_group) (bfd *, struct bfd_section *);
++
++  /* Check if SEC has been already linked during a reloceatable or
++     final link.  */
++  void (*_section_already_linked) (bfd *, struct bfd_section *);
++
++  /* Routines to handle dynamic symbols and relocs.  */
++#define BFD_JUMP_TABLE_DYNAMIC(NAME) \
++  NAME##_get_dynamic_symtab_upper_bound, \
++  NAME##_canonicalize_dynamic_symtab, \
++  NAME##_get_synthetic_symtab, \
++  NAME##_get_dynamic_reloc_upper_bound, \
++  NAME##_canonicalize_dynamic_reloc
++
++  /* Get the amount of memory required to hold the dynamic symbols.  */
++  long        (*_bfd_get_dynamic_symtab_upper_bound) (bfd *);
++  /* Read in the dynamic symbols.  */
++  long        (*_bfd_canonicalize_dynamic_symtab)
++    (bfd *, struct bfd_symbol **);
++  /* Create synthetized symbols.  */
++  long        (*_bfd_get_synthetic_symtab)
++    (bfd *, long, struct bfd_symbol **, long, struct bfd_symbol **,
++     struct bfd_symbol **);
++  /* Get the amount of memory required to hold the dynamic relocs.  */
++  long        (*_bfd_get_dynamic_reloc_upper_bound) (bfd *);
++  /* Read in the dynamic relocs.  */
++  long        (*_bfd_canonicalize_dynamic_reloc)
++    (bfd *, arelent **, struct bfd_symbol **);
++
++  /* Opposite endian version of this target.  */
++  const struct bfd_target * alternative_target;
++
++  /* Data for use by back-end routines, which isn't
++     generic enough to belong in this structure.  */
++  const void *backend_data;
++
++} bfd_target;
++
++bfd_boolean bfd_set_default_target (const char *name);
++
++const bfd_target *bfd_find_target (const char *target_name, bfd *abfd);
++
++const char ** bfd_target_list (void);
++
++const bfd_target *bfd_search_for_target
++   (int (*search_func) (const bfd_target *, void *),
++    void *);
++
++/* Extracted from format.c.  */
++bfd_boolean bfd_check_format (bfd *abfd, bfd_format format);
++
++bfd_boolean bfd_check_format_matches
++   (bfd *abfd, bfd_format format, char ***matching);
++
++bfd_boolean bfd_set_format (bfd *abfd, bfd_format format);
++
++const char *bfd_format_string (bfd_format format);
++
++/* Extracted from linker.c.  */
++bfd_boolean bfd_link_split_section (bfd *abfd, asection *sec);
++
++#define bfd_link_split_section(abfd, sec) \
++       BFD_SEND (abfd, _bfd_link_split_section, (abfd, sec))
++
++void bfd_section_already_linked (bfd *abfd, asection *sec);
++
++#define bfd_section_already_linked(abfd, sec) \
++       BFD_SEND (abfd, _section_already_linked, (abfd, sec))
++
++/* Extracted from simple.c.  */
++bfd_byte *bfd_simple_get_relocated_section_contents
++   (bfd *abfd, asection *sec, bfd_byte *outbuf, asymbol **symbol_table);
++
++#ifdef __cplusplus
++}
++#endif
++#endif
diff --cc arch/x86/include/asm/cacheflush.h

index 0000000,2f84665..a3fd78f

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@@ -1,0 -1,118 +1,124 @@@
+ #ifndef _ASM_X86_CACHEFLUSH_H
+ #define _ASM_X86_CACHEFLUSH_H
+ 
+ /* Keep includes the same across arches.  */
+ #include <linux/mm.h>
+ 
+ /* Caches aren't brain-dead on the intel. */
+ #define flush_cache_all()                     do { } while (0)
+ #define flush_cache_mm(mm)                    do { } while (0)
+ #define flush_cache_dup_mm(mm)                        do { } while (0)
+ #define flush_cache_range(vma, start, end)    do { } while (0)
+ #define flush_cache_page(vma, vmaddr, pfn)    do { } while (0)
+ #define flush_dcache_page(page)                       do { } while (0)
+ #define flush_dcache_mmap_lock(mapping)               do { } while (0)
+ #define flush_dcache_mmap_unlock(mapping)     do { } while (0)
+ #define flush_icache_range(start, end)                do { } while (0)
+ #define flush_icache_page(vma, pg)            do { } while (0)
+ #define flush_icache_user_range(vma, pg, adr, len)    do { } while (0)
+ #define flush_cache_vmap(start, end)          do { } while (0)
+ #define flush_cache_vunmap(start, end)                do { } while (0)
+ 
+ #define copy_to_user_page(vma, page, vaddr, dst, src, len)    \
+       memcpy((dst), (src), (len))
+ #define copy_from_user_page(vma, page, vaddr, dst, src, len)  \
+       memcpy((dst), (src), (len))
+ 
+ #define PG_non_WB                             PG_arch_1
+ PAGEFLAG(NonWB, non_WB)
+ 
+ /*
+  * The set_memory_* API can be used to change various attributes of a virtual
+  * address range. The attributes include:
+  * Cachability   : UnCached, WriteCombining, WriteBack
+  * Executability : eXeutable, NoteXecutable
+  * Read/Write    : ReadOnly, ReadWrite
+  * Presence      : NotPresent
+  *
+  * Within a catagory, the attributes are mutually exclusive.
+  *
+  * The implementation of this API will take care of various aspects that
+  * are associated with changing such attributes, such as:
+  * - Flushing TLBs
+  * - Flushing CPU caches
+  * - Making sure aliases of the memory behind the mapping don't violate
+  *   coherency rules as defined by the CPU in the system.
+  *
+  * What this API does not do:
+  * - Provide exclusion between various callers - including callers that
+  *   operation on other mappings of the same physical page
+  * - Restore default attributes when a page is freed
+  * - Guarantee that mappings other than the requested one are
+  *   in any state, other than that these do not violate rules for
+  *   the CPU you have. Do not depend on any effects on other mappings,
+  *   CPUs other than the one you have may have more relaxed rules.
+  * The caller is required to take care of these.
+  */
+ 
+ int _set_memory_uc(unsigned long addr, int numpages);
+ int _set_memory_wc(unsigned long addr, int numpages);
+ int _set_memory_wb(unsigned long addr, int numpages);
+ int set_memory_uc(unsigned long addr, int numpages);
+ int set_memory_wc(unsigned long addr, int numpages);
+ int set_memory_wb(unsigned long addr, int numpages);
+ int set_memory_x(unsigned long addr, int numpages);
+ int set_memory_nx(unsigned long addr, int numpages);
+ int set_memory_ro(unsigned long addr, int numpages);
+ int set_memory_rw(unsigned long addr, int numpages);
++int set_memory_rw_force(unsigned long addr, int numpages);
+ int set_memory_np(unsigned long addr, int numpages);
+ int set_memory_4k(unsigned long addr, int numpages);
+ 
+ int set_memory_array_uc(unsigned long *addr, int addrinarray);
+ int set_memory_array_wb(unsigned long *addr, int addrinarray);
+ 
+ /*
+  * For legacy compatibility with the old APIs, a few functions
+  * are provided that work on a "struct page".
+  * These functions operate ONLY on the 1:1 kernel mapping of the
+  * memory that the struct page represents, and internally just
+  * call the set_memory_* function. See the description of the
+  * set_memory_* function for more details on conventions.
+  *
+  * These APIs should be considered *deprecated* and are likely going to
+  * be removed in the future.
+  * The reason for this is the implicit operation on the 1:1 mapping only,
+  * making this not a generally useful API.
+  *
+  * Specifically, many users of the old APIs had a virtual address,
+  * called virt_to_page() or vmalloc_to_page() on that address to
+  * get a struct page* that the old API required.
+  * To convert these cases, use set_memory_*() on the original
+  * virtual address, do not use these functions.
+  */
+ 
+ int set_pages_uc(struct page *page, int numpages);
+ int set_pages_wb(struct page *page, int numpages);
+ int set_pages_x(struct page *page, int numpages);
+ int set_pages_nx(struct page *page, int numpages);
+ int set_pages_ro(struct page *page, int numpages);
+ int set_pages_rw(struct page *page, int numpages);
++int set_pages_rw_force(struct page *page, int numpages);
+ 
+ 
+ void clflush_cache_range(void *addr, unsigned int size);
+ 
+ #ifdef CONFIG_DEBUG_RODATA
+ void mark_rodata_ro(void);
++void mark_rodata_rw(void);
+ extern const int rodata_test_data;
++#else
++static inline void mark_rodata_ro(void) {}
++static inline void mark_rodata_rw(void) {}
+ #endif
+ 
+ #ifdef CONFIG_DEBUG_RODATA_TEST
+ int rodata_test(void);
+ #else
+ static inline int rodata_test(void)
+ {
+       return 0;
+ }
+ #endif
+ 
+ #endif /* _ASM_X86_CACHEFLUSH_H */
diff --cc arch/x86/include/asm/ioctls.h

index 0000000,0d5b23b..03fb9ab

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/ioctls.h
+++ b/arch/x86/include/asm/ioctls.h
@@@ -1,0 -1,94 +1,95 @@@
+ #ifndef _ASM_X86_IOCTLS_H
+ #define _ASM_X86_IOCTLS_H
+ 
+ #include <asm/ioctl.h>
+ 
+ /* 0x54 is just a magic number to make these relatively unique ('T') */
+ 
+ #define TCGETS                0x5401
+ #define TCSETS                0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
+ #define TCSETSW               0x5403
+ #define TCSETSF               0x5404
+ #define TCGETA                0x5405
+ #define TCSETA                0x5406
+ #define TCSETAW               0x5407
+ #define TCSETAF               0x5408
+ #define TCSBRK                0x5409
+ #define TCXONC                0x540A
+ #define TCFLSH                0x540B
+ #define TIOCEXCL      0x540C
+ #define TIOCNXCL      0x540D
+ #define TIOCSCTTY     0x540E
+ #define TIOCGPGRP     0x540F
+ #define TIOCSPGRP     0x5410
+ #define TIOCOUTQ      0x5411
+ #define TIOCSTI               0x5412
+ #define TIOCGWINSZ    0x5413
+ #define TIOCSWINSZ    0x5414
+ #define TIOCMGET      0x5415
+ #define TIOCMBIS      0x5416
+ #define TIOCMBIC      0x5417
+ #define TIOCMSET      0x5418
+ #define TIOCGSOFTCAR  0x5419
+ #define TIOCSSOFTCAR  0x541A
+ #define FIONREAD      0x541B
+ #define TIOCINQ               FIONREAD
+ #define TIOCLINUX     0x541C
+ #define TIOCCONS      0x541D
+ #define TIOCGSERIAL   0x541E
+ #define TIOCSSERIAL   0x541F
+ #define TIOCPKT               0x5420
+ #define FIONBIO               0x5421
+ #define TIOCNOTTY     0x5422
+ #define TIOCSETD      0x5423
+ #define TIOCGETD      0x5424
+ #define TCSBRKP               0x5425  /* Needed for POSIX tcsendbreak() */
+ /* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
+ #define TIOCSBRK      0x5427  /* BSD compatibility */
+ #define TIOCCBRK      0x5428  /* BSD compatibility */
+ #define TIOCGSID      0x5429  /* Return the session ID of FD */
+ #define TCGETS2               _IOR('T', 0x2A, struct termios2)
+ #define TCSETS2               _IOW('T', 0x2B, struct termios2)
+ #define TCSETSW2      _IOW('T', 0x2C, struct termios2)
+ #define TCSETSF2      _IOW('T', 0x2D, struct termios2)
+ #define TIOCGRS485    0x542E
+ #define TIOCSRS485    0x542F
+ #define TIOCGPTN      _IOR('T', 0x30, unsigned int)
+                               /* Get Pty Number (of pty-mux device) */
+ #define TIOCSPTLCK    _IOW('T', 0x31, int)  /* Lock/unlock Pty */
++#define TIOCGDEV      _IOR('T', 0x32, unsigned int) /* Get real dev no below /dev/console */
+ #define TCGETX                0x5432 /* SYS5 TCGETX compatibility */
+ #define TCSETX                0x5433
+ #define TCSETXF               0x5434
+ #define TCSETXW               0x5435
+ 
+ #define FIONCLEX      0x5450
+ #define FIOCLEX               0x5451
+ #define FIOASYNC      0x5452
+ #define TIOCSERCONFIG 0x5453
+ #define TIOCSERGWILD  0x5454
+ #define TIOCSERSWILD  0x5455
+ #define TIOCGLCKTRMIOS        0x5456
+ #define TIOCSLCKTRMIOS        0x5457
+ #define TIOCSERGSTRUCT        0x5458 /* For debugging only */
+ #define TIOCSERGETLSR   0x5459 /* Get line status register */
+ #define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+ #define TIOCSERSETMULTI 0x545B /* Set multiport config */
+ 
+ #define TIOCMIWAIT    0x545C  /* wait for a change on serial input line(s) */
+ #define TIOCGICOUNT   0x545D  /* read serial port inline interrupt counts */
+ #define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+ #define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+ #define FIOQSIZE      0x5460
+ 
+ /* Used for packet mode */
+ #define TIOCPKT_DATA           0
+ #define TIOCPKT_FLUSHREAD      1
+ #define TIOCPKT_FLUSHWRITE     2
+ #define TIOCPKT_STOP           4
+ #define TIOCPKT_START          8
+ #define TIOCPKT_NOSTOP                16
+ #define TIOCPKT_DOSTOP                32
+ 
+ #define TIOCSER_TEMT    0x01  /* Transmitter physically empty */
+ 
+ #endif /* _ASM_X86_IOCTLS_H */
diff --cc arch/x86/include/asm/irq_vectors.h

index 0000000,f7ff650..c12dc6a

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@@ -1,0 -1,167 +1,179 @@@
+ #ifndef _ASM_X86_IRQ_VECTORS_H
+ #define _ASM_X86_IRQ_VECTORS_H
+ 
+ #include <linux/threads.h>
+ 
+ #define NMI_VECTOR            0x02
+ 
+ /*
+  * IDT vectors usable for external interrupt sources start
+  * at 0x20:
+  */
+ #define FIRST_EXTERNAL_VECTOR 0x20
+ 
+ #ifdef CONFIG_X86_32
+ # define SYSCALL_VECTOR               0x80
+ #else
+ # define IA32_SYSCALL_VECTOR  0x80
+ #endif
++#define KDBENTER_VECTOR       0x81
+ 
+ /*
+  * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
+  * cleanup after irq migration.
+  */
+ #define IRQ_MOVE_CLEANUP_VECTOR       FIRST_EXTERNAL_VECTOR
+ 
+ /*
+  * Vectors 0x30-0x3f are used for ISA interrupts.
+  */
+ #define IRQ0_VECTOR           (FIRST_EXTERNAL_VECTOR + 0x10)
+ #define IRQ1_VECTOR           (IRQ0_VECTOR + 1)
+ #define IRQ2_VECTOR           (IRQ0_VECTOR + 2)
+ #define IRQ3_VECTOR           (IRQ0_VECTOR + 3)
+ #define IRQ4_VECTOR           (IRQ0_VECTOR + 4)
+ #define IRQ5_VECTOR           (IRQ0_VECTOR + 5)
+ #define IRQ6_VECTOR           (IRQ0_VECTOR + 6)
+ #define IRQ7_VECTOR           (IRQ0_VECTOR + 7)
+ #define IRQ8_VECTOR           (IRQ0_VECTOR + 8)
+ #define IRQ9_VECTOR           (IRQ0_VECTOR + 9)
+ #define IRQ10_VECTOR          (IRQ0_VECTOR + 10)
+ #define IRQ11_VECTOR          (IRQ0_VECTOR + 11)
+ #define IRQ12_VECTOR          (IRQ0_VECTOR + 12)
+ #define IRQ13_VECTOR          (IRQ0_VECTOR + 13)
+ #define IRQ14_VECTOR          (IRQ0_VECTOR + 14)
+ #define IRQ15_VECTOR          (IRQ0_VECTOR + 15)
+ 
+ /*
+  * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+  *
+  *  some of the following vectors are 'rare', they are merged
+  *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+  *  TLB, reschedule and local APIC vectors are performance-critical.
+  *
+  *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
+  */
+ #ifdef CONFIG_X86_32
+ 
+ # define SPURIOUS_APIC_VECTOR         0xff
+ # define ERROR_APIC_VECTOR            0xfe
+ # define INVALIDATE_TLB_VECTOR                0xfd
+ # define RESCHEDULE_VECTOR            0xfc
+ # define CALL_FUNCTION_VECTOR         0xfb
+ # define CALL_FUNCTION_SINGLE_VECTOR  0xfa
+ # define THERMAL_APIC_VECTOR          0xf0
+ 
+ #else
+ 
+ #define SPURIOUS_APIC_VECTOR          0xff
+ #define ERROR_APIC_VECTOR             0xfe
+ #define RESCHEDULE_VECTOR             0xfd
+ #define CALL_FUNCTION_VECTOR          0xfc
+ #define CALL_FUNCTION_SINGLE_VECTOR   0xfb
+ #define THERMAL_APIC_VECTOR           0xfa
+ #define THRESHOLD_APIC_VECTOR         0xf9
+ #define UV_BAU_MESSAGE                        0xf8
+ #define INVALIDATE_TLB_VECTOR_END     0xf7
+ #define INVALIDATE_TLB_VECTOR_START   0xf0    /* f0-f7 used for TLB flush */
+ 
+ #define NUM_INVALIDATE_TLB_VECTORS    8
+ 
+ #endif
+ 
+ /*
++ * KDB_VECTOR will take over vector 0xfe when it is needed, as in theory
++ * it should not be used anyway.
++ */
++#define KDB_VECTOR                    0xfe
++
++/*
+  * Local APIC timer IRQ vector is on a different priority level,
+  * to work around the 'lost local interrupt if more than 2 IRQ
+  * sources per level' errata.
+  */
+ #define LOCAL_TIMER_VECTOR    0xef
+ 
+ /*
++ * Perfmon PMU interrupt vector
++ */
++#define LOCAL_PERFMON_VECTOR  0xee
++
++/*
+  * First APIC vector available to drivers: (vectors 0x30-0xee) we
+  * start at 0x31(0x41) to spread out vectors evenly between priority
+  * levels. (0x80 is the syscall vector)
+  */
+ #define FIRST_DEVICE_VECTOR   (IRQ15_VECTOR + 2)
+ 
+ #define NR_VECTORS            256
+ 
+ #define FPU_IRQ                       13
+ 
+ #define       FIRST_VM86_IRQ          3
+ #define LAST_VM86_IRQ         15
+ #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
+ 
+ #define NR_IRQS_LEGACY                16
+ 
+ #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
+ 
+ #ifndef CONFIG_SPARSE_IRQ
+ # if NR_CPUS < MAX_IO_APICS
+ #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
+ # else
+ #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+ # endif
+ #else
+ # if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
+ #  define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
+ # else
+ #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+ # endif
+ #endif
+ 
+ #elif defined(CONFIG_X86_VOYAGER)
+ 
+ # define NR_IRQS              224
+ 
+ #else /* IO_APIC || VOYAGER */
+ 
+ # define NR_IRQS              16
+ 
+ #endif
+ 
+ /* Voyager specific defines */
+ /* These define the CPIs we use in linux */
+ #define VIC_CPI_LEVEL0                        0
+ #define VIC_CPI_LEVEL1                        1
+ /* now the fake CPIs */
+ #define VIC_TIMER_CPI                 2
+ #define VIC_INVALIDATE_CPI            3
+ #define VIC_RESCHEDULE_CPI            4
+ #define VIC_ENABLE_IRQ_CPI            5
+ #define VIC_CALL_FUNCTION_CPI         6
+ #define VIC_CALL_FUNCTION_SINGLE_CPI  7
+ 
+ /* Now the QIC CPIs:  Since we don't need the two initial levels,
+  * these are 2 less than the VIC CPIs */
+ #define QIC_CPI_OFFSET                        1
+ #define QIC_TIMER_CPI                 (VIC_TIMER_CPI - QIC_CPI_OFFSET)
+ #define QIC_INVALIDATE_CPI            (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
+ #define QIC_RESCHEDULE_CPI            (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
+ #define QIC_ENABLE_IRQ_CPI            (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
+ #define QIC_CALL_FUNCTION_CPI         (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
+ #define QIC_CALL_FUNCTION_SINGLE_CPI  (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
+ 
+ #define VIC_START_FAKE_CPI            VIC_TIMER_CPI
+ #define VIC_END_FAKE_CPI              VIC_CALL_FUNCTION_SINGLE_CPI
+ 
+ /* this is the SYS_INT CPI. */
+ #define VIC_SYS_INT                   8
+ #define VIC_CMN_INT                   15
+ 
+ /* This is the boot CPI for alternate processors.  It gets overwritten
+  * by the above once the system has activated all available processors */
+ #define VIC_CPU_BOOT_CPI              VIC_CPI_LEVEL0
+ #define VIC_CPU_BOOT_ERRATA_CPI               (VIC_CPI_LEVEL0 + 8)
+ 
+ 
+ #endif /* _ASM_X86_IRQ_VECTORS_H */
diff --cc arch/x86/include/asm/kdb.h

index 0000000,0000000..06c638b

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/kdb.h
@@@ -1,0 -1,0 +1,138 @@@
++#ifndef       _ASM_KDB_H
++#define _ASM_KDB_H
++
++/*
++ * Kernel Debugger Architecture Dependent (x86) Global Headers
++ *
++ * This file is subject to the terms and conditions of the GNU General Public
++ * License.  See the file "COPYING" in the main directory of this archive
++ * for more details.
++ *
++ * Copyright (c) 1999-2008 Silicon Graphics, Inc.  All Rights Reserved.
++ */
++
++/*
++ * KDB_ENTER() is a macro which causes entry into the kernel
++ * debugger from any point in the kernel code stream.  If it
++ * is intended to be used from interrupt level, it must  use
++ * a non-maskable entry method. The vector is KDB_VECTOR,
++ * defined in hw_irq.h
++ */
++#define KDB_ENTER()   do {if (kdb_on && !KDB_IS_RUNNING()) { asm("\tint $129\n"); }} while(0)
++
++/*
++ * Needed for exported symbols.
++ */
++typedef unsigned long kdb_machreg_t;
++
++/*
++ * Per cpu arch specific kdb state.  Must be in range 0xff000000.
++ */
++#define KDB_STATE_A_IF                0x01000000      /* Saved IF flag */
++
++
++#ifdef CONFIG_X86_32
++
++#define kdb_machreg_fmt               "0x%lx"
++#define kdb_machreg_fmt0      "0x%08lx"
++#define kdb_bfd_vma_fmt               "0x%lx"
++#define kdb_bfd_vma_fmt0      "0x%08lx"
++#define kdb_elfw_addr_fmt     "0x%x"
++#define kdb_elfw_addr_fmt0    "0x%08x"
++
++#else /* CONFIG_X86_32 */
++
++#define kdb_machreg_fmt               "0x%lx"
++#define kdb_machreg_fmt0      "0x%016lx"
++#define kdb_bfd_vma_fmt               "0x%lx"
++#define kdb_bfd_vma_fmt0      "0x%016lx"
++#define kdb_elfw_addr_fmt     "0x%x"
++#define kdb_elfw_addr_fmt0    "0x%016x"
++
++/*
++ * Functions to safely read and write kernel areas.  The {to,from}_xxx
++ * addresses are not necessarily valid, these functions must check for
++ * validity.  If the arch already supports get and put routines with
++ * suitable validation and/or recovery on invalid addresses then use
++ * those routines, otherwise check it yourself.
++ */
++
++/*
++ * asm-i386 uaccess.h supplies __copy_to_user which relies on MMU to
++ * trap invalid addresses in the _xxx fields.  Verify the other address
++ * of the pair is valid by accessing the first and last byte ourselves,
++ * then any access violations should only be caused by the _xxx
++ * addresses,
++ */
++
++#include <asm/uaccess.h>
++
++static inline int
++__kdba_putarea_size(unsigned long to_xxx, void *from, size_t size)
++{
++      mm_segment_t oldfs = get_fs();
++      int r;
++      char c;
++      c = *((volatile char *)from);
++      c = *((volatile char *)from + size - 1);
++
++      if (to_xxx < PAGE_OFFSET) {
++              return kdb_putuserarea_size(to_xxx, from, size);
++      }
++
++      set_fs(KERNEL_DS);
++      r = __copy_to_user_inatomic((void *)to_xxx, from, size);
++      set_fs(oldfs);
++      return r;
++}
++
++static inline int
++__kdba_getarea_size(void *to, unsigned long from_xxx, size_t size)
++{
++      mm_segment_t oldfs = get_fs();
++      int r;
++      *((volatile char *)to) = '\0';
++      *((volatile char *)to + size - 1) = '\0';
++
++      if (from_xxx < PAGE_OFFSET) {
++              return kdb_getuserarea_size(to, from_xxx, size);
++      }
++
++      set_fs(KERNEL_DS);
++      r = __copy_to_user_inatomic(to, (void *)from_xxx, size);
++      set_fs(oldfs);
++      return r;
++}
++
++/* For numa with replicated code/data, the platform must supply its own
++ * kdba_putarea_size and kdba_getarea_size routines.  Without replication kdb
++ * uses the standard architecture routines.
++ */
++#ifdef CONFIG_NUMA_REPLICATE
++extern int kdba_putarea_size(unsigned long to_xxx, void *from, size_t size);
++extern int kdba_getarea_size(void *to, unsigned long from_xxx, size_t size);
++#else
++#define kdba_putarea_size __kdba_putarea_size
++#define kdba_getarea_size __kdba_getarea_size
++#endif
++
++static inline int
++kdba_verify_rw(unsigned long addr, size_t size)
++{
++      unsigned char data[size];
++      return(kdba_getarea_size(data, addr, size) || kdba_putarea_size(addr, data, size));
++}
++
++#endif        /* !CONFIG_X86_32 */
++
++static inline unsigned long
++kdba_funcptr_value(void *fp)
++{
++      return (unsigned long)fp;
++}
++
++#ifdef CONFIG_SMP
++extern void kdba_giveback_vector(int);
++#endif
++
++#endif        /* !_ASM_KDB_H */
diff --cc arch/x86/include/asm/kdbprivate.h

index 0000000,0000000..09c2059

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/kdbprivate.h
@@@ -1,0 -1,0 +1,241 @@@
++#ifndef _ASM_KDBPRIVATE_H
++#define _ASM_KDBPRIVATE_H
++
++/*
++ * Kernel Debugger Architecture Dependent (x86) Private Headers
++ *
++ * This file is subject to the terms and conditions of the GNU General Public
++ * License.  See the file "COPYING" in the main directory of this archive
++ * for more details.
++ *
++ * Copyright (c) 1999-2008 Silicon Graphics, Inc.  All Rights Reserved.
++ */
++
++typedef unsigned char kdb_machinst_t;
++
++/*
++ * KDB_MAXBPT describes the total number of breakpoints
++ * supported by this architecure.
++ */
++#define KDB_MAXBPT    16
++
++/*
++ * KDB_MAXHARDBPT describes the total number of hardware
++ * breakpoint registers that exist.
++ */
++#define KDB_MAXHARDBPT        4
++
++/* Maximum number of arguments to a function  */
++#define KDBA_MAXARGS    16
++
++/*
++ * Support for ia32 debug registers
++ */
++typedef struct _kdbhard_bp {
++      kdb_machreg_t   bph_reg;        /* Register this breakpoint uses */
++
++      unsigned int    bph_free:1;     /* Register available for use */
++      unsigned int    bph_data:1;     /* Data Access breakpoint */
++
++      unsigned int    bph_write:1;    /* Write Data breakpoint */
++      unsigned int    bph_mode:2;     /* 0=inst, 1=write, 2=io, 3=read */
++      unsigned int    bph_length:2;   /* 0=1, 1=2, 2=BAD, 3=4 (bytes) */
++      unsigned int    bph_installed;  /* flag: hw bp is installed */
++} kdbhard_bp_t;
++
++#define IA32_BREAKPOINT_INSTRUCTION   0xcc
++
++#define DR6_BT  0x00008000
++#define DR6_BS  0x00004000
++#define DR6_BD  0x00002000
++
++#define DR6_B3  0x00000008
++#define DR6_B2  0x00000004
++#define DR6_B1  0x00000002
++#define DR6_B0  0x00000001
++#define DR6_DR_MASK  0x0000000F
++
++#define DR7_RW_VAL(dr, drnum) \
++       (((dr) >> (16 + (4 * (drnum)))) & 0x3)
++
++#define DR7_RW_SET(dr, drnum, rw)                              \
++       do {                                                    \
++             (dr) &= ~(0x3 << (16 + (4 * (drnum))));         \
++             (dr) |= (((rw) & 0x3) << (16 + (4 * (drnum)))); \
++       } while (0)
++
++#define DR7_RW0(dr)           DR7_RW_VAL(dr, 0)
++#define DR7_RW0SET(dr,rw)     DR7_RW_SET(dr, 0, rw)
++#define DR7_RW1(dr)           DR7_RW_VAL(dr, 1)
++#define DR7_RW1SET(dr,rw)     DR7_RW_SET(dr, 1, rw)
++#define DR7_RW2(dr)           DR7_RW_VAL(dr, 2)
++#define DR7_RW2SET(dr,rw)     DR7_RW_SET(dr, 2, rw)
++#define DR7_RW3(dr)           DR7_RW_VAL(dr, 3)
++#define DR7_RW3SET(dr,rw)     DR7_RW_SET(dr, 3, rw)
++
++
++#define DR7_LEN_VAL(dr, drnum) \
++       (((dr) >> (18 + (4 * (drnum)))) & 0x3)
++
++#define DR7_LEN_SET(dr, drnum, rw)                             \
++       do {                                                    \
++             (dr) &= ~(0x3 << (18 + (4 * (drnum))));         \
++             (dr) |= (((rw) & 0x3) << (18 + (4 * (drnum)))); \
++       } while (0)
++
++#define DR7_LEN0(dr)          DR7_LEN_VAL(dr, 0)
++#define DR7_LEN0SET(dr,len)   DR7_LEN_SET(dr, 0, len)
++#define DR7_LEN1(dr)          DR7_LEN_VAL(dr, 1)
++#define DR7_LEN1SET(dr,len)   DR7_LEN_SET(dr, 1, len)
++#define DR7_LEN2(dr)          DR7_LEN_VAL(dr, 2)
++#define DR7_LEN2SET(dr,len)   DR7_LEN_SET(dr, 2, len)
++#define DR7_LEN3(dr)          DR7_LEN_VAL(dr, 3)
++#define DR7_LEN3SET(dr,len)   DR7_LEN_SET(dr, 3, len)
++
++#define DR7_G0(dr)    (((dr)>>1)&0x1)
++#define DR7_G0SET(dr) ((dr) |= 0x2)
++#define DR7_G0CLR(dr) ((dr) &= ~0x2)
++#define DR7_G1(dr)    (((dr)>>3)&0x1)
++#define DR7_G1SET(dr) ((dr) |= 0x8)
++#define DR7_G1CLR(dr) ((dr) &= ~0x8)
++#define DR7_G2(dr)    (((dr)>>5)&0x1)
++#define DR7_G2SET(dr) ((dr) |= 0x20)
++#define DR7_G2CLR(dr) ((dr) &= ~0x20)
++#define DR7_G3(dr)    (((dr)>>7)&0x1)
++#define DR7_G3SET(dr) ((dr) |= 0x80)
++#define DR7_G3CLR(dr) ((dr) &= ~0x80)
++
++#define DR7_L0(dr)    (((dr))&0x1)
++#define DR7_L0SET(dr) ((dr) |= 0x1)
++#define DR7_L0CLR(dr) ((dr) &= ~0x1)
++#define DR7_L1(dr)    (((dr)>>2)&0x1)
++#define DR7_L1SET(dr) ((dr) |= 0x4)
++#define DR7_L1CLR(dr) ((dr) &= ~0x4)
++#define DR7_L2(dr)    (((dr)>>4)&0x1)
++#define DR7_L2SET(dr) ((dr) |= 0x10)
++#define DR7_L2CLR(dr) ((dr) &= ~0x10)
++#define DR7_L3(dr)    (((dr)>>6)&0x1)
++#define DR7_L3SET(dr) ((dr) |= 0x40)
++#define DR7_L3CLR(dr) ((dr) &= ~0x40)
++
++#define DR7_GD          0x00002000              /* General Detect Enable */
++#define DR7_GE          0x00000200              /* Global exact */
++#define DR7_LE          0x00000100              /* Local exact */
++
++extern kdb_machreg_t kdba_getdr6(void);
++extern void kdba_putdr6(kdb_machreg_t);
++
++extern kdb_machreg_t kdba_getdr7(void);
++
++struct kdba_running_process {
++      long sp;        /* KDB may be on a different stack */
++      long ip;        /* eip when esp was set */
++};
++
++static inline
++void kdba_unsave_running(struct kdba_running_process *k, struct pt_regs *regs)
++{
++}
++
++struct kdb_activation_record;
++extern void kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
++                                        struct kdb_activation_record *ar);
++
++extern void kdba_wait_for_cpus(void);
++
++
++#ifdef CONFIG_X86_32
++
++#define DR_TYPE_EXECUTE       0x0
++#define DR_TYPE_WRITE 0x1
++#define DR_TYPE_IO    0x2
++#define DR_TYPE_RW    0x3
++
++/*
++ * Platform specific environment entries
++ */
++#define KDB_PLATFORM_ENV      "IDMODE=x86", "BYTESPERWORD=4", "IDCOUNT=16"
++
++/*
++ * Support for setjmp/longjmp
++ */
++#define JB_BX   0
++#define JB_SI   1
++#define JB_DI   2
++#define JB_BP   3
++#define JB_SP   4
++#define JB_PC   5
++
++typedef struct __kdb_jmp_buf {
++      unsigned long   regs[6];        /* kdba_setjmp assumes fixed offsets here */
++} kdb_jmp_buf;
++
++extern int asmlinkage kdba_setjmp(kdb_jmp_buf *);
++extern void asmlinkage kdba_longjmp(kdb_jmp_buf *, int);
++#define kdba_setjmp kdba_setjmp
++
++extern kdb_jmp_buf  *kdbjmpbuf;
++
++/* Arch specific data saved for running processes */
++static inline
++void kdba_save_running(struct kdba_running_process *k, struct pt_regs *regs)
++{
++      k->sp = current_stack_pointer;
++      __asm__ __volatile__ ( " lea 1f,%%eax; movl %%eax,%0 ; 1: " : "=r"(k->ip) : : "eax" );
++}
++
++extern void kdb_interrupt(void);
++
++#define       KDB_INT_REGISTERS       8
++
++#else /* CONFIG_X86_32 */
++
++extern kdb_machreg_t kdba_getdr(int);
++extern void kdba_putdr(int, kdb_machreg_t);
++
++extern kdb_machreg_t kdb_getcr(int);
++
++/*
++ * Platform specific environment entries
++ */
++#define KDB_PLATFORM_ENV      "IDMODE=x86_64", "BYTESPERWORD=8", "IDCOUNT=16"
++
++/*
++ * reg indicies for x86_64 setjmp/longjmp
++ */
++#define JB_RBX   0
++#define JB_RBP   1
++#define JB_R12   2
++#define JB_R13   3
++#define JB_R14   4
++#define JB_R15   5
++#define JB_RSP   6
++#define JB_PC    7
++
++typedef struct __kdb_jmp_buf {
++        unsigned long   regs[8];      /* kdba_setjmp assumes fixed offsets here */
++} kdb_jmp_buf;
++
++extern int asmlinkage kdba_setjmp(kdb_jmp_buf *);
++extern void asmlinkage kdba_longjmp(kdb_jmp_buf *, int);
++#define kdba_setjmp kdba_setjmp
++
++extern kdb_jmp_buf  *kdbjmpbuf;
++
++/* Arch specific data saved for running processes */
++register unsigned long current_stack_pointer asm("rsp") __used;
++
++static inline
++void kdba_save_running(struct kdba_running_process *k, struct pt_regs *regs)
++{
++      k->sp = current_stack_pointer;
++      __asm__ __volatile__ ( " lea 0(%%rip),%%rax; movq %%rax,%0 ; " : "=r"(k->ip) : : "rax" );
++}
++
++extern asmlinkage void kdb_interrupt(void);
++
++#define       KDB_INT_REGISTERS       16
++
++#endif        /* !CONFIG_X86_32 */
++
++#endif        /* !_ASM_KDBPRIVATE_H */
diff --cc arch/x86/include/asm/kdebug.h

index 0000000,fa7c0b9..4197841

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@@ -1,0 -1,37 +1,39 @@@
+ #ifndef _ASM_X86_KDEBUG_H
+ #define _ASM_X86_KDEBUG_H
+ 
+ #include <linux/notifier.h>
+ 
+ struct pt_regs;
+ 
+ /* Grossly misnamed. */
+ enum die_val {
+       DIE_OOPS = 1,
+       DIE_INT3,
+       DIE_DEBUG,
+       DIE_PANIC,
+       DIE_NMI,
+       DIE_DIE,
+       DIE_NMIWATCHDOG,
+       DIE_KERNELDEBUG,
++      DIE_KDEBUG_ENTER,
++      DIE_KDEBUG_LEAVE,
+       DIE_TRAP,
+       DIE_GPF,
+       DIE_CALL,
+       DIE_NMI_IPI,
+       DIE_PAGE_FAULT,
+       DIE_NMIUNKNOWN,
+ };
+ 
+ extern void printk_address(unsigned long address, int reliable);
+ extern void die(const char *, struct pt_regs *,long);
+ extern int __must_check __die(const char *, struct pt_regs *, long);
+ extern void show_registers(struct pt_regs *regs);
+ extern void show_trace(struct task_struct *t, struct pt_regs *regs,
+                      unsigned long *sp, unsigned long bp);
+ extern void __show_regs(struct pt_regs *regs, int all);
+ extern void show_regs(struct pt_regs *regs);
+ extern unsigned long oops_begin(void);
+ extern void oops_end(unsigned long, struct pt_regs *, int signr);
+ 
+ #endif /* _ASM_X86_KDEBUG_H */
diff --cc arch/x86/include/asm/kmap_types.h

index 0000000,5759c16..9e7a73f

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/kmap_types.h
+++ b/arch/x86/include/asm/kmap_types.h
@@@ -1,0 -1,29 +1,30 @@@
+ #ifndef _ASM_X86_KMAP_TYPES_H
+ #define _ASM_X86_KMAP_TYPES_H
+ 
+ #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
+ # define D(n) __KM_FENCE_##n ,
+ #else
+ # define D(n)
+ #endif
+ 
+ enum km_type {
+ D(0)  KM_BOUNCE_READ,
+ D(1)  KM_SKB_SUNRPC_DATA,
+ D(2)  KM_SKB_DATA_SOFTIRQ,
+ D(3)  KM_USER0,
+ D(4)  KM_USER1,
+ D(5)  KM_BIO_SRC_IRQ,
+ D(6)  KM_BIO_DST_IRQ,
+ D(7)  KM_PTE0,
+ D(8)  KM_PTE1,
+ D(9)  KM_IRQ0,
+ D(10) KM_IRQ1,
+ D(11) KM_SOFTIRQ0,
+ D(12) KM_SOFTIRQ1,
- -D(13) KM_TYPE_NR
++D(13) KM_KDB,
++D(14) KM_TYPE_NR
+ };
+ 
+ #undef D
+ 
+ #endif /* _ASM_X86_KMAP_TYPES_H */
diff --cc arch/x86/include/asm/mach-default/entry_arch.h

index 0000000,6b1add8..e940722

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@@ -1,0 -1,36 +1,40 @@@
+ /*
+  * This file is designed to contain the BUILD_INTERRUPT specifications for
+  * all of the extra named interrupt vectors used by the architecture.
+  * Usually this is the Inter Process Interrupts (IPIs)
+  */
+ 
+ /*
+  * The following vectors are part of the Linux architecture, there
+  * is no hardware IRQ pin equivalent for them, they are triggered
+  * through the ICC by us (IPIs)
+  */
+ #ifdef CONFIG_X86_SMP
+ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
+ BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
+ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+ BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+ #endif
+ 
+ /*
+  * every pentium local APIC has two 'local interrupts', with a
+  * soft-definable vector attached to both interrupts, one of
+  * which is a timer interrupt, the other one is error counter
+  * overflow. Linux uses the local APIC timer interrupt to get
+  * a much simpler SMP time architecture:
+  */
+ #ifdef CONFIG_X86_LOCAL_APIC
+ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
+ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+ 
+ #ifdef CONFIG_X86_MCE_P4THERMAL
+ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
+ #endif
+ 
++#ifdef CONFIG_PERFMON
++BUILD_INTERRUPT(pmu_interrupt,LOCAL_PERFMON_VECTOR)
++#endif
++
+ #endif
diff --cc arch/x86/include/asm/perfmon.h

index 0000000,0000000..906f4b2

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/perfmon.h
@@@ -1,0 -1,0 +1,34 @@@
++/*
++ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
++ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
++ *
++ * This file contains i386/x86_64 specific definitions for the perfmon
++ * interface.
++ *
++ * This file MUST never be included directly. Use linux/perfmon.h.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of version 2 of the GNU General Public
++ * License as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
++ * 02111-1307 USA
++  */
++#ifndef _ASM_X86_PERFMON__H_
++#define _ASM_X86_PERFMON__H_
++
++/*
++ * arch-specific user visible interface definitions
++ */
++
++#define PFM_ARCH_MAX_PMCS     (256+64) /* 256 HW 64 SW */
++#define PFM_ARCH_MAX_PMDS     (256+64) /* 256 HW 64 SW */
++
++#endif /* _ASM_X86_PERFMON_H_ */
diff --cc arch/x86/include/asm/perfmon_kern.h

index 0000000,0000000..91a678c

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/perfmon_kern.h
@@@ -1,0 -1,0 +1,538 @@@
++/*
++ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
++ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
++ *
++ * Copyright (c) 2007 Advanced Micro Devices, Inc.
++ * Contributed by Robert Richter <robert.richter@amd.com>
++ *
++ * This file contains X86 Processor Family specific definitions
++ * for the perfmon interface. This covers P6, Pentium M, P4/Xeon
++ * (32-bit and 64-bit, i.e., EM64T) and AMD X86-64.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of version 2 of the GNU General Public
++ * License as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
++ * 02111-1307 USA
++  */
++#ifndef _ASM_X86_PERFMON_KERN_H_
++#define _ASM_X86_PERFMON_KERN_H_
++
++#ifdef CONFIG_PERFMON
++#include <linux/unistd.h>
++#ifdef CONFIG_4KSTACKS
++#define PFM_ARCH_PMD_STK_ARG  2
++#define PFM_ARCH_PMC_STK_ARG  2
++#else
++#define PFM_ARCH_PMD_STK_ARG  4 /* about 700 bytes of stack space */
++#define PFM_ARCH_PMC_STK_ARG  4 /* about 200 bytes of stack space */
++#endif
++
++struct pfm_arch_pmu_info {
++      u32 flags;              /* PMU feature flags */
++      /*
++       * mandatory model-specific callbacks
++       */
++      int  (*stop_save)(struct pfm_context *ctx, struct pfm_event_set *set);
++      int  (*has_ovfls)(struct pfm_context *ctx);
++      void (*quiesce)(void);
++
++      /*
++       * optional model-specific callbacks
++       */
++      void (*acquire_pmu_percpu)(void);
++      void (*release_pmu_percpu)(void);
++      int (*create_context)(struct pfm_context *ctx, u32 ctx_flags);
++      void (*free_context)(struct pfm_context *ctx);
++      int (*load_context)(struct pfm_context *ctx);
++      void (*unload_context)(struct pfm_context *ctx);
++      void (*write_pmc)(struct pfm_context *ctx, unsigned int cnum, u64 value);
++      void (*write_pmd)(struct pfm_context *ctx, unsigned int cnum, u64 value);
++      u64  (*read_pmd)(struct pfm_context *ctx, unsigned int cnum);
++      u64  (*read_pmc)(struct pfm_context *ctx, unsigned int cnum);
++      void (*nmi_copy_state)(struct pfm_context *ctx);
++      void (*restore_pmcs)(struct pfm_context *ctx,
++                           struct pfm_event_set *set);
++      void (*restore_pmds)(struct pfm_context *ctx,
++                           struct pfm_event_set *set);
++};
++
++/*
++ * PMU feature flags
++ */
++#define PFM_X86_FL_USE_NMI    0x01    /* user asking for NMI */
++#define PFM_X86_FL_NO_SHARING 0x02    /* no sharing with other subsystems */
++#define PFM_X86_FL_SHARING    0x04    /* PMU is being shared */
++
++struct pfm_x86_ctx_flags {
++      unsigned int insecure:1;  /* rdpmc per-thread self-monitoring */
++      unsigned int use_pebs:1;  /* PEBS used */
++      unsigned int use_ds:1;    /* DS used */
++      unsigned int reserved:29; /* for future use */
++};
++
++struct pfm_arch_context {
++      u64 saved_real_iip;             /* instr pointer of last NMI intr */
++      struct pfm_x86_ctx_flags flags; /* flags */
++      void *ds_area;                  /* address of DS area (to go away) */
++      void *data;                     /* model-specific data */
++};
++
++/*
++ * functions implemented as inline on x86
++ */
++
++/**
++ * pfm_arch_write_pmc - write a single PMC register
++ * @ctx: context to work on
++ * @cnum: PMC index
++ * @value: PMC 64-bit value
++ *
++ * in certain situations, ctx may be NULL
++ */
++static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
++                                    unsigned int cnum, u64 value)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++
++      pmu_info = pfm_pmu_info();
++
++      /*
++       * we only write to the actual register when monitoring is
++       * active (pfm_start was issued)
++       */
++      if (ctx && ctx->flags.started == 0)
++              return;
++
++      /*
++       * model-specific override, if any
++       */
++      if (pmu_info->write_pmc) {
++              pmu_info->write_pmc(ctx, cnum, value);
++              return;
++      }
++
++      PFM_DBG_ovfl("pfm_arch_write_pmc(0x%lx, 0x%Lx)",
++                   pfm_pmu_conf->pmc_desc[cnum].hw_addr,
++                   (unsigned long long) value);
++
++      wrmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
++}
++
++/**
++ * pfm_arch_write_pmd - write a single PMD register
++ * @ctx: context to work on
++ * @cnum: PMD index
++ * @value: PMD 64-bit value
++ */
++static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
++                                    unsigned int cnum, u64 value)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++
++      pmu_info = pfm_pmu_info();
++
++      /*
++       * to make sure the counter overflows, we set the
++       * upper bits. we also clear any other unimplemented
++       * bits as this may cause crash on some processors.
++       */
++      if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
++              value = (value | ~pfm_pmu_conf->ovfl_mask)
++                    & ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
++
++      PFM_DBG_ovfl("pfm_arch_write_pmd(0x%lx, 0x%Lx)",
++                   pfm_pmu_conf->pmd_desc[cnum].hw_addr,
++                   (unsigned long long) value);
++
++      /*
++       * model-specific override, if any
++       */
++      if (pmu_info->write_pmd) {
++              pmu_info->write_pmd(ctx, cnum, value);
++              return;
++      }
++
++      wrmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
++}
++
++/**
++ * pfm_arch_read_pmd - read a single PMD register
++ * @ctx: context to work on
++ * @cnum: PMD index
++ *
++ * return value is register 64-bit value
++ */
++static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++      u64 tmp;
++
++      pmu_info = pfm_pmu_info();
++
++      /*
++       * model-specific override, if any
++       */
++      if (pmu_info->read_pmd)
++              tmp = pmu_info->read_pmd(ctx, cnum);
++      else
++              rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp);
++
++      PFM_DBG_ovfl("pfm_arch_read_pmd(0x%lx) = 0x%Lx",
++                   pfm_pmu_conf->pmd_desc[cnum].hw_addr,
++                   (unsigned long long) tmp);
++      return tmp;
++}
++
++/**
++ * pfm_arch_read_pmc - read a single PMC register
++ * @ctx: context to work on
++ * @cnum: PMC index
++ *
++ * return value is register 64-bit value
++ */
++static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++      u64 tmp;
++
++      pmu_info = pfm_pmu_info();
++
++      /*
++       * model-specific override, if any
++       */
++      if (pmu_info->read_pmc)
++              tmp = pmu_info->read_pmc(ctx, cnum);
++      else
++              rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp);
++
++      PFM_DBG_ovfl("pfm_arch_read_pmc(0x%lx) = 0x%016Lx",
++                   pfm_pmu_conf->pmc_desc[cnum].hw_addr,
++                   (unsigned long long) tmp);
++      return tmp;
++}
++
++/**
++ * pfm_arch_is_active - return non-zero is monitoring has been started
++ * @ctx: context to check
++ *
++ * At certain points, perfmon needs to know if monitoring has been
++ * explicitly started.
++ *
++ * On x86, there is not other way but to use pfm_start/pfm_stop
++ * to activate monitoring, thus we can simply check flags.started
++ */
++static inline int pfm_arch_is_active(struct pfm_context *ctx)
++{
++      return ctx->flags.started;
++}
++
++
++/**
++ * pfm_arch_unload_context - detach context from thread or CPU
++ * @ctx: context to detach
++ *
++ * in system-wide ctx->task is NULL, otherwise it points to the
++ * attached thread
++ */
++static inline void pfm_arch_unload_context(struct pfm_context *ctx)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++      struct pfm_arch_context *ctx_arch;
++
++      ctx_arch = pfm_ctx_arch(ctx);
++      pmu_info = pfm_pmu_info();
++
++      if (ctx_arch->flags.insecure) {
++              PFM_DBG("clear cr4.pce");
++              clear_in_cr4(X86_CR4_PCE);
++      }
++
++      if (pmu_info->unload_context)
++              pmu_info->unload_context(ctx);
++}
++
++/**
++ * pfm_arch_load_context - attach context to thread or CPU
++ * @ctx: context to attach
++ */
++static inline int pfm_arch_load_context(struct pfm_context *ctx)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++      struct pfm_arch_context *ctx_arch;
++      int ret = 0;
++
++      ctx_arch = pfm_ctx_arch(ctx);
++      pmu_info = pfm_pmu_info();
++
++      /*
++       * RDPMC authorized in system-wide and
++       * per-thread self-monitoring.
++       *
++       * RDPMC only gives access to counts.
++       *
++       * The context-switch routine code does not restore
++       * all the PMD registers (optimization), thus there
++       * is a possible leak of counts there in per-thread
++       * mode.
++       */
++      if (ctx->task == current || ctx->flags.system) {
++              PFM_DBG("set cr4.pce");
++              set_in_cr4(X86_CR4_PCE);
++              ctx_arch->flags.insecure = 1;
++      }
++
++      if (pmu_info->load_context)
++              ret = pmu_info->load_context(ctx);
++
++      return ret;
++}
++
++void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
++void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
++void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
++
++/**
++ * pfm_arch_unmask_monitoring - unmask monitoring
++ * @ctx: context to mask
++ * @set: current event set
++ *
++ * masking is slightly different from stopping in that, it does not undo
++ * the pfm_start() issued by user. This is used in conjunction with
++ * sampling. Masking means stop monitoring, but do not authorize user
++ * to issue pfm_start/stop during that time. Unmasking is achieved via
++ * pfm_restart() and also may also depend on the sampling format used.
++ *
++ * on x86 masking/unmasking use the start/stop mechanism, except
++ * that flags.started is not modified.
++ */
++static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
++                                            struct pfm_event_set *set)
++{
++      pfm_arch_start(current, ctx);
++}
++
++/**
++ * pfm_arch_intr_freeze_pmu - stop monitoring when handling PMU interrupt
++ * @ctx: current context
++ * @set: current event set
++ *
++ * called from __pfm_interrupt_handler().
++ * ctx is not NULL. ctx is locked. interrupts are masked
++ *
++ * The following actions must take place:
++ *  - stop all monitoring to ensure handler has consistent view.
++ *  - collect overflowed PMDs bitmask into povfls_pmds and
++ *    npend_ovfls. If no interrupt detected then npend_ovfls
++ *    must be set to zero.
++ */
++static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
++                                          struct pfm_event_set *set)
++{
++      /*
++       * on X86, freezing is equivalent to stopping
++       */
++      pfm_arch_stop(current, ctx);
++
++      /*
++       * we mark monitoring as stopped to avoid
++       * certain side effects especially in
++       * pfm_switch_sets_from_intr() and
++       * pfm_arch_restore_pmcs()
++       */
++      ctx->flags.started = 0;
++}
++
++/**
++ * pfm_arch_intr_unfreeze_pmu - conditionally reactive monitoring
++ * @ctx: current context
++ *
++ * current context may be not when dealing when spurious interrupts
++ *
++ * Must re-activate monitoring if context is not MASKED.
++ * interrupts are masked.
++ */
++static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
++{
++      if (ctx == NULL)
++              return;
++
++      PFM_DBG_ovfl("state=%d", ctx->state);
++
++      /*
++       * restore flags.started which is cleared in
++       * pfm_arch_intr_freeze_pmu()
++       */
++      ctx->flags.started = 1;
++
++      if (ctx->state == PFM_CTX_MASKED)
++              return;
++
++      pfm_arch_restore_pmcs(ctx, ctx->active_set);
++}
++
++/**
++ * pfm_arch_setfl_sane - check arch/model specific event set flags
++ * @ctx: context to work on
++ * @flags: event set flags as passed by user
++ *
++ * called from pfm_setfl_sane(). Context is locked. Interrupts are masked.
++ *
++ * Return:
++ *      0 when flags are valid
++ *      1 on error
++ */
++static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
++{
++      return 0;
++}
++
++/**
++ * pfm_arch_ovfl_reset_pmd - reset pmd on overflow
++ * @ctx: current context
++ * @cnum: PMD index
++ *
++ * On some CPUs, the upper bits of a counter must be set in order for the
++ * overflow interrupt to happen. On overflow, the counter has wrapped around,
++ * and the upper bits are cleared. This function may be used to set them back.
++ *
++ * For x86, the current version loses whatever is remaining in the counter,
++ * which is usually has a small count. In order not to loose this count,
++ * we do a read-modify-write to set the upper bits while preserving the
++ * low-order bits. This is slow but works.
++ */
++static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, unsigned int cnum)
++{
++      u64 val;
++      val = pfm_arch_read_pmd(ctx, cnum);
++      pfm_arch_write_pmd(ctx, cnum, val);
++}
++
++/**
++ * pfm_arch_context_create - create context
++ * @ctx: newly created context
++ * @flags: context flags as passed by user
++ *
++ * called from __pfm_create_context()
++ */
++static inline int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++
++      pmu_info = pfm_pmu_info();
++
++      if (pmu_info->create_context)
++              return pmu_info->create_context(ctx, ctx_flags);
++
++      return 0;
++}
++
++/**
++ * pfm_arch_context_free - free context
++ * @ctx: context to free
++ */
++static inline void pfm_arch_context_free(struct pfm_context *ctx)
++{
++      struct pfm_arch_pmu_info *pmu_info;
++
++      pmu_info = pfm_pmu_info();
++
++      if (pmu_info->free_context)
++              pmu_info->free_context(ctx);
++}
++
++/*
++ * pfm_arch_clear_pmd_ovfl_cond - alter the pmds in such a way that they
++ * will not cause cause interrupts when unused.
++ *
++ * This is a nop on x86
++ */
++static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
++                                              struct pfm_event_set *set)
++{}
++
++/*
++ * functions implemented in arch/x86/perfmon/perfmon.c
++ */
++int  pfm_arch_init(void);
++void pfm_arch_resend_irq(struct pfm_context *ctx);
++
++int  pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
++void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx);
++
++void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
++int  pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg);
++void pfm_arch_pmu_config_remove(void);
++char *pfm_arch_get_pmu_module_name(void);
++int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds);
++void pfm_arch_pmu_release(void);
++
++/*
++ * pfm_arch_serialize - make PMU modifications visible to subsequent instructions
++ *
++ * This is a nop on x86
++ */
++static inline void pfm_arch_serialize(void)
++{}
++
++/*
++ * on x86, the PMDs are already saved by pfm_arch_freeze_pmu()
++ * when entering the PMU interrupt handler, thus, we do not need
++ * to save them again in pfm_switch_sets_from_intr()
++ */
++static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
++                                              struct pfm_event_set *set)
++{}
++
++
++static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
++                                       struct pfm_context *ctx)
++{}
++
++static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
++                                      struct pfm_context *ctx)
++{}
++
++static inline void pfm_arch_init_percpu(void)
++{}
++
++static inline void pfm_cacheflush(void *addr, unsigned int len)
++{}
++
++/*
++ * this function is called from the PMU interrupt handler ONLY.
++ * On x86, the PMU is frozen via arch_stop, masking would be implemented
++ * via arch-stop as well. Given that the PMU is already stopped when
++ * entering the interrupt handler, we do not need to stop it again, so
++ * this function is a nop.
++ */
++static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
++                                          struct pfm_event_set *set)
++{}
++
++
++static inline void pfm_arch_arm_handle_work(struct task_struct *task)
++{}
++
++static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
++{}
++
++#define PFM_ARCH_CTX_SIZE     (sizeof(struct pfm_arch_context))
++/*
++ * x86 does not need extra alignment requirements for the sampling buffer
++ */
++#define PFM_ARCH_SMPL_ALIGN_SIZE      0
++
++asmlinkage void  pmu_interrupt(void);
++
++#endif /* CONFIG_PEFMON */
++
++#endif /* _ASM_X86_PERFMON_KERN_H_ */
diff --cc arch/x86/include/asm/perfmon_pebs_core_smpl.h

index 0000000,0000000..4a12e0d

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/perfmon_pebs_core_smpl.h
@@@ -1,0 -1,0 +1,164 @@@
++/*
++ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
++ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of version 2 of the GNU General Public
++ * License as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
++ * 02111-1307 USA
++ *
++ * This file implements the sampling format to support Intel
++ * Precise Event Based Sampling (PEBS) feature of Intel Core
++ * processors, such as Intel Core 2.
++ *
++ * What is PEBS?
++ * ------------
++ *  This is a hardware feature to enhance sampling by providing
++ *  better precision as to where a sample is taken. This avoids the
++ *  typical skew in the instruction one can observe with any
++ *  interrupt-based sampling technique.
++ *
++ *  PEBS also lowers sampling overhead significantly by having the
++ *  processor store samples instead of the OS. PMU interrupt are only
++ *  generated after multiple samples are written.
++ *
++ *  Another benefit of PEBS is that samples can be captured inside
++ *  critical sections where interrupts are masked.
++ *
++ * How does it work?
++ *  PEBS effectively implements a Hw buffer. The Os must pass a region
++ *  of memory where samples are to be stored. The region can have any
++ *  size. The OS must also specify the sampling period to reload. The PMU
++ *  will interrupt when it reaches the end of the buffer or a specified
++ *  threshold location inside the memory region.
++ *
++ *  The description of the buffer is stored in the Data Save Area (DS).
++ *  The samples are stored sequentially in the buffer. The format of the
++ *  buffer is fixed and specified in the PEBS documentation.  The sample
++ *  format does not change between 32-bit and 64-bit modes unlike on the
++ *  Pentium 4 version of PEBS.
++ *
++ *  PEBS does not work when HyperThreading is enabled due to certain MSR
++ *  being shared being to two threads.
++ *
++ *  What does the format do?
++ *   It provides access to the PEBS feature for both 32-bit and 64-bit
++ *   processors that support it.
++ *
++ *   The same code and data structures are used for both 32-bit and 64-bi
++ *   modes. A single format name is used for both modes. In 32-bit mode,
++ *   some of the extended registers are written to zero in each sample.
++ *
++ *   It is important to realize that the format provides a zero-copy
++ *   environment for the samples, i.e,, the OS never touches the
++ *   samples. Whatever the processor write is directly accessible to
++ *   the user.
++ *
++ *   Parameters to the buffer can be passed via pfm_create_context() in
++ *   the pfm_pebs_smpl_arg structure.
++ */
++#ifndef __PERFMON_PEBS_CORE_SMPL_H__
++#define __PERFMON_PEBS_CORE_SMPL_H__ 1
++
++/*
++ * The 32-bit and 64-bit formats are identical, thus we use only
++ * one name for the format.
++ */
++#define PFM_PEBS_CORE_SMPL_NAME       "pebs_core"
++
++/*
++ * format specific parameters (passed at context creation)
++ *
++ * intr_thres: index from start of buffer of entry where the
++ * PMU interrupt must be triggered. It must be several samples
++ * short of the end of the buffer.
++ */
++struct pfm_pebs_core_smpl_arg {
++      u64 cnt_reset;    /* counter reset value */
++      size_t buf_size;  /* size of the PEBS buffer in bytes */
++      size_t intr_thres;/* index of PEBS interrupt threshold entry */
++      u64 reserved[6];  /* for future use */
++};
++
++/*
++ * Data Save Area (32 and 64-bit mode)
++ *
++ * The DS area is exposed to the user. To determine the number
++ * of samples available in PEBS, it is necessary to substract
++ * pebs_index from pebs_base.
++ *
++ * Layout of the structure is mandated by hardware and specified
++ * in the Intel documentation.
++ */
++struct pfm_ds_area_core {
++      u64 bts_buf_base;
++      u64 bts_index;
++      u64 bts_abs_max;
++      u64 bts_intr_thres;
++      u64 pebs_buf_base;
++      u64 pebs_index;
++      u64 pebs_abs_max;
++      u64 pebs_intr_thres;
++      u64 pebs_cnt_reset;
++};
++
++/*
++ * This header is at the beginning of the sampling buffer returned to the user.
++ *
++ * Because of PEBS alignement constraints, the actual PEBS buffer area does
++ * not necessarily begin right after the header. The hdr_start_offs must be
++ * used to compute the first byte of the buffer. The offset is defined as
++ * the number of bytes between the end of the header and the beginning of
++ * the buffer. As such the formula is:
++ *    actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
++ */
++struct pfm_pebs_core_smpl_hdr {
++      u64 overflows;                  /* #overflows for buffer */
++      size_t buf_size;                /* bytes in the buffer */
++      size_t start_offs;              /* actual buffer start offset */
++      u32 version;                    /* smpl format version */
++      u32 reserved1;                  /* for future use */
++      u64 reserved2[5];               /* for future use */
++      struct pfm_ds_area_core ds;     /* data save area */
++};
++
++/*
++ * Sample format as mandated by Intel documentation.
++ * The same format is used in both 32 and 64 bit modes.
++ */
++struct pfm_pebs_core_smpl_entry {
++      u64     eflags;
++      u64     ip;
++      u64     eax;
++      u64     ebx;
++      u64     ecx;
++      u64     edx;
++      u64     esi;
++      u64     edi;
++      u64     ebp;
++      u64     esp;
++      u64     r8;     /* 0 in 32-bit mode */
++      u64     r9;     /* 0 in 32-bit mode */
++      u64     r10;    /* 0 in 32-bit mode */
++      u64     r11;    /* 0 in 32-bit mode */
++      u64     r12;    /* 0 in 32-bit mode */
++      u64     r13;    /* 0 in 32-bit mode */
++      u64     r14;    /* 0 in 32-bit mode */
++      u64     r15;    /* 0 in 32-bit mode */
++};
++
++#define PFM_PEBS_CORE_SMPL_VERSION_MAJ 1U
++#define PFM_PEBS_CORE_SMPL_VERSION_MIN 0U
++#define PFM_PEBS_CORE_SMPL_VERSION (((PFM_PEBS_CORE_SMPL_VERSION_MAJ&0xffff)<<16)|\
++                                 (PFM_PEBS_CORE_SMPL_VERSION_MIN & 0xffff))
++
++#endif /* __PERFMON_PEBS_CORE_SMPL_H__ */
diff --cc arch/x86/include/asm/perfmon_pebs_p4_smpl.h

index 0000000,0000000..26b51b4

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/perfmon_pebs_p4_smpl.h
@@@ -1,0 -1,0 +1,193 @@@
++/*
++ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
++ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of version 2 of the GNU General Public
++ * License as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
++ * 02111-1307 USA
++ *
++ * This file implements the sampling format to support Intel
++ * Precise Event Based Sampling (PEBS) feature of Pentium 4
++ * and other Netburst-based processors. Not to be used for
++ * Intel Core-based processors.
++ *
++ * What is PEBS?
++ * ------------
++ *  This is a hardware feature to enhance sampling by providing
++ *  better precision as to where a sample is taken. This avoids the
++ *  typical skew in the instruction one can observe with any
++ *  interrupt-based sampling technique.
++ *
++ *  PEBS also lowers sampling overhead significantly by having the
++ *  processor store samples instead of the OS. PMU interrupt are only
++ *  generated after multiple samples are written.
++ *
++ *  Another benefit of PEBS is that samples can be captured inside
++ *  critical sections where interrupts are masked.
++ *
++ * How does it work?
++ *  PEBS effectively implements a Hw buffer. The Os must pass a region
++ *  of memory where samples are to be stored. The region can have any
++ *  size. The OS must also specify the sampling period to reload. The PMU
++ *  will interrupt when it reaches the end of the buffer or a specified
++ *  threshold location inside the memory region.
++ *
++ *  The description of the buffer is stored in the Data Save Area (DS).
++ *  The samples are stored sequentially in the buffer. The format of the
++ *  buffer is fixed and specified in the PEBS documentation.  The sample
++ *  format changes between 32-bit and 64-bit modes due to extended register
++ *  file.
++ *
++ *  PEBS does not work when HyperThreading is enabled due to certain MSR
++ *  being shared being to two threads.
++ *
++ *  What does the format do?
++ *   It provides access to the PEBS feature for both 32-bit and 64-bit
++ *   processors that support it.
++ *
++ *   The same code is used for both 32-bit and 64-bit modes, but different
++ *   format names are used because the two modes are not compatible due to
++ *   data model and register file differences. Similarly the public data
++ *   structures describing the samples are different.
++ *
++ *   It is important to realize that the format provides a zero-copy environment
++ *   for the samples, i.e,, the OS never touches the samples. Whatever the
++ *   processor write is directly accessible to the user.
++ *
++ *   Parameters to the buffer can be passed via pfm_create_context() in
++ *   the pfm_pebs_smpl_arg structure.
++ *
++ *   It is not possible to mix a 32-bit PEBS application on top of a 64-bit
++ *   host kernel.
++ */
++#ifndef __PERFMON_PEBS_P4_SMPL_H__
++#define __PERFMON_PEBS_P4_SMPL_H__ 1
++
++#ifdef __i386__
++/*
++ * The 32-bit and 64-bit formats are not compatible, thus we have
++ * two different identifications so that 32-bit programs running on
++ * 64-bit OS will fail to use the 64-bit PEBS support.
++ */
++#define PFM_PEBS_P4_SMPL_NAME "pebs32_p4"
++#else
++#define PFM_PEBS_P4_SMPL_NAME "pebs64_p4"
++#endif
++
++/*
++ * format specific parameters (passed at context creation)
++ *
++ * intr_thres: index from start of buffer of entry where the
++ * PMU interrupt must be triggered. It must be several samples
++ * short of the end of the buffer.
++ */
++struct pfm_pebs_p4_smpl_arg {
++      u64 cnt_reset;    /* counter reset value */
++      size_t buf_size;  /* size of the PEBS buffer in bytes */
++      size_t intr_thres;/* index of PEBS interrupt threshold entry */
++      u64 reserved[6];  /* for future use */
++};
++
++/*
++ * Data Save Area (32 and 64-bit mode)
++ *
++ * The DS area must be exposed to the user because this is the only
++ * way to report on the number of valid entries recorded by the CPU.
++ * This is required when the buffer is not full, i..e, there was not
++ * PMU interrupt.
++ *
++ * Layout of the structure is mandated by hardware and specified in
++ * the Intel documentation.
++ */
++struct pfm_ds_area_p4 {
++      unsigned long   bts_buf_base;
++      unsigned long   bts_index;
++      unsigned long   bts_abs_max;
++      unsigned long   bts_intr_thres;
++      unsigned long   pebs_buf_base;
++      unsigned long   pebs_index;
++      unsigned long   pebs_abs_max;
++      unsigned long   pebs_intr_thres;
++      u64             pebs_cnt_reset;
++};
++
++/*
++ * This header is at the beginning of the sampling buffer returned to the user.
++ *
++ * Because of PEBS alignement constraints, the actual PEBS buffer area does
++ * not necessarily begin right after the header. The hdr_start_offs must be
++ * used to compute the first byte of the buffer. The offset is defined as
++ * the number of bytes between the end of the header and the beginning of
++ * the buffer. As such the formula is:
++ *    actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
++ */
++struct pfm_pebs_p4_smpl_hdr {
++      u64 overflows;                  /* #overflows for buffer */
++      size_t buf_size;                /* bytes in the buffer */
++      size_t start_offs;              /* actual buffer start offset */
++      u32 version;                    /* smpl format version */
++      u32 reserved1;                  /* for future use */
++      u64 reserved2[5];               /* for future use */
++      struct pfm_ds_area_p4 ds;       /* data save area */
++};
++
++/*
++ * 64-bit PEBS record format is described in
++ * http://www.intel.com/technology/64bitextensions/30083502.pdf
++ *
++ * The format does not peek at samples. The sample structure is only
++ * used to ensure that the buffer is large enough to accomodate one
++ * sample.
++ */
++#ifdef __i386__
++struct pfm_pebs_p4_smpl_entry {
++      u32     eflags;
++      u32     ip;
++      u32     eax;
++      u32     ebx;
++      u32     ecx;
++      u32     edx;
++      u32     esi;
++      u32     edi;
++      u32     ebp;
++      u32     esp;
++};
++#else
++struct pfm_pebs_p4_smpl_entry {
++      u64     eflags;
++      u64     ip;
++      u64     eax;
++      u64     ebx;
++      u64     ecx;
++      u64     edx;
++      u64     esi;
++      u64     edi;
++      u64     ebp;
++      u64     esp;
++      u64     r8;
++      u64     r9;
++      u64     r10;
++      u64     r11;
++      u64     r12;
++      u64     r13;
++      u64     r14;
++      u64     r15;
++};
++#endif
++
++#define PFM_PEBS_P4_SMPL_VERSION_MAJ 1U
++#define PFM_PEBS_P4_SMPL_VERSION_MIN 0U
++#define PFM_PEBS_P4_SMPL_VERSION (((PFM_PEBS_P4_SMPL_VERSION_MAJ&0xffff)<<16)|\
++                                 (PFM_PEBS_P4_SMPL_VERSION_MIN & 0xffff))
++
++#endif /* __PERFMON_PEBS_P4_SMPL_H__ */
diff --cc arch/x86/include/asm/ptrace.h

index 0000000,6d34d95..deff2b2

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@@ -1,0 -1,249 +1,272 @@@
+ #ifndef _ASM_X86_PTRACE_H
+ #define _ASM_X86_PTRACE_H
+ 
+ #include <linux/compiler.h>   /* For __user */
+ #include <asm/ptrace-abi.h>
+ #include <asm/processor-flags.h>
+ 
+ #ifdef __KERNEL__
+ #include <asm/segment.h>
+ #endif
+ 
+ #ifndef __ASSEMBLY__
+ 
+ #ifdef __i386__
+ /* this struct defines the way the registers are stored on the
+    stack during a system call. */
+ 
++enum EFLAGS {
++        EF_CF   = 0x00000001,
++        EF_PF   = 0x00000004,
++        EF_AF   = 0x00000010,
++        EF_ZF   = 0x00000040,
++        EF_SF   = 0x00000080,
++        EF_TF   = 0x00000100,
++        EF_IE   = 0x00000200,
++        EF_DF   = 0x00000400,
++        EF_OF   = 0x00000800,
++        EF_IOPL = 0x00003000,
++        EF_IOPL_RING0 = 0x00000000,
++        EF_IOPL_RING1 = 0x00001000,
++        EF_IOPL_RING2 = 0x00002000,
++        EF_NT   = 0x00004000,   /* nested task */
++        EF_RF   = 0x00010000,   /* resume */
++        EF_VM   = 0x00020000,   /* virtual mode */
++        EF_AC   = 0x00040000,   /* alignment */
++        EF_VIF  = 0x00080000,   /* virtual interrupt */
++        EF_VIP  = 0x00100000,   /* virtual interrupt pending */
++        EF_ID   = 0x00200000,   /* id */
++};
++
+ #ifndef __KERNEL__
+ 
+ struct pt_regs {
+       long ebx;
+       long ecx;
+       long edx;
+       long esi;
+       long edi;
+       long ebp;
+       long eax;
+       int  xds;
+       int  xes;
+       int  xfs;
+       /* int  gs; */
+       long orig_eax;
+       long eip;
+       int  xcs;
+       long eflags;
+       long esp;
+       int  xss;
+ };
+ 
+ #else /* __KERNEL__ */
+ 
+ struct pt_regs {
+       unsigned long bx;
+       unsigned long cx;
+       unsigned long dx;
+       unsigned long si;
+       unsigned long di;
+       unsigned long bp;
+       unsigned long ax;
+       unsigned long ds;
+       unsigned long es;
+       unsigned long fs;
+       /* int  gs; */
+       unsigned long orig_ax;
+       unsigned long ip;
+       unsigned long cs;
+       unsigned long flags;
+       unsigned long sp;
+       unsigned long ss;
+ };
+ 
+ #endif /* __KERNEL__ */
+ 
+ #else /* __i386__ */
+ 
+ #ifndef __KERNEL__
+ 
+ struct pt_regs {
+       unsigned long r15;
+       unsigned long r14;
+       unsigned long r13;
+       unsigned long r12;
+       unsigned long rbp;
+       unsigned long rbx;
+ /* arguments: non interrupts/non tracing syscalls only save upto here*/
+       unsigned long r11;
+       unsigned long r10;
+       unsigned long r9;
+       unsigned long r8;
+       unsigned long rax;
+       unsigned long rcx;
+       unsigned long rdx;
+       unsigned long rsi;
+       unsigned long rdi;
+       unsigned long orig_rax;
+ /* end of arguments */
+ /* cpu exception frame or undefined */
+       unsigned long rip;
+       unsigned long cs;
+       unsigned long eflags;
+       unsigned long rsp;
+       unsigned long ss;
+ /* top of stack page */
+ };
+ 
+ #else /* __KERNEL__ */
+ 
+ struct pt_regs {
+       unsigned long r15;
+       unsigned long r14;
+       unsigned long r13;
+       unsigned long r12;
+       unsigned long bp;
+       unsigned long bx;
+ /* arguments: non interrupts/non tracing syscalls only save upto here*/
+       unsigned long r11;
+       unsigned long r10;
+       unsigned long r9;
+       unsigned long r8;
+       unsigned long ax;
+       unsigned long cx;
+       unsigned long dx;
+       unsigned long si;
+       unsigned long di;
+       unsigned long orig_ax;
+ /* end of arguments */
+ /* cpu exception frame or undefined */
+       unsigned long ip;
+       unsigned long cs;
+       unsigned long flags;
+       unsigned long sp;
+       unsigned long ss;
+ /* top of stack page */
+ };
+ 
+ #endif /* __KERNEL__ */
+ #endif /* !__i386__ */
+ 
+ 
+ #ifdef __KERNEL__
+ 
+ #include <linux/init.h>
+ 
+ struct cpuinfo_x86;
+ struct task_struct;
+ 
+ extern unsigned long profile_pc(struct pt_regs *regs);
+ 
+ extern unsigned long
+ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
+ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+                        int error_code, int si_code);
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+ 
+ extern long syscall_trace_enter(struct pt_regs *);
+ extern void syscall_trace_leave(struct pt_regs *);
+ 
+ static inline unsigned long regs_return_value(struct pt_regs *regs)
+ {
+       return regs->ax;
+ }
+ 
+ /*
+  * user_mode_vm(regs) determines whether a register set came from user mode.
+  * This is true if V8086 mode was enabled OR if the register set was from
+  * protected mode with RPL-3 CS value.  This tricky test checks that with
+  * one comparison.  Many places in the kernel can bypass this full check
+  * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+  */
+ static inline int user_mode(struct pt_regs *regs)
+ {
+ #ifdef CONFIG_X86_32
+       return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
+ #else
+       return !!(regs->cs & 3);
+ #endif
+ }
+ 
+ static inline int user_mode_vm(struct pt_regs *regs)
+ {
+ #ifdef CONFIG_X86_32
+       return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
+               USER_RPL;
+ #else
+       return user_mode(regs);
+ #endif
+ }
+ 
+ static inline int v8086_mode(struct pt_regs *regs)
+ {
+ #ifdef CONFIG_X86_32
+       return (regs->flags & X86_VM_MASK);
+ #else
+       return 0;       /* No V86 mode support in long mode */
+ #endif
+ }
+ 
+ /*
+  * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
+  * when it traps.  So regs will be the current sp.
+  *
+  * This is valid only for kernel mode traps.
+  */
+ static inline unsigned long kernel_trap_sp(struct pt_regs *regs)
+ {
+ #ifdef CONFIG_X86_32
+       return (unsigned long)regs;
+ #else
+       return regs->sp;
+ #endif
+ }
+ 
+ static inline unsigned long instruction_pointer(struct pt_regs *regs)
+ {
+       return regs->ip;
+ }
+ 
+ static inline unsigned long frame_pointer(struct pt_regs *regs)
+ {
+       return regs->bp;
+ }
+ 
+ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+ {
+       return regs->sp;
+ }
+ 
+ /*
+  * These are defined as per linux/ptrace.h, which see.
+  */
+ #define arch_has_single_step()        (1)
+ extern void user_enable_single_step(struct task_struct *);
+ extern void user_disable_single_step(struct task_struct *);
+ 
+ extern void user_enable_block_step(struct task_struct *);
+ #ifdef CONFIG_X86_DEBUGCTLMSR
+ #define arch_has_block_step() (1)
+ #else
+ #define arch_has_block_step() (boot_cpu_data.x86 >= 6)
+ #endif
+ 
+ struct user_desc;
+ extern int do_get_thread_area(struct task_struct *p, int idx,
+                             struct user_desc __user *info);
+ extern int do_set_thread_area(struct task_struct *p, int idx,
+                             struct user_desc __user *info, int can_allocate);
+ 
+ extern void x86_ptrace_untrace(struct task_struct *);
+ extern void x86_ptrace_fork(struct task_struct *child,
+                           unsigned long clone_flags);
+ 
+ #define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
+ #define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+ 
+ #endif /* __KERNEL__ */
+ 
+ #endif /* !__ASSEMBLY__ */
+ 
+ #endif /* _ASM_X86_PTRACE_H */
diff --cc arch/x86/include/asm/spinlock.h

index 0000000,d17c919..009c40c

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@@ -1,0 -1,364 +1,367 @@@
+ #ifndef _ASM_X86_SPINLOCK_H
+ #define _ASM_X86_SPINLOCK_H
+ 
+ #include <asm/atomic.h>
+ #include <asm/rwlock.h>
+ #include <asm/page.h>
+ #include <asm/processor.h>
+ #include <linux/compiler.h>
+ #include <asm/paravirt.h>
+ /*
+  * Your basic SMP spinlocks, allowing only a single CPU anywhere
+  *
+  * Simple spin lock operations.  There are two variants, one clears IRQ's
+  * on the local processor, one does not.
+  *
+  * These are fair FIFO ticket locks, which are currently limited to 256
+  * CPUs.
+  *
+  * (the type definitions are in asm/spinlock_types.h)
+  */
+ 
+ #ifdef CONFIG_X86_32
+ # define LOCK_PTR_REG "a"
+ # define REG_PTR_MODE "k"
+ #else
+ # define LOCK_PTR_REG "D"
+ # define REG_PTR_MODE "q"
+ #endif
+ 
+ #if defined(CONFIG_X86_32) && \
+       (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
+ /*
+  * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
+  * (PPro errata 66, 92)
+  */
+ # define UNLOCK_LOCK_PREFIX LOCK_PREFIX
+ #else
+ # define UNLOCK_LOCK_PREFIX
+ #endif
+ 
+ /*
+  * Ticket locks are conceptually two parts, one indicating the current head of
+  * the queue, and the other indicating the current tail. The lock is acquired
+  * by atomically noting the tail and incrementing it by one (thus adding
+  * ourself to the queue and noting our position), then waiting until the head
+  * becomes equal to the the initial value of the tail.
+  *
+  * We use an xadd covering *both* parts of the lock, to increment the tail and
+  * also load the position of the head, which takes care of memory ordering
+  * issues and should be optimal for the uncontended case. Note the tail must be
+  * in the high part, because a wide xadd increment of the low part would carry
+  * up and contaminate the high part.
+  *
+  * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
+  * save some instructions and make the code more elegant. There really isn't
+  * much between them in performance though, especially as locks are out of line.
+  */
+ #if (NR_CPUS < 256)
+ #define TICKET_SHIFT 8
+ 
+ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+ {
+       short inc = 0x0100;
+ 
+       asm volatile (
+               LOCK_PREFIX "xaddw %w0, %1\n"
+               "1:\t"
+               "cmpb %h0, %b0\n\t"
+               "je 2f\n\t"
+               "rep ; nop\n\t"
+               "movb %1, %b0\n\t"
+               /* don't need lfence here, because loads are in-order */
+               "jmp 1b\n"
+               "2:"
+               : "+Q" (inc), "+m" (lock->slock)
+               :
+               : "memory", "cc");
+ }
+ 
+ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+ {
+       int tmp, new;
+ 
+       asm volatile("movzwl %2, %0\n\t"
+                    "cmpb %h0,%b0\n\t"
+                    "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
+                    "jne 1f\n\t"
+                    LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
+                    "1:"
+                    "sete %b1\n\t"
+                    "movzbl %b1,%0\n\t"
+                    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
+                    :
+                    : "memory", "cc");
+ 
+       return tmp;
+ }
+ 
+ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+ {
+       asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
+                    : "+m" (lock->slock)
+                    :
+                    : "memory", "cc");
+ }
+ #else
+ #define TICKET_SHIFT 16
+ 
+ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+ {
+       int inc = 0x00010000;
+       int tmp;
+ 
+       asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
+                    "movzwl %w0, %2\n\t"
+                    "shrl $16, %0\n\t"
+                    "1:\t"
+                    "cmpl %0, %2\n\t"
+                    "je 2f\n\t"
+                    "rep ; nop\n\t"
+                    "movzwl %1, %2\n\t"
+                    /* don't need lfence here, because loads are in-order */
+                    "jmp 1b\n"
+                    "2:"
+                    : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
+                    :
+                    : "memory", "cc");
+ }
+ 
+ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+ {
+       int tmp;
+       int new;
+ 
+       asm volatile("movl %2,%0\n\t"
+                    "movl %0,%1\n\t"
+                    "roll $16, %0\n\t"
+                    "cmpl %0,%1\n\t"
+                    "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
+                    "jne 1f\n\t"
+                    LOCK_PREFIX "cmpxchgl %1,%2\n\t"
+                    "1:"
+                    "sete %b1\n\t"
+                    "movzbl %b1,%0\n\t"
+                    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
+                    :
+                    : "memory", "cc");
+ 
+       return tmp;
+ }
+ 
+ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+ {
+       asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
+                    : "+m" (lock->slock)
+                    :
+                    : "memory", "cc");
+ }
+ #endif
+ 
+ static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+ {
+       int tmp = ACCESS_ONCE(lock->slock);
+ 
+       return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+ }
+ 
+ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+ {
+       int tmp = ACCESS_ONCE(lock->slock);
+ 
+       return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+ }
+ 
+ #ifdef CONFIG_PARAVIRT
+ /*
+  * Define virtualization-friendly old-style lock byte lock, for use in
+  * pv_lock_ops if desired.
+  *
+  * This differs from the pre-2.6.24 spinlock by always using xchgb
+  * rather than decb to take the lock; this allows it to use a
+  * zero-initialized lock structure.  It also maintains a 1-byte
+  * contention counter, so that we can implement
+  * __byte_spin_is_contended.
+  */
+ struct __byte_spinlock {
+       s8 lock;
+       s8 spinners;
+ };
+ 
+ static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
+ {
+       struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+       return bl->lock != 0;
+ }
+ 
+ static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
+ {
+       struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+       return bl->spinners != 0;
+ }
+ 
+ static inline void __byte_spin_lock(raw_spinlock_t *lock)
+ {
+       struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+       s8 val = 1;
+ 
+       asm("1: xchgb %1, %0\n"
+           "   test %1,%1\n"
+           "   jz 3f\n"
+           "   " LOCK_PREFIX "incb %2\n"
+           "2: rep;nop\n"
+           "   cmpb $1, %0\n"
+           "   je 2b\n"
+           "   " LOCK_PREFIX "decb %2\n"
+           "   jmp 1b\n"
+           "3:"
+           : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
+ }
+ 
+ static inline int __byte_spin_trylock(raw_spinlock_t *lock)
+ {
+       struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+       u8 old = 1;
+ 
+       asm("xchgb %1,%0"
+           : "+m" (bl->lock), "+q" (old) : : "memory");
+ 
+       return old == 0;
+ }
+ 
+ static inline void __byte_spin_unlock(raw_spinlock_t *lock)
+ {
+       struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+       smp_wmb();
+       bl->lock = 0;
+ }
+ #else  /* !CONFIG_PARAVIRT */
+ static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+ {
+       return __ticket_spin_is_locked(lock);
+ }
+ 
+ static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+ {
+       return __ticket_spin_is_contended(lock);
+ }
+ 
+ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+ {
+       __ticket_spin_lock(lock);
+ }
+ 
+ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+ {
+       return __ticket_spin_trylock(lock);
+ }
+ 
+ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+ {
+       __ticket_spin_unlock(lock);
+ }
+ 
+ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+                                                 unsigned long flags)
+ {
+       __raw_spin_lock(lock);
+ }
+ 
+ #endif        /* CONFIG_PARAVIRT */
+ 
+ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+ {
+       while (__raw_spin_is_locked(lock))
+               cpu_relax();
+ }
+ 
+ /*
+  * Read-write spinlocks, allowing multiple readers
+  * but only one writer.
+  *
+  * NOTE! it is quite common to have readers in interrupts
+  * but no interrupt writers. For those circumstances we
+  * can "mix" irq-safe locks - any writer needs to get a
+  * irq-safe write-lock, but readers can get non-irqsafe
+  * read-locks.
+  *
+  * On x86, we implement read-write locks as a 32-bit counter
+  * with the high bit (sign) being the "contended" bit.
+  */
+ 
+ /**
+  * read_can_lock - would read_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+ static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+ {
+       return (int)(lock)->lock > 0;
+ }
+ 
+ /**
+  * write_can_lock - would write_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+ static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+ {
+       return (lock)->lock == RW_LOCK_BIAS;
+ }
+ 
+ static inline void __raw_read_lock(raw_rwlock_t *rw)
+ {
+       asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
+                    "jns 1f\n"
+                    "call __read_lock_failed\n\t"
+                    "1:\n"
+                    ::LOCK_PTR_REG (rw) : "memory");
+ }
+ 
+ static inline void __raw_write_lock(raw_rwlock_t *rw)
+ {
+       asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
+                    "jz 1f\n"
+                    "call __write_lock_failed\n\t"
+                    "1:\n"
+                    ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
+ }
+ 
+ static inline int __raw_read_trylock(raw_rwlock_t *lock)
+ {
+       atomic_t *count = (atomic_t *)lock;
+ 
+       atomic_dec(count);
+       if (atomic_read(count) >= 0)
+               return 1;
+       atomic_inc(count);
+       return 0;
+ }
+ 
+ static inline int __raw_write_trylock(raw_rwlock_t *lock)
+ {
+       atomic_t *count = (atomic_t *)lock;
+ 
+       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+               return 1;
+       atomic_add(RW_LOCK_BIAS, count);
+       return 0;
+ }
+ 
+ static inline void __raw_read_unlock(raw_rwlock_t *rw)
+ {
+       asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
+ }
+ 
+ static inline void __raw_write_unlock(raw_rwlock_t *rw)
+ {
+       asm volatile(LOCK_PREFIX "addl %1, %0"
+                    : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
+ }
+ 
++#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
++#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
++
+ #define _raw_spin_relax(lock) cpu_relax()
+ #define _raw_read_relax(lock) cpu_relax()
+ #define _raw_write_relax(lock)        cpu_relax()
+ 
+ #endif /* _ASM_X86_SPINLOCK_H */
diff --cc arch/x86/include/asm/thread_info.h

index 0000000,9878964..bd75bc1

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@@ -1,0 -1,263 +1,268 @@@
+ /* thread_info.h: low-level thread information
+  *
+  * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+  * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+  */
+ 
+ #ifndef _ASM_X86_THREAD_INFO_H
+ #define _ASM_X86_THREAD_INFO_H
+ 
+ #include <linux/compiler.h>
+ #include <asm/page.h>
+ #include <asm/types.h>
+ 
+ /*
+  * low level task data that entry.S needs immediate access to
+  * - this struct should fit entirely inside of one cache line
+  * - this struct shares the supervisor stack pages
+  */
+ #ifndef __ASSEMBLY__
+ struct task_struct;
+ struct exec_domain;
+ #include <asm/processor.h>
+ #include <asm/ftrace.h>
+ #include <asm/atomic.h>
+ 
+ struct thread_info {
+       struct task_struct      *task;          /* main task structure */
+       struct exec_domain      *exec_domain;   /* execution domain */
+       __u32                   flags;          /* low level flags */
+       __u32                   status;         /* thread synchronous flags */
+       __u32                   cpu;            /* current CPU */
+       int                     preempt_count;  /* 0 => preemptable,
+                                                  <0 => BUG */
+       mm_segment_t            addr_limit;
+       struct restart_block    restart_block;
+       void __user             *sysenter_return;
+ #ifdef CONFIG_X86_32
+       unsigned long           previous_esp;   /* ESP of the previous stack in
+                                                  case of nested (IRQ) stacks
+                                               */
+       __u8                    supervisor_stack[0];
+ #endif
+ };
+ 
+ #define INIT_THREAD_INFO(tsk)                 \
+ {                                             \
+       .task           = &tsk,                 \
+       .exec_domain    = &default_exec_domain, \
+       .flags          = 0,                    \
+       .cpu            = 0,                    \
+       .preempt_count  = 1,                    \
+       .addr_limit     = KERNEL_DS,            \
+       .restart_block = {                      \
+               .fn = do_no_restart_syscall,    \
+       },                                      \
+ }
+ 
+ #define init_thread_info      (init_thread_union.thread_info)
+ #define init_stack            (init_thread_union.stack)
+ 
+ #else /* !__ASSEMBLY__ */
+ 
+ #include <asm/asm-offsets.h>
+ 
+ #endif
+ 
+ /*
+  * thread information flags
+  * - these are process state flags that various assembly files
+  *   may need to access
+  * - pending work-to-be-done flags are in LSW
+  * - other flags in MSW
+  * Warning: layout of LSW is hardcoded in entry.S
+  */
+ #define TIF_SYSCALL_TRACE     0       /* syscall trace active */
+ #define TIF_NOTIFY_RESUME     1       /* callback before returning to user */
+ #define TIF_SIGPENDING                2       /* signal pending */
+ #define TIF_NEED_RESCHED      3       /* rescheduling necessary */
+ #define TIF_SINGLESTEP                4       /* reenable singlestep on user return*/
+ #define TIF_IRET              5       /* force IRET */
+ #define TIF_SYSCALL_EMU               6       /* syscall emulation active */
+ #define TIF_SYSCALL_AUDIT     7       /* syscall auditing active */
+ #define TIF_SECCOMP           8       /* secure computing */
++#define TIF_PERFMON_WORK      9       /* work for pfm_handle_work() */
+ #define TIF_MCE_NOTIFY                10      /* notify userspace of an MCE */
+ #define TIF_NOTSC             16      /* TSC is not accessible in userland */
+ #define TIF_IA32              17      /* 32bit process */
+ #define TIF_FORK              18      /* ret_from_fork */
+ #define TIF_ABI_PENDING               19
+ #define TIF_MEMDIE            20
+ #define TIF_DEBUG             21      /* uses debug registers */
+ #define TIF_IO_BITMAP         22      /* uses I/O bitmap */
+ #define TIF_FREEZE            23      /* is freezing for suspend */
+ #define TIF_FORCED_TF         24      /* true if TF in eflags artificially */
+ #define TIF_DEBUGCTLMSR               25      /* uses thread_struct.debugctlmsr */
+ #define TIF_DS_AREA_MSR               26      /* uses thread_struct.ds_area_msr */
++#define TIF_PERFMON_CTXSW     27      /* perfmon needs ctxsw calls */
+ 
+ #define _TIF_SYSCALL_TRACE    (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME    (1 << TIF_NOTIFY_RESUME)
+ #define _TIF_SIGPENDING               (1 << TIF_SIGPENDING)
+ #define _TIF_SINGLESTEP               (1 << TIF_SINGLESTEP)
+ #define _TIF_NEED_RESCHED     (1 << TIF_NEED_RESCHED)
+ #define _TIF_IRET             (1 << TIF_IRET)
+ #define _TIF_SYSCALL_EMU      (1 << TIF_SYSCALL_EMU)
+ #define _TIF_SYSCALL_AUDIT    (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP          (1 << TIF_SECCOMP)
+ #define _TIF_MCE_NOTIFY               (1 << TIF_MCE_NOTIFY)
+ #define _TIF_NOTSC            (1 << TIF_NOTSC)
+ #define _TIF_IA32             (1 << TIF_IA32)
+ #define _TIF_FORK             (1 << TIF_FORK)
+ #define _TIF_ABI_PENDING      (1 << TIF_ABI_PENDING)
+ #define _TIF_DEBUG            (1 << TIF_DEBUG)
+ #define _TIF_IO_BITMAP                (1 << TIF_IO_BITMAP)
+ #define _TIF_FREEZE           (1 << TIF_FREEZE)
+ #define _TIF_FORCED_TF                (1 << TIF_FORCED_TF)
+ #define _TIF_DEBUGCTLMSR      (1 << TIF_DEBUGCTLMSR)
+ #define _TIF_DS_AREA_MSR      (1 << TIF_DS_AREA_MSR)
++#define _TIF_PERFMON_WORK     (1 << TIF_PERFMON_WORK)
++#define _TIF_PERFMON_CTXSW    (1 << TIF_PERFMON_CTXSW)
+ 
+ /* work to do in syscall_trace_enter() */
+ #define _TIF_WORK_SYSCALL_ENTRY       \
+       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
+        _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
+ 
+ /* work to do in syscall_trace_leave() */
+ #define _TIF_WORK_SYSCALL_EXIT        \
+       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)
+ 
+ /* work to do on interrupt/exception return */
+ #define _TIF_WORK_MASK                                                        \
+       (0x0000FFFF &                                                   \
+        ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|                       \
+          _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
+ 
+ /* work to do on any return to user space */
+ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
+ 
+ /* Only used for 64 bit */
+ #define _TIF_DO_NOTIFY_MASK                                           \
- -      (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
++      (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME|_TIF_PERFMON_WORK)
+ 
+ /* flags to check in __switch_to() */
+ #define _TIF_WORK_CTXSW                                                       \
- -      (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
++      (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC| \
++       _TIF_PERFMON_CTXSW)
+ 
+ #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+ #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+ 
+ #define PREEMPT_ACTIVE                0x10000000
+ 
+ /* thread information allocation */
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ #define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
+ #else
+ #define THREAD_FLAGS GFP_KERNEL
+ #endif
+ 
+ #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+ 
+ #define alloc_thread_info(tsk)                                                \
+       ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+ 
+ #ifdef CONFIG_X86_32
+ 
+ #define STACK_WARN    (THREAD_SIZE/8)
+ /*
+  * macros/functions for gaining access to the thread information structure
+  *
+  * preempt_count needs to be 1 initially, until the scheduler is functional.
+  */
+ #ifndef __ASSEMBLY__
+ 
+ 
+ /* how to get the current stack pointer from C */
+ register unsigned long current_stack_pointer asm("esp") __used;
+ 
+ /* how to get the thread information struct from C */
+ static inline struct thread_info *current_thread_info(void)
+ {
+       return (struct thread_info *)
+               (current_stack_pointer & ~(THREAD_SIZE - 1));
+ }
+ 
+ #else /* !__ASSEMBLY__ */
+ 
+ /* how to get the thread information struct from ASM */
+ #define GET_THREAD_INFO(reg)   \
+       movl $-THREAD_SIZE, reg; \
+       andl %esp, reg
+ 
+ /* use this one if reg already contains %esp */
+ #define GET_THREAD_INFO_WITH_ESP(reg) \
+       andl $-THREAD_SIZE, reg
+ 
+ #endif
+ 
+ #else /* X86_32 */
+ 
+ #include <asm/pda.h>
+ 
+ /*
+  * macros/functions for gaining access to the thread information structure
+  * preempt_count needs to be 1 initially, until the scheduler is functional.
+  */
+ #ifndef __ASSEMBLY__
+ static inline struct thread_info *current_thread_info(void)
+ {
+       struct thread_info *ti;
+       ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
+       return ti;
+ }
+ 
+ /* do not use in interrupt context */
+ static inline struct thread_info *stack_thread_info(void)
+ {
+       struct thread_info *ti;
+       asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+       return ti;
+ }
+ 
+ #else /* !__ASSEMBLY__ */
+ 
+ /* how to get the thread information struct from ASM */
+ #define GET_THREAD_INFO(reg) \
+       movq %gs:pda_kernelstack,reg ; \
+       subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+ 
+ #endif
+ 
+ #endif /* !X86_32 */
+ 
+ /*
+  * Thread-synchronous status.
+  *
+  * This is different from the flags in that nobody else
+  * ever touches our thread-synchronous status, so we don't
+  * have to worry about atomic accesses.
+  */
+ #define TS_USEDFPU            0x0001  /* FPU was used by this task
+                                          this quantum (SMP) */
+ #define TS_COMPAT             0x0002  /* 32bit syscall active (64BIT)*/
+ #define TS_POLLING            0x0004  /* true if in idle loop
+                                          and not sleeping */
+ #define TS_RESTORE_SIGMASK    0x0008  /* restore signal mask in do_signal() */
+ #define TS_XSAVE              0x0010  /* Use xsave/xrstor */
+ 
+ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
+ 
+ #ifndef __ASSEMBLY__
+ #define HAVE_SET_RESTORE_SIGMASK      1
+ static inline void set_restore_sigmask(void)
+ {
+       struct thread_info *ti = current_thread_info();
+       ti->status |= TS_RESTORE_SIGMASK;
+       set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+ }
+ #endif        /* !__ASSEMBLY__ */
+ 
+ #ifndef __ASSEMBLY__
+ extern void arch_task_cache_init(void);
+ extern void free_thread_info(struct thread_info *ti);
+ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+ #define arch_task_cache_init arch_task_cache_init
+ #endif
+ #endif /* _ASM_X86_THREAD_INFO_H */
diff --cc arch/x86/include/asm/unwind.h

index 0000000,0000000..a5e3c5e

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/arch/x86/include/asm/unwind.h
@@@ -1,0 -1,0 +1,165 @@@
++#ifndef _ASM_X86_UNWIND_H
++#define _ASM_X86_UNWIND_H
++
++/*
++ * Copyright (C) 2002-2007 Novell, Inc.
++ *    Jan Beulich <jbeulich@novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <linux/uaccess.h>
++#include <asm/ptrace.h>
++
++struct unwind_frame_info
++{
++      struct pt_regs regs;
++      struct task_struct *task;
++      unsigned call_frame:1;
++};
++
++extern int try_stack_unwind(struct task_struct *task, struct pt_regs *regs,
++                          unsigned long **stack, unsigned long *bp,
++                          const struct stacktrace_ops *ops, void *data);
++
++#ifdef CONFIG_X86_64
++
++#include <asm/vsyscall.h>
++
++#define UNW_PC(frame)        (frame)->regs.ip
++#define UNW_SP(frame)        (frame)->regs.sp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame)        (frame)->regs.bp
++#define FRAME_RETADDR_OFFSET 8
++#define FRAME_LINK_OFFSET    0
++#define STACK_BOTTOM(tsk)    (((tsk)->thread.sp0 - 1) & ~(THREAD_SIZE - 1))
++#define TSK_STACK_TOP(tsk)       ((tsk)->thread.sp0)
++#else
++#define UNW_FP(frame) ((void)(frame), 0UL)
++#endif
++/* Might need to account for the special exception and interrupt handling
++   stacks here, since normally
++      EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
++   but the construct is needed only for getting across the stack switch to
++   the interrupt stack - thus considering the IRQ stack itself is unnecessary,
++   and the overhead of comparing against all exception handling stacks seems
++   not desirable. */
++#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++      PTREGS_INFO(ax), \
++      PTREGS_INFO(dx), \
++      PTREGS_INFO(cx), \
++      PTREGS_INFO(bx), \
++      PTREGS_INFO(si), \
++      PTREGS_INFO(di), \
++      PTREGS_INFO(bp), \
++      PTREGS_INFO(sp), \
++      PTREGS_INFO(r8), \
++      PTREGS_INFO(r9), \
++      PTREGS_INFO(r10), \
++      PTREGS_INFO(r11), \
++      PTREGS_INFO(r12), \
++      PTREGS_INFO(r13), \
++      PTREGS_INFO(r14), \
++      PTREGS_INFO(r15), \
++      PTREGS_INFO(ip)
++
++#else
++
++#include <asm/fixmap.h>
++
++#define UNW_PC(frame)        (frame)->regs.ip
++#define UNW_SP(frame)        (frame)->regs.sp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame)        (frame)->regs.bp
++#define FRAME_RETADDR_OFFSET 4
++#define FRAME_LINK_OFFSET    0
++#define STACK_BOTTOM(tsk)    STACK_LIMIT((tsk)->thread.sp0)
++#define TSK_STACK_TOP(tsk)       ((tsk)->thread.sp0)
++#else
++#define UNW_FP(frame) ((void)(frame), 0UL)
++#endif
++#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++      PTREGS_INFO(ax), \
++      PTREGS_INFO(cx), \
++      PTREGS_INFO(dx), \
++      PTREGS_INFO(bx), \
++      PTREGS_INFO(sp), \
++      PTREGS_INFO(bp), \
++      PTREGS_INFO(si), \
++      PTREGS_INFO(di), \
++      PTREGS_INFO(ip)
++
++#endif
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++      ((raItem).where == Memory && \
++       !((raItem).value * (dataAlign) + sizeof(void *)))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++                                            /*const*/ struct pt_regs *regs)
++{
++#ifdef CONFIG_X86_64
++      info->regs = *regs;
++#else
++      if (user_mode_vm(regs))
++              info->regs = *regs;
++      else {
++              memcpy(&info->regs, regs, offsetof(struct pt_regs, sp));
++              info->regs.sp = (unsigned long)&regs->sp;
++              info->regs.ss = __KERNEL_DS;
++      }
++#endif
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++#ifdef CONFIG_X86_64
++      extern const char thread_return[];
++
++      memset(&info->regs, 0, sizeof(info->regs));
++      info->regs.ip = (unsigned long)thread_return;
++      info->regs.cs = __KERNEL_CS;
++      probe_kernel_address(info->task->thread.sp, info->regs.bp);
++      info->regs.sp = info->task->thread.sp;
++      info->regs.ss = __KERNEL_DS;
++#else
++      memset(&info->regs, 0, sizeof(info->regs));
++      info->regs.ip = info->task->thread.ip;
++      info->regs.cs = __KERNEL_CS;
++      probe_kernel_address(info->task->thread.sp, info->regs.bp);
++      info->regs.sp = info->task->thread.sp;
++      info->regs.ss = __KERNEL_DS;
++      info->regs.ds = __USER_DS;
++      info->regs.es = __USER_DS;
++#endif
++}
++
++extern asmlinkage int
++arch_unwind_init_running(struct unwind_frame_info *,
++                       asmlinkage unwind_callback_fn,
++                       const struct stacktrace_ops *ops, void *data);
++
++static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
++{
++#ifdef CONFIG_X86_64
++      return user_mode(&info->regs)
++             || (long)info->regs.ip >= 0
++             || (info->regs.ip >= VSYSCALL_START && info->regs.ip < VSYSCALL_END)
++             || (long)info->regs.sp >= 0;
++#else
++      return user_mode_vm(&info->regs)
++             || info->regs.ip < PAGE_OFFSET
++             || (info->regs.ip >= __fix_to_virt(FIX_VDSO)
++                 && info->regs.ip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
++             || info->regs.sp < PAGE_OFFSET;
++#endif
++}
++#endif
++
++#endif /* _ASM_X86_UNWIND_H */
diff --cc arch/x86/kdb/kdba_bt.c

index 969a4fb,0000000..62a8444

mode 100644,000000..100644
--- 1/arch/x86/kdb/kdba_bt.c
--- /dev/null
+++ b/arch/x86/kdb/kdba_bt.c
@@@ -1,5600 -1,0 +1,5597 @@@
+ +/*
+ + * This file is subject to the terms and conditions of the GNU General Public
+ + * License.  See the file "COPYING" in the main directory of this archive
+ + * for more details.
+ + *
+ + * Copyright (c) 2006, 2007-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ + *
+ + * Common code for doing accurate backtraces on i386 and x86_64, including
+ + * printing the values of arguments.
+ + */
+ +
+ +#include <linux/init.h>
+ +#include <linux/kallsyms.h>
+ +#include <linux/kdb.h>
+ +#include <linux/kdbprivate.h>
+ +#include <linux/ctype.h>
+ +#include <linux/string.h>
+ +#include <linux/stringify.h>
+ +#include <linux/kernel.h>
+ +#include <linux/sched.h>
+ +#include <linux/nmi.h>
+ +#include <asm/asm-offsets.h>
+ +#include <asm/system.h>
+ +
+ +#define KDB_DEBUG_BB(fmt, ...)                                                        \
+ +      {if (KDB_DEBUG(BB)) kdb_printf(fmt, ## __VA_ARGS__);}
+ +#define KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix)                    \
+ +      kdb_printf(prefix "%c0x%x" suffix,                                      \
+ +                 offset >= 0 ? '+' : '-',                                     \
+ +                 offset >= 0 ? offset : -offset)
+ +#define KDB_DEBUG_BB_OFFSET(offset, prefix, suffix)                           \
+ +      {if (KDB_DEBUG(BB)) KDB_DEBUG_BB_OFFSET_PRINTF(offset, prefix, suffix);}
+ +
+ +#define       BB_CHECK(expr, val, ret)                                                \
+ +({                                                                            \
+ +      if (unlikely(expr)) {                                                   \
+ +              kdb_printf("%s, line %d: BB_CHECK(" #expr ") failed "           \
+ +                      #val "=%lx\n",                                          \
+ +                      __FUNCTION__, __LINE__, (long)val);                     \
+ +              bb_giveup = 1;                                                  \
+ +              return ret;                                                     \
+ +      }                                                                       \
+ +})
+ +
+ +static int bb_giveup;
+ +
+ +/* Use BBRG_Rxx for both i386 and x86_64.  RAX through R15 must be at the end,
+ + * starting with RAX.  Some of these codes do not reflect actual registers,
+ + * such codes are special cases when parsing the record of register changes.
+ + * When updating BBRG_ entries, update bbrg_name as well.
+ + */
+ +
+ +enum bb_reg_code
+ +{
+ +      BBRG_UNDEFINED = 0,     /* Register contents are undefined */
+ +      BBRG_OSP,               /* original stack pointer on entry to function */
+ +      BBRG_RAX,
+ +      BBRG_RBX,
+ +      BBRG_RCX,
+ +      BBRG_RDX,
+ +      BBRG_RDI,
+ +      BBRG_RSI,
+ +      BBRG_RBP,
+ +      BBRG_RSP,
+ +      BBRG_R8,
+ +      BBRG_R9,
+ +      BBRG_R10,
+ +      BBRG_R11,
+ +      BBRG_R12,
+ +      BBRG_R13,
+ +      BBRG_R14,
+ +      BBRG_R15,
+ +};
+ +
+ +const static char *bbrg_name[] = {
+ +      [BBRG_UNDEFINED]   = "undefined",
+ +      [BBRG_OSP]         = "osp",
+ +      [BBRG_RAX]         = "rax",
+ +      [BBRG_RBX]         = "rbx",
+ +      [BBRG_RCX]         = "rcx",
+ +      [BBRG_RDX]         = "rdx",
+ +      [BBRG_RDI]         = "rdi",
+ +      [BBRG_RSI]         = "rsi",
+ +      [BBRG_RBP]         = "rbp",
+ +      [BBRG_RSP]         = "rsp",
+ +      [BBRG_R8]          = "r8",
+ +      [BBRG_R9]          = "r9",
+ +      [BBRG_R10]         = "r10",
+ +      [BBRG_R11]         = "r11",
+ +      [BBRG_R12]         = "r12",
+ +      [BBRG_R13]         = "r13",
+ +      [BBRG_R14]         = "r14",
+ +      [BBRG_R15]         = "r15",
+ +};
+ +
+ +/* Map a register name to its register code.  This includes the sub-register
+ + * addressable fields, e.g. parts of rax can be addressed as ax, al, ah, eax.
+ + * The list is sorted so it can be binary chopped, sort command is:
+ + *   LANG=C sort -t '"' -k2
+ + */
+ +
+ +struct bb_reg_code_map {
+ +      enum bb_reg_code reg;
+ +      const char *name;
+ +};
+ +
+ +const static struct bb_reg_code_map
+ +bb_reg_code_map[] = {
+ +      { BBRG_RAX, "ah" },
+ +      { BBRG_RAX, "al" },
+ +      { BBRG_RAX, "ax" },
+ +      { BBRG_RBX, "bh" },
+ +      { BBRG_RBX, "bl" },
+ +      { BBRG_RBP, "bp" },
+ +      { BBRG_RBP, "bpl" },
+ +      { BBRG_RBX, "bx" },
+ +      { BBRG_RCX, "ch" },
+ +      { BBRG_RCX, "cl" },
+ +      { BBRG_RCX, "cx" },
+ +      { BBRG_RDX, "dh" },
+ +      { BBRG_RDI, "di" },
+ +      { BBRG_RDI, "dil" },
+ +      { BBRG_RDX, "dl" },
+ +      { BBRG_RDX, "dx" },
+ +      { BBRG_RAX, "eax" },
+ +      { BBRG_RBP, "ebp" },
+ +      { BBRG_RBX, "ebx" },
+ +      { BBRG_RCX, "ecx" },
+ +      { BBRG_RDI, "edi" },
+ +      { BBRG_RDX, "edx" },
+ +      { BBRG_RSI, "esi" },
+ +      { BBRG_RSP, "esp" },
+ +      { BBRG_R10, "r10" },
+ +      { BBRG_R10, "r10d" },
+ +      { BBRG_R10, "r10l" },
+ +      { BBRG_R10, "r10w" },
+ +      { BBRG_R11, "r11" },
+ +      { BBRG_R11, "r11d" },
+ +      { BBRG_R11, "r11l" },
+ +      { BBRG_R11, "r11w" },
+ +      { BBRG_R12, "r12" },
+ +      { BBRG_R12, "r12d" },
+ +      { BBRG_R12, "r12l" },
+ +      { BBRG_R12, "r12w" },
+ +      { BBRG_R13, "r13" },
+ +      { BBRG_R13, "r13d" },
+ +      { BBRG_R13, "r13l" },
+ +      { BBRG_R13, "r13w" },
+ +      { BBRG_R14, "r14" },
+ +      { BBRG_R14, "r14d" },
+ +      { BBRG_R14, "r14l" },
+ +      { BBRG_R14, "r14w" },
+ +      { BBRG_R15, "r15" },
+ +      { BBRG_R15, "r15d" },
+ +      { BBRG_R15, "r15l" },
+ +      { BBRG_R15, "r15w" },
+ +      { BBRG_R8,  "r8" },
+ +      { BBRG_R8,  "r8d" },
+ +      { BBRG_R8,  "r8l" },
+ +      { BBRG_R8,  "r8w" },
+ +      { BBRG_R9,  "r9" },
+ +      { BBRG_R9,  "r9d" },
+ +      { BBRG_R9,  "r9l" },
+ +      { BBRG_R9,  "r9w" },
+ +      { BBRG_RAX, "rax" },
+ +      { BBRG_RBP, "rbp" },
+ +      { BBRG_RBX, "rbx" },
+ +      { BBRG_RCX, "rcx" },
+ +      { BBRG_RDI, "rdi" },
+ +      { BBRG_RDX, "rdx" },
+ +      { BBRG_RSI, "rsi" },
+ +      { BBRG_RSP, "rsp" },
+ +      { BBRG_RSI, "si" },
+ +      { BBRG_RSI, "sil" },
+ +      { BBRG_RSP, "sp" },
+ +      { BBRG_RSP, "spl" },
+ +};
+ +
+ +/* Record register contents in terms of the values that were passed to this
+ + * function, IOW track which registers contain an input value.  A register's
+ + * contents can be undefined, it can contain an input register value or it can
+ + * contain an offset from the original stack pointer.
+ + *
+ + * This structure is used to represent the current contents of the integer
+ + * registers, it is held in an array that is indexed by BBRG_xxx.  The element
+ + * for BBRG_xxx indicates what input value is currently in BBRG_xxx.  When
+ + * 'value' is BBRG_OSP then register BBRG_xxx contains a stack pointer,
+ + * pointing at 'offset' from the original stack pointer on entry to the
+ + * function.  When 'value' is not BBRG_OSP then element BBRG_xxx contains the
+ + * original contents of an input register and offset is ignored.
+ + *
+ + * An input register 'value' can be stored in more than one register and/or in
+ + * more than one memory location.
+ + */
+ +
+ +struct bb_reg_contains
+ +{
+ +      enum bb_reg_code value: 8;
+ +      short offset;
+ +};
+ +
+ +/* Note: the offsets in struct bb_mem_contains in this code are _NOT_ offsets
+ + * from OSP, they are offsets from current RSP.  It fits better with the way
+ + * that struct pt_regs is built, some code pushes extra data before pt_regs so
+ + * working with OSP relative offsets gets messy.  struct bb_mem_contains
+ + * entries must be in descending order of RSP offset.
+ + */
+ +
+ +typedef struct { DECLARE_BITMAP(bits, BBRG_R15+1); } bbrgmask_t;
+ +#define BB_SKIP(reg) (1 << (BBRG_ ## reg))
+ +struct bb_mem_contains {
+ +      short offset_address;
+ +      enum bb_reg_code value: 8;
+ +};
+ +
+ +/* Transfer of control to a label outside the current function.  If the
+ + * transfer is to a known common restore path that expects known registers
+ + * and/or a known memory state (e.g. struct pt_regs) then do a sanity check on
+ + * the state at this point.
+ + */
+ +
+ +struct bb_name_state {
+ +      const char *name;                       /* target function */
+ +      bfd_vma address;                        /* Address of target function */
+ +      const char *fname;                      /* optional from function name */
+ +      const struct bb_mem_contains *mem;      /* expected memory state */
+ +      const struct bb_reg_contains *regs;     /* expected register state */
+ +      const unsigned short mem_size;          /* ARRAY_SIZE(mem) */
+ +      const unsigned short regs_size;         /* ARRAY_SIZE(regs) */
+ +      const short osp_offset;                 /* RSP in regs == OSP+osp_offset */
+ +      const bbrgmask_t skip_mem;              /* Some slots in mem may be undefined */
+ +      const bbrgmask_t skip_regs;             /* Some slots in regs may be undefined */
+ +};
+ +
+ +/* NS (NAME_STATE) macros define the register and memory state when we transfer
+ + * control to or start decoding a special case name.  Use NS when the target
+ + * label always has the same state.  Use NS_FROM and specify the source label
+ + * if the target state is slightly different depending on where it is branched
+ + * from.  This gives better state checking, by isolating the special cases.
+ + *
+ + * Note: for the same target label, NS_FROM entries must be followed by a
+ + * single NS entry.
+ + */
+ +
+ +#define       NS_FROM(iname, ifname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
+ +      { \
+ +              .name = iname, \
+ +              .fname = ifname, \
+ +              .mem = imem, \
+ +              .regs = iregs, \
+ +              .mem_size = ARRAY_SIZE(imem), \
+ +              .regs_size = ARRAY_SIZE(iregs), \
+ +              .skip_mem.bits[0] = iskip_mem, \
+ +              .skip_regs.bits[0] = iskip_regs, \
+ +              .osp_offset = iosp_offset, \
+ +                      .address = 0 \
+ +      }
+ +
+ +/* Shorter forms for the common cases */
+ +#define       NS(iname, imem, iregs, iskip_mem, iskip_regs, iosp_offset) \
+ +        NS_FROM(iname, NULL, imem, iregs, iskip_mem, iskip_regs, iosp_offset)
+ +#define       NS_MEM(iname, imem, iskip_mem) \
+ +        NS_FROM(iname, NULL, imem, no_regs, iskip_mem, 0, 0)
+ +#define       NS_MEM_FROM(iname, ifname, imem, iskip_mem) \
+ +        NS_FROM(iname, ifname, imem, no_regs, iskip_mem, 0, 0)
+ +#define       NS_REG(iname, iregs, iskip_regs) \
+ +        NS_FROM(iname, NULL, no_memory, iregs, 0, iskip_regs, 0)
+ +#define       NS_REG_FROM(iname, ifname, iregs, iskip_regs) \
+ +        NS_FROM(iname, ifname, no_memory, iregs, 0, iskip_regs, 0)
+ +
+ +static void
+ +bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src);
+ +
+ +static const char *bb_mod_name, *bb_func_name;
+ +
+ +static int
+ +bb_noret(const char *name)
+ +{
+ +      if (strcmp(name, "panic") == 0 ||
+ +          strcmp(name, "do_exit") == 0 ||
+ +          strcmp(name, "do_group_exit") == 0 ||
+ +          strcmp(name, "complete_and_exit") == 0)
+ +              return 1;
+ +      return 0;
+ +}
+ +
+ +/*============================================================================*/
+ +/*                                                                            */
+ +/* Most of the basic block code and data is common to x86_64 and i386.  This  */
+ +/* large ifdef  contains almost all of the differences between the two        */
+ +/* architectures.                                                             */
+ +/*                                                                            */
+ +/* Make sure you update the correct section of this ifdef.                    */
+ +/*                                                                            */
+ +/*============================================================================*/
+ +
+ +#ifdef        CONFIG_X86_64
+ +
+ +/* Registers that can be used to pass parameters, in the order that parameters
+ + * are passed.
+ + */
+ +
+ +const static enum bb_reg_code
+ +bb_param_reg[] = {
+ +      BBRG_RDI,
+ +      BBRG_RSI,
+ +      BBRG_RDX,
+ +      BBRG_RCX,
+ +      BBRG_R8,
+ +      BBRG_R9,
+ +};
+ +
+ +const static enum bb_reg_code
+ +bb_preserved_reg[] = {
+ +      BBRG_RBX,
+ +      BBRG_RBP,
+ +      BBRG_RSP,
+ +      BBRG_R12,
+ +      BBRG_R13,
+ +      BBRG_R14,
+ +      BBRG_R15,
+ +};
+ +
+ +static const struct bb_mem_contains full_pt_regs[] = {
+ +      { 0x70, BBRG_RDI },
+ +      { 0x68, BBRG_RSI },
+ +      { 0x60, BBRG_RDX },
+ +      { 0x58, BBRG_RCX },
+ +      { 0x50, BBRG_RAX },
+ +      { 0x48, BBRG_R8  },
+ +      { 0x40, BBRG_R9  },
+ +      { 0x38, BBRG_R10 },
+ +      { 0x30, BBRG_R11 },
+ +      { 0x28, BBRG_RBX },
+ +      { 0x20, BBRG_RBP },
+ +      { 0x18, BBRG_R12 },
+ +      { 0x10, BBRG_R13 },
+ +      { 0x08, BBRG_R14 },
+ +      { 0x00, BBRG_R15 },
+ +};
+ +static const struct bb_mem_contains partial_pt_regs[] = {
+ +      { 0x40, BBRG_RDI },
+ +      { 0x38, BBRG_RSI },
+ +      { 0x30, BBRG_RDX },
+ +      { 0x28, BBRG_RCX },
+ +      { 0x20, BBRG_RAX },
+ +      { 0x18, BBRG_R8  },
+ +      { 0x10, BBRG_R9  },
+ +      { 0x08, BBRG_R10 },
+ +      { 0x00, BBRG_R11 },
+ +};
+ +static const struct bb_mem_contains partial_pt_regs_plus_1[] = {
+ +      { 0x48, BBRG_RDI },
+ +      { 0x40, BBRG_RSI },
+ +      { 0x38, BBRG_RDX },
+ +      { 0x30, BBRG_RCX },
+ +      { 0x28, BBRG_RAX },
+ +      { 0x20, BBRG_R8  },
+ +      { 0x18, BBRG_R9  },
+ +      { 0x10, BBRG_R10 },
+ +      { 0x08, BBRG_R11 },
+ +};
+ +static const struct bb_mem_contains partial_pt_regs_plus_2[] = {
+ +      { 0x50, BBRG_RDI },
+ +      { 0x48, BBRG_RSI },
+ +      { 0x40, BBRG_RDX },
+ +      { 0x38, BBRG_RCX },
+ +      { 0x30, BBRG_RAX },
+ +      { 0x28, BBRG_R8  },
+ +      { 0x20, BBRG_R9  },
+ +      { 0x18, BBRG_R10 },
+ +      { 0x10, BBRG_R11 },
+ +};
+ +static const struct bb_mem_contains no_memory[] = {
+ +};
+ +/* Hardware has already pushed an error_code on the stack.  Use undefined just
+ + * to set the initial stack offset.
+ + */
+ +static const struct bb_mem_contains error_code[] = {
+ +      { 0x0, BBRG_UNDEFINED },
+ +};
+ +/* error_code plus original rax */
+ +static const struct bb_mem_contains error_code_rax[] = {
+ +      { 0x8, BBRG_UNDEFINED },
+ +      { 0x0, BBRG_RAX },
+ +};
+ +
+ +static const struct bb_reg_contains all_regs[] = {
+ +      [BBRG_RAX] = { BBRG_RAX, 0 },
+ +      [BBRG_RBX] = { BBRG_RBX, 0 },
+ +      [BBRG_RCX] = { BBRG_RCX, 0 },
+ +      [BBRG_RDX] = { BBRG_RDX, 0 },
+ +      [BBRG_RDI] = { BBRG_RDI, 0 },
+ +      [BBRG_RSI] = { BBRG_RSI, 0 },
+ +      [BBRG_RBP] = { BBRG_RBP, 0 },
+ +      [BBRG_RSP] = { BBRG_OSP, 0 },
+ +      [BBRG_R8 ] = { BBRG_R8,  0 },
+ +      [BBRG_R9 ] = { BBRG_R9,  0 },
+ +      [BBRG_R10] = { BBRG_R10, 0 },
+ +      [BBRG_R11] = { BBRG_R11, 0 },
+ +      [BBRG_R12] = { BBRG_R12, 0 },
+ +      [BBRG_R13] = { BBRG_R13, 0 },
+ +      [BBRG_R14] = { BBRG_R14, 0 },
+ +      [BBRG_R15] = { BBRG_R15, 0 },
+ +};
+ +static const struct bb_reg_contains no_regs[] = {
+ +};
+ +
+ +static struct bb_name_state bb_special_cases[] = {
+ +
+ +      /* First the cases that pass data only in memory.  We do not check any
+ +       * register state for these cases.
+ +       */
+ +
+ +      /* Simple cases, no exceptions */
+ +      NS_MEM("ia32_ptregs_common", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("ia32_sysret", partial_pt_regs, 0),
+ +      NS_MEM("int_careful", partial_pt_regs, 0),
+ +      NS_MEM("int_restore_rest", full_pt_regs, 0),
+ +      NS_MEM("int_signal", full_pt_regs, 0),
+ +      NS_MEM("int_very_careful", partial_pt_regs, 0),
+ +      NS_MEM("int_with_check", partial_pt_regs, 0),
+ +#ifdef        CONFIG_TRACE_IRQFLAGS
+ +      NS_MEM("paranoid_exit0", full_pt_regs, 0),
+ +#endif        /* CONFIG_TRACE_IRQFLAGS */
+ +      NS_MEM("paranoid_exit1", full_pt_regs, 0),
+ +      NS_MEM("ptregscall_common", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("restore_norax", partial_pt_regs, 0),
+ +      NS_MEM("restore", partial_pt_regs, 0),
+ +      NS_MEM("ret_from_intr", partial_pt_regs_plus_2, 0),
+ +      NS_MEM("stub32_clone", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_execve", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_fork", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_iopl", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_rt_sigreturn", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_rt_sigsuspend", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_sigaltstack", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_sigreturn", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_sigsuspend", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub32_vfork", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_clone", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_execve", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_fork", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_iopl", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_rt_sigreturn", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_rt_sigsuspend", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_sigaltstack", partial_pt_regs_plus_1, 0),
+ +      NS_MEM("stub_vfork", partial_pt_regs_plus_1, 0),
+ +
+ +      NS_MEM_FROM("ia32_badsys", "ia32_sysenter_target",
+ +              partial_pt_regs,
+ +              /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
+ +               * some paths.  It also stomps on RAX.
+ +               */
+ +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ +              BB_SKIP(RAX)),
+ +      NS_MEM_FROM("ia32_badsys", "ia32_cstar_target",
+ +              partial_pt_regs,
+ +              /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
+ +               * paths.  It also stomps on RAX.  Even more confusing, instead
+ +               * of storing RCX it stores RBP.  WTF?
+ +               */
+ +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +      NS_MEM("ia32_badsys", partial_pt_regs, 0),
+ +
+ +      /* Various bits of code branch to int_ret_from_sys_call, with slightly
+ +       * different missing values in pt_regs.
+ +       */
+ +      NS_MEM_FROM("int_ret_from_sys_call", "ret_from_fork",
+ +              partial_pt_regs,
+ +              BB_SKIP(R11)),
+ +      NS_MEM_FROM("int_ret_from_sys_call", "stub_execve",
+ +              partial_pt_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +      NS_MEM_FROM("int_ret_from_sys_call", "stub_rt_sigreturn",
+ +              partial_pt_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +      NS_MEM_FROM("int_ret_from_sys_call", "kernel_execve",
+ +              partial_pt_regs,
+ +              BB_SKIP(RAX)),
+ +      NS_MEM_FROM("int_ret_from_sys_call", "ia32_syscall",
+ +              partial_pt_regs,
+ +              /* ia32_syscall only saves RDI through RCX. */
+ +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ +              BB_SKIP(RAX)),
+ +      NS_MEM_FROM("int_ret_from_sys_call", "ia32_sysenter_target",
+ +              partial_pt_regs,
+ +              /* ia32_sysenter_target uses CLEAR_RREGS to clear R8-R11 on
+ +              * some paths.  It also stomps on RAX.
+ +              */
+ +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ +              BB_SKIP(RAX)),
+ +      NS_MEM_FROM("int_ret_from_sys_call", "ia32_cstar_target",
+ +              partial_pt_regs,
+ +              /* ia32_cstar_target uses CLEAR_RREGS to clear R8-R11 on some
+ +               * paths.  It also stomps on RAX.  Even more confusing, instead
+ +               * of storing RCX it stores RBP.  WTF?
+ +               */
+ +              BB_SKIP(R8) | BB_SKIP(R9) | BB_SKIP(R10) | BB_SKIP(R11) |
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +      NS_MEM("int_ret_from_sys_call", partial_pt_regs, 0),
+ +
+ +#ifdef        CONFIG_PREEMPT
+ +      NS_MEM("retint_kernel", partial_pt_regs, BB_SKIP(RAX)),
+ +#endif        /* CONFIG_PREEMPT */
+ +
+ +      NS_MEM("retint_careful", partial_pt_regs, BB_SKIP(RAX)),
+ +
+ +      /* Horrible hack: For a brand new x86_64 task, switch_to() branches to
+ +       * ret_from_fork with a totally different stack state from all the
+ +       * other tasks that come out of switch_to().  This non-standard state
+ +       * cannot be represented so just ignore the branch from switch_to() to
+ +       * ret_from_fork.  Due to inlining and linker labels, switch_to() can
+ +       * appear as several different function labels, including schedule,
+ +       * context_switch and __sched_text_start.
+ +       */
+ +      NS_MEM_FROM("ret_from_fork", "schedule", no_memory, 0),
+ +      NS_MEM_FROM("ret_from_fork", "__sched_text_start", no_memory, 0),
+ +      NS_MEM_FROM("ret_from_fork", "context_switch", no_memory, 0),
+ +      NS_MEM("ret_from_fork", full_pt_regs, 0),
+ +
+ +
+ +      NS_MEM_FROM("ret_from_sys_call", "ret_from_fork",
+ +              partial_pt_regs,
+ +              BB_SKIP(R11)),
+ +      NS_MEM("ret_from_sys_call", partial_pt_regs, 0),
+ +
+ +      NS_MEM("retint_restore_args",
+ +              partial_pt_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +
+ +      NS_MEM("retint_swapgs",
+ +              partial_pt_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +
+ +      /* Now the cases that pass data in registers.  We do not check any
+ +       * memory state for these cases.
+ +       */
+ +
+ +      NS_REG("bad_put_user",
+ +              all_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX) | BB_SKIP(R8)),
+ +
+ +      NS_REG("bad_get_user",
+ +              all_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX) | BB_SKIP(R8)),
+ +
+ +      NS_REG("bad_to_user",
+ +              all_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +
+ +      NS_REG("ia32_ptregs_common",
+ +              all_regs,
+ +              0),
+ +
+ +      NS_REG("copy_user_generic_unrolled",
+ +              all_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +
+ +      NS_REG("copy_user_generic_string",
+ +              all_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RCX)),
+ +
+ +      NS_REG("irq_return",
+ +              all_regs,
+ +              0),
+ +
+ +      /* Finally the cases that pass data in both registers and memory.
+ +       */
+ +
+ +      NS("invalid_TSS", error_code, all_regs, 0, 0, 0),
+ +      NS("segment_not_present", error_code, all_regs, 0, 0, 0),
+ +      NS("alignment_check", error_code, all_regs, 0, 0, 0),
+ +      NS("page_fault", error_code, all_regs, 0, 0, 0),
+ +      NS("general_protection", error_code, all_regs, 0, 0, 0),
+ +      NS("error_entry", error_code_rax, all_regs, 0, BB_SKIP(RAX), -0x10),
+ +      NS("common_interrupt", error_code, all_regs, 0, 0, -0x8),
+ +};
+ +
+ +static const char *bb_spurious[] = {
+ +                              /* schedule */
+ +      "thread_return",
+ +                              /* ret_from_fork */
+ +      "rff_action",
+ +      "rff_trace",
+ +                              /* system_call */
+ +      "ret_from_sys_call",
+ +      "sysret_check",
+ +      "sysret_careful",
+ +      "sysret_signal",
+ +      "badsys",
+ +      "tracesys",
+ +      "int_ret_from_sys_call",
+ +      "int_with_check",
+ +      "int_careful",
+ +      "int_very_careful",
+ +      "int_signal",
+ +      "int_restore_rest",
+ +                              /* common_interrupt */
+ +      "ret_from_intr",
+ +      "exit_intr",
+ +      "retint_with_reschedule",
+ +      "retint_check",
+ +      "retint_swapgs",
+ +      "retint_restore_args",
+ +      "restore_args",
+ +      "irq_return",
+ +      "bad_iret",
+ +      "retint_careful",
+ +      "retint_signal",
+ +#ifdef        CONFIG_PREEMPT
+ +      "retint_kernel",
+ +#endif        /* CONFIG_PREEMPT */
+ +                              /* .macro paranoidexit */
+ +#ifdef        CONFIG_TRACE_IRQFLAGS
+ +      "paranoid_exit0",
+ +      "paranoid_userspace0",
+ +      "paranoid_restore0",
+ +      "paranoid_swapgs0",
+ +      "paranoid_schedule0",
+ +#endif        /* CONFIG_TRACE_IRQFLAGS */
+ +      "paranoid_exit1",
+ +      "paranoid_swapgs1",
+ +      "paranoid_restore1",
+ +      "paranoid_userspace1",
+ +      "paranoid_schedule1",
+ +                              /* error_entry */
+ +      "error_swapgs",
+ +      "error_sti",
+ +      "error_exit",
+ +      "error_kernelspace",
+ +                              /* load_gs_index */
+ +      "gs_change",
+ +      "bad_gs",
+ +                              /* ia32_sysenter_target */
+ +      "sysenter_do_call",
+ +      "sysenter_tracesys",
+ +                              /* ia32_cstar_target */
+ +      "cstar_do_call",
+ +      "cstar_tracesys",
+ +      "ia32_badarg",
+ +                              /* ia32_syscall */
+ +      "ia32_do_syscall",
+ +      "ia32_sysret",
+ +      "ia32_tracesys",
+ +      "ia32_badsys",
+ +#ifdef        CONFIG_HIBERNATION
+ +                              /* restore_image */
+ +      "loop",
+ +      "done",
+ +#endif        /* CONFIG_HIBERNATION */
+ +#ifdef        CONFIG_KPROBES
+ +                              /* jprobe_return */
+ +      "jprobe_return_end",
+ +                              /* kretprobe_trampoline_holder */
+ +      "kretprobe_trampoline",
+ +#endif        /* CONFIG_KPROBES */
+ +#ifdef        CONFIG_KEXEC
+ +                              /* relocate_kernel */
+ +      "relocate_new_kernel",
+ +#endif        /* CONFIG_KEXEC */
+ +#ifdef        CONFIG_XEN
+ +                              /* arch/i386/xen/xen-asm.S */
+ +      "xen_irq_enable_direct_end",
+ +      "xen_irq_disable_direct_end",
+ +      "xen_save_fl_direct_end",
+ +      "xen_restore_fl_direct_end",
+ +      "xen_iret_start_crit",
+ +      "iret_restore_end",
+ +      "xen_iret_end_crit",
+ +      "hyper_iret",
+ +#endif        /* CONFIG_XEN */
+ +};
+ +
+ +static const char *bb_hardware_handlers[] = {
+ +      "system_call",
+ +      "common_interrupt",
+ +      "error_entry",
+ +      "debug",
+ +      "nmi",
+ +      "int3",
+ +      "double_fault",
+ +      "stack_segment",
+ +      "machine_check",
+ +      "kdb_call",
+ +};
+ +
+ +static int
+ +bb_hardware_pushed_arch(kdb_machreg_t rsp,
+ +                      const struct kdb_activation_record *ar)
+ +{
+ +      /* x86_64 interrupt stacks are 16 byte aligned and you must get the
+ +       * next rsp from stack, it cannot be statically calculated.  Do not
+ +       * include the word at rsp, it is pushed by hardware but is treated as
+ +       * a normal software return value.
+ +       *
+ +       * When an IST switch occurs (e.g. NMI) then the saved rsp points to
+ +       * another stack entirely.  Assume that the IST stack is 16 byte
+ +       * aligned and just return the size of the hardware data on this stack.
+ +       * The stack unwind code will take care of the stack switch.
+ +       */
+ +      kdb_machreg_t saved_rsp = *((kdb_machreg_t *)rsp + 3);
+ +      int hardware_pushed = saved_rsp - rsp - KDB_WORD_SIZE;
+ +      if (hardware_pushed < 4 * KDB_WORD_SIZE ||
+ +          saved_rsp < ar->stack.logical_start ||
+ +          saved_rsp >= ar->stack.logical_end)
+ +              return 4 * KDB_WORD_SIZE;
+ +      else
+ +              return hardware_pushed;
+ +}
+ +
+ +static void
+ +bb_start_block0(void)
+ +{
+ +      bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
+ +      bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
+ +      bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
+ +      bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
+ +      bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
+ +      bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
+ +      bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
+ +      bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
+ +      bb_reg_code_set_value(BBRG_R8, BBRG_R8);
+ +      bb_reg_code_set_value(BBRG_R9, BBRG_R9);
+ +      bb_reg_code_set_value(BBRG_R10, BBRG_R10);
+ +      bb_reg_code_set_value(BBRG_R11, BBRG_R11);
+ +      bb_reg_code_set_value(BBRG_R12, BBRG_R12);
+ +      bb_reg_code_set_value(BBRG_R13, BBRG_R13);
+ +      bb_reg_code_set_value(BBRG_R14, BBRG_R14);
+ +      bb_reg_code_set_value(BBRG_R15, BBRG_R15);
+ +}
+ +
+ +/* x86_64 does not have a special case for __switch_to */
+ +
+ +static void
+ +bb_fixup_switch_to(char *p)
+ +{
+ +}
+ +
+ +static int
+ +bb_asmlinkage_arch(void)
+ +{
+ +      return strncmp(bb_func_name, "__down", 6) == 0 ||
+ +             strncmp(bb_func_name, "__up", 4) == 0 ||
+ +             strncmp(bb_func_name, "stub_", 5) == 0 ||
+ +             strcmp(bb_func_name, "ret_from_fork") == 0 ||
+ +             strcmp(bb_func_name, "ptregscall_common") == 0;
+ +}
+ +
+ +#else /* !CONFIG_X86_64 */
+ +
+ +/* Registers that can be used to pass parameters, in the order that parameters
+ + * are passed.
+ + */
+ +
+ +const static enum bb_reg_code
+ +bb_param_reg[] = {
+ +      BBRG_RAX,
+ +      BBRG_RDX,
+ +      BBRG_RCX,
+ +};
+ +
+ +const static enum bb_reg_code
+ +bb_preserved_reg[] = {
+ +      BBRG_RBX,
+ +      BBRG_RBP,
+ +      BBRG_RSP,
+ +      BBRG_RSI,
+ +      BBRG_RDI,
+ +};
+ +
+ +static const struct bb_mem_contains full_pt_regs[] = {
+ +      { 0x18, BBRG_RAX },
+ +      { 0x14, BBRG_RBP },
+ +      { 0x10, BBRG_RDI },
+ +      { 0x0c, BBRG_RSI },
+ +      { 0x08, BBRG_RDX },
+ +      { 0x04, BBRG_RCX },
+ +      { 0x00, BBRG_RBX },
+ +};
+ +static const struct bb_mem_contains no_memory[] = {
+ +};
+ +/* Hardware has already pushed an error_code on the stack.  Use undefined just
+ + * to set the initial stack offset.
+ + */
+ +static const struct bb_mem_contains error_code[] = {
+ +      { 0x0, BBRG_UNDEFINED },
+ +};
+ +/* rbx already pushed */
+ +static const struct bb_mem_contains rbx_pushed[] = {
+ +      { 0x0, BBRG_RBX },
+ +};
+ +#ifdef        CONFIG_MATH_EMULATION
+ +static const struct bb_mem_contains mem_fpu_reg_round[] = {
+ +      { 0xc, BBRG_RBP },
+ +      { 0x8, BBRG_RSI },
+ +      { 0x4, BBRG_RDI },
+ +      { 0x0, BBRG_RBX },
+ +};
+ +#endif        /* CONFIG_MATH_EMULATION */
+ +
+ +static const struct bb_reg_contains all_regs[] = {
+ +      [BBRG_RAX] = { BBRG_RAX, 0 },
+ +      [BBRG_RBX] = { BBRG_RBX, 0 },
+ +      [BBRG_RCX] = { BBRG_RCX, 0 },
+ +      [BBRG_RDX] = { BBRG_RDX, 0 },
+ +      [BBRG_RDI] = { BBRG_RDI, 0 },
+ +      [BBRG_RSI] = { BBRG_RSI, 0 },
+ +      [BBRG_RBP] = { BBRG_RBP, 0 },
+ +      [BBRG_RSP] = { BBRG_OSP, 0 },
+ +};
+ +static const struct bb_reg_contains no_regs[] = {
+ +};
+ +#ifdef        CONFIG_MATH_EMULATION
+ +static const struct bb_reg_contains reg_fpu_reg_round[] = {
+ +      [BBRG_RBP] = { BBRG_OSP, -0x4 },
+ +      [BBRG_RSP] = { BBRG_OSP, -0x10 },
+ +};
+ +#endif        /* CONFIG_MATH_EMULATION */
+ +
+ +static struct bb_name_state bb_special_cases[] = {
+ +
+ +      /* First the cases that pass data only in memory.  We do not check any
+ +       * register state for these cases.
+ +       */
+ +
+ +      /* Simple cases, no exceptions */
+ +      NS_MEM("check_userspace", full_pt_regs, 0),
+ +      NS_MEM("device_not_available_emulate", full_pt_regs, 0),
+ +      NS_MEM("ldt_ss", full_pt_regs, 0),
+ +      NS_MEM("no_singlestep", full_pt_regs, 0),
+ +      NS_MEM("restore_all", full_pt_regs, 0),
+ +      NS_MEM("restore_nocheck", full_pt_regs, 0),
+ +      NS_MEM("restore_nocheck_notrace", full_pt_regs, 0),
+ +      NS_MEM("ret_from_exception", full_pt_regs, 0),
+ +      NS_MEM("ret_from_fork", full_pt_regs, 0),
+ +      NS_MEM("ret_from_intr", full_pt_regs, 0),
+ +      NS_MEM("work_notifysig", full_pt_regs, 0),
+ +      NS_MEM("work_pending", full_pt_regs, 0),
+ +
+ +#ifdef        CONFIG_PREEMPT
+ +      NS_MEM("resume_kernel", full_pt_regs, 0),
+ +#endif        /* CONFIG_PREEMPT */
+ +
+ +      NS_MEM("common_interrupt", error_code, 0),
+ +      NS_MEM("error_code", error_code, 0),
+ +
+ +      NS_MEM("bad_put_user", rbx_pushed, 0),
+ +
+ +      NS_MEM_FROM("resume_userspace", "syscall_badsys",
+ +              full_pt_regs, BB_SKIP(RAX)),
+ +      NS_MEM_FROM("resume_userspace", "syscall_fault",
+ +              full_pt_regs, BB_SKIP(RAX)),
+ +      NS_MEM_FROM("resume_userspace", "syscall_trace_entry",
+ +              full_pt_regs, BB_SKIP(RAX)),
+ +      /* Too difficult to trace through the various vm86 functions for now.
+ +       * They are C functions that start off with some memory state, fiddle
+ +       * the registers then jmp directly to resume_userspace.  For the
+ +       * moment, just assume that they are valid and do no checks.
+ +       */
+ +      NS_FROM("resume_userspace", "do_int",
+ +              no_memory, no_regs, 0, 0, 0),
+ +      NS_FROM("resume_userspace", "do_sys_vm86",
+ +              no_memory, no_regs, 0, 0, 0),
+ +      NS_FROM("resume_userspace", "handle_vm86_fault",
+ +              no_memory, no_regs, 0, 0, 0),
+ +      NS_FROM("resume_userspace", "handle_vm86_trap",
+ +              no_memory, no_regs, 0, 0, 0),
+ +      NS_MEM("resume_userspace", full_pt_regs, 0),
+ +
+ +      NS_MEM_FROM("syscall_badsys", "ia32_sysenter_target",
+ +              full_pt_regs, BB_SKIP(RBP)),
+ +      NS_MEM("syscall_badsys", full_pt_regs, 0),
+ +
+ +      NS_MEM_FROM("syscall_call", "syscall_trace_entry",
+ +              full_pt_regs, BB_SKIP(RAX)),
+ +      NS_MEM("syscall_call", full_pt_regs, 0),
+ +
+ +      NS_MEM_FROM("syscall_exit", "syscall_trace_entry",
+ +              full_pt_regs, BB_SKIP(RAX)),
+ +      NS_MEM("syscall_exit", full_pt_regs, 0),
+ +
+ +      NS_MEM_FROM("syscall_exit_work", "ia32_sysenter_target",
+ +              full_pt_regs, BB_SKIP(RAX) | BB_SKIP(RBP)),
+ +      NS_MEM_FROM("syscall_exit_work", "system_call",
+ +              full_pt_regs, BB_SKIP(RAX)),
+ +      NS_MEM("syscall_exit_work", full_pt_regs, 0),
+ +
+ +      NS_MEM_FROM("syscall_trace_entry", "ia32_sysenter_target",
+ +              full_pt_regs, BB_SKIP(RBP)),
+ +      NS_MEM_FROM("syscall_trace_entry", "system_call",
+ +              full_pt_regs, BB_SKIP(RAX)),
+ +      NS_MEM("syscall_trace_entry", full_pt_regs, 0),
+ +
+ +      /* Now the cases that pass data in registers.  We do not check any
+ +       * memory state for these cases.
+ +       */
+ +
+ +      NS_REG("syscall_fault", all_regs, 0),
+ +
+ +      NS_REG("bad_get_user", all_regs,
+ +              BB_SKIP(RAX) | BB_SKIP(RDX)),
+ +
+ +      /* Finally the cases that pass data in both registers and memory.
+ +      */
+ +
+ +      /* This entry is redundant now because bb_fixup_switch_to() hides the
+ +       * jmp __switch_to case, however the entry is left here as
+ +       * documentation.
+ +       *
+ +       * NS("__switch_to", no_memory, no_regs, 0, 0, 0),
+ +       */
+ +
+ +      NS("iret_exc", no_memory, all_regs, 0, 0, 0x20),
+ +
+ +#ifdef        CONFIG_MATH_EMULATION
+ +      NS("fpu_reg_round", mem_fpu_reg_round, reg_fpu_reg_round, 0, 0, 0),
+ +#endif        /* CONFIG_MATH_EMULATION */
+ +};
+ +
+ +static const char *bb_spurious[] = {
+ +                              /* ret_from_exception */
+ +      "ret_from_intr",
+ +      "check_userspace",
+ +      "resume_userspace",
+ +                              /* resume_kernel */
+ +#ifdef        CONFIG_PREEMPT
+ +      "need_resched",
+ +#endif        /* CONFIG_PREEMPT */
+ +                              /* ia32_sysenter_target */
+ +      "sysenter_past_esp",
+ +                              /* system_call */
+ +      "no_singlestep",
+ +      "syscall_call",
+ +      "syscall_exit",
+ +      "restore_all",
+ +      "restore_nocheck",
+ +      "restore_nocheck_notrace",
+ +      "ldt_ss",
+ +      /* do not include iret_exc, it is in a .fixup section */
+ +                              /* work_pending */
+ +      "work_resched",
+ +      "work_notifysig",
+ +#ifdef        CONFIG_VM86
+ +      "work_notifysig_v86",
+ +#endif        /* CONFIG_VM86 */
+ +                              /* page_fault */
+ +      "error_code",
+ +                              /* device_not_available */
+ +      "device_not_available_emulate",
+ +                              /* debug */
+ +      "debug_esp_fix_insn",
+ +      "debug_stack_correct",
+ +                              /* nmi */
+ +      "nmi_stack_correct",
+ +      "nmi_stack_fixup",
+ +      "nmi_debug_stack_check",
+ +      "nmi_espfix_stack",
+ +#ifdef        CONFIG_HIBERNATION
+ +                              /* restore_image */
+ +      "copy_loop",
+ +      "done",
+ +#endif        /* CONFIG_HIBERNATION */
+ +#ifdef        CONFIG_KPROBES
+ +                              /* jprobe_return */
+ +      "jprobe_return_end",
+ +#endif        /* CONFIG_KPROBES */
+ +#ifdef        CONFIG_KEXEC
+ +                              /* relocate_kernel */
+ +      "relocate_new_kernel",
+ +#endif        /* CONFIG_KEXEC */
+ +#ifdef        CONFIG_MATH_EMULATION
+ +                              /* assorted *.S files in arch/i386/math_emu */
+ +      "Denorm_done",
+ +      "Denorm_shift_more_than_32",
+ +      "Denorm_shift_more_than_63",
+ +      "Denorm_shift_more_than_64",
+ +      "Do_unmasked_underflow",
+ +      "Exp_not_underflow",
+ +      "fpu_Arith_exit",
+ +      "fpu_reg_round",
+ +      "fpu_reg_round_signed_special_exit",
+ +      "fpu_reg_round_special_exit",
+ +      "L_accum_done",
+ +      "L_accum_loaded",
+ +      "L_accum_loop",
+ +      "L_arg1_larger",
+ +      "L_bugged",
+ +      "L_bugged_1",
+ +      "L_bugged_2",
+ +      "L_bugged_3",
+ +      "L_bugged_4",
+ +      "L_bugged_denorm_486",
+ +      "L_bugged_round24",
+ +      "L_bugged_round53",
+ +      "L_bugged_round64",
+ +      "LCheck_24_round_up",
+ +      "LCheck_53_round_up",
+ +      "LCheck_Round_Overflow",
+ +      "LCheck_truncate_24",
+ +      "LCheck_truncate_53",
+ +      "LCheck_truncate_64",
+ +      "LDenormal_adj_exponent",
+ +      "L_deNormalised",
+ +      "LDo_24_round_up",
+ +      "LDo_2nd_32_bits",
+ +      "LDo_2nd_div",
+ +      "LDo_3rd_32_bits",
+ +      "LDo_3rd_div",
+ +      "LDo_53_round_up",
+ +      "LDo_64_round_up",
+ +      "L_done",
+ +      "LDo_truncate_24",
+ +      "LDown_24",
+ +      "LDown_53",
+ +      "LDown_64",
+ +      "L_entry_bugged",
+ +      "L_error_exit",
+ +      "L_exactly_32",
+ +      "L_exception_exit",
+ +      "L_exit",
+ +      "L_exit_nuo_valid",
+ +      "L_exit_nuo_zero",
+ +      "L_exit_valid",
+ +      "L_extent_zero",
+ +      "LFirst_div_done",
+ +      "LFirst_div_not_1",
+ +      "L_Full_Division",
+ +      "LGreater_Half_24",
+ +      "LGreater_Half_53",
+ +      "LGreater_than_1",
+ +      "LLess_than_1",
+ +      "L_Make_denorm",
+ +      "L_more_31_no_low",
+ +      "L_more_63_no_low",
+ +      "L_more_than_31",
+ +      "L_more_than_63",
+ +      "L_more_than_64",
+ +      "L_more_than_65",
+ +      "L_more_than_95",
+ +      "L_must_be_zero",
+ +      "L_n_exit",
+ +      "L_no_adjust",
+ +      "L_no_bit_lost",
+ +      "L_no_overflow",
+ +      "L_no_precision_loss",
+ +      "L_Normalised",
+ +      "L_norm_bugged",
+ +      "L_n_shift_1",
+ +      "L_nuo_shift_1",
+ +      "L_overflow",
+ +      "L_precision_lost_down",
+ +      "L_precision_lost_up",
+ +      "LPrevent_2nd_overflow",
+ +      "LPrevent_3rd_overflow",
+ +      "LPseudoDenormal",
+ +      "L_Re_normalise",
+ +      "LResult_Normalised",
+ +      "L_round",
+ +      "LRound_large",
+ +      "LRound_nearest_24",
+ +      "LRound_nearest_53",
+ +      "LRound_nearest_64",
+ +      "LRound_not_small",
+ +      "LRound_ovfl",
+ +      "LRound_precision",
+ +      "LRound_prep",
+ +      "L_round_the_result",
+ +      "LRound_To_24",
+ +      "LRound_To_53",
+ +      "LRound_To_64",
+ +      "LSecond_div_done",
+ +      "LSecond_div_not_1",
+ +      "L_shift_1",
+ +      "L_shift_32",
+ +      "L_shift_65_nc",
+ +      "L_shift_done",
+ +      "Ls_less_than_32",
+ +      "Ls_more_than_63",
+ +      "Ls_more_than_95",
+ +      "L_Store_significand",
+ +      "L_subtr",
+ +      "LTest_over",
+ +      "LTruncate_53",
+ +      "LTruncate_64",
+ +      "L_underflow",
+ +      "L_underflow_to_zero",
+ +      "LUp_24",
+ +      "LUp_53",
+ +      "LUp_64",
+ +      "L_zero",
+ +      "Normalise_result",
+ +      "Signal_underflow",
+ +      "sqrt_arg_ge_2",
+ +      "sqrt_get_more_precision",
+ +      "sqrt_more_prec_large",
+ +      "sqrt_more_prec_ok",
+ +      "sqrt_more_prec_small",
+ +      "sqrt_near_exact",
+ +      "sqrt_near_exact_large",
+ +      "sqrt_near_exact_ok",
+ +      "sqrt_near_exact_small",
+ +      "sqrt_near_exact_x",
+ +      "sqrt_prelim_no_adjust",
+ +      "sqrt_round_result",
+ +      "sqrt_stage_2_done",
+ +      "sqrt_stage_2_error",
+ +      "sqrt_stage_2_finish",
+ +      "sqrt_stage_2_positive",
+ +      "sqrt_stage_3_error",
+ +      "sqrt_stage_3_finished",
+ +      "sqrt_stage_3_no_error",
+ +      "sqrt_stage_3_positive",
+ +      "Unmasked_underflow",
+ +      "xExp_not_underflow",
+ +#endif        /* CONFIG_MATH_EMULATION */
+ +};
+ +
+ +static const char *bb_hardware_handlers[] = {
+ +      "ret_from_exception",
+ +      "system_call",
+ +      "work_pending",
+ +      "syscall_fault",
+ +      "page_fault",
+ +      "coprocessor_error",
+ +      "simd_coprocessor_error",
+ +      "device_not_available",
+ +      "debug",
+ +      "nmi",
+ +      "int3",
+ +      "overflow",
+ +      "bounds",
+ +      "invalid_op",
+ +      "coprocessor_segment_overrun",
+ +      "invalid_TSS",
+ +      "segment_not_present",
+ +      "stack_segment",
+ +      "general_protection",
+ +      "alignment_check",
+ +      "kdb_call",
+ +      "divide_error",
+ +      "machine_check",
+ +      "spurious_interrupt_bug",
+ +};
+ +
+ +static int
+ +bb_hardware_pushed_arch(kdb_machreg_t rsp,
+ +                      const struct kdb_activation_record *ar)
+ +{
+ +      return (2 * KDB_WORD_SIZE);
+ +}
+ +
+ +static void
+ +bb_start_block0(void)
+ +{
+ +      bb_reg_code_set_value(BBRG_RAX, BBRG_RAX);
+ +      bb_reg_code_set_value(BBRG_RBX, BBRG_RBX);
+ +      bb_reg_code_set_value(BBRG_RCX, BBRG_RCX);
+ +      bb_reg_code_set_value(BBRG_RDX, BBRG_RDX);
+ +      bb_reg_code_set_value(BBRG_RDI, BBRG_RDI);
+ +      bb_reg_code_set_value(BBRG_RSI, BBRG_RSI);
+ +      bb_reg_code_set_value(BBRG_RBP, BBRG_RBP);
+ +      bb_reg_code_set_value(BBRG_RSP, BBRG_OSP);
+ +}
+ +
+ +/* The i386 code that switches stack in a context switch is an extremely
+ + * special case.  It saves the rip pointing to a label that is not otherwise
+ + * referenced, saves the current rsp then pushes a word.  The magic code that
+ + * resumes the new task picks up the saved rip and rsp, effectively referencing
+ + * a label that otherwise is not used and ignoring the pushed word.
+ + *
+ + * The simplest way to handle this very strange case is to recognise jmp
+ + * address <__switch_to> and treat it as a popfl instruction.  This avoids
+ + * terminating the block on this jmp and removes one word from the stack state,
+ + * which is the end effect of all the magic code.
+ + *
+ + * Called with the instruction line, starting after the first ':'.
+ + */
+ +
+ +static void
+ +bb_fixup_switch_to(char *p)
+ +{
+ +      char *p1 = p;
+ +      p += strspn(p, " \t");          /* start of instruction */
+ +      if (strncmp(p, "jmp", 3))
+ +              return;
+ +      p += strcspn(p, " \t");         /* end of instruction */
+ +      p += strspn(p, " \t");          /* start of address */
+ +      p += strcspn(p, " \t");         /* end of address */
+ +      p += strspn(p, " \t");          /* start of comment */
+ +      if (strcmp(p, "<__switch_to>") == 0)
+ +              strcpy(p1, "popfl");
+ +}
+ +
+ +static int
+ +bb_asmlinkage_arch(void)
+ +{
+ +      return strcmp(bb_func_name, "ret_from_exception") == 0 ||
+ +             strcmp(bb_func_name, "syscall_trace_entry") == 0;
+ +}
+ +
+ +#endif        /* CONFIG_X86_64 */
+ +
+ +
+ +/*============================================================================*/
+ +/*                                                                            */
+ +/* Common code and data.                                                      */
+ +/*                                                                            */
+ +/*============================================================================*/
+ +
+ +
+ +/* Tracking registers by decoding the instructions is quite a bit harder than
+ + * doing the same tracking using compiler generated information.  Register
+ + * contents can remain in the same register, they can be copied to other
+ + * registers, they can be stored on stack or they can be modified/overwritten.
+ + * At any one time, there are 0 or more copies of the original value that was
+ + * supplied in each register on input to the current function.  If a register
+ + * exists in multiple places, one copy of that register is the master version,
+ + * the others are temporary copies which may or may not be destroyed before the
+ + * end of the function.
+ + *
+ + * The compiler knows which copy of a register is the master and which are
+ + * temporary copies, which makes it relatively easy to track register contents
+ + * as they are saved and restored.  Without that compiler based knowledge, this
+ + * code has to track _every_ possible copy of each register, simply because we
+ + * do not know which is the master copy and which are temporary copies which
+ + * may be destroyed later.
+ + *
+ + * It gets worse: registers that contain parameters can be copied to other
+ + * registers which are then saved on stack in a lower level function.  Also the
+ + * stack pointer may be held in multiple registers (typically RSP and RBP)
+ + * which contain different offsets from the base of the stack on entry to this
+ + * function.  All of which means that we have to track _all_ register
+ + * movements, or at least as much as possible.
+ + *
+ + * Start with the basic block that contains the start of the function, by
+ + * definition all registers contain their initial value.  Track each
+ + * instruction's effect on register contents, this includes reading from a
+ + * parameter register before any write to that register, IOW the register
+ + * really does contain a parameter.  The register state is represented by a
+ + * dynamically sized array with each entry containing :-
+ + *
+ + *   Register name
+ + *   Location it is copied to (another register or stack + offset)
+ + *
+ + * Besides the register tracking array, we track which parameter registers are
+ + * read before being written, to determine how many parameters are passed in
+ + * registers.  We also track which registers contain stack pointers, including
+ + * their offset from the original stack pointer on entry to the function.
+ + *
+ + * At each exit from the current basic block (via JMP instruction or drop
+ + * through), the register state is cloned to form the state on input to the
+ + * target basic block and the target is marked for processing using this state.
+ + * When there are multiple ways to enter a basic block (e.g. several JMP
+ + * instructions referencing the same target) then there will be multiple sets
+ + * of register state to form the "input" for that basic block, there is no
+ + * guarantee that all paths to that block will have the same register state.
+ + *
+ + * As each target block is processed, all the known sets of register state are
+ + * merged to form a suitable subset of the state which agrees with all the
+ + * inputs.  The most common case is where one path to this block copies a
+ + * register to another register but another path does not, therefore the copy
+ + * is only a temporary and should not be propogated into this block.
+ + *
+ + * If the target block already has an input state from the current transfer
+ + * point and the new input state is identical to the previous input state then
+ + * we have reached a steady state for the arc from the current location to the
+ + * target block.  Therefore there is no need to process the target block again.
+ + *
+ + * The steps of "process a block, create state for target block(s), pick a new
+ + * target block, merge state for target block, process target block" will
+ + * continue until all the state changes have propogated all the way down the
+ + * basic block tree, including round any cycles in the tree.  The merge step
+ + * only deletes tracking entries from the input state(s), it never adds a
+ + * tracking entry.  Therefore the overall algorithm is guaranteed to converge
+ + * to a steady state, the worst possible case is that every tracking entry into
+ + * a block is deleted, which will result in an empty output state.
+ + *
+ + * As each instruction is decoded, it is checked to see if this is the point at
+ + * which execution left this function.  This can be a call to another function
+ + * (actually the return address to this function) or is the instruction which
+ + * was about to be executed when an interrupt occurred (including an oops).
+ + * Save the register state at this point.
+ + *
+ + * We always know what the registers contain when execution left this function.
+ + * For an interrupt, the registers are in struct pt_regs.  For a call to
+ + * another function, we have already deduced the register state on entry to the
+ + * other function by unwinding to the start of that function.  Given the
+ + * register state on exit from this function plus the known register contents
+ + * on entry to the next function, we can determine the stack pointer value on
+ + * input to this function.  That in turn lets us calculate the address of input
+ + * registers that have been stored on stack, giving us the input parameters.
+ + * Finally the stack pointer gives us the return address which is the exit
+ + * point from the calling function, repeat the unwind process on that function.
+ + *
+ + * The data that tracks which registers contain input parameters is function
+ + * global, not local to any basic block.  To determine which input registers
+ + * contain parameters, we have to decode the entire function.  Otherwise an
+ + * exit early in the function might not have read any parameters yet.
+ + */
+ +
+ +/* Record memory contents in terms of the values that were passed to this
+ + * function, IOW track which memory locations contain an input value.  A memory
+ + * location's contents can be undefined, it can contain an input register value
+ + * or it can contain an offset from the original stack pointer.
+ + *
+ + * This structure is used to record register contents that have been stored in
+ + * memory.  Location (BBRG_OSP + 'offset_address') contains the input value
+ + * from register 'value'.  When 'value' is BBRG_OSP then offset_value contains
+ + * the offset from the original stack pointer that was stored in this memory
+ + * location.  When 'value' is not BBRG_OSP then the memory location contains
+ + * the original contents of an input register and offset_value is ignored.
+ + *
+ + * An input register 'value' can be stored in more than one register and/or in
+ + * more than one memory location.
+ + */
+ +
+ +struct bb_memory_contains
+ +{
+ +      short offset_address;
+ +      enum bb_reg_code value: 8;
+ +      short offset_value;
+ +};
+ +
+ +/* Track the register state in each basic block. */
+ +
+ +struct bb_reg_state
+ +{
+ +      /* Indexed by register value 'reg - BBRG_RAX' */
+ +      struct bb_reg_contains contains[KDB_INT_REGISTERS];
+ +      int ref_count;
+ +      int mem_count;
+ +      /* dynamic size for memory locations, see mem_count */
+ +      struct bb_memory_contains memory[0];
+ +};
+ +
+ +static struct bb_reg_state *bb_reg_state, *bb_exit_state;
+ +static int bb_reg_state_max, bb_reg_params, bb_memory_params;
+ +
+ +struct bb_actual
+ +{
+ +      bfd_vma value;
+ +      int valid;
+ +};
+ +
+ +/* Contains the actual hex value of a register, plus a valid bit.  Indexed by
+ + * register value 'reg - BBRG_RAX'
+ + */
+ +static struct bb_actual bb_actual[KDB_INT_REGISTERS];
+ +
+ +static bfd_vma bb_func_start, bb_func_end;
+ +static bfd_vma bb_common_interrupt, bb_error_entry, bb_ret_from_intr,
+ +             bb_thread_return, bb_sync_regs, bb_save_v86_state,
+ +             bb__sched_text_start, bb__sched_text_end;
+ +
+ +/* Record jmp instructions, both conditional and unconditional.  These form the
+ + * arcs between the basic blocks.  This is also used to record the state when
+ + * one block drops through into the next.
+ + *
+ + * A bb can have multiple associated bb_jmp entries, one for each jcc
+ + * instruction plus at most one bb_jmp for the drop through case.  If a bb
+ + * drops through to the next bb then the drop through bb_jmp entry will be the
+ + * last entry in the set of bb_jmp's that are associated with the bb.  This is
+ + * enforced by the fact that jcc entries are added during the disassembly phase
+ + * of pass 1, the drop through entries are added near the end of pass 1.
+ + *
+ + * At address 'from' in this block, we have a jump to address 'to'.  The
+ + * register state at 'from' is copied to the target block.
+ + */
+ +
+ +struct bb_jmp
+ +{
+ +      bfd_vma from;
+ +      bfd_vma to;
+ +      struct bb_reg_state *state;
+ +      unsigned int drop_through: 1;
+ +};
+ +
+ +struct bb
+ +{
+ +      bfd_vma start;
+ +      /* The end address of a basic block is sloppy.  It can be the first
+ +       * byte of the last instruction in the block or it can be the last byte
+ +       * of the block.
+ +       */
+ +      bfd_vma end;
+ +      unsigned int changed: 1;
+ +      unsigned int drop_through: 1;
+ +};
+ +
+ +static struct bb **bb_list, *bb_curr;
+ +static int bb_max, bb_count;
+ +
+ +static struct bb_jmp *bb_jmp_list;
+ +static int bb_jmp_max, bb_jmp_count;
+ +
+ +/* Add a new bb entry to the list.  This does an insert sort. */
+ +
+ +static struct bb *
+ +bb_new(bfd_vma order)
+ +{
+ +      int i, j;
+ +      struct bb *bb, *p;
+ +      if (bb_giveup)
+ +              return NULL;
+ +      if (bb_count == bb_max) {
+ +              struct bb **bb_list_new;
+ +              bb_max += 10;
+ +              bb_list_new = debug_kmalloc(bb_max*sizeof(*bb_list_new),
+ +                                          GFP_ATOMIC);
+ +              if (!bb_list_new) {
+ +                      kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ +                      bb_giveup = 1;
+ +                      return NULL;
+ +              }
+ +              memcpy(bb_list_new, bb_list, bb_count*sizeof(*bb_list));
+ +              debug_kfree(bb_list);
+ +              bb_list = bb_list_new;
+ +      }
+ +      bb = debug_kmalloc(sizeof(*bb), GFP_ATOMIC);
+ +      if (!bb) {
+ +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return NULL;
+ +      }
+ +      memset(bb, 0, sizeof(*bb));
+ +      for (i = 0; i < bb_count; ++i) {
+ +              p = bb_list[i];
+ +              if ((p->start && p->start > order) ||
+ +                  (p->end && p->end > order))
+ +                      break;
+ +      }
+ +      for (j = bb_count-1; j >= i; --j)
+ +              bb_list[j+1] = bb_list[j];
+ +      bb_list[i] = bb;
+ +      ++bb_count;
+ +      return bb;
+ +}
+ +
+ +/* Add a new bb_jmp entry to the list.  This list is not sorted. */
+ +
+ +static struct bb_jmp *
+ +bb_jmp_new(bfd_vma from, bfd_vma to, unsigned int drop_through)
+ +{
+ +      struct bb_jmp *bb_jmp;
+ +      if (bb_giveup)
+ +              return NULL;
+ +      if (bb_jmp_count == bb_jmp_max) {
+ +              struct bb_jmp *bb_jmp_list_new;
+ +              bb_jmp_max += 10;
+ +              bb_jmp_list_new =
+ +                      debug_kmalloc(bb_jmp_max*sizeof(*bb_jmp_list_new),
+ +                                    GFP_ATOMIC);
+ +              if (!bb_jmp_list_new) {
+ +                      kdb_printf("\n\n%s: out of debug_kmalloc\n",
+ +                                 __FUNCTION__);
+ +                      bb_giveup = 1;
+ +                      return NULL;
+ +              }
+ +              memcpy(bb_jmp_list_new, bb_jmp_list,
+ +                     bb_jmp_count*sizeof(*bb_jmp_list));
+ +              debug_kfree(bb_jmp_list);
+ +              bb_jmp_list = bb_jmp_list_new;
+ +      }
+ +      bb_jmp = bb_jmp_list + bb_jmp_count++;
+ +      bb_jmp->from = from;
+ +      bb_jmp->to = to;
+ +      bb_jmp->drop_through = drop_through;
+ +      bb_jmp->state = NULL;
+ +      return bb_jmp;
+ +}
+ +
+ +static void
+ +bb_delete(int i)
+ +{
+ +      struct bb *bb = bb_list[i];
+ +      memcpy(bb_list+i, bb_list+i+1, (bb_count-i-1)*sizeof(*bb_list));
+ +      bb_list[--bb_count] = NULL;
+ +      debug_kfree(bb);
+ +}
+ +
+ +static struct bb *
+ +bb_add(bfd_vma start, bfd_vma end)
+ +{
+ +      int i;
+ +      struct bb *bb;
+ +      /* Ignore basic blocks whose start address is outside the current
+ +       * function.  These occur for call instructions and for tail recursion.
+ +       */
+ +      if (start &&
+ +          (start < bb_func_start || start >= bb_func_end))
+ +                     return NULL;
+ +      for (i = 0; i < bb_count; ++i) {
+ +              bb = bb_list[i];
+ +              if ((start && bb->start == start) ||
+ +                  (end && bb->end == end))
+ +                      return bb;
+ +      }
+ +      bb = bb_new(start ? start : end);
+ +      if (bb) {
+ +              bb->start = start;
+ +              bb->end = end;
+ +      }
+ +      return bb;
+ +}
+ +
+ +static struct bb_jmp *
+ +bb_jmp_add(bfd_vma from, bfd_vma to, unsigned int drop_through)
+ +{
+ +      int i;
+ +      struct bb_jmp *bb_jmp;
+ +      for (i = 0, bb_jmp = bb_jmp_list; i < bb_jmp_count; ++i, ++bb_jmp) {
+ +              if (bb_jmp->from == from &&
+ +                  bb_jmp->to == to &&
+ +                  bb_jmp->drop_through == drop_through)
+ +                      return bb_jmp;
+ +      }
+ +      bb_jmp = bb_jmp_new(from, to, drop_through);
+ +      return bb_jmp;
+ +}
+ +
+ +static unsigned long bb_curr_addr, bb_exit_addr;
+ +static char bb_buffer[256];   /* A bit too big to go on stack */
+ +
+ +/* Computed jmp uses 'jmp *addr(,%reg,[48])' where 'addr' is the start of a
+ + * table of addresses that point into the current function.  Run the table and
+ + * generate bb starts for each target address plus a bb_jmp from this address
+ + * to the target address.
+ + *
+ + * Only called for 'jmp' instructions, with the pointer starting at 'jmp'.
+ + */
+ +
+ +static void
+ +bb_pass1_computed_jmp(char *p)
+ +{
+ +      unsigned long table, scale;
+ +      kdb_machreg_t addr;
+ +      struct bb* bb;
+ +      p += strcspn(p, " \t");         /* end of instruction */
+ +      p += strspn(p, " \t");          /* start of address */
+ +      if (*p++ != '*')
+ +              return;
+ +      table = simple_strtoul(p, &p, 0);
+ +      if (strncmp(p, "(,%", 3) != 0)
+ +              return;
+ +      p += 3;
+ +      p += strcspn(p, ",");           /* end of reg */
+ +      if (*p++ != ',')
+ +              return;
+ +      scale = simple_strtoul(p, &p, 0);
+ +      if (scale != KDB_WORD_SIZE || strcmp(p, ")"))
+ +              return;
+ +      while (!bb_giveup) {
+ +              if (kdb_getword(&addr, table, sizeof(addr)))
+ +                      return;
+ +              if (addr < bb_func_start || addr >= bb_func_end)
+ +                      return;
+ +              bb = bb_add(addr, 0);
+ +              if (bb)
+ +                      bb_jmp_add(bb_curr_addr, addr, 0);
+ +              table += KDB_WORD_SIZE;
+ +      }
+ +}
+ +
+ +/* Pass 1, identify the start and end of each basic block */
+ +
+ +static int
+ +bb_dis_pass1(PTR file, const char *fmt, ...)
+ +{
+ +      int l = strlen(bb_buffer);
+ +      char *p;
+ +      va_list ap;
+ +      va_start(ap, fmt);
+ +      vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
+ +      va_end(ap);
+ +      if ((p = strchr(bb_buffer, '\n'))) {
+ +              *p = '\0';
+ +              /* ret[q], iret[q], sysexit, sysret, ud2a or jmp[q] end a
+ +               * block.  As does a call to a function marked noret.
+ +               */
+ +              p = bb_buffer;
+ +              p += strcspn(p, ":");
+ +              if (*p++ == ':') {
+ +                      bb_fixup_switch_to(p);
+ +                      p += strspn(p, " \t");  /* start of instruction */
+ +                      if (strncmp(p, "ret", 3) == 0 ||
+ +                          strncmp(p, "iret", 4) == 0 ||
+ +                          strncmp(p, "sysexit", 7) == 0 ||
+ +                          strncmp(p, "sysret", 6) == 0 ||
+ +                          strncmp(p, "ud2a", 4) == 0 ||
+ +                          strncmp(p, "jmp", 3) == 0) {
+ +                              if (strncmp(p, "jmp", 3) == 0)
+ +                                      bb_pass1_computed_jmp(p);
+ +                              bb_add(0, bb_curr_addr);
+ +                      };
+ +                      if (strncmp(p, "call", 4) == 0) {
+ +                              strsep(&p, " \t");      /* end of opcode */
+ +                              if (p)
+ +                                      p += strspn(p, " \t");  /* operand(s) */
+ +                              if (p && strchr(p, '<')) {
+ +                                      p = strchr(p, '<') + 1;
+ +                                      *strchr(p, '>') = '\0';
+ +                                      if (bb_noret(p))
+ +                                              bb_add(0, bb_curr_addr);
+ +                              }
+ +                      };
+ +              }
+ +              bb_buffer[0] = '\0';
+ +      }
+ +      return 0;
+ +}
+ +
+ +static void
+ +bb_printaddr_pass1(bfd_vma addr, disassemble_info *dip)
+ +{
+ +      kdb_symtab_t symtab;
+ +      unsigned int offset;
+ +      struct bb* bb;
+ +      /* disasm only calls the printaddr routine for the target of jmp, loop
+ +       * or call instructions, i.e. the start of a basic block.  call is
+ +       * ignored by bb_add because the target address is outside the current
+ +       * function.
+ +       */
+ +      dip->fprintf_func(dip->stream, "0x%lx", addr);
+ +      kdbnearsym(addr, &symtab);
+ +      if (symtab.sym_name) {
+ +              dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
+ +              if ((offset = addr - symtab.sym_start))
+ +                      dip->fprintf_func(dip->stream, "+0x%x", offset);
+ +              dip->fprintf_func(dip->stream, ">");
+ +      }
+ +      bb = bb_add(addr, 0);
+ +      if (bb)
+ +              bb_jmp_add(bb_curr_addr, addr, 0);
+ +}
+ +
+ +static void
+ +bb_pass1(void)
+ +{
+ +      int i;
+ +      unsigned long addr;
+ +      struct bb *bb;
+ +      struct bb_jmp *bb_jmp;
+ +
+ +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ +              kdb_printf("%s: func_name %s func_start " kdb_bfd_vma_fmt0
+ +                         " func_end " kdb_bfd_vma_fmt0 "\n",
+ +                         __FUNCTION__,
+ +                         bb_func_name,
+ +                         bb_func_start,
+ +                         bb_func_end);
+ +      kdb_di.fprintf_func = bb_dis_pass1;
+ +      kdb_di.print_address_func = bb_printaddr_pass1;
+ +
+ +      bb_add(bb_func_start, 0);
+ +      for (bb_curr_addr = bb_func_start;
+ +           bb_curr_addr < bb_func_end;
+ +           ++bb_curr_addr) {
+ +              unsigned char c;
+ +              if (kdb_getarea(c, bb_curr_addr)) {
+ +                      kdb_printf("%s: unreadable function code at ",
+ +                                 __FUNCTION__);
+ +                      kdb_symbol_print(bb_curr_addr, NULL, KDB_SP_DEFAULT);
+ +                      kdb_printf(", giving up\n");
+ +                      bb_giveup = 1;
+ +                      return;
+ +              }
+ +      }
+ +      for (addr = bb_func_start; addr < bb_func_end; ) {
+ +              bb_curr_addr = addr;
+ +              addr += kdba_id_printinsn(addr, &kdb_di);
+ +              kdb_di.fprintf_func(NULL, "\n");
+ +      }
+ +      if (bb_giveup)
+ +              goto out;
+ +
+ +      /* Special case: a block consisting of a single instruction which is
+ +       * both the target of a jmp and is also an ending instruction, so we
+ +       * add two blocks using the same address, one as a start and one as an
+ +       * end, in no guaranteed order.  The end must be ordered after the
+ +       * start.
+ +       */
+ +      for (i = 0; i < bb_count-1; ++i) {
+ +              struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
+ +              if (bb1->end && bb1->end == bb2->start) {
+ +                      bb = bb_list[i+1];
+ +                      bb_list[i+1] = bb_list[i];
+ +                      bb_list[i] = bb;
+ +              }
+ +      }
+ +
+ +      /* Some bb have a start address, some have an end address.  Collapse
+ +       * them into entries that have both start and end addresses.  The first
+ +       * entry is guaranteed to have a start address.
+ +       */
+ +      for (i = 0; i < bb_count-1; ++i) {
+ +              struct bb *bb1 = bb_list[i], *bb2 = bb_list[i+1];
+ +              if (bb1->end)
+ +                      continue;
+ +              if (bb2->start) {
+ +                      bb1->end = bb2->start - 1;
+ +                      bb1->drop_through = 1;
+ +                      bb_jmp_add(bb1->end, bb2->start, 1);
+ +              } else {
+ +                      bb1->end = bb2->end;
+ +                      bb_delete(i+1);
+ +              }
+ +      }
+ +      bb = bb_list[bb_count-1];
+ +      if (!bb->end)
+ +              bb->end = bb_func_end - 1;
+ +
+ +      /* It would be nice to check that all bb have a valid start and end
+ +       * address but there is just too much garbage code in the kernel to do
+ +       * that check.  Aligned functions in assembler code mean that there is
+ +       * space between the end of one function and the start of the next and
+ +       * that space contains previous code from the assembler's buffers.  It
+ +       * looks like dead code with nothing that branches to it, so no start
+ +       * address.  do_sys_vm86() ends with 'jmp resume_userspace' which the C
+ +       * compiler does not know about so gcc appends the normal exit code,
+ +       * again nothing branches to this dangling code.
+ +       *
+ +       * The best we can do is delete bb entries with no start address.
+ +       */
+ +      for (i = 0; i < bb_count; ++i) {
+ +              struct bb *bb = bb_list[i];
+ +              if (!bb->start)
+ +                      bb_delete(i--);
+ +      }
+ +      for (i = 0; i < bb_count; ++i) {
+ +              struct bb *bb = bb_list[i];
+ +              if (!bb->end) {
+ +                      kdb_printf("%s: incomplete bb state\n", __FUNCTION__);
+ +                      bb_giveup = 1;
+ +                      goto debug;
+ +              }
+ +      }
+ +
+ +out:
+ +      if (!KDB_DEBUG(BB))
+ +              return;
+ +debug:
+ +      kdb_printf("%s: end\n", __FUNCTION__);
+ +      for (i = 0; i < bb_count; ++i) {
+ +              bb = bb_list[i];
+ +              kdb_printf("  bb[%d] start "
+ +                         kdb_bfd_vma_fmt0
+ +                         " end " kdb_bfd_vma_fmt0
+ +                         " drop_through %d",
+ +                         i, bb->start, bb->end, bb->drop_through);
+ +              kdb_printf("\n");
+ +      }
+ +      for (i = 0; i < bb_jmp_count; ++i) {
+ +              bb_jmp = bb_jmp_list + i;
+ +              kdb_printf("  bb_jmp[%d] from "
+ +                         kdb_bfd_vma_fmt0
+ +                         " to " kdb_bfd_vma_fmt0
+ +                         " drop_through %d\n",
+ +                         i, bb_jmp->from, bb_jmp->to, bb_jmp->drop_through);
+ +      }
+ +}
+ +
+ +/* Pass 2, record register changes in each basic block */
+ +
+ +/* For each opcode that we care about, indicate how it uses its operands.  Most
+ + * opcodes can be handled generically because they completely specify their
+ + * operands in the instruction, however many opcodes have side effects such as
+ + * reading or writing rax or updating rsp.  Instructions that change registers
+ + * that are not listed in the operands must be handled as special cases.  In
+ + * addition, instructions that copy registers while preserving their contents
+ + * (push, pop, mov) or change the contents in a well defined way (add with an
+ + * immediate, lea) must be handled as special cases in order to track the
+ + * register contents.
+ + *
+ + * The tables below only list opcodes that are actually used in the Linux
+ + * kernel, so they omit most of the floating point and all of the SSE type
+ + * instructions.  The operand usage entries only cater for accesses to memory
+ + * and to the integer registers, accesses to floating point registers and flags
+ + * are not relevant for kernel backtraces.
+ + */
+ +
+ +enum bb_operand_usage {
+ +      BBOU_UNKNOWN = 0,
+ +              /* generic entries.  because xchg can do any combinations of
+ +               * read src, write src, read dst and  write dst we need to
+ +               * define all 16 possibilities.  These are ordered by rs = 1,
+ +               * rd = 2, ws = 4, wd = 8, bb_usage_x*() functions rely on this
+ +               * order.
+ +               */
+ +      BBOU_RS = 1,    /* read src */          /*  1 */
+ +      BBOU_RD,        /* read dst */          /*  2 */
+ +      BBOU_RSRD,                              /*  3 */
+ +      BBOU_WS,        /* write src */         /*  4 */
+ +      BBOU_RSWS,                              /*  5 */
+ +      BBOU_RDWS,                              /*  6 */
+ +      BBOU_RSRDWS,                            /*  7 */
+ +      BBOU_WD,        /* write dst */         /*  8 */
+ +      BBOU_RSWD,                              /*  9 */
+ +      BBOU_RDWD,                              /* 10 */
+ +      BBOU_RSRDWD,                            /* 11 */
+ +      BBOU_WSWD,                              /* 12 */
+ +      BBOU_RSWSWD,                            /* 13 */
+ +      BBOU_RDWSWD,                            /* 14 */
+ +      BBOU_RSRDWSWD,                          /* 15 */
+ +              /* opcode specific entries */
+ +      BBOU_ADD,
+ +      BBOU_CALL,
+ +      BBOU_CBW,
+ +      BBOU_CMOV,
+ +      BBOU_CMPXCHG,
+ +      BBOU_CMPXCHGD,
+ +      BBOU_CPUID,
+ +      BBOU_CWD,
+ +      BBOU_DIV,
+ +      BBOU_IDIV,
+ +      BBOU_IMUL,
+ +      BBOU_IRET,
+ +      BBOU_JMP,
+ +      BBOU_LAHF,
+ +      BBOU_LEA,
+ +      BBOU_LEAVE,
+ +      BBOU_LODS,
+ +      BBOU_LOOP,
+ +      BBOU_LSS,
+ +      BBOU_MONITOR,
+ +      BBOU_MOV,
+ +      BBOU_MOVS,
+ +      BBOU_MUL,
+ +      BBOU_MWAIT,
+ +      BBOU_NOP,
+ +      BBOU_OUTS,
+ +      BBOU_POP,
+ +      BBOU_POPF,
+ +      BBOU_PUSH,
+ +      BBOU_PUSHF,
+ +      BBOU_RDMSR,
+ +      BBOU_RDTSC,
+ +      BBOU_RET,
+ +      BBOU_SAHF,
+ +      BBOU_SCAS,
+ +      BBOU_SUB,
+ +      BBOU_SYSEXIT,
+ +      BBOU_SYSRET,
+ +      BBOU_WRMSR,
+ +      BBOU_XADD,
+ +      BBOU_XCHG,
+ +      BBOU_XOR,
+ +};
+ +
+ +struct bb_opcode_usage {
+ +      int length;
+ +      enum bb_operand_usage usage;
+ +      const char *opcode;
+ +};
+ +
+ +/* This table is sorted in alphabetical order of opcode, except that the
+ + * trailing '"' is treated as a high value.  For example, 'in' sorts after
+ + * 'inc', 'bt' after 'btc'.  This modified sort order ensures that shorter
+ + * opcodes come after long ones.  A normal sort would put 'in' first, so 'in'
+ + * would match both 'inc' and 'in'.  When adding any new entries to this table,
+ + * be careful to put shorter entries last in their group.
+ + *
+ + * To automatically sort the table (in vi)
+ + *   Mark the first and last opcode line with 'a and 'b
+ + *   'a
+ + *   !'bsed -e 's/"}/}}/' | LANG=C sort -t '"' -k2 | sed -e 's/}}/"}/'
+ + *
+ + * If a new instruction has to be added, first consider if it affects registers
+ + * other than those listed in the operands.  Also consider if you want to track
+ + * the results of issuing the instruction, IOW can you extract useful
+ + * information by looking in detail at the modified registers or memory.  If
+ + * either test is true then you need a special case to handle the instruction.
+ + *
+ + * The generic entries at the start of enum bb_operand_usage all have one thing
+ + * in common, if a register or memory location is updated then that location
+ + * becomes undefined, i.e. we lose track of anything that was previously saved
+ + * in that location.  So only use a generic BBOU_* value when the result of the
+ + * instruction cannot be calculated exactly _and_ when all the affected
+ + * registers are listed in the operands.
+ + *
+ + * Examples:
+ + *
+ + * 'call' does not generate a known result, but as a side effect of call,
+ + * several scratch registers become undefined, so it needs a special BBOU_CALL
+ + * entry.
+ + *
+ + * 'adc' generates a variable result, it depends on the carry flag, so 'adc'
+ + * gets a generic entry.  'add' can generate an exact result (add with
+ + * immediate on a register that points to the stack) or it can generate an
+ + * unknown result (add a variable, or add immediate to a register that does not
+ + * contain a stack pointer) so 'add' has its own BBOU_ADD entry.
+ + */
+ +
+ +static const struct bb_opcode_usage
+ +bb_opcode_usage_all[] = {
+ +      {3, BBOU_RSRDWD,  "adc"},
+ +      {3, BBOU_ADD,     "add"},
+ +      {3, BBOU_RSRDWD,  "and"},
+ +      {3, BBOU_RSWD,    "bsf"},
+ +      {3, BBOU_RSWD,    "bsr"},
+ +      {5, BBOU_RSWS,    "bswap"},
+ +      {3, BBOU_RSRDWD,  "btc"},
+ +      {3, BBOU_RSRDWD,  "btr"},
+ +      {3, BBOU_RSRDWD,  "bts"},
+ +      {2, BBOU_RSRD,    "bt"},
+ +      {4, BBOU_CALL,    "call"},
+ +      {4, BBOU_CBW,     "cbtw"},      /* Intel cbw */
+ +      {3, BBOU_NOP,     "clc"},
+ +      {3, BBOU_NOP,     "cld"},
+ +      {7, BBOU_RS,      "clflush"},
+ +      {4, BBOU_NOP,     "clgi"},
+ +      {3, BBOU_NOP,     "cli"},
+ +      {4, BBOU_CWD,     "cltd"},      /* Intel cdq */
+ +      {4, BBOU_CBW,     "cltq"},      /* Intel cdqe */
+ +      {4, BBOU_NOP,     "clts"},
+ +      {4, BBOU_CMOV,    "cmov"},
+ +      {9, BBOU_CMPXCHGD,"cmpxchg16"},
+ +      {8, BBOU_CMPXCHGD,"cmpxchg8"},
+ +      {7, BBOU_CMPXCHG, "cmpxchg"},
+ +      {3, BBOU_RSRD,    "cmp"},
+ +      {5, BBOU_CPUID,   "cpuid"},
+ +      {4, BBOU_CWD,     "cqto"},      /* Intel cdo */
+ +      {4, BBOU_CWD,     "cwtd"},      /* Intel cwd */
+ +      {4, BBOU_CBW,     "cwtl"},      /* Intel cwde */
+ +      {4, BBOU_NOP,     "data"},      /* alternative ASM_NOP<n> generates data16 on x86_64 */
+ +      {3, BBOU_RSWS,    "dec"},
+ +      {3, BBOU_DIV,     "div"},
+ +      {5, BBOU_RS,      "fdivl"},
+ +      {5, BBOU_NOP,     "finit"},
+ +      {6, BBOU_RS,      "fistpl"},
+ +      {4, BBOU_RS,      "fldl"},
+ +      {4, BBOU_RS,      "fmul"},
+ +      {6, BBOU_NOP,     "fnclex"},
+ +      {6, BBOU_NOP,     "fninit"},
+ +      {6, BBOU_RS,      "fnsave"},
+ +      {7, BBOU_NOP,     "fnsetpm"},
+ +      {6, BBOU_RS,      "frstor"},
+ +      {5, BBOU_WS,      "fstsw"},
+ +      {5, BBOU_RS,      "fsubp"},
+ +      {5, BBOU_NOP,     "fwait"},
+ +      {7, BBOU_RS,      "fxrstor"},
+ +      {6, BBOU_RS,      "fxsave"},
+ +      {3, BBOU_NOP,     "hlt"},
+ +      {4, BBOU_IDIV,    "idiv"},
+ +      {4, BBOU_IMUL,    "imul"},
+ +      {3, BBOU_RSWS,    "inc"},
+ +      {3, BBOU_NOP,     "int"},
+ +      {7, BBOU_RSRD,    "invlpga"},
+ +      {6, BBOU_RS,      "invlpg"},
+ +      {2, BBOU_RSWD,    "in"},
+ +      {4, BBOU_IRET,    "iret"},
+ +      {1, BBOU_JMP,     "j"},
+ +      {4, BBOU_LAHF,    "lahf"},
+ +      {3, BBOU_RSWD,    "lar"},
+ +      {5, BBOU_RS,      "lcall"},
+ +      {5, BBOU_LEAVE,   "leave"},
+ +      {3, BBOU_LEA,     "lea"},
+ +      {6, BBOU_NOP,     "lfence"},
+ +      {4, BBOU_RS,      "lgdt"},
+ +      {4, BBOU_RS,      "lidt"},
+ +      {4, BBOU_RS,      "ljmp"},
+ +      {4, BBOU_RS,      "lldt"},
+ +      {4, BBOU_RS,      "lmsw"},
+ +      {4, BBOU_LODS,    "lods"},
+ +      {4, BBOU_LOOP,    "loop"},
+ +      {4, BBOU_NOP,     "lret"},
+ +      {3, BBOU_RSWD,    "lsl"},
+ +      {3, BBOU_LSS,     "lss"},
+ +      {3, BBOU_RS,      "ltr"},
+ +      {6, BBOU_NOP,     "mfence"},
+ +      {7, BBOU_MONITOR, "monitor"},
+ +      {4, BBOU_MOVS,    "movs"},
+ +      {3, BBOU_MOV,     "mov"},
+ +      {3, BBOU_MUL,     "mul"},
+ +      {5, BBOU_MWAIT,   "mwait"},
+ +      {3, BBOU_RSWS,    "neg"},
+ +      {3, BBOU_NOP,     "nop"},
+ +      {3, BBOU_RSWS,    "not"},
+ +      {2, BBOU_RSRDWD,  "or"},
+ +      {4, BBOU_OUTS,    "outs"},
+ +      {3, BBOU_RSRD,    "out"},
+ +      {5, BBOU_NOP,     "pause"},
+ +      {4, BBOU_POPF,    "popf"},
+ +      {3, BBOU_POP,     "pop"},
+ +      {8, BBOU_RS,      "prefetch"},
+ +      {5, BBOU_PUSHF,   "pushf"},
+ +      {4, BBOU_PUSH,    "push"},
+ +      {3, BBOU_RSRDWD,  "rcl"},
+ +      {3, BBOU_RSRDWD,  "rcr"},
+ +      {5, BBOU_RDMSR,   "rdmsr"},
+ +      {5, BBOU_RDMSR,   "rdpmc"},     /* same side effects as rdmsr */
+ +      {5, BBOU_RDTSC,   "rdtsc"},
+ +      {3, BBOU_RET,     "ret"},
+ +      {3, BBOU_RSRDWD,  "rol"},
+ +      {3, BBOU_RSRDWD,  "ror"},
+ +      {4, BBOU_SAHF,    "sahf"},
+ +      {3, BBOU_RSRDWD,  "sar"},
+ +      {3, BBOU_RSRDWD,  "sbb"},
+ +      {4, BBOU_SCAS,    "scas"},
+ +      {3, BBOU_WS,      "set"},
+ +      {6, BBOU_NOP,     "sfence"},
+ +      {4, BBOU_WS,      "sgdt"},
+ +      {3, BBOU_RSRDWD,  "shl"},
+ +      {3, BBOU_RSRDWD,  "shr"},
+ +      {4, BBOU_WS,      "sidt"},
+ +      {4, BBOU_WS,      "sldt"},
+ +      {3, BBOU_NOP,     "stc"},
+ +      {3, BBOU_NOP,     "std"},
+ +      {4, BBOU_NOP,     "stgi"},
+ +      {3, BBOU_NOP,     "sti"},
+ +      {4, BBOU_SCAS,    "stos"},
+ +      {4, BBOU_WS,      "strl"},
+ +      {3, BBOU_WS,      "str"},
+ +      {3, BBOU_SUB,     "sub"},
+ +      {6, BBOU_NOP,     "swapgs"},
+ +      {7, BBOU_SYSEXIT, "sysexit"},
+ +      {6, BBOU_SYSRET,  "sysret"},
+ +      {4, BBOU_NOP,     "test"},
+ +      {4, BBOU_NOP,     "ud2a"},
+ +      {7, BBOU_RS,      "vmclear"},
+ +      {8, BBOU_NOP,     "vmlaunch"},
+ +      {6, BBOU_RS,      "vmload"},
+ +      {7, BBOU_RS,      "vmptrld"},
+ +      {6, BBOU_WD,      "vmread"},    /* vmread src is an encoding, not a register */
+ +      {8, BBOU_NOP,     "vmresume"},
+ +      {5, BBOU_RS,      "vmrun"},
+ +      {6, BBOU_RS,      "vmsave"},
+ +      {7, BBOU_WD,      "vmwrite"},   /* vmwrite src is an encoding, not a register */
+ +      {6, BBOU_NOP,     "wbinvd"},
+ +      {5, BBOU_WRMSR,   "wrmsr"},
+ +      {4, BBOU_XADD,    "xadd"},
+ +      {4, BBOU_XCHG,    "xchg"},
+ +      {3, BBOU_XOR,     "xor"},
+ +       {10, BBOU_WS,      "xstore-rng"},
+ +};
+ +
+ +/* To speed up searching, index bb_opcode_usage_all by the first letter of each
+ + * opcode.
+ + */
+ +static struct {
+ +      const struct bb_opcode_usage *opcode;
+ +      int size;
+ +} bb_opcode_usage[26];
+ +
+ +struct bb_operand {
+ +      char *base;
+ +      char *index;
+ +      char *segment;
+ +      long disp;
+ +      unsigned int scale;
+ +      enum bb_reg_code base_rc;               /* UNDEFINED or RAX through R15 */
+ +      enum bb_reg_code index_rc;              /* UNDEFINED or RAX through R15 */
+ +      unsigned int present            :1;
+ +      unsigned int disp_present       :1;
+ +      unsigned int indirect           :1;     /* must be combined with reg or memory */
+ +      unsigned int immediate          :1;     /* exactly one of these 3 must be set */
+ +      unsigned int reg                :1;
+ +      unsigned int memory             :1;
+ +};
+ +
+ +struct bb_decode {
+ +      char *prefix;
+ +      char *opcode;
+ +      const struct bb_opcode_usage *match;
+ +      struct bb_operand src;
+ +      struct bb_operand dst;
+ +      struct bb_operand dst2;
+ +};
+ +
+ +static struct bb_decode bb_decode;
+ +
+ +static enum bb_reg_code
+ +bb_reg_map(const char *reg)
+ +{
+ +      int lo, hi, c;
+ +      const struct bb_reg_code_map *p;
+ +      lo = 0;
+ +      hi = ARRAY_SIZE(bb_reg_code_map) - 1;
+ +      while (lo <= hi) {
+ +              int mid = (hi + lo) / 2;
+ +              p = bb_reg_code_map + mid;
+ +              c = strcmp(p->name, reg+1);
+ +              if (c == 0)
+ +                      return p->reg;
+ +              else if (c > 0)
+ +                      hi = mid - 1;
+ +              else
+ +                      lo = mid + 1;
+ +      }
+ +      return BBRG_UNDEFINED;
+ +}
+ +
+ +static void
+ +bb_parse_operand(char *str, struct bb_operand *operand)
+ +{
+ +      char *p = str;
+ +      int sign = 1;
+ +      operand->present = 1;
+ +      /* extract any segment prefix */
+ +      if (p[0] == '%' && p[1] && p[2] == 's' && p[3] == ':') {
+ +              operand->memory = 1;
+ +              operand->segment = p;
+ +              p[3] = '\0';
+ +              p += 4;
+ +      }
+ +      /* extract displacement, base, index, scale */
+ +      if (*p == '*') {
+ +              /* jmp/call *disp(%reg), *%reg or *0xnnn */
+ +              operand->indirect = 1;
+ +              ++p;
+ +      }
+ +      if (*p == '-') {
+ +              sign = -1;
+ +              ++p;
+ +      }
+ +      if (*p == '$') {
+ +              operand->immediate = 1;
+ +              operand->disp_present = 1;
+ +              operand->disp = simple_strtoul(p+1, &p, 0);
+ +      } else if (isdigit(*p)) {
+ +              operand->memory = 1;
+ +              operand->disp_present = 1;
+ +              operand->disp = simple_strtoul(p, &p, 0) * sign;
+ +      }
+ +      if (*p == '%') {
+ +              operand->reg = 1;
+ +              operand->base = p;
+ +      } else if (*p == '(') {
+ +              operand->memory = 1;
+ +              operand->base = ++p;
+ +              p += strcspn(p, ",)");
+ +              if (p == operand->base)
+ +                      operand->base = NULL;
+ +              if (*p == ',') {
+ +                      *p = '\0';
+ +                      operand->index = ++p;
+ +                      p += strcspn(p, ",)");
+ +                      if (p == operand->index)
+ +                              operand->index = NULL;
+ +              }
+ +              if (*p == ',') {
+ +                      *p = '\0';
+ +                      operand->scale = simple_strtoul(p+1, &p, 0);
+ +              }
+ +              *p = '\0';
+ +      } else if (*p) {
+ +              kdb_printf("%s: unexpected token '%c' after disp '%s'\n",
+ +                         __FUNCTION__, *p, str);
+ +              bb_giveup = 1;
+ +      }
+ +      if ((operand->immediate + operand->reg + operand->memory != 1) ||
+ +          (operand->indirect && operand->immediate)) {
+ +              kdb_printf("%s: incorrect decode '%s' N %d I %d R %d M %d\n",
+ +                         __FUNCTION__, str,
+ +                         operand->indirect, operand->immediate, operand->reg,
+ +                         operand->memory);
+ +              bb_giveup = 1;
+ +      }
+ +      if (operand->base)
+ +              operand->base_rc = bb_reg_map(operand->base);
+ +      if (operand->index)
+ +              operand->index_rc = bb_reg_map(operand->index);
+ +}
+ +
+ +static void
+ +bb_print_operand(const char *type, const struct bb_operand *operand)
+ +{
+ +      if (!operand->present)
+ +              return;
+ +      kdb_printf("  %s %c%c: ",
+ +                 type,
+ +                 operand->indirect ? 'N' : ' ',
+ +                 operand->immediate ? 'I' :
+ +                   operand->reg ? 'R' :
+ +                   operand->memory ? 'M' :
+ +                   '?'
+ +                 );
+ +      if (operand->segment)
+ +              kdb_printf("%s:", operand->segment);
+ +      if (operand->immediate) {
+ +              kdb_printf("$0x%lx", operand->disp);
+ +      } else if (operand->reg) {
+ +              if (operand->indirect)
+ +                      kdb_printf("*");
+ +              kdb_printf("%s", operand->base);
+ +      } else if (operand->memory) {
+ +              if (operand->indirect && (operand->base || operand->index))
+ +                      kdb_printf("*");
+ +              if (operand->disp_present) {
+ +                      kdb_printf("0x%lx", operand->disp);
+ +              }
+ +              if (operand->base || operand->index || operand->scale) {
+ +                      kdb_printf("(");
+ +                      if (operand->base)
+ +                              kdb_printf("%s", operand->base);
+ +                      if (operand->index || operand->scale)
+ +                              kdb_printf(",");
+ +                      if (operand->index)
+ +                              kdb_printf("%s", operand->index);
+ +                      if (operand->scale)
+ +                              kdb_printf(",%d", operand->scale);
+ +                      kdb_printf(")");
+ +              }
+ +      }
+ +      if (operand->base_rc)
+ +              kdb_printf(" base_rc %d (%s)",
+ +                         operand->base_rc, bbrg_name[operand->base_rc]);
+ +      if (operand->index_rc)
+ +              kdb_printf(" index_rc %d (%s)",
+ +                         operand->index_rc,
+ +                         bbrg_name[operand->index_rc]);
+ +      kdb_printf("\n");
+ +}
+ +
+ +static void
+ +bb_print_opcode(void)
+ +{
+ +      const struct bb_opcode_usage *o = bb_decode.match;
+ +      kdb_printf("  ");
+ +      if (bb_decode.prefix)
+ +              kdb_printf("%s ", bb_decode.prefix);
+ +      kdb_printf("opcode '%s' matched by '%s', usage %d\n",
+ +                 bb_decode.opcode, o->opcode, o->usage);
+ +}
+ +
+ +static int
+ +bb_parse_opcode(void)
+ +{
+ +      int c, i;
+ +      const struct bb_opcode_usage *o;
+ +      static int bb_parse_opcode_error_limit = 5;
+ +      c = bb_decode.opcode[0] - 'a';
+ +      if (c < 0 || c >= ARRAY_SIZE(bb_opcode_usage))
+ +              goto nomatch;
+ +      o = bb_opcode_usage[c].opcode;
+ +      if (!o)
+ +              goto nomatch;
+ +      for (i = 0; i < bb_opcode_usage[c].size; ++i, ++o) {
+ +              if (strncmp(bb_decode.opcode, o->opcode, o->length) == 0) {
+ +                      bb_decode.match = o;
+ +                      if (KDB_DEBUG(BB))
+ +                              bb_print_opcode();
+ +                      return 0;
+ +              }
+ +      }
+ +nomatch:
+ +      if (!bb_parse_opcode_error_limit)
+ +              return 1;
+ +      --bb_parse_opcode_error_limit;
+ +      kdb_printf("%s: no match at [%s]%s " kdb_bfd_vma_fmt0 " - '%s'\n",
+ +                 __FUNCTION__,
+ +                 bb_mod_name, bb_func_name, bb_curr_addr,
+ +                 bb_decode.opcode);
+ +      return 1;
+ +}
+ +
+ +static bool
+ +bb_is_int_reg(enum bb_reg_code reg)
+ +{
+ +      return reg >= BBRG_RAX && reg < (BBRG_RAX + KDB_INT_REGISTERS);
+ +}
+ +
+ +static bool
+ +bb_is_simple_memory(const struct bb_operand *operand)
+ +{
+ +      return operand->memory &&
+ +             bb_is_int_reg(operand->base_rc) &&
+ +             !operand->index_rc &&
+ +             operand->scale == 0 &&
+ +             !operand->segment;
+ +}
+ +
+ +static bool
+ +bb_is_static_disp(const struct bb_operand *operand)
+ +{
+ +      return operand->memory &&
+ +             !operand->base_rc &&
+ +             !operand->index_rc &&
+ +             operand->scale == 0 &&
+ +             !operand->segment &&
+ +             !operand->indirect;
+ +}
+ +
+ +static enum bb_reg_code
+ +bb_reg_code_value(enum bb_reg_code reg)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ +      return bb_reg_state->contains[reg - BBRG_RAX].value;
+ +}
+ +
+ +static short
+ +bb_reg_code_offset(enum bb_reg_code reg)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ +      return bb_reg_state->contains[reg - BBRG_RAX].offset;
+ +}
+ +
+ +static void
+ +bb_reg_code_set_value(enum bb_reg_code dst, enum bb_reg_code src)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(dst), dst, );
+ +      bb_reg_state->contains[dst - BBRG_RAX].value = src;
+ +}
+ +
+ +static void
+ +bb_reg_code_set_offset(enum bb_reg_code dst, short offset)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(dst), dst, );
+ +      bb_reg_state->contains[dst - BBRG_RAX].offset = offset;
+ +}
+ +
+ +static bool
+ +bb_is_osp_defined(enum bb_reg_code reg)
+ +{
+ +      if (bb_is_int_reg(reg))
+ +              return bb_reg_code_value(reg) == BBRG_OSP;
+ +      else
+ +              return 0;
+ +}
+ +
+ +static bfd_vma
+ +bb_actual_value(enum bb_reg_code reg)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ +      return bb_actual[reg - BBRG_RAX].value;
+ +}
+ +
+ +static int
+ +bb_actual_valid(enum bb_reg_code reg)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(reg), reg, 0);
+ +      return bb_actual[reg - BBRG_RAX].valid;
+ +}
+ +
+ +static void
+ +bb_actual_set_value(enum bb_reg_code reg, bfd_vma value)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(reg), reg, );
+ +      bb_actual[reg - BBRG_RAX].value = value;
+ +}
+ +
+ +static void
+ +bb_actual_set_valid(enum bb_reg_code reg, int valid)
+ +{
+ +      BB_CHECK(!bb_is_int_reg(reg), reg, );
+ +      bb_actual[reg - BBRG_RAX].valid = valid;
+ +}
+ +
+ +/* The scheduler code switches RSP then does PUSH, it is not an error for RSP
+ + * to be undefined in this area of the code.
+ + */
+ +static bool
+ +bb_is_scheduler_address(void)
+ +{
+ +      return bb_curr_addr >= bb__sched_text_start &&
+ +             bb_curr_addr < bb__sched_text_end;
+ +}
+ +
+ +static void
+ +bb_reg_read(enum bb_reg_code reg)
+ +{
+ +      int i, r = 0;
+ +      if (!bb_is_int_reg(reg) ||
+ +          bb_reg_code_value(reg) != reg)
+ +              return;
+ +      for (i = 0;
+ +           i < min_t(unsigned int, REGPARM, ARRAY_SIZE(bb_param_reg));
+ +           ++i) {
+ +              if (reg == bb_param_reg[i]) {
+ +                      r = i + 1;
+ +                      break;
+ +              }
+ +      }
+ +      bb_reg_params = max(bb_reg_params, r);
+ +}
+ +
+ +static void
+ +bb_do_reg_state_print(const struct bb_reg_state *s)
+ +{
+ +      int i, offset_address, offset_value;
+ +      const struct bb_memory_contains *c;
+ +      enum bb_reg_code value;
+ +      kdb_printf("  bb_reg_state %p\n", s);
+ +      for (i = 0; i < ARRAY_SIZE(s->contains); ++i) {
+ +              value = s->contains[i].value;
+ +              offset_value = s->contains[i].offset;
+ +              kdb_printf("    %s = %s",
+ +                         bbrg_name[i + BBRG_RAX], bbrg_name[value]);
+ +              if (value == BBRG_OSP)
+ +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
+ +              kdb_printf("\n");
+ +      }
+ +      for (i = 0, c = s->memory; i < s->mem_count; ++i, ++c) {
+ +              offset_address = c->offset_address;
+ +              value = c->value;
+ +              offset_value = c->offset_value;
+ +              kdb_printf("    slot %d offset_address %c0x%x %s",
+ +                         i,
+ +                         offset_address >= 0 ? '+' : '-',
+ +                         offset_address >= 0 ? offset_address : -offset_address,
+ +                         bbrg_name[value]);
+ +              if (value == BBRG_OSP)
+ +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset_value, "", "");
+ +              kdb_printf("\n");
+ +      }
+ +}
+ +
+ +static void
+ +bb_reg_state_print(const struct bb_reg_state *s)
+ +{
+ +      if (KDB_DEBUG(BB))
+ +              bb_do_reg_state_print(s);
+ +}
+ +
+ +/* Set register 'dst' to contain the value from 'src'.  This includes reading
+ + * from 'src' and writing to 'dst'.  The offset value is copied iff 'src'
+ + * contains a stack pointer.
+ + *
+ + * Be very careful about the context here.  'dst' and 'src' reflect integer
+ + * registers by name, _not_ by the value of their contents.  "mov %rax,%rsi"
+ + * will call this function as bb_reg_set_reg(BBRG_RSI, BBRG_RAX), which
+ + * reflects what the assembler code is doing.  However we need to track the
+ + * _values_ in the registers, not their names.  IOW, we really care about "what
+ + * value does rax contain when it is copied into rsi?", so we can record the
+ + * fact that we now have two copies of that value, one in rax and one in rsi.
+ + */
+ +
+ +static void
+ +bb_reg_set_reg(enum bb_reg_code dst, enum bb_reg_code src)
+ +{
+ +      enum bb_reg_code src_value = BBRG_UNDEFINED;
+ +      short offset_value = 0;
+ +      KDB_DEBUG_BB("  %s = %s", bbrg_name[dst], bbrg_name[src]);
+ +      if (bb_is_int_reg(src)) {
+ +              bb_reg_read(src);
+ +              src_value = bb_reg_code_value(src);
+ +              KDB_DEBUG_BB(" (%s", bbrg_name[src_value]);
+ +              if (bb_is_osp_defined(src)) {
+ +                      offset_value = bb_reg_code_offset(src);
+ +                      KDB_DEBUG_BB_OFFSET(offset_value, "", "");
+ +              }
+ +              KDB_DEBUG_BB(")");
+ +      }
+ +      if (bb_is_int_reg(dst)) {
+ +              bb_reg_code_set_value(dst, src_value);
+ +              bb_reg_code_set_offset(dst, offset_value);
+ +      }
+ +      KDB_DEBUG_BB("\n");
+ +}
+ +
+ +static void
+ +bb_reg_set_undef(enum bb_reg_code dst)
+ +{
+ +      bb_reg_set_reg(dst, BBRG_UNDEFINED);
+ +}
+ +
+ +/* Delete any record of a stored register held in osp + 'offset' */
+ +
+ +static void
+ +bb_delete_memory(short offset)
+ +{
+ +      int i;
+ +      struct bb_memory_contains *c;
+ +      for (i = 0, c = bb_reg_state->memory;
+ +           i < bb_reg_state->mem_count;
+ +           ++i, ++c) {
+ +              if (c->offset_address == offset &&
+ +                  c->value != BBRG_UNDEFINED) {
+ +                      KDB_DEBUG_BB("  delete %s from ",
+ +                                   bbrg_name[c->value]);
+ +                      KDB_DEBUG_BB_OFFSET(offset, "osp", "");
+ +                      KDB_DEBUG_BB(" slot %d\n",
+ +                                   (int)(c - bb_reg_state->memory));
+ +                      memset(c, BBRG_UNDEFINED, sizeof(*c));
+ +                      if (i == bb_reg_state->mem_count - 1)
+ +                              --bb_reg_state->mem_count;
+ +              }
+ +      }
+ +}
+ +
+ +/* Set memory location *('dst' + 'offset_address') to contain the supplied
+ + * value and offset.  'dst' is assumed to be a register that contains a stack
+ + * pointer.
+ + */
+ +
+ +static void
+ +bb_memory_set_reg_value(enum bb_reg_code dst, short offset_address,
+ +                      enum bb_reg_code value, short offset_value)
+ +{
+ +      int i;
+ +      struct bb_memory_contains *c, *free = NULL;
+ +      BB_CHECK(!bb_is_osp_defined(dst), dst, );
+ +      KDB_DEBUG_BB("  *(%s", bbrg_name[dst]);
+ +      KDB_DEBUG_BB_OFFSET(offset_address, "", "");
+ +      offset_address += bb_reg_code_offset(dst);
+ +      KDB_DEBUG_BB_OFFSET(offset_address, " osp", ") = ");
+ +      KDB_DEBUG_BB("%s", bbrg_name[value]);
+ +      if (value == BBRG_OSP)
+ +              KDB_DEBUG_BB_OFFSET(offset_value, "", "");
+ +      for (i = 0, c = bb_reg_state->memory;
+ +           i < bb_reg_state_max;
+ +           ++i, ++c) {
+ +              if (c->offset_address == offset_address)
+ +                      free = c;
+ +              else if (c->value == BBRG_UNDEFINED && !free)
+ +                      free = c;
+ +      }
+ +      if (!free) {
+ +              struct bb_reg_state *new, *old = bb_reg_state;
+ +              size_t old_size, new_size;
+ +              int slot;
+ +              old_size = sizeof(*old) + bb_reg_state_max *
+ +                                sizeof(old->memory[0]);
+ +              slot = bb_reg_state_max;
+ +              bb_reg_state_max += 5;
+ +              new_size = sizeof(*new) + bb_reg_state_max *
+ +                                sizeof(new->memory[0]);
+ +              new = debug_kmalloc(new_size, GFP_ATOMIC);
+ +              if (!new) {
+ +                      kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ +                      bb_giveup = 1;
+ +              } else {
+ +                      memcpy(new, old, old_size);
+ +                      memset((char *)new + old_size, BBRG_UNDEFINED,
+ +                             new_size - old_size);
+ +                      bb_reg_state = new;
+ +                      debug_kfree(old);
+ +                      free = bb_reg_state->memory + slot;
+ +              }
+ +      }
+ +      if (free) {
+ +              int slot = free - bb_reg_state->memory;
+ +              free->offset_address = offset_address;
+ +              free->value = value;
+ +              free->offset_value = offset_value;
+ +              KDB_DEBUG_BB(" slot %d", slot);
+ +              bb_reg_state->mem_count = max(bb_reg_state->mem_count, slot+1);
+ +      }
+ +      KDB_DEBUG_BB("\n");
+ +}
+ +
+ +/* Set memory location *('dst' + 'offset') to contain the value from register
+ + * 'src'.  'dst' is assumed to be a register that contains a stack pointer.
+ + * This differs from bb_memory_set_reg_value because it takes a src register
+ + * which contains a value and possibly an offset, bb_memory_set_reg_value is
+ + * passed the value and offset directly.
+ + */
+ +
+ +static void
+ +bb_memory_set_reg(enum bb_reg_code dst, enum bb_reg_code src,
+ +                short offset_address)
+ +{
+ +      int offset_value;
+ +      enum bb_reg_code value;
+ +      BB_CHECK(!bb_is_osp_defined(dst), dst, );
+ +      if (!bb_is_int_reg(src))
+ +              return;
+ +      value = bb_reg_code_value(src);
+ +      if (value == BBRG_UNDEFINED) {
+ +              bb_delete_memory(offset_address + bb_reg_code_offset(dst));
+ +              return;
+ +      }
+ +      offset_value = bb_reg_code_offset(src);
+ +      bb_reg_read(src);
+ +      bb_memory_set_reg_value(dst, offset_address, value, offset_value);
+ +}
+ +
+ +/* Set register 'dst' to contain the value from memory *('src' + offset_address).
+ + * 'src' is assumed to be a register that contains a stack pointer.
+ + */
+ +
+ +static void
+ +bb_reg_set_memory(enum bb_reg_code dst, enum bb_reg_code src, short offset_address)
+ +{
+ +      int i, defined = 0;
+ +      struct bb_memory_contains *s;
+ +      BB_CHECK(!bb_is_osp_defined(src), src, );
+ +      KDB_DEBUG_BB("  %s = *(%s",
+ +                   bbrg_name[dst], bbrg_name[src]);
+ +      KDB_DEBUG_BB_OFFSET(offset_address, "", ")");
+ +      offset_address += bb_reg_code_offset(src);
+ +      KDB_DEBUG_BB_OFFSET(offset_address, " (osp", ")");
+ +      for (i = 0, s = bb_reg_state->memory;
+ +           i < bb_reg_state->mem_count;
+ +           ++i, ++s) {
+ +              if (s->offset_address == offset_address && bb_is_int_reg(dst)) {
+ +                      bb_reg_code_set_value(dst, s->value);
+ +                      KDB_DEBUG_BB(" value %s", bbrg_name[s->value]);
+ +                      if (s->value == BBRG_OSP) {
+ +                              bb_reg_code_set_offset(dst, s->offset_value);
+ +                              KDB_DEBUG_BB_OFFSET(s->offset_value, "", "");
+ +                      } else {
+ +                              bb_reg_code_set_offset(dst, 0);
+ +                      }
+ +                      defined = 1;
+ +              }
+ +      }
+ +      if (!defined)
+ +              bb_reg_set_reg(dst, BBRG_UNDEFINED);
+ +      else
+ +              KDB_DEBUG_BB("\n");
+ +}
+ +
+ +/* A generic read from an operand. */
+ +
+ +static void
+ +bb_read_operand(const struct bb_operand *operand)
+ +{
+ +      int m = 0;
+ +      if (operand->base_rc)
+ +              bb_reg_read(operand->base_rc);
+ +      if (operand->index_rc)
+ +              bb_reg_read(operand->index_rc);
+ +      if (bb_is_simple_memory(operand) &&
+ +          bb_is_osp_defined(operand->base_rc) &&
+ +          bb_decode.match->usage != BBOU_LEA) {
+ +              m = (bb_reg_code_offset(operand->base_rc) + operand->disp +
+ +                   KDB_WORD_SIZE - 1) / KDB_WORD_SIZE;
+ +              bb_memory_params = max(bb_memory_params, m);
+ +      }
+ +}
+ +
+ +/* A generic write to an operand, resulting in an undefined value in that
+ + * location.  All well defined operands are handled separately, this function
+ + * only handles the opcodes where the result is undefined.
+ + */
+ +
+ +static void
+ +bb_write_operand(const struct bb_operand *operand)
+ +{
+ +      enum bb_reg_code base_rc = operand->base_rc;
+ +      if (operand->memory) {
+ +              if (base_rc)
+ +                      bb_reg_read(base_rc);
+ +              if (operand->index_rc)
+ +                      bb_reg_read(operand->index_rc);
+ +      } else if (operand->reg && base_rc) {
+ +              bb_reg_set_undef(base_rc);
+ +      }
+ +      if (bb_is_simple_memory(operand) && bb_is_osp_defined(base_rc)) {
+ +              int offset;
+ +              offset = bb_reg_code_offset(base_rc) + operand->disp;
+ +              offset = ALIGN(offset - KDB_WORD_SIZE + 1, KDB_WORD_SIZE);
+ +              bb_delete_memory(offset);
+ +      }
+ +}
+ +
+ +/* Adjust a register that contains a stack pointer */
+ +
+ +static void
+ +bb_adjust_osp(enum bb_reg_code reg, int adjust)
+ +{
+ +      int offset = bb_reg_code_offset(reg), old_offset = offset;
+ +      KDB_DEBUG_BB("  %s osp offset ", bbrg_name[reg]);
+ +      KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", " -> ");
+ +      offset += adjust;
+ +      bb_reg_code_set_offset(reg, offset);
+ +      KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(reg), "", "\n");
+ +      /* When RSP is adjusted upwards, it invalidates any memory
+ +       * stored between the old and current stack offsets.
+ +       */
+ +      if (reg == BBRG_RSP) {
+ +              while (old_offset < bb_reg_code_offset(reg)) {
+ +                      bb_delete_memory(old_offset);
+ +                      old_offset += KDB_WORD_SIZE;
+ +              }
+ +      }
+ +}
+ +
+ +/* The current instruction adjusts a register that contains a stack pointer.
+ + * Direction is 1 or -1, depending on whether the instruction is add/lea or
+ + * sub.
+ + */
+ +
+ +static void
+ +bb_adjust_osp_instruction(int direction)
+ +{
+ +      enum bb_reg_code dst_reg = bb_decode.dst.base_rc;
+ +      if (bb_decode.src.immediate ||
+ +          bb_decode.match->usage == BBOU_LEA /* lea has its own checks */) {
+ +              int adjust = direction * bb_decode.src.disp;
+ +              bb_adjust_osp(dst_reg, adjust);
+ +      } else {
+ +              /* variable stack adjustment, osp offset is not well defined */
+ +              KDB_DEBUG_BB("  %s osp offset ", bbrg_name[dst_reg]);
+ +              KDB_DEBUG_BB_OFFSET(bb_reg_code_offset(dst_reg), "", " -> undefined\n");
+ +              bb_reg_code_set_value(dst_reg, BBRG_UNDEFINED);
+ +              bb_reg_code_set_offset(dst_reg, 0);
+ +      }
+ +}
+ +
+ +/* Some instructions using memory have an explicit length suffix (b, w, l, q).
+ + * The equivalent instructions using a register imply the length from the
+ + * register name.  Deduce the operand length.
+ + */
+ +
+ +static int
+ +bb_operand_length(const struct bb_operand *operand, char opcode_suffix)
+ +{
+ +      int l = 0;
+ +      switch (opcode_suffix) {
+ +      case 'b':
+ +              l = 8;
+ +              break;
+ +      case 'w':
+ +              l = 16;
+ +              break;
+ +      case 'l':
+ +              l = 32;
+ +              break;
+ +      case 'q':
+ +              l = 64;
+ +              break;
+ +      }
+ +      if (l == 0 && operand->reg) {
+ +              switch (strlen(operand->base)) {
+ +              case 3:
+ +                      switch (operand->base[2]) {
+ +                      case 'h':
+ +                      case 'l':
+ +                              l = 8;
+ +                              break;
+ +                      default:
+ +                              l = 16;
+ +                              break;
+ +                      }
+ +              case 4:
+ +                      if (operand->base[1] == 'r')
+ +                              l = 64;
+ +                      else
+ +                              l = 32;
+ +                      break;
+ +              }
+ +      }
+ +      return l;
+ +}
+ +
+ +static int
+ +bb_reg_state_size(const struct bb_reg_state *state)
+ +{
+ +      return sizeof(*state) +
+ +             state->mem_count * sizeof(state->memory[0]);
+ +}
+ +
+ +/* Canonicalize the current bb_reg_state so it can be compared against
+ + * previously created states.  Sort the memory entries in descending order of
+ + * offset_address (stack grows down).  Empty slots are moved to the end of the
+ + * list and trimmed.
+ + */
+ +
+ +static void
+ +bb_reg_state_canonicalize(void)
+ +{
+ +      int i, order, changed;
+ +      struct bb_memory_contains *p1, *p2, temp;
+ +      do {
+ +              changed = 0;
+ +              for (i = 0, p1 = bb_reg_state->memory;
+ +                   i < bb_reg_state->mem_count-1;
+ +                   ++i, ++p1) {
+ +                      p2 = p1 + 1;
+ +                      if (p2->value == BBRG_UNDEFINED) {
+ +                              order = 0;
+ +                      } else if (p1->value == BBRG_UNDEFINED) {
+ +                              order = 1;
+ +                      } else if (p1->offset_address < p2->offset_address) {
+ +                              order = 1;
+ +                      } else if (p1->offset_address > p2->offset_address) {
+ +                              order = -1;
+ +                      } else {
+ +                              order = 0;
+ +                      }
+ +                      if (order > 0) {
+ +                              temp = *p2;
+ +                              *p2 = *p1;
+ +                              *p1 = temp;
+ +                              changed = 1;
+ +                      }
+ +              }
+ +      } while(changed);
+ +      for (i = 0, p1 = bb_reg_state->memory;
+ +           i < bb_reg_state_max;
+ +           ++i, ++p1) {
+ +              if (p1->value != BBRG_UNDEFINED)
+ +                      bb_reg_state->mem_count = i + 1;
+ +      }
+ +      bb_reg_state_print(bb_reg_state);
+ +}
+ +
+ +static int
+ +bb_special_case(bfd_vma to)
+ +{
+ +      int i, j, rsp_offset, expect_offset, offset, errors = 0, max_errors = 40;
+ +      enum bb_reg_code reg, expect_value, value;
+ +      struct bb_name_state *r;
+ +
+ +      for (i = 0, r = bb_special_cases;
+ +           i < ARRAY_SIZE(bb_special_cases);
+ +           ++i, ++r) {
+ +              if (to == r->address &&
+ +                  (r->fname == NULL || strcmp(bb_func_name, r->fname) == 0))
+ +                      goto match;
+ +      }
+ +      /* Some inline assembler code has jumps to .fixup sections which result
+ +       * in out of line transfers with undefined state, ignore them.
+ +       */
+ +      if (strcmp(bb_func_name, "strnlen_user") == 0 ||
+ +          strcmp(bb_func_name, "copy_from_user") == 0)
+ +              return 1;
+ +      return 0;
+ +
+ +match:
+ +      /* Check the running registers match */
+ +      for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
+ +              expect_value = r->regs[reg].value;
+ +              if (test_bit(expect_value, r->skip_regs.bits)) {
+ +                      /* this regs entry is not defined for this label */
+ +                      continue;
+ +              }
+ +              if (expect_value == BBRG_UNDEFINED)
+ +                      continue;
+ +              expect_offset = r->regs[reg].offset;
+ +              value = bb_reg_code_value(reg);
+ +              offset = bb_reg_code_offset(reg);
+ +              if (expect_value == value &&
+ +                  (value != BBRG_OSP || r->osp_offset == offset))
+ +                      continue;
+ +              kdb_printf("%s: Expected %s to contain %s",
+ +                         __FUNCTION__,
+ +                         bbrg_name[reg],
+ +                         bbrg_name[expect_value]);
+ +              if (r->osp_offset)
+ +                      KDB_DEBUG_BB_OFFSET_PRINTF(r->osp_offset, "", "");
+ +              kdb_printf(".  It actually contains %s", bbrg_name[value]);
+ +              if (offset)
+ +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
+ +              kdb_printf("\n");
+ +              ++errors;
+ +              if (max_errors-- == 0)
+ +                      goto fail;
+ +      }
+ +      /* Check that any memory data on stack matches */
+ +      i = j = 0;
+ +      while (i < bb_reg_state->mem_count &&
+ +             j < r->mem_size) {
+ +              expect_value = r->mem[j].value;
+ +              if (test_bit(expect_value, r->skip_mem.bits) ||
+ +                  expect_value == BBRG_UNDEFINED) {
+ +                      /* this memory slot is not defined for this label */
+ +                      ++j;
+ +                      continue;
+ +              }
+ +              rsp_offset = bb_reg_state->memory[i].offset_address -
+ +                      bb_reg_code_offset(BBRG_RSP);
+ +              if (rsp_offset >
+ +                  r->mem[j].offset_address) {
+ +                      /* extra slots in memory are OK */
+ +                      ++i;
+ +              } else if (rsp_offset <
+ +                         r->mem[j].offset_address) {
+ +                      /* Required memory slot is missing */
+ +                      kdb_printf("%s: Invalid bb_reg_state.memory, "
+ +                                 "missing memory entry[%d] %s\n",
+ +                         __FUNCTION__, j, bbrg_name[expect_value]);
+ +                      ++errors;
+ +                      if (max_errors-- == 0)
+ +                              goto fail;
+ +                      ++j;
+ +              } else {
+ +                      if (bb_reg_state->memory[i].offset_value ||
+ +                          bb_reg_state->memory[i].value != expect_value) {
+ +                              /* memory slot is present but contains wrong
+ +                               * value.
+ +                               */
+ +                              kdb_printf("%s: Invalid bb_reg_state.memory, "
+ +                                          "wrong value in slot %d, "
+ +                                          "should be %s, it is %s\n",
+ +                                 __FUNCTION__, i,
+ +                                 bbrg_name[expect_value],
+ +                                 bbrg_name[bb_reg_state->memory[i].value]);
+ +                              ++errors;
+ +                              if (max_errors-- == 0)
+ +                                      goto fail;
+ +                      }
+ +                      ++i;
+ +                      ++j;
+ +              }
+ +      }
+ +      while (j < r->mem_size) {
+ +              expect_value = r->mem[j].value;
+ +              if (test_bit(expect_value, r->skip_mem.bits) ||
+ +                  expect_value == BBRG_UNDEFINED)
+ +                      ++j;
+ +              else
+ +                      break;
+ +      }
+ +      if (j != r->mem_size) {
+ +              /* Hit end of memory before testing all the pt_reg slots */
+ +              kdb_printf("%s: Invalid bb_reg_state.memory, "
+ +                          "missing trailing entries\n",
+ +                 __FUNCTION__);
+ +              ++errors;
+ +              if (max_errors-- == 0)
+ +                      goto fail;
+ +      }
+ +      if (errors)
+ +              goto fail;
+ +      return 1;
+ +fail:
+ +      kdb_printf("%s: on transfer to %s\n", __FUNCTION__, r->name);
+ +      bb_giveup = 1;
+ +      return 1;
+ +}
+ +
+ +/* Transfer of control to a label outside the current function.  If the
+ + * transfer is to a known common code path then do a sanity check on the state
+ + * at this point.
+ + */
+ +
+ +static void
+ +bb_sanity_check(int type)
+ +{
+ +      enum bb_reg_code expect, actual;
+ +      int i, offset, error = 0;
+ +
+ +      for (i = 0; i < ARRAY_SIZE(bb_preserved_reg); ++i) {
+ +              expect = bb_preserved_reg[i];
+ +              actual = bb_reg_code_value(expect);
+ +              offset = bb_reg_code_offset(expect);
+ +              if (expect == actual)
+ +                      continue;
+ +              /* type == 1 is sysret/sysexit, ignore RSP */
+ +              if (type && expect == BBRG_RSP)
+ +                      continue;
+ +              /* type == 1 is sysret/sysexit, ignore RBP for i386 */
+ +              /* We used to have "#ifndef CONFIG_X86_64" for the type=1 RBP
+ +               * test; however, x86_64 can run ia32 compatible mode and
+ +               * hit this problem. Perform the following test anyway!
+ +               */
+ +              if (type && expect == BBRG_RBP)
+ +                      continue;
+ +              /* RSP should contain OSP+0.  Except for ptregscall_common and
+ +               * ia32_ptregs_common, they get a partial pt_regs, fudge the
+ +               * stack to make it a full pt_regs then reverse the effect on
+ +               * exit, so the offset is -0x50 on exit.
+ +               */
+ +              if (expect == BBRG_RSP &&
+ +                  bb_is_osp_defined(expect) &&
+ +                  (offset == 0 ||
+ +                   (offset == -0x50 &&
+ +                    (strcmp(bb_func_name, "ptregscall_common") == 0 ||
+ +                     strcmp(bb_func_name, "ia32_ptregs_common") == 0))))
+ +                      continue;
+ +              kdb_printf("%s: Expected %s, got %s",
+ +                         __FUNCTION__,
+ +                         bbrg_name[expect], bbrg_name[actual]);
+ +              if (offset)
+ +                      KDB_DEBUG_BB_OFFSET_PRINTF(offset, "", "");
+ +              kdb_printf("\n");
+ +              error = 1;
+ +      }
+ +      BB_CHECK(error, error, );
+ +}
+ +
+ +/* Transfer of control.  Follow the arc and save the current state as input to
+ + * another basic block.
+ + */
+ +
+ +static void
+ +bb_transfer(bfd_vma from, bfd_vma to, unsigned int drop_through)
+ +{
+ +      int i, found;
+ +      size_t size;
+ +      struct bb* bb = NULL;   /*stupid gcc */
+ +      struct bb_jmp *bb_jmp;
+ +      struct bb_reg_state *state;
+ +      bb_reg_state_canonicalize();
+ +      found = 0;
+ +      for (i = 0; i < bb_jmp_count; ++i) {
+ +              bb_jmp = bb_jmp_list + i;
+ +              if (bb_jmp->from == from &&
+ +                  bb_jmp->to == to &&
+ +                  bb_jmp->drop_through == drop_through) {
+ +                      found = 1;
+ +                      break;
+ +              }
+ +      }
+ +      if (!found) {
+ +              /* Transfer outside the current function.  Check the special
+ +               * cases (mainly in entry.S) first.  If it is not a known
+ +               * special case then check if the target address is the start
+ +               * of a function or not.  If it is the start of a function then
+ +               * assume tail recursion and require that the state be the same
+ +               * as on entry.  Otherwise assume out of line code (e.g.
+ +               * spinlock contention path) and ignore it, the state can be
+ +               * anything.
+ +               */
+ +              kdb_symtab_t symtab;
+ +              if (bb_special_case(to))
+ +                      return;
+ +              kdbnearsym(to, &symtab);
+ +              if (symtab.sym_start != to)
+ +                      return;
+ +              bb_sanity_check(0);
+ +              if (bb_giveup)
+ +                      return;
+ +#ifdef        NO_SIBLINGS
+ +              /* Only print this message when the kernel is compiled with
+ +               * -fno-optimize-sibling-calls.  Otherwise it would print a
+ +               * message for every tail recursion call.  If you see the
+ +               * message below then you probably have an assembler label that
+ +               * is not listed in the special cases.
+ +               */
+ +              kdb_printf("  not matched: from "
+ +                         kdb_bfd_vma_fmt0
+ +                         " to " kdb_bfd_vma_fmt0
+ +                         " drop_through %d bb_jmp[%d]\n",
+ +                         from, to, drop_through, i);
+ +#endif        /* NO_SIBLINGS */
+ +              return;
+ +      }
+ +      KDB_DEBUG_BB("  matched: from " kdb_bfd_vma_fmt0
+ +                   " to " kdb_bfd_vma_fmt0
+ +                   " drop_through %d bb_jmp[%d]\n",
+ +                   from, to, drop_through, i);
+ +      found = 0;
+ +      for (i = 0; i < bb_count; ++i) {
+ +              bb = bb_list[i];
+ +              if (bb->start == to) {
+ +                      found = 1;
+ +                      break;
+ +              }
+ +      }
+ +      BB_CHECK(!found, to, );
+ +      /* If the register state for this arc has already been set (we are
+ +       * rescanning the block that originates the arc) and the state is the
+ +       * same as the previous state for this arc then this input to the
+ +       * target block is the same as last time, so there is no need to rescan
+ +       * the target block.
+ +       */
+ +      state = bb_jmp->state;
+ +      size = bb_reg_state_size(bb_reg_state);
+ +      if (state) {
+ +              bb_reg_state->ref_count = state->ref_count;
+ +              if (memcmp(state, bb_reg_state, size) == 0) {
+ +                      KDB_DEBUG_BB("  no state change\n");
+ +                      return;
+ +              }
+ +              if (--state->ref_count == 0)
+ +                      debug_kfree(state);
+ +              bb_jmp->state = NULL;
+ +      }
+ +      /* New input state is required.  To save space, check if any other arcs
+ +       * have the same state and reuse them where possible.  The overall set
+ +       * of inputs to the target block is now different so the target block
+ +       * must be rescanned.
+ +       */
+ +      bb->changed = 1;
+ +      for (i = 0; i < bb_jmp_count; ++i) {
+ +              state = bb_jmp_list[i].state;
+ +              if (!state)
+ +                      continue;
+ +              bb_reg_state->ref_count = state->ref_count;
+ +              if (memcmp(state, bb_reg_state, size) == 0) {
+ +                      KDB_DEBUG_BB("  reuse bb_jmp[%d]\n", i);
+ +                      bb_jmp->state = state;
+ +                      ++state->ref_count;
+ +                      return;
+ +              }
+ +      }
+ +      state = debug_kmalloc(size, GFP_ATOMIC);
+ +      if (!state) {
+ +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +      memcpy(state, bb_reg_state, size);
+ +      state->ref_count = 1;
+ +      bb_jmp->state = state;
+ +      KDB_DEBUG_BB("  new state %p\n", state);
+ +}
+ +
+ +/* Isolate the processing for 'mov' so it can be used for 'xadd'/'xchg' as
+ + * well.
+ + *
+ + * xadd/xchg expect this function to return BBOU_NOP for special cases,
+ + * otherwise it returns BBOU_RSWD.  All special cases must be handled entirely
+ + * within this function, including doing bb_read_operand or bb_write_operand
+ + * where necessary.
+ + */
+ +
+ +static enum bb_operand_usage
+ +bb_usage_mov(const struct bb_operand *src, const struct bb_operand *dst, int l)
+ +{
+ +      int full_register_src, full_register_dst;
+ +      full_register_src = bb_operand_length(src, bb_decode.opcode[l])
+ +                          == KDB_WORD_SIZE * 8;
+ +      full_register_dst = bb_operand_length(dst, bb_decode.opcode[l])
+ +                          == KDB_WORD_SIZE * 8;
+ +      /* If both src and dst are full integer registers then record the
+ +       * register change.
+ +       */
+ +      if (src->reg &&
+ +          bb_is_int_reg(src->base_rc) &&
+ +          dst->reg &&
+ +          bb_is_int_reg(dst->base_rc) &&
+ +          full_register_src &&
+ +          full_register_dst) {
+ +              /* Special case for the code that switches stacks in
+ +               * jprobe_return.  That code must modify RSP but it does it in
+ +               * a well defined manner.  Do not invalidate RSP.
+ +               */
+ +              if (src->base_rc == BBRG_RBX &&
+ +                  dst->base_rc == BBRG_RSP &&
+ +                  strcmp(bb_func_name, "jprobe_return") == 0) {
+ +                      bb_read_operand(src);
+ +                      return BBOU_NOP;
+ +              }
+ +              /* math_abort takes the equivalent of a longjmp structure and
+ +               * resets the stack.  Ignore this, it leaves RSP well defined.
+ +               */
+ +              if (dst->base_rc == BBRG_RSP &&
+ +                  strcmp(bb_func_name, "math_abort") == 0) {
+ +                      bb_read_operand(src);
+ +                      return BBOU_NOP;
+ +              }
+ +              bb_reg_set_reg(dst->base_rc, src->base_rc);
+ +              return BBOU_NOP;
+ +      }
+ +      /* If the move is from a full integer register to stack then record it.
+ +       */
+ +      if (src->reg &&
+ +          bb_is_simple_memory(dst) &&
+ +          bb_is_osp_defined(dst->base_rc) &&
+ +          full_register_src) {
+ +              /* Ugly special case.  Initializing list heads on stack causes
+ +               * false references to stack variables when the list head is
+ +               * used.  Static code analysis cannot detect that the list head
+ +               * has been changed by a previous execution loop and that a
+ +               * basic block is only executed after the list head has been
+ +               * changed.
+ +               *
+ +               * These false references can result in valid stack variables
+ +               * being incorrectly cleared on some logic paths.  Ignore
+ +               * stores to stack variables which point to themselves or to
+ +               * the previous word so the list head initialization is not
+ +               * recorded.
+ +               */
+ +              if (bb_is_osp_defined(src->base_rc)) {
+ +                      int stack1 = bb_reg_code_offset(src->base_rc);
+ +                      int stack2 = bb_reg_code_offset(dst->base_rc) +
+ +                                   dst->disp;
+ +                      if (stack1 == stack2 ||
+ +                          stack1 == stack2 - KDB_WORD_SIZE)
+ +                              return BBOU_NOP;
+ +              }
+ +              bb_memory_set_reg(dst->base_rc, src->base_rc, dst->disp);
+ +              return BBOU_NOP;
+ +      }
+ +      /* If the move is from stack to a full integer register then record it.
+ +       */
+ +      if (bb_is_simple_memory(src) &&
+ +          bb_is_osp_defined(src->base_rc) &&
+ +          dst->reg &&
+ +          bb_is_int_reg(dst->base_rc) &&
+ +          full_register_dst) {
+ +#ifdef        CONFIG_X86_32
- #ifndef TSS_sysenter_sp0
- #define TSS_sysenter_sp0 SYSENTER_stack_sp0
- #endif
+ +              /* mov from TSS_sysenter_sp0+offset to esp to fix up the
+ +               * sysenter stack, it leaves esp well defined.  mov
+ +               * TSS_ysenter_sp0+offset(%esp),%esp is followed by up to 5
+ +               * push instructions to mimic the hardware stack push.  If
+ +               * TSS_sysenter_sp0 is offset then only 3 words will be
+ +               * pushed.
+ +               */
+ +              if (dst->base_rc == BBRG_RSP &&
+ +                  src->disp >= TSS_sysenter_sp0 &&
+ +                  bb_is_osp_defined(BBRG_RSP)) {
+ +                      int pushes;
+ +                      pushes = src->disp == TSS_sysenter_sp0 ? 5 : 3;
+ +                      bb_reg_code_set_offset(BBRG_RSP,
+ +                              bb_reg_code_offset(BBRG_RSP) +
+ +                                      pushes * KDB_WORD_SIZE);
+ +                      KDB_DEBUG_BB_OFFSET(
+ +                              bb_reg_code_offset(BBRG_RSP),
+ +                              "  sysenter fixup, RSP",
+ +                             "\n");
+ +                      return BBOU_NOP;
+ +              }
+ +#endif        /* CONFIG_X86_32 */
+ +              bb_read_operand(src);
+ +              bb_reg_set_memory(dst->base_rc, src->base_rc, src->disp);
+ +              return BBOU_NOP;
+ +      }
+ +      /* move %gs:0x<nn>,%rsp is used to unconditionally switch to another
+ +       * stack.  Ignore this special case, it is handled by the stack
+ +       * unwinding code.
+ +       */
+ +      if (src->segment &&
+ +          strcmp(src->segment, "%gs") == 0 &&
+ +          dst->reg &&
+ +          dst->base_rc == BBRG_RSP)
+ +              return BBOU_NOP;
+ +      /* move %reg,%reg is a nop */
+ +      if (src->reg &&
+ +          dst->reg &&
+ +          !src->segment &&
+ +          !dst->segment &&
+ +          strcmp(src->base, dst->base) == 0)
+ +              return BBOU_NOP;
+ +      /* Special case for the code that switches stacks in the scheduler
+ +       * (switch_to()).  That code must modify RSP but it does it in a well
+ +       * defined manner.  Do not invalidate RSP.
+ +       */
+ +      if (dst->reg &&
+ +          dst->base_rc == BBRG_RSP &&
+ +          full_register_dst &&
+ +          bb_is_scheduler_address()) {
+ +              bb_read_operand(src);
+ +              return BBOU_NOP;
+ +      }
+ +      /* Special case for the code that switches stacks in resume from
+ +       * hibernation code.  That code must modify RSP but it does it in a
+ +       * well defined manner.  Do not invalidate RSP.
+ +       */
+ +      if (src->memory &&
+ +          dst->reg &&
+ +          dst->base_rc == BBRG_RSP &&
+ +          full_register_dst &&
+ +          strcmp(bb_func_name, "restore_image") == 0) {
+ +              bb_read_operand(src);
+ +              return BBOU_NOP;
+ +      }
+ +      return BBOU_RSWD;
+ +}
+ +
+ +static enum bb_operand_usage
+ +bb_usage_xadd(const struct bb_operand *src, const struct bb_operand *dst)
+ +{
+ +      /* Simulate xadd as a series of instructions including mov, that way we
+ +       * get the benefit of all the special cases already handled by
+ +       * BBOU_MOV.
+ +       *
+ +       * tmp = src + dst, src = dst, dst = tmp.
+ +       *
+ +       * For tmp, pick a register that is undefined.  If all registers are
+ +       * defined then pick one that is not being used by xadd.
+ +       */
+ +      enum bb_reg_code reg = BBRG_UNDEFINED;
+ +      struct bb_operand tmp;
+ +      struct bb_reg_contains save_tmp;
+ +      enum bb_operand_usage usage;
+ +      int undefined = 0;
+ +      for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ +              if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
+ +                      undefined = 1;
+ +                      break;
+ +              }
+ +      }
+ +      if (!undefined) {
+ +              for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ +                      if (reg != src->base_rc &&
+ +                          reg != src->index_rc &&
+ +                          reg != dst->base_rc &&
+ +                          reg != dst->index_rc &&
+ +                          reg != BBRG_RSP)
+ +                              break;
+ +              }
+ +      }
+ +      KDB_DEBUG_BB("  %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ +      save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
+ +      bb_reg_set_undef(reg);
+ +      memset(&tmp, 0, sizeof(tmp));
+ +      tmp.present = 1;
+ +      tmp.reg = 1;
+ +      tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
+ +      if (tmp.base) {
+ +              tmp.base[0] = '%';
+ +              strcpy(tmp.base + 1, bbrg_name[reg]);
+ +      }
+ +      tmp.base_rc = reg;
+ +      bb_read_operand(src);
+ +      bb_read_operand(dst);
+ +      if (bb_usage_mov(src, dst, sizeof("xadd")-1) == BBOU_NOP)
+ +              usage = BBOU_RSRD;
+ +      else
+ +              usage = BBOU_RSRDWS;
+ +      bb_usage_mov(&tmp, dst, sizeof("xadd")-1);
+ +      KDB_DEBUG_BB("  %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ +      bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
+ +      debug_kfree(tmp.base);
+ +      return usage;
+ +}
+ +
+ +static enum bb_operand_usage
+ +bb_usage_xchg(const struct bb_operand *src, const struct bb_operand *dst)
+ +{
+ +      /* Simulate xchg as a series of mov instructions, that way we get the
+ +       * benefit of all the special cases already handled by BBOU_MOV.
+ +       *
+ +       * mov dst,tmp; mov src,dst; mov tmp,src;
+ +       *
+ +       * For tmp, pick a register that is undefined.  If all registers are
+ +       * defined then pick one that is not being used by xchg.
+ +       */
+ +      enum bb_reg_code reg = BBRG_UNDEFINED;
+ +      int rs = BBOU_RS, rd = BBOU_RD, ws = BBOU_WS, wd = BBOU_WD;
+ +      struct bb_operand tmp;
+ +      struct bb_reg_contains save_tmp;
+ +      int undefined = 0;
+ +      for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ +              if (bb_reg_code_value(reg) == BBRG_UNDEFINED) {
+ +                      undefined = 1;
+ +                      break;
+ +              }
+ +      }
+ +      if (!undefined) {
+ +              for (reg = BBRG_RAX; reg < BBRG_RAX + KDB_INT_REGISTERS; ++reg) {
+ +                      if (reg != src->base_rc &&
+ +                          reg != src->index_rc &&
+ +                          reg != dst->base_rc &&
+ +                          reg != dst->index_rc &&
+ +                          reg != BBRG_RSP)
+ +                              break;
+ +              }
+ +      }
+ +      KDB_DEBUG_BB("  %s saving tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ +      save_tmp = bb_reg_state->contains[reg - BBRG_RAX];
+ +      memset(&tmp, 0, sizeof(tmp));
+ +      tmp.present = 1;
+ +      tmp.reg = 1;
+ +      tmp.base = debug_kmalloc(strlen(bbrg_name[reg]) + 2, GFP_ATOMIC);
+ +      if (tmp.base) {
+ +              tmp.base[0] = '%';
+ +              strcpy(tmp.base + 1, bbrg_name[reg]);
+ +      }
+ +      tmp.base_rc = reg;
+ +      if (bb_usage_mov(dst, &tmp, sizeof("xchg")-1) == BBOU_NOP)
+ +              rd = 0;
+ +      if (bb_usage_mov(src, dst, sizeof("xchg")-1) == BBOU_NOP) {
+ +              rs = 0;
+ +              wd = 0;
+ +      }
+ +      if (bb_usage_mov(&tmp, src, sizeof("xchg")-1) == BBOU_NOP)
+ +              ws = 0;
+ +      KDB_DEBUG_BB("  %s restoring tmp %s\n", __FUNCTION__, bbrg_name[reg]);
+ +      bb_reg_state->contains[reg - BBRG_RAX] = save_tmp;
+ +      debug_kfree(tmp.base);
+ +      return rs | rd | ws | wd;
+ +}
+ +
+ +/* Invalidate all the scratch registers */
+ +
+ +static void
+ +bb_invalidate_scratch_reg(void)
+ +{
+ +      int i, j;
+ +      for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ +              for (j = 0; j < ARRAY_SIZE(bb_preserved_reg); ++j) {
+ +                      if (i == bb_preserved_reg[j])
+ +                              goto preserved;
+ +              }
+ +              bb_reg_set_undef(i);
+ +preserved:
+ +              continue;
+ +      }
+ +}
+ +
+ +static void
+ +bb_pass2_computed_jmp(const struct bb_operand *src)
+ +{
+ +      unsigned long table = src->disp;
+ +      kdb_machreg_t addr;
+ +      while (!bb_giveup) {
+ +              if (kdb_getword(&addr, table, sizeof(addr)))
+ +                      return;
+ +              if (addr < bb_func_start || addr >= bb_func_end)
+ +                      return;
+ +              bb_transfer(bb_curr_addr, addr, 0);
+ +              table += KDB_WORD_SIZE;
+ +      }
+ +}
+ +
+ +/* The current instruction has been decoded and all the information is in
+ + * bb_decode.  Based on the opcode, track any operand usage that we care about.
+ + */
+ +
+ +static void
+ +bb_usage(void)
+ +{
+ +      enum bb_operand_usage usage = bb_decode.match->usage;
+ +      struct bb_operand *src = &bb_decode.src;
+ +      struct bb_operand *dst = &bb_decode.dst;
+ +      struct bb_operand *dst2 = &bb_decode.dst2;
+ +      int opcode_suffix, operand_length;
+ +
+ +      /* First handle all the special usage cases, and map them to a generic
+ +       * case after catering for the side effects.
+ +       */
+ +
+ +      if (usage == BBOU_IMUL &&
+ +          src->present && !dst->present && !dst2->present) {
+ +              /* single operand imul, same effects as mul */
+ +              usage = BBOU_MUL;
+ +      }
+ +
+ +      /* AT&T syntax uses movs<l1><l2> for move with sign extension, instead
+ +       * of the Intel movsx.  The AT&T syntax causes problems for the opcode
+ +       * mapping; movs with sign extension needs to be treated as a generic
+ +       * read src, write dst, but instead it falls under the movs I/O
+ +       * instruction.  Fix it.
+ +       */
+ +      if (usage == BBOU_MOVS && strlen(bb_decode.opcode) > 5)
+ +              usage = BBOU_RSWD;
+ +
+ +      /* This switch statement deliberately does not use 'default' at the top
+ +       * level.  That way the compiler will complain if a new BBOU_ enum is
+ +       * added above and not explicitly handled here.
+ +       */
+ +      switch (usage) {
+ +      case BBOU_UNKNOWN:      /* drop through */
+ +      case BBOU_RS:           /* drop through */
+ +      case BBOU_RD:           /* drop through */
+ +      case BBOU_RSRD:         /* drop through */
+ +      case BBOU_WS:           /* drop through */
+ +      case BBOU_RSWS:         /* drop through */
+ +      case BBOU_RDWS:         /* drop through */
+ +      case BBOU_RSRDWS:       /* drop through */
+ +      case BBOU_WD:           /* drop through */
+ +      case BBOU_RSWD:         /* drop through */
+ +      case BBOU_RDWD:         /* drop through */
+ +      case BBOU_RSRDWD:       /* drop through */
+ +      case BBOU_WSWD:         /* drop through */
+ +      case BBOU_RSWSWD:       /* drop through */
+ +      case BBOU_RDWSWD:       /* drop through */
+ +      case BBOU_RSRDWSWD:
+ +              break;          /* ignore generic usage for now */
+ +      case BBOU_ADD:
+ +              /* Special case for add instructions that adjust registers
+ +               * which are mapping the stack.
+ +               */
+ +              if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
+ +                      bb_adjust_osp_instruction(1);
+ +                      usage = BBOU_RS;
+ +              } else {
+ +                      usage = BBOU_RSRDWD;
+ +              }
+ +              break;
+ +      case BBOU_CALL:
+ +              /* Invalidate the scratch registers.  Functions sync_regs and
+ +               * save_v86_state are special, their return value is the new
+ +               * stack pointer.
+ +               */
+ +              bb_reg_state_print(bb_reg_state);
+ +              bb_invalidate_scratch_reg();
+ +              if (bb_is_static_disp(src)) {
+ +                      if (src->disp == bb_sync_regs) {
+ +                              bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
+ +                      } else if (src->disp == bb_save_v86_state) {
+ +                              bb_reg_set_reg(BBRG_RAX, BBRG_RSP);
+ +                              bb_adjust_osp(BBRG_RAX, +KDB_WORD_SIZE);
+ +                      }
+ +              }
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_CBW:
+ +              /* Convert word in RAX.  Read RAX, write RAX */
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_CMOV:
+ +              /* cmove %gs:0x<nn>,%rsp is used to conditionally switch to
+ +               * another stack.  Ignore this special case, it is handled by
+ +               * the stack unwinding code.
+ +               */
+ +              if (src->segment &&
+ +                  strcmp(src->segment, "%gs") == 0 &&
+ +                  dst->reg &&
+ +                  dst->base_rc == BBRG_RSP)
+ +                      usage = BBOU_NOP;
+ +              else
+ +                      usage = BBOU_RSWD;
+ +              break;
+ +      case BBOU_CMPXCHG:
+ +              /* Read RAX, write RAX plus src read, dst write */
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              usage = BBOU_RSWD;
+ +              break;
+ +      case BBOU_CMPXCHGD:
+ +              /* Read RAX, RBX, RCX, RDX, write RAX, RDX plus src read/write */
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_read(BBRG_RBX);
+ +              bb_reg_read(BBRG_RCX);
+ +              bb_reg_read(BBRG_RDX);
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RDX);
+ +              usage = BBOU_RSWS;
+ +              break;
+ +      case BBOU_CPUID:
+ +              /* Read RAX, write RAX, RBX, RCX, RDX */
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RBX);
+ +              bb_reg_set_undef(BBRG_RCX);
+ +              bb_reg_set_undef(BBRG_RDX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_CWD:
+ +              /* Convert word in RAX, RDX.  Read RAX, write RDX */
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RDX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_DIV:  /* drop through */
+ +      case BBOU_IDIV:
+ +              /* The 8 bit variants only affect RAX, the 16, 32 and 64 bit
+ +               * variants affect RDX as well.
+ +               */
+ +              switch (usage) {
+ +              case BBOU_DIV:
+ +                      opcode_suffix = bb_decode.opcode[3];
+ +                      break;
+ +              case BBOU_IDIV:
+ +                      opcode_suffix = bb_decode.opcode[4];
+ +                      break;
+ +              default:
+ +                      opcode_suffix = 'q';
+ +                      break;
+ +              }
+ +              operand_length = bb_operand_length(src, opcode_suffix);
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              if (operand_length != 8) {
+ +                      bb_reg_read(BBRG_RDX);
+ +                      bb_reg_set_undef(BBRG_RDX);
+ +              }
+ +              usage = BBOU_RS;
+ +              break;
+ +      case BBOU_IMUL:
+ +              /* Only the two and three operand forms get here.  The one
+ +               * operand form is treated as mul.
+ +               */
+ +              if (dst2->present) {
+ +                      /* The three operand form is a special case, read the first two
+ +                       * operands, write the third.
+ +                       */
+ +                      bb_read_operand(src);
+ +                      bb_read_operand(dst);
+ +                      bb_write_operand(dst2);
+ +                      usage = BBOU_NOP;
+ +              } else {
+ +                      usage = BBOU_RSRDWD;
+ +              }
+ +              break;
+ +      case BBOU_IRET:
+ +              bb_sanity_check(0);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_JMP:
+ +              if (bb_is_static_disp(src))
+ +                      bb_transfer(bb_curr_addr, src->disp, 0);
+ +              else if (src->indirect &&
+ +                       src->disp &&
+ +                       src->base == NULL &&
+ +                       src->index &&
+ +                       src->scale == KDB_WORD_SIZE)
+ +                      bb_pass2_computed_jmp(src);
+ +              usage = BBOU_RS;
+ +              break;
+ +      case BBOU_LAHF:
+ +              /* Write RAX */
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_LEA:
+ +              /* dst = src + disp.  Often used to calculate offsets into the
+ +               * stack, so check if it uses a stack pointer.
+ +               */
+ +              usage = BBOU_RSWD;
+ +              if (bb_is_simple_memory(src)) {
+ +                     if (bb_is_osp_defined(src->base_rc)) {
+ +                              bb_reg_set_reg(dst->base_rc, src->base_rc);
+ +                              bb_adjust_osp_instruction(1);
+ +                              usage = BBOU_RS;
+ +                      } else if (src->disp == 0 &&
+ +                                 src->base_rc == dst->base_rc) {
+ +                              /* lea 0(%reg),%reg is generated by i386
+ +                               * GENERIC_NOP7.
+ +                               */
+ +                              usage = BBOU_NOP;
+ +                      } else if (src->disp == 4096 &&
+ +                                 (src->base_rc == BBRG_R8 ||
+ +                                  src->base_rc == BBRG_RDI) &&
+ +                                 strcmp(bb_func_name, "relocate_kernel") == 0) {
+ +                              /* relocate_kernel: setup a new stack at the
+ +                               * end of the physical control page, using
+ +                               * (x86_64) lea 4096(%r8),%rsp or (i386) lea
+ +                               * 4096(%edi),%esp
+ +                               */
+ +                              usage = BBOU_NOP;
+ +                      }
+ +              }
+ +              break;
+ +      case BBOU_LEAVE:
+ +              /* RSP = RBP; RBP = *(RSP); RSP += KDB_WORD_SIZE; */
+ +              bb_reg_set_reg(BBRG_RSP, BBRG_RBP);
+ +              if (bb_is_osp_defined(BBRG_RSP))
+ +                      bb_reg_set_memory(BBRG_RBP, BBRG_RSP, 0);
+ +              else
+ +                      bb_reg_set_undef(BBRG_RBP);
+ +              if (bb_is_osp_defined(BBRG_RSP))
+ +                      bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ +              /* common_interrupt uses leave in a non-standard manner */
+ +              if (strcmp(bb_func_name, "common_interrupt") != 0)
+ +                      bb_sanity_check(0);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_LODS:
+ +              /* Read RSI, write RAX, RSI */
+ +              bb_reg_read(BBRG_RSI);
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RSI);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_LOOP:
+ +              /* Read and write RCX */
+ +              bb_reg_read(BBRG_RCX);
+ +              bb_reg_set_undef(BBRG_RCX);
+ +              if (bb_is_static_disp(src))
+ +                      bb_transfer(bb_curr_addr, src->disp, 0);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_LSS:
+ +              /* lss offset(%esp),%esp leaves esp well defined */
+ +              if (dst->reg &&
+ +                  dst->base_rc == BBRG_RSP &&
+ +                  bb_is_simple_memory(src) &&
+ +                  src->base_rc == BBRG_RSP) {
+ +                      bb_adjust_osp(BBRG_RSP, 2*KDB_WORD_SIZE + src->disp);
+ +                      usage = BBOU_NOP;
+ +              } else {
+ +                      usage = BBOU_RSWD;
+ +              }
+ +              break;
+ +      case BBOU_MONITOR:
+ +              /* Read RAX, RCX, RDX */
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RCX);
+ +              bb_reg_set_undef(BBRG_RDX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_MOV:
+ +              usage = bb_usage_mov(src, dst, sizeof("mov")-1);
+ +              break;
+ +      case BBOU_MOVS:
+ +              /* Read RSI, RDI, write RSI, RDI */
+ +              bb_reg_read(BBRG_RSI);
+ +              bb_reg_read(BBRG_RDI);
+ +              bb_reg_set_undef(BBRG_RSI);
+ +              bb_reg_set_undef(BBRG_RDI);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_MUL:
+ +              /* imul (one operand form only) or mul.  Read RAX.  If the
+ +               * operand length is not 8 then write RDX.
+ +               */
+ +              if (bb_decode.opcode[0] == 'i')
+ +                      opcode_suffix = bb_decode.opcode[4];
+ +              else
+ +                      opcode_suffix = bb_decode.opcode[3];
+ +              operand_length = bb_operand_length(src, opcode_suffix);
+ +              bb_reg_read(BBRG_RAX);
+ +              if (operand_length != 8)
+ +                      bb_reg_set_undef(BBRG_RDX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_MWAIT:
+ +              /* Read RAX, RCX */
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_read(BBRG_RCX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_NOP:
+ +              break;
+ +      case BBOU_OUTS:
+ +              /* Read RSI, RDX, write RSI */
+ +              bb_reg_read(BBRG_RSI);
+ +              bb_reg_read(BBRG_RDX);
+ +              bb_reg_set_undef(BBRG_RSI);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_POP:
+ +              /* Complicated by the fact that you can pop from top of stack
+ +               * to a stack location, for this case the destination location
+ +               * is calculated after adjusting RSP.  Analysis of the kernel
+ +               * code shows that gcc only uses this strange format to get the
+ +               * flags into a local variable, e.g. pushf; popl 0x10(%esp); so
+ +               * I am going to ignore this special case.
+ +               */
+ +              usage = BBOU_WS;
+ +              if (!bb_is_osp_defined(BBRG_RSP)) {
+ +                      if (!bb_is_scheduler_address()) {
+ +                              kdb_printf("pop when BBRG_RSP is undefined?\n");
+ +                              bb_giveup = 1;
+ +                      }
+ +              } else {
+ +                      if (src->reg) {
+ +                              bb_reg_set_memory(src->base_rc, BBRG_RSP, 0);
+ +                              usage = BBOU_NOP;
+ +                      }
+ +                      /* pop %rsp does not adjust rsp */
+ +                      if (!src->reg ||
+ +                          src->base_rc != BBRG_RSP)
+ +                              bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ +              }
+ +              break;
+ +      case BBOU_POPF:
+ +              /* Do not care about flags, just adjust RSP */
+ +              if (!bb_is_osp_defined(BBRG_RSP)) {
+ +                      if (!bb_is_scheduler_address()) {
+ +                              kdb_printf("popf when BBRG_RSP is undefined?\n");
+ +                              bb_giveup = 1;
+ +                      }
+ +              } else {
+ +                      bb_adjust_osp(BBRG_RSP, KDB_WORD_SIZE);
+ +              }
+ +              usage = BBOU_WS;
+ +              break;
+ +      case BBOU_PUSH:
+ +              /* Complicated by the fact that you can push from a stack
+ +               * location to top of stack, the source location is calculated
+ +               * before adjusting RSP.  Analysis of the kernel code shows
+ +               * that gcc only uses this strange format to restore the flags
+ +               * from a local variable, e.g. pushl 0x10(%esp); popf; so I am
+ +               * going to ignore this special case.
+ +               */
+ +              usage = BBOU_RS;
+ +              if (!bb_is_osp_defined(BBRG_RSP)) {
+ +                      if (!bb_is_scheduler_address()) {
+ +                              kdb_printf("push when BBRG_RSP is undefined?\n");
+ +                              bb_giveup = 1;
+ +                      }
+ +              } else {
+ +                      bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ +                      if (src->reg &&
+ +                          bb_reg_code_offset(BBRG_RSP) <= 0)
+ +                              bb_memory_set_reg(BBRG_RSP, src->base_rc, 0);
+ +              }
+ +              break;
+ +      case BBOU_PUSHF:
+ +              /* Do not care about flags, just adjust RSP */
+ +              if (!bb_is_osp_defined(BBRG_RSP)) {
+ +                      if (!bb_is_scheduler_address()) {
+ +                              kdb_printf("pushf when BBRG_RSP is undefined?\n");
+ +                              bb_giveup = 1;
+ +                      }
+ +              } else {
+ +                      bb_adjust_osp(BBRG_RSP, -KDB_WORD_SIZE);
+ +              }
+ +              usage = BBOU_WS;
+ +              break;
+ +      case BBOU_RDMSR:
+ +              /* Read RCX, write RAX, RDX */
+ +              bb_reg_read(BBRG_RCX);
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RDX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_RDTSC:
+ +              /* Write RAX, RDX */
+ +              bb_reg_set_undef(BBRG_RAX);
+ +              bb_reg_set_undef(BBRG_RDX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_RET:
+ +              usage = BBOU_NOP;
+ +              /* Functions that restore state which was saved by another
+ +               * function or build new kernel stacks.  We cannot verify what
+ +               * is being restored so skip the sanity check.
+ +               */
+ +              if (strcmp(bb_func_name, "restore_image") == 0 ||
+ +                  strcmp(bb_func_name, "relocate_kernel") == 0 ||
+ +                  strcmp(bb_func_name, "identity_mapped") == 0 ||
+ +                  strcmp(bb_func_name, "xen_iret_crit_fixup") == 0 ||
+ +                  strcmp(bb_func_name, "math_abort") == 0)
+ +                      break;
+ +              bb_sanity_check(0);
+ +              break;
+ +      case BBOU_SAHF:
+ +              /* Read RAX */
+ +              bb_reg_read(BBRG_RAX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_SCAS:
+ +              /* Read RAX, RDI, write RDI */
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_read(BBRG_RDI);
+ +              bb_reg_set_undef(BBRG_RDI);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_SUB:
+ +              /* Special case for sub instructions that adjust registers
+ +               * which are mapping the stack.
+ +               */
+ +              if (dst->reg && bb_is_osp_defined(dst->base_rc)) {
+ +                      bb_adjust_osp_instruction(-1);
+ +                      usage = BBOU_RS;
+ +              } else {
+ +                      usage = BBOU_RSRDWD;
+ +              }
+ +              break;
+ +      case BBOU_SYSEXIT:
+ +              bb_sanity_check(1);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_SYSRET:
+ +              bb_sanity_check(1);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_WRMSR:
+ +              /* Read RCX, RAX, RDX */
+ +              bb_reg_read(BBRG_RCX);
+ +              bb_reg_read(BBRG_RAX);
+ +              bb_reg_read(BBRG_RDX);
+ +              usage = BBOU_NOP;
+ +              break;
+ +      case BBOU_XADD:
+ +              usage = bb_usage_xadd(src, dst);
+ +              break;
+ +      case BBOU_XCHG:
+ +              /* i386 do_IRQ with 4K stacks does xchg %ebx,%esp; call
+ +               * irq_handler; mov %ebx,%esp; to switch stacks.  Ignore this
+ +               * stack switch when tracking registers, it is handled by
+ +               * higher level backtrace code.  Convert xchg %ebx,%esp to mov
+ +               * %esp,%ebx so the later mov %ebx,%esp becomes a NOP and the
+ +               * stack remains defined so we can backtrace through do_IRQ's
+ +               * stack switch.
+ +               *
+ +               * Ditto for do_softirq.
+ +               */
+ +              if (src->reg &&
+ +                  dst->reg &&
+ +                  src->base_rc == BBRG_RBX &&
+ +                  dst->base_rc == BBRG_RSP &&
+ +                  (strcmp(bb_func_name, "do_IRQ") == 0 ||
+ +                   strcmp(bb_func_name, "do_softirq") == 0)) {
+ +                      strcpy(bb_decode.opcode, "mov");
+ +                      usage = bb_usage_mov(dst, src, sizeof("mov")-1);
+ +              } else {
+ +                      usage = bb_usage_xchg(src, dst);
+ +              }
+ +              break;
+ +      case BBOU_XOR:
+ +              /* xor %reg,%reg only counts as a register write, the original
+ +               * contents of reg are irrelevant.
+ +               */
+ +              if (src->reg && dst->reg && src->base_rc == dst->base_rc)
+ +                      usage = BBOU_WS;
+ +              else
+ +                      usage = BBOU_RSRDWD;
+ +              break;
+ +      }
+ +
+ +      /* The switch statement above handled all the special cases.  Every
+ +       * opcode should now have a usage of NOP or one of the generic cases.
+ +       */
+ +      if (usage == BBOU_UNKNOWN || usage == BBOU_NOP) {
+ +              /* nothing to do */
+ +      } else if (usage >= BBOU_RS && usage <= BBOU_RSRDWSWD) {
+ +              if (usage & BBOU_RS)
+ +                      bb_read_operand(src);
+ +              if (usage & BBOU_RD)
+ +                      bb_read_operand(dst);
+ +              if (usage & BBOU_WS)
+ +                      bb_write_operand(src);
+ +              if (usage & BBOU_WD)
+ +                      bb_write_operand(dst);
+ +      } else {
+ +              kdb_printf("%s: opcode not fully handled\n", __FUNCTION__);
+ +              if (!KDB_DEBUG(BB)) {
+ +                      bb_print_opcode();
+ +                      if (bb_decode.src.present)
+ +                              bb_print_operand("src", &bb_decode.src);
+ +                      if (bb_decode.dst.present)
+ +                              bb_print_operand("dst", &bb_decode.dst);
+ +                      if (bb_decode.dst2.present)
+ +                              bb_print_operand("dst2", &bb_decode.dst2);
+ +              }
+ +              bb_giveup = 1;
+ +      }
+ +}
+ +
+ +static void
+ +bb_parse_buffer(void)
+ +{
+ +      char *p, *src, *dst = NULL, *dst2 = NULL;
+ +      int paren = 0;
+ +      p = bb_buffer;
+ +      memset(&bb_decode, 0, sizeof(bb_decode));
+ +      KDB_DEBUG_BB(" '%s'\n", p);
+ +      p += strcspn(p, ":");   /* skip address and function name+offset: */
+ +      if (*p++ != ':') {
+ +              kdb_printf("%s: cannot find ':' in buffer '%s'\n",
+ +                         __FUNCTION__, bb_buffer);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +      p += strspn(p, " \t");  /* step to opcode */
+ +      if (strncmp(p, "(bad)", 5) == 0)
+ +              strcpy(p, "nop");
+ +      /* separate any opcode prefix */
+ +      if (strncmp(p, "lock", 4) == 0 ||
+ +          strncmp(p, "rep", 3) == 0 ||
+ +          strncmp(p, "rex", 3) == 0 ||
+ +          strncmp(p, "addr", 4) == 0) {
+ +              bb_decode.prefix = p;
+ +              p += strcspn(p, " \t");
+ +              *p++ = '\0';
+ +              p += strspn(p, " \t");
+ +      }
+ +      bb_decode.opcode = p;
+ +      strsep(&p, " \t");      /* step to end of opcode */
+ +      if (bb_parse_opcode())
+ +              return;
+ +      if (!p)
+ +              goto no_operands;
+ +      p += strspn(p, " \t");  /* step to operand(s) */
+ +      if (!*p)
+ +              goto no_operands;
+ +      src = p;
+ +      p = strsep(&p, " \t");  /* strip comments after operands */
+ +      /* split 'src','dst' but ignore ',' inside '(' ')' */
+ +      while (*p) {
+ +              if (*p == '(') {
+ +                      ++paren;
+ +              } else if (*p == ')') {
+ +                      --paren;
+ +              } else if (*p == ',' && paren == 0) {
+ +                      *p = '\0';
+ +                      if (dst)
+ +                              dst2 = p+1;
+ +                      else
+ +                              dst = p+1;
+ +              }
+ +              ++p;
+ +      }
+ +      bb_parse_operand(src, &bb_decode.src);
+ +      if (KDB_DEBUG(BB))
+ +              bb_print_operand("src", &bb_decode.src);
+ +      if (dst && !bb_giveup) {
+ +              bb_parse_operand(dst, &bb_decode.dst);
+ +              if (KDB_DEBUG(BB))
+ +                      bb_print_operand("dst", &bb_decode.dst);
+ +      }
+ +      if (dst2 && !bb_giveup) {
+ +              bb_parse_operand(dst2, &bb_decode.dst2);
+ +              if (KDB_DEBUG(BB))
+ +                      bb_print_operand("dst2", &bb_decode.dst2);
+ +      }
+ +no_operands:
+ +      if (!bb_giveup)
+ +              bb_usage();
+ +}
+ +
+ +static int
+ +bb_dis_pass2(PTR file, const char *fmt, ...)
+ +{
+ +      char *p;
+ +      int l = strlen(bb_buffer);
+ +      va_list ap;
+ +      va_start(ap, fmt);
+ +      vsnprintf(bb_buffer + l, sizeof(bb_buffer) - l, fmt, ap);
+ +      va_end(ap);
+ +      if ((p = strchr(bb_buffer, '\n'))) {
+ +              *p = '\0';
+ +              p = bb_buffer;
+ +              p += strcspn(p, ":");
+ +              if (*p++ == ':')
+ +                      bb_fixup_switch_to(p);
+ +              bb_parse_buffer();
+ +              bb_buffer[0] = '\0';
+ +      }
+ +      return 0;
+ +}
+ +
+ +static void
+ +bb_printaddr_pass2(bfd_vma addr, disassemble_info *dip)
+ +{
+ +      kdb_symtab_t symtab;
+ +      unsigned int offset;
+ +      dip->fprintf_func(dip->stream, "0x%lx", addr);
+ +      kdbnearsym(addr, &symtab);
+ +      if (symtab.sym_name) {
+ +              dip->fprintf_func(dip->stream, " <%s", symtab.sym_name);
+ +              if ((offset = addr - symtab.sym_start))
+ +                      dip->fprintf_func(dip->stream, "+0x%x", offset);
+ +              dip->fprintf_func(dip->stream, ">");
+ +      }
+ +}
+ +
+ +/* Set the starting register and memory state for the current bb */
+ +
+ +static void
+ +bb_start_block0_special(void)
+ +{
+ +      int i;
+ +      short offset_address;
+ +      enum bb_reg_code reg, value;
+ +      struct bb_name_state *r;
+ +      for (i = 0, r = bb_special_cases;
+ +           i < ARRAY_SIZE(bb_special_cases);
+ +           ++i, ++r) {
+ +              if (bb_func_start == r->address && r->fname == NULL)
+ +                      goto match;
+ +      }
+ +      return;
+ +match:
+ +      /* Set the running registers */
+ +      for (reg = BBRG_RAX; reg < r->regs_size; ++reg) {
+ +              value = r->regs[reg].value;
+ +              if (test_bit(value, r->skip_regs.bits)) {
+ +                      /* this regs entry is not defined for this label */
+ +                      continue;
+ +              }
+ +              bb_reg_code_set_value(reg, value);
+ +              bb_reg_code_set_offset(reg, r->regs[reg].offset);
+ +      }
+ +      /* Set any memory contents, e.g. pt_regs.  Adjust RSP as required. */
+ +      offset_address = 0;
+ +      for (i = 0; i < r->mem_size; ++i) {
+ +              offset_address = max_t(int,
+ +                              r->mem[i].offset_address + KDB_WORD_SIZE,
+ +                              offset_address);
+ +      }
+ +      if (bb_reg_code_offset(BBRG_RSP) > -offset_address)
+ +              bb_adjust_osp(BBRG_RSP, -offset_address - bb_reg_code_offset(BBRG_RSP));
+ +      for (i = 0; i < r->mem_size; ++i) {
+ +              value = r->mem[i].value;
+ +              if (test_bit(value, r->skip_mem.bits)) {
+ +                      /* this memory entry is not defined for this label */
+ +                      continue;
+ +              }
+ +              bb_memory_set_reg_value(BBRG_RSP, r->mem[i].offset_address,
+ +                                      value, 0);
+ +              bb_reg_set_undef(value);
+ +      }
+ +      return;
+ +}
+ +
+ +static void
+ +bb_pass2_start_block(int number)
+ +{
+ +      int i, j, k, first, changed;
+ +      size_t size;
+ +      struct bb_jmp *bb_jmp;
+ +      struct bb_reg_state *state;
+ +      struct bb_memory_contains *c1, *c2;
+ +      bb_reg_state->mem_count = bb_reg_state_max;
+ +      size = bb_reg_state_size(bb_reg_state);
+ +      memset(bb_reg_state, 0, size);
+ +
+ +      if (number == 0) {
+ +              /* The first block is assumed to have well defined inputs */
+ +              bb_start_block0();
+ +              /* Some assembler labels have non-standard entry
+ +               * states.
+ +               */
+ +              bb_start_block0_special();
+ +              bb_reg_state_print(bb_reg_state);
+ +              return;
+ +      }
+ +
+ +      /* Merge all the input states for the current bb together */
+ +      first = 1;
+ +      changed = 0;
+ +      for (i = 0; i < bb_jmp_count; ++i) {
+ +              bb_jmp = bb_jmp_list + i;
+ +              if (bb_jmp->to != bb_curr->start)
+ +                      continue;
+ +              state = bb_jmp->state;
+ +              if (!state)
+ +                      continue;
+ +              if (first) {
+ +                      size = bb_reg_state_size(state);
+ +                      memcpy(bb_reg_state, state, size);
+ +                      KDB_DEBUG_BB("  first state %p\n", state);
+ +                      bb_reg_state_print(bb_reg_state);
+ +                      first = 0;
+ +                      continue;
+ +              }
+ +
+ +              KDB_DEBUG_BB("  merging state %p\n", state);
+ +              /* Merge the register states */
+ +              for (j = 0; j < ARRAY_SIZE(state->contains); ++j) {
+ +                      if (memcmp(bb_reg_state->contains + j,
+ +                                 state->contains + j,
+ +                                 sizeof(bb_reg_state->contains[0]))) {
+ +                              /* Different states for this register from two
+ +                               * or more inputs, make it undefined.
+ +                               */
+ +                              if (bb_reg_state->contains[j].value ==
+ +                                  BBRG_UNDEFINED) {
+ +                                      KDB_DEBUG_BB("  ignoring %s\n",
+ +                                                  bbrg_name[j + BBRG_RAX]);
+ +                              } else {
+ +                                      bb_reg_set_undef(BBRG_RAX + j);
+ +                                      changed = 1;
+ +                              }
+ +                      }
+ +              }
+ +
+ +              /* Merge the memory states.  This relies on both
+ +               * bb_reg_state->memory and state->memory being sorted in
+ +               * descending order, with undefined entries at the end.
+ +               */
+ +              c1 = bb_reg_state->memory;
+ +              c2 = state->memory;
+ +              j = k = 0;
+ +              while (j < bb_reg_state->mem_count &&
+ +                     k < state->mem_count) {
+ +                      if (c1->offset_address < c2->offset_address) {
+ +                              KDB_DEBUG_BB_OFFSET(c2->offset_address,
+ +                                                  "  ignoring c2->offset_address ",
+ +                                                  "\n");
+ +                              ++c2;
+ +                              ++k;
+ +                              continue;
+ +                      }
+ +                      if (c1->offset_address > c2->offset_address) {
+ +                              /* Memory location is not in all input states,
+ +                               * delete the memory location.
+ +                               */
+ +                              bb_delete_memory(c1->offset_address);
+ +                              changed = 1;
+ +                              ++c1;
+ +                              ++j;
+ +                              continue;
+ +                      }
+ +                      if (memcmp(c1, c2, sizeof(*c1))) {
+ +                              /* Same location, different contents, delete
+ +                               * the memory location.
+ +                               */
+ +                              bb_delete_memory(c1->offset_address);
+ +                              KDB_DEBUG_BB_OFFSET(c2->offset_address,
+ +                                                  "  ignoring c2->offset_address ",
+ +                                                  "\n");
+ +                              changed = 1;
+ +                      }
+ +                      ++c1;
+ +                      ++c2;
+ +                      ++j;
+ +                      ++k;
+ +              }
+ +              while (j < bb_reg_state->mem_count) {
+ +                      bb_delete_memory(c1->offset_address);
+ +                      changed = 1;
+ +                      ++c1;
+ +                      ++j;
+ +              }
+ +      }
+ +      if (changed) {
+ +              KDB_DEBUG_BB("  final state\n");
+ +              bb_reg_state_print(bb_reg_state);
+ +      }
+ +}
+ +
+ +/* We have reached the exit point from the current function, either a call to
+ + * the next function or the instruction that was about to executed when an
+ + * interrupt occurred.  Save the current register state in bb_exit_state.
+ + */
+ +
+ +static void
+ +bb_save_exit_state(void)
+ +{
+ +      size_t size;
+ +      debug_kfree(bb_exit_state);
+ +      bb_exit_state = NULL;
+ +      bb_reg_state_canonicalize();
+ +      size = bb_reg_state_size(bb_reg_state);
+ +      bb_exit_state = debug_kmalloc(size, GFP_ATOMIC);
+ +      if (!bb_exit_state) {
+ +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +      memcpy(bb_exit_state, bb_reg_state, size);
+ +}
+ +
+ +static int
+ +bb_pass2_do_changed_blocks(int allow_missing)
+ +{
+ +      int i, j, missing, changed, maxloops;
+ +      unsigned long addr;
+ +      struct bb_jmp *bb_jmp;
+ +      KDB_DEBUG_BB("\n  %s: allow_missing %d\n", __FUNCTION__, allow_missing);
+ +      /* Absolute worst case is we have to iterate over all the basic blocks
+ +       * in an "out of order" state, each iteration losing one register or
+ +       * memory state.  Any more loops than that is a bug.  "out of order"
+ +       * means that the layout of blocks in memory does not match the logic
+ +       * flow through those blocks so (for example) block 27 comes before
+ +       * block 2.  To allow for out of order blocks, multiply maxloops by the
+ +       * number of blocks.
+ +       */
+ +      maxloops = (KDB_INT_REGISTERS + bb_reg_state_max) * bb_count;
+ +      changed = 1;
+ +      do {
+ +              changed = 0;
+ +              for (i = 0; i < bb_count; ++i) {
+ +                      bb_curr = bb_list[i];
+ +                      if (!bb_curr->changed)
+ +                              continue;
+ +                      missing = 0;
+ +                      for (j = 0, bb_jmp = bb_jmp_list;
+ +                           j < bb_jmp_count;
+ +                           ++j, ++bb_jmp) {
+ +                              if (bb_jmp->to == bb_curr->start &&
+ +                                  !bb_jmp->state)
+ +                                      ++missing;
+ +                      }
+ +                      if (missing > allow_missing)
+ +                              continue;
+ +                      bb_curr->changed = 0;
+ +                      changed = 1;
+ +                      KDB_DEBUG_BB("\n  bb[%d]\n", i);
+ +                      bb_pass2_start_block(i);
+ +                      for (addr = bb_curr->start;
+ +                           addr <= bb_curr->end; ) {
+ +                              bb_curr_addr = addr;
+ +                              if (addr == bb_exit_addr)
+ +                                      bb_save_exit_state();
+ +                              addr += kdba_id_printinsn(addr, &kdb_di);
+ +                              kdb_di.fprintf_func(NULL, "\n");
+ +                              if (bb_giveup)
+ +                                      goto done;
+ +                      }
+ +                      if (!bb_exit_state) {
+ +                              /* ATTRIB_NORET functions are a problem with
+ +                               * the current gcc.  Allow the trailing address
+ +                               * a bit of leaway.
+ +                               */
+ +                              if (addr == bb_exit_addr ||
+ +                                  addr == bb_exit_addr + 1)
+ +                                      bb_save_exit_state();
+ +                      }
+ +                      if (bb_curr->drop_through)
+ +                              bb_transfer(bb_curr->end,
+ +                                          bb_list[i+1]->start, 1);
+ +              }
+ +              if (maxloops-- == 0) {
+ +                      kdb_printf("\n\n%s maxloops reached\n",
+ +                                 __FUNCTION__);
+ +                      bb_giveup = 1;
+ +                      goto done;
+ +              }
+ +      } while(changed);
+ +done:
+ +      for (i = 0; i < bb_count; ++i) {
+ +              bb_curr = bb_list[i];
+ +              if (bb_curr->changed)
+ +                      return 1;       /* more to do, increase allow_missing */
+ +      }
+ +      return 0;       /* all blocks done */
+ +}
+ +
+ +/* Assume that the current function is a pass through function that does not
+ + * refer to its register parameters.  Exclude known asmlinkage functions and
+ + * assume the other functions actually use their registers.
+ + */
+ +
+ +static void
+ +bb_assume_pass_through(void)
+ +{
+ +      static int first_time = 1;
+ +      if (strncmp(bb_func_name, "sys_", 4) == 0 ||
+ +          strncmp(bb_func_name, "compat_sys_", 11) == 0 ||
+ +          strcmp(bb_func_name, "schedule") == 0 ||
+ +          strcmp(bb_func_name, "do_softirq") == 0 ||
+ +          strcmp(bb_func_name, "printk") == 0 ||
+ +          strcmp(bb_func_name, "vprintk") == 0 ||
+ +          strcmp(bb_func_name, "preempt_schedule") == 0 ||
+ +          strcmp(bb_func_name, "start_kernel") == 0 ||
+ +          strcmp(bb_func_name, "csum_partial") == 0 ||
+ +          strcmp(bb_func_name, "csum_partial_copy_generic") == 0 ||
+ +          strcmp(bb_func_name, "math_state_restore") == 0 ||
+ +          strcmp(bb_func_name, "panic") == 0 ||
+ +          strcmp(bb_func_name, "kdb_printf") == 0 ||
+ +          strcmp(bb_func_name, "kdb_interrupt") == 0)
+ +              return;
+ +      if (bb_asmlinkage_arch())
+ +              return;
+ +      bb_reg_params = REGPARM;
+ +      if (first_time) {
+ +              kdb_printf("  %s has memory parameters but no register "
+ +                         "parameters.\n  Assuming it is a 'pass "
+ +                         "through' function that does not refer to "
+ +                         "its register\n  parameters and setting %d "
+ +                         "register parameters\n",
+ +                         bb_func_name, REGPARM);
+ +              first_time = 0;
+ +              return;
+ +      }
+ +      kdb_printf("  Assuming %s is 'pass through' with %d register "
+ +                 "parameters\n",
+ +                 bb_func_name, REGPARM);
+ +}
+ +
+ +static void
+ +bb_pass2(void)
+ +{
+ +      int allow_missing;
+ +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ +              kdb_printf("%s: start\n", __FUNCTION__);
+ +
+ +      kdb_di.fprintf_func = bb_dis_pass2;
+ +      kdb_di.print_address_func = bb_printaddr_pass2;
+ +
+ +      bb_reg_state = debug_kmalloc(sizeof(*bb_reg_state), GFP_ATOMIC);
+ +      if (!bb_reg_state) {
+ +              kdb_printf("\n\n%s: out of debug_kmalloc\n", __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +      bb_list[0]->changed = 1;
+ +
+ +      /* If a block does not have all its input states available then it is
+ +       * possible for a register to initially appear to hold a known value,
+ +       * but when other inputs are available then it becomes a variable
+ +       * value.  The initial false state of "known" can generate false values
+ +       * for other registers and can even make it look like stack locations
+ +       * are being changed.
+ +       *
+ +       * To avoid these false positives, only process blocks which have all
+ +       * their inputs defined.  That gives a clean depth first traversal of
+ +       * the tree, except for loops.  If there are any loops, then start
+ +       * processing blocks with one missing input, then two missing inputs
+ +       * etc.
+ +       *
+ +       * Absolute worst case is we have to iterate over all the jmp entries,
+ +       * each iteration allowing one more missing input.  Any more loops than
+ +       * that is a bug.  Watch out for the corner case of 0 jmp entries.
+ +       */
+ +      for (allow_missing = 0; allow_missing <= bb_jmp_count; ++allow_missing) {
+ +              if (!bb_pass2_do_changed_blocks(allow_missing))
+ +                      break;
+ +              if (bb_giveup)
+ +                      break;
+ +      }
+ +      if (allow_missing > bb_jmp_count) {
+ +              kdb_printf("\n\n%s maxloops reached\n",
+ +                         __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +
+ +      if (bb_memory_params && bb_reg_params)
+ +              bb_reg_params = REGPARM;
+ +      if (REGPARM &&
+ +          bb_memory_params &&
+ +          !bb_reg_params)
+ +              bb_assume_pass_through();
+ +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ +              kdb_printf("%s: end bb_reg_params %d bb_memory_params %d\n",
+ +                         __FUNCTION__, bb_reg_params, bb_memory_params);
+ +              if (bb_exit_state) {
+ +                      kdb_printf("%s: bb_exit_state at " kdb_bfd_vma_fmt0 "\n",
+ +                                 __FUNCTION__, bb_exit_addr);
+ +                      bb_do_reg_state_print(bb_exit_state);
+ +              }
+ +      }
+ +}
+ +
+ +static void
+ +bb_cleanup(void)
+ +{
+ +      int i;
+ +      struct bb* bb;
+ +      struct bb_reg_state *state;
+ +      while (bb_count) {
+ +              bb = bb_list[0];
+ +              bb_delete(0);
+ +      }
+ +      debug_kfree(bb_list);
+ +      bb_list = NULL;
+ +      bb_count = bb_max = 0;
+ +      for (i = 0; i < bb_jmp_count; ++i) {
+ +              state = bb_jmp_list[i].state;
+ +              if (state && --state->ref_count == 0)
+ +                      debug_kfree(state);
+ +      }
+ +      debug_kfree(bb_jmp_list);
+ +      bb_jmp_list = NULL;
+ +      bb_jmp_count = bb_jmp_max = 0;
+ +      debug_kfree(bb_reg_state);
+ +      bb_reg_state = NULL;
+ +      bb_reg_state_max = 0;
+ +      debug_kfree(bb_exit_state);
+ +      bb_exit_state = NULL;
+ +      bb_reg_params = bb_memory_params = 0;
+ +      bb_giveup = 0;
+ +}
+ +
+ +static int
+ +bb_spurious_global_label(const char *func_name)
+ +{
+ +      int i;
+ +      for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
+ +              if (strcmp(bb_spurious[i], func_name) == 0)
+ +                      return 1;
+ +      }
+ +      return 0;
+ +}
+ +
+ +/* Given the current actual register contents plus the exit state deduced from
+ + * a basic block analysis of the current function, rollback the actual register
+ + * contents to the values they had on entry to this function.
+ + */
+ +
+ +static void
+ +bb_actual_rollback(const struct kdb_activation_record *ar)
+ +{
+ +      int i, offset_address;
+ +      struct bb_memory_contains *c;
+ +      enum bb_reg_code reg;
+ +      unsigned long address, osp = 0;
+ +      struct bb_actual new[ARRAY_SIZE(bb_actual)];
+ +
+ +
+ +      if (!bb_exit_state) {
+ +              kdb_printf("%s: no bb_exit_state, cannot rollback\n",
+ +                         __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +      memcpy(bb_reg_state, bb_exit_state, bb_reg_state_size(bb_exit_state));
+ +      memset(new, 0, sizeof(new));
+ +
+ +      /* The most important register for obtaining saved state is rsp so get
+ +       * its new value first.  Prefer rsp if it is valid, then other
+ +       * registers.  Saved values of rsp in memory are unusable without a
+ +       * register that points to memory.
+ +       */
+ +      if (!bb_actual_valid(BBRG_RSP)) {
+ +              kdb_printf("%s: no starting value for RSP, cannot rollback\n",
+ +                         __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ +              kdb_printf("%s: rsp " kdb_bfd_vma_fmt0,
+ +                         __FUNCTION__, bb_actual_value(BBRG_RSP));
+ +      i = BBRG_RSP;
+ +      if (!bb_is_osp_defined(i)) {
+ +              for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ +                      if (bb_is_osp_defined(i) && bb_actual_valid(i))
+ +                              break;
+ +              }
+ +      }
+ +      if (bb_is_osp_defined(i) && bb_actual_valid(i)) {
+ +              osp = new[BBRG_RSP - BBRG_RAX].value =
+ +                    bb_actual_value(i) - bb_reg_code_offset(i);
+ +              new[BBRG_RSP - BBRG_RAX].valid = 1;
+ +              if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ +                      kdb_printf(" -> osp " kdb_bfd_vma_fmt0 "\n", osp);
+ +      } else {
+ +              bb_actual_set_valid(BBRG_RSP, 0);
+ +              if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ +                      kdb_printf(" -> undefined\n");
+ +              kdb_printf("%s: no ending value for RSP, cannot rollback\n",
+ +                         __FUNCTION__);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +
+ +      /* Now the other registers.  First look at register values that have
+ +       * been copied to other registers.
+ +       */
+ +      for (i = BBRG_RAX; i < BBRG_RAX + KDB_INT_REGISTERS; ++i) {
+ +              reg = bb_reg_code_value(i);
+ +              if (bb_is_int_reg(reg)) {
+ +                      new[reg - BBRG_RAX] = bb_actual[i - BBRG_RAX];
+ +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ +                              kdb_printf("%s: %s is in %s ",
+ +                                          __FUNCTION__,
+ +                                          bbrg_name[reg],
+ +                                          bbrg_name[i]);
+ +                              if (bb_actual_valid(i))
+ +                                      kdb_printf(" -> " kdb_bfd_vma_fmt0 "\n",
+ +                                                  bb_actual_value(i));
+ +                              else
+ +                                      kdb_printf("(invalid)\n");
+ +                      }
+ +              }
+ +      }
+ +
+ +      /* Finally register values that have been saved on stack */
+ +      for (i = 0, c = bb_reg_state->memory;
+ +           i < bb_reg_state->mem_count;
+ +           ++i, ++c) {
+ +              offset_address = c->offset_address;
+ +              reg = c->value;
+ +              if (!bb_is_int_reg(reg))
+ +                      continue;
+ +              address = osp + offset_address;
+ +              if (address < ar->stack.logical_start ||
+ +                  address >= ar->stack.logical_end) {
+ +                      new[reg - BBRG_RAX].value = 0;
+ +                      new[reg - BBRG_RAX].valid = 0;
+ +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ +                              kdb_printf("%s: %s -> undefined\n",
+ +                                         __FUNCTION__,
+ +                                         bbrg_name[reg]);
+ +              } else {
+ +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM)) {
+ +                              kdb_printf("%s: %s -> *(osp",
+ +                                         __FUNCTION__,
+ +                                         bbrg_name[reg]);
+ +                              KDB_DEBUG_BB_OFFSET_PRINTF(offset_address, "", " ");
+ +                              kdb_printf(kdb_bfd_vma_fmt0, address);
+ +                      }
+ +                      new[reg - BBRG_RAX].value = *(bfd_vma *)address;
+ +                      new[reg - BBRG_RAX].valid = 1;
+ +                      if (KDB_DEBUG(BB) | KDB_DEBUG(BB_SUMM))
+ +                              kdb_printf(") = " kdb_bfd_vma_fmt0 "\n",
+ +                                         new[reg - BBRG_RAX].value);
+ +              }
+ +      }
+ +
+ +      memcpy(bb_actual, new, sizeof(bb_actual));
+ +}
+ +
+ +/* Return true if the current function is an interrupt handler */
+ +
+ +static bool
+ +bb_interrupt_handler(kdb_machreg_t rip)
+ +{
+ +      unsigned long disp8, disp32, target, addr = (unsigned long)rip;
+ +      unsigned char code[5];
+ +      int i;
+ +
+ +      for (i = 0; i < ARRAY_SIZE(bb_hardware_handlers); ++i)
+ +              if (strcmp(bb_func_name, bb_hardware_handlers[i]) == 0)
+ +                      return 1;
+ +
+ +      /* Given the large number of interrupt handlers, it is easiest to look
+ +       * at the next instruction and see if it is a jmp to the common exit
+ +       * routines.
+ +       */
+ +      if (kdb_getarea(code, addr) ||
+ +          kdb_getword(&disp32, addr+1, 4) ||
+ +          kdb_getword(&disp8, addr+1, 1))
+ +              return 0;       /* not a valid code address */
+ +      if (code[0] == 0xe9) {
+ +              target = addr + (s32) disp32 + 5;       /* jmp disp32 */
+ +              if (target == bb_ret_from_intr ||
+ +                  target == bb_common_interrupt ||
+ +                  target == bb_error_entry)
+ +                      return 1;
+ +      }
+ +      if (code[0] == 0xeb) {
+ +              target = addr + (s8) disp8 + 2;         /* jmp disp8 */
+ +              if (target == bb_ret_from_intr ||
+ +                  target == bb_common_interrupt ||
+ +                  target == bb_error_entry)
+ +                      return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Copy argument information that was deduced by the basic block analysis and
+ + * rollback into the kdb stack activation record.
+ + */
+ +
+ +static void
+ +bb_arguments(struct kdb_activation_record *ar)
+ +{
+ +      int i;
+ +      enum bb_reg_code reg;
+ +      kdb_machreg_t rsp;
+ +      ar->args = bb_reg_params + bb_memory_params;
+ +      bitmap_zero(ar->valid.bits, KDBA_MAXARGS);
+ +      for (i = 0; i < bb_reg_params; ++i) {
+ +              reg = bb_param_reg[i];
+ +              if (bb_actual_valid(reg)) {
+ +                      ar->arg[i] = bb_actual_value(reg);
+ +                      set_bit(i, ar->valid.bits);
+ +              }
+ +      }
+ +      if (!bb_actual_valid(BBRG_RSP))
+ +              return;
+ +      rsp = bb_actual_value(BBRG_RSP);
+ +      for (i = bb_reg_params; i < ar->args; ++i) {
+ +              rsp += KDB_WORD_SIZE;
+ +              if (kdb_getarea(ar->arg[i], rsp) == 0)
+ +                      set_bit(i, ar->valid.bits);
+ +      }
+ +}
+ +
+ +/* Given an exit address from a function, decompose the entire function into
+ + * basic blocks and determine the register state at the exit point.
+ + */
+ +
+ +static void
+ +kdb_bb(unsigned long exit)
+ +{
+ +      kdb_symtab_t symtab;
+ +      if (!kdbnearsym(exit, &symtab)) {
+ +              kdb_printf("%s: address " kdb_bfd_vma_fmt0 " not recognised\n",
+ +                         __FUNCTION__, exit);
+ +              bb_giveup = 1;
+ +              return;
+ +      }
+ +      bb_exit_addr = exit;
+ +      bb_mod_name = symtab.mod_name;
+ +      bb_func_name = symtab.sym_name;
+ +      bb_func_start = symtab.sym_start;
+ +      bb_func_end = symtab.sym_end;
+ +      /* Various global labels exist in the middle of assembler code and have
+ +       * a non-standard state.  Ignore these labels and use the start of the
+ +       * previous label instead.
+ +       */
+ +      while (bb_spurious_global_label(symtab.sym_name)) {
+ +              if (!kdbnearsym(symtab.sym_start - 1, &symtab))
+ +                      break;
+ +              bb_func_start = symtab.sym_start;
+ +      }
+ +      bb_mod_name = symtab.mod_name;
+ +      bb_func_name = symtab.sym_name;
+ +      bb_func_start = symtab.sym_start;
+ +      /* Ignore spurious labels past this point and use the next non-spurious
+ +       * label as the end point.
+ +       */
+ +      if (kdbnearsym(bb_func_end, &symtab)) {
+ +              while (bb_spurious_global_label(symtab.sym_name)) {
+ +                      bb_func_end = symtab.sym_end;
+ +                      if (!kdbnearsym(symtab.sym_end + 1, &symtab))
+ +                              break;
+ +              }
+ +      }
+ +      bb_pass1();
+ +      if (!bb_giveup)
+ +              bb_pass2();
+ +      if (bb_giveup)
+ +              kdb_printf("%s: " kdb_bfd_vma_fmt0
+ +                         " [%s]%s failed at " kdb_bfd_vma_fmt0 "\n\n",
+ +                         __FUNCTION__, exit,
+ +                         bb_mod_name, bb_func_name, bb_curr_addr);
+ +}
+ +
+ +static int
+ +kdb_bb1(int argc, const char **argv)
+ +{
+ +      int diag;
+ +      unsigned long addr;
+ +      bb_cleanup();   /* in case previous command was interrupted */
+ +      kdba_id_init(&kdb_di);
+ +      if (argc != 1)
+ +              return KDB_ARGCOUNT;
+ +      if ((diag = kdbgetularg((char *)argv[1], &addr)))
+ +              return diag;
+ +      kdb_save_flags();
+ +      kdb_flags |= KDB_DEBUG_FLAG_BB << KDB_DEBUG_FLAG_SHIFT;
+ +      kdb_bb(addr);
+ +      bb_cleanup();
+ +      kdb_restore_flags();
+ +      kdbnearsym_cleanup();
+ +      return 0;
+ +}
+ +
+ +/* Run a basic block analysis on every function in the base kernel.  Used as a
+ + * global sanity check to find errors in the basic block code.
+ + */
+ +
+ +static int
+ +kdb_bb_all(int argc, const char **argv)
+ +{
+ +      loff_t pos = 0;
+ +      const char *symname;
+ +      unsigned long addr;
+ +      int i, max_errors = 20;
+ +      struct bb_name_state *r;
+ +      kdb_printf("%s: build variables:"
+ +                 " CCVERSION \"" __stringify(CCVERSION) "\""
+ +#ifdef        CONFIG_X86_64
+ +                 " CONFIG_X86_64"
+ +#endif
+ +#ifdef        CONFIG_4KSTACKS
+ +                 " CONFIG_4KSTACKS"
+ +#endif
+ +#ifdef        CONFIG_PREEMPT
+ +                 " CONFIG_PREEMPT"
+ +#endif
+ +#ifdef        CONFIG_VM86
+ +                 " CONFIG_VM86"
+ +#endif
+ +#ifdef        CONFIG_FRAME_POINTER
+ +                 " CONFIG_FRAME_POINTER"
+ +#endif
+ +#ifdef        CONFIG_TRACE_IRQFLAGS
+ +                 " CONFIG_TRACE_IRQFLAGS"
+ +#endif
+ +#ifdef        CONFIG_HIBERNATION
+ +                 " CONFIG_HIBERNATION"
+ +#endif
+ +#ifdef        CONFIG_KPROBES
+ +                 " CONFIG_KPROBES"
+ +#endif
+ +#ifdef        CONFIG_KEXEC
+ +                 " CONFIG_KEXEC"
+ +#endif
+ +#ifdef        CONFIG_MATH_EMULATION
+ +                 " CONFIG_MATH_EMULATION"
+ +#endif
+ +#ifdef        CONFIG_XEN
+ +                 " CONFIG_XEN"
+ +#endif
+ +#ifdef        CONFIG_DEBUG_INFO
+ +                 " CONFIG_DEBUG_INFO"
+ +#endif
+ +#ifdef        NO_SIBLINGS
+ +                 " NO_SIBLINGS"
+ +#endif
+ +                 " REGPARM=" __stringify(REGPARM)
+ +                 "\n\n", __FUNCTION__);
+ +      for (i = 0, r = bb_special_cases;
+ +           i < ARRAY_SIZE(bb_special_cases);
+ +           ++i, ++r) {
+ +              if (!r->address)
+ +                      kdb_printf("%s: cannot find special_case name %s\n",
+ +                                 __FUNCTION__, r->name);
+ +      }
+ +      for (i = 0; i < ARRAY_SIZE(bb_spurious); ++i) {
+ +              if (!kallsyms_lookup_name(bb_spurious[i]))
+ +                      kdb_printf("%s: cannot find spurious label %s\n",
+ +                                 __FUNCTION__, bb_spurious[i]);
+ +      }
+ +      while ((symname = kdb_walk_kallsyms(&pos))) {
+ +              if (strcmp(symname, "_stext") == 0 ||
+ +                  strcmp(symname, "stext") == 0)
+ +                      break;
+ +      }
+ +      if (!symname) {
+ +              kdb_printf("%s: cannot find _stext\n", __FUNCTION__);
+ +              return 0;
+ +      }
+ +      kdba_id_init(&kdb_di);
+ +      i = 0;
+ +      while ((symname = kdb_walk_kallsyms(&pos))) {
+ +              if (strcmp(symname, "_etext") == 0)
+ +                      break;
+ +              if (i++ % 100 == 0)
+ +                      kdb_printf(".");
+ +              /* x86_64 has some 16 bit functions that appear between stext
+ +               * and _etext.  Skip them.
+ +               */
+ +              if (strcmp(symname, "verify_cpu") == 0 ||
+ +                  strcmp(symname, "verify_cpu_noamd") == 0 ||
+ +                  strcmp(symname, "verify_cpu_sse_test") == 0 ||
+ +                  strcmp(symname, "verify_cpu_no_longmode") == 0 ||
+ +                  strcmp(symname, "verify_cpu_sse_ok") == 0 ||
+ +                  strcmp(symname, "mode_seta") == 0 ||
+ +                  strcmp(symname, "bad_address") == 0 ||
+ +                  strcmp(symname, "wakeup_code") == 0 ||
+ +                  strcmp(symname, "wakeup_code_start") == 0 ||
+ +                  strcmp(symname, "wakeup_start") == 0 ||
+ +                  strcmp(symname, "wakeup_32_vector") == 0 ||
+ +                  strcmp(symname, "wakeup_32") == 0 ||
+ +                  strcmp(symname, "wakeup_long64_vector") == 0 ||
+ +                  strcmp(symname, "wakeup_long64") == 0 ||
+ +                  strcmp(symname, "gdta") == 0 ||
+ +                  strcmp(symname, "idt_48a") == 0 ||
+ +                  strcmp(symname, "gdt_48a") == 0 ||
+ +                  strcmp(symname, "bogus_real_magic") == 0 ||
+ +                  strcmp(symname, "bogus_64_magic") == 0 ||
+ +                  strcmp(symname, "no_longmode") == 0 ||
+ +                  strcmp(symname, "mode_set") == 0 ||
+ +                  strcmp(symname, "mode_seta") == 0 ||
+ +                  strcmp(symname, "setbada") == 0 ||
+ +                  strcmp(symname, "check_vesa") == 0 ||
+ +                  strcmp(symname, "check_vesaa") == 0 ||
+ +                  strcmp(symname, "_setbada") == 0 ||
+ +                  strcmp(symname, "wakeup_stack_begin") == 0 ||
+ +                  strcmp(symname, "wakeup_stack") == 0 ||
+ +                  strcmp(symname, "wakeup_level4_pgt") == 0 ||
+ +                  strcmp(symname, "acpi_copy_wakeup_routine") == 0 ||
+ +                  strcmp(symname, "wakeup_end") == 0 ||
+ +                  strcmp(symname, "do_suspend_lowlevel_s4bios") == 0 ||
+ +                  strcmp(symname, "do_suspend_lowlevel") == 0 ||
+ +                  strcmp(symname, "wakeup_pmode_return") == 0 ||
+ +                  strcmp(symname, "restore_registers") == 0)
+ +                      continue;
+ +              /* __kprobes_text_end contains branches to the middle of code,
+ +               * with undefined states.
+ +               */
+ +              if (strcmp(symname, "__kprobes_text_end") == 0)
+ +                      continue;
+ +              /* Data in the middle of the text segment :( */
+ +              if (strcmp(symname, "level2_kernel_pgt") == 0 ||
+ +                  strcmp(symname, "level3_kernel_pgt") == 0)
+ +                      continue;
+ +              if (bb_spurious_global_label(symname))
+ +                      continue;
+ +              if ((addr = kallsyms_lookup_name(symname)) == 0)
+ +                      continue;
+ +              // kdb_printf("BB " kdb_bfd_vma_fmt0 " %s\n", addr, symname);
+ +              bb_cleanup();   /* in case previous command was interrupted */
+ +              kdbnearsym_cleanup();
+ +              kdb_bb(addr);
+ +              touch_nmi_watchdog();
+ +              if (bb_giveup) {
+ +                      if (max_errors-- == 0) {
+ +                              kdb_printf("%s: max_errors reached, giving up\n",
+ +                                         __FUNCTION__);
+ +                              break;
+ +                      } else {
+ +                              bb_giveup = 0;
+ +                      }
+ +              }
+ +      }
+ +      kdb_printf("\n");
+ +      bb_cleanup();
+ +      kdbnearsym_cleanup();
+ +      return 0;
+ +}
+ +
+ +/*
+ + *=============================================================================
+ + *
+ + * Everything above this line is doing basic block analysis, function by
+ + * function.  Everything below this line uses the basic block data to do a
+ + * complete backtrace over all functions that are used by a process.
+ + *
+ + *=============================================================================
+ + */
+ +
+ +
+ +/*============================================================================*/
+ +/*                                                                            */
+ +/* Most of the backtrace code and data is common to x86_64 and i386.  This    */
+ +/* large ifdef contains all of the differences between the two architectures. */
+ +/*                                                                            */
+ +/* Make sure you update the correct section of this ifdef.                    */
+ +/*                                                                            */
+ +/*============================================================================*/
+ +#define XCS "cs"
+ +#define RSP "sp"
+ +#define RIP "ip"
+ +#define ARCH_RSP sp
+ +#define ARCH_RIP ip
+ +
+ +#ifdef        CONFIG_X86_64
+ +
+ +#define ARCH_NORMAL_PADDING (16 * 8)
+ +
+ +/* x86_64 has multiple alternate stacks, with different sizes and different
+ + * offsets to get the link from one stack to the next.  Some of the stacks are
+ + * referenced via cpu_pda, some via per_cpu orig_ist.  Debug events can even
+ + * have multiple nested stacks within the single physical stack, each nested
+ + * stack has its own link and some of those links are wrong.
+ + *
+ + * Consistent it's not!
+ + *
+ + * Do not assume that these stacks are aligned on their size.
+ + */
+ +#define INTERRUPT_STACK (N_EXCEPTION_STACKS + 1)
+ +void
+ +kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
+ +                            struct kdb_activation_record *ar)
+ +{
+ +      static struct {
+ +              const char *id;
+ +              unsigned int total_size;
+ +              unsigned int nested_size;
+ +              unsigned int next;
+ +      } *sdp, stack_data[] = {
+ +              [STACKFAULT_STACK - 1] =  { "stackfault",    EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ +              [DOUBLEFAULT_STACK - 1] = { "doublefault",   EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ +              [NMI_STACK - 1] =         { "nmi",           EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ +              [DEBUG_STACK - 1] =       { "debug",         DEBUG_STKSZ,     EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ +              [MCE_STACK - 1] =         { "machine check", EXCEPTION_STKSZ, EXCEPTION_STKSZ, EXCEPTION_STKSZ - 2*sizeof(void *) },
+ +              [INTERRUPT_STACK - 1] =   { "interrupt",     IRQSTACKSIZE,    IRQSTACKSIZE,    IRQSTACKSIZE    -   sizeof(void *) },
+ +      };
+ +      unsigned long total_start = 0, total_size, total_end;
+ +      int sd, found = 0;
+ +      extern unsigned long kdba_orig_ist(int, int);
+ +
+ +      for (sd = 0, sdp = stack_data;
+ +           sd < ARRAY_SIZE(stack_data);
+ +           ++sd, ++sdp) {
+ +              total_size = sdp->total_size;
+ +              if (!total_size)
+ +                      continue;       /* in case stack_data[] has any holes */
+ +              if (cpu < 0) {
+ +                      /* Arbitrary address which can be on any cpu, see if it
+ +                       * falls within any of the alternate stacks
+ +                       */
+ +                      int c;
+ +                      for_each_online_cpu(c) {
+ +                              if (sd == INTERRUPT_STACK - 1)
+ +                                      total_end = (unsigned long)cpu_pda(c)->irqstackptr;
+ +                              else
+ +                                      total_end = per_cpu(orig_ist, c).ist[sd];
+ +                              total_start = total_end - total_size;
+ +                              if (addr >= total_start && addr < total_end) {
+ +                                      found = 1;
+ +                                      cpu = c;
+ +                                      break;
+ +                              }
+ +                      }
+ +                      if (!found)
+ +                              continue;
+ +              }
+ +              /* Only check the supplied or found cpu */
+ +              if (sd == INTERRUPT_STACK - 1)
+ +                      total_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
+ +              else
+ +                      total_end = per_cpu(orig_ist, cpu).ist[sd];
+ +              total_start = total_end - total_size;
+ +              if (addr >= total_start && addr < total_end) {
+ +                      found = 1;
+ +                      break;
+ +              }
+ +      }
+ +      if (!found)
+ +              return;
+ +      /* find which nested stack the address is in */
+ +      while (addr > total_start + sdp->nested_size)
+ +              total_start += sdp->nested_size;
+ +      ar->stack.physical_start = total_start;
+ +      ar->stack.physical_end = total_start + sdp->nested_size;
+ +      ar->stack.logical_start = total_start;
+ +      ar->stack.logical_end = total_start + sdp->next;
+ +      ar->stack.next = *(unsigned long *)ar->stack.logical_end;
+ +      ar->stack.id = sdp->id;
+ +
+ +      /* Nasty: when switching to the interrupt stack, the stack state of the
+ +       * caller is split over two stacks, the original stack and the
+ +       * interrupt stack.  One word (the previous frame pointer) is stored on
+ +       * the interrupt stack, the rest of the interrupt data is in the old
+ +       * frame.  To make the interrupted stack state look as though it is
+ +       * contiguous, copy the missing word from the interrupt stack to the
+ +       * original stack and adjust the new stack pointer accordingly.
+ +       */
+ +
+ +      if (sd == INTERRUPT_STACK - 1) {
+ +              *(unsigned long *)(ar->stack.next - KDB_WORD_SIZE) =
+ +                      ar->stack.next;
+ +              ar->stack.next -= KDB_WORD_SIZE;
+ +      }
+ +}
+ +
+ +/* rip is not in the thread struct for x86_64.  We know that the stack value
+ + * was saved in schedule near the label thread_return.  Setting rip to
+ + * thread_return lets the stack trace find that we are in schedule and
+ + * correctly decode its prologue.
+ + */
+ +
+ +static kdb_machreg_t
+ +kdba_bt_stack_rip(const struct task_struct *p)
+ +{
+ +      return bb_thread_return;
+ +}
+ +
+ +#else /* !CONFIG_X86_64 */
+ +
+ +#define ARCH_NORMAL_PADDING (19 * 4)
+ +
+ +#ifdef        CONFIG_4KSTACKS
+ +static struct thread_info **kdba_hardirq_ctx, **kdba_softirq_ctx;
+ +#endif        /* CONFIG_4KSTACKS */
+ +
+ +/* On a 4K stack kernel, hardirq_ctx and softirq_ctx are [NR_CPUS] arrays.  The
+ + * first element of each per-cpu stack is a struct thread_info.
+ + */
+ +void
+ +kdba_get_stack_info_alternate(kdb_machreg_t addr, int cpu,
+ +                            struct kdb_activation_record *ar)
+ +{
+ +#ifdef        CONFIG_4KSTACKS
+ +      struct thread_info *tinfo;
+ +      tinfo = (struct thread_info *)(addr & -THREAD_SIZE);
+ +      if (cpu < 0) {
+ +              /* Arbitrary address, see if it falls within any of the irq
+ +               * stacks
+ +               */
+ +              int found = 0;
+ +              for_each_online_cpu(cpu) {
+ +                      if (tinfo == kdba_hardirq_ctx[cpu] ||
+ +                          tinfo == kdba_softirq_ctx[cpu]) {
+ +                              found = 1;
+ +                              break;
+ +                      }
+ +              }
+ +              if (!found)
+ +                      return;
+ +      }
+ +      if (tinfo == kdba_hardirq_ctx[cpu] ||
+ +          tinfo == kdba_softirq_ctx[cpu]) {
+ +              ar->stack.physical_start = (kdb_machreg_t)tinfo;
+ +              ar->stack.physical_end = ar->stack.physical_start + THREAD_SIZE;
+ +              ar->stack.logical_start = ar->stack.physical_start +
+ +                                        sizeof(struct thread_info);
+ +              ar->stack.logical_end = ar->stack.physical_end;
+ +              ar->stack.next = tinfo->previous_esp;
+ +              if (tinfo == kdba_hardirq_ctx[cpu])
+ +                      ar->stack.id = "hardirq_ctx";
+ +              else
+ +                      ar->stack.id = "softirq_ctx";
+ +      }
+ +#endif        /* CONFIG_4KSTACKS */
+ +}
+ +
+ +/* rip is in the thread struct for i386 */
+ +
+ +static kdb_machreg_t
+ +kdba_bt_stack_rip(const struct task_struct *p)
+ +{
+ +      return p->thread.ip;
+ +}
+ +
+ +#endif        /* CONFIG_X86_64 */
+ +
+ +/* Given an address which claims to be on a stack, an optional cpu number and
+ + * an optional task address, get information about the stack.
+ + *
+ + * t == NULL, cpu < 0 indicates an arbitrary stack address with no associated
+ + * struct task, the address can be in an alternate stack or any task's normal
+ + * stack.
+ + *
+ + * t != NULL, cpu >= 0 indicates a running task, the address can be in an
+ + * alternate stack or that task's normal stack.
+ + *
+ + * t != NULL, cpu < 0 indicates a blocked task, the address can only be in that
+ + * task's normal stack.
+ + *
+ + * t == NULL, cpu >= 0 is not a valid combination.
+ + */
+ +
+ +static void
+ +kdba_get_stack_info(kdb_machreg_t rsp, int cpu,
+ +                  struct kdb_activation_record *ar,
+ +                  const struct task_struct *t)
+ +{
+ +      struct thread_info *tinfo;
+ +      struct task_struct *g, *p;
+ +      memset(&ar->stack, 0, sizeof(ar->stack));
+ +      if (KDB_DEBUG(ARA))
+ +              kdb_printf("%s: " RSP "=0x%lx cpu=%d task=%p\n",
+ +                         __FUNCTION__, rsp, cpu, t);
+ +      if (t == NULL || cpu >= 0) {
+ +              kdba_get_stack_info_alternate(rsp, cpu, ar);
+ +              if (ar->stack.logical_start)
+ +                      goto out;
+ +      }
+ +      rsp &= -THREAD_SIZE;
+ +      tinfo = (struct thread_info *)rsp;
+ +      if (t == NULL) {
+ +              /* Arbitrary stack address without an associated task, see if
+ +               * it falls within any normal process stack, including the idle
+ +               * tasks.
+ +               */
+ +              kdb_do_each_thread(g, p) {
+ +                      if (tinfo == task_thread_info(p)) {
+ +                              t = p;
+ +                              goto found;
+ +                      }
+ +              } kdb_while_each_thread(g, p);
+ +              for_each_online_cpu(cpu) {
+ +                      p = idle_task(cpu);
+ +                      if (tinfo == task_thread_info(p)) {
+ +                              t = p;
+ +                              goto found;
+ +                      }
+ +              }
+ +      found:
+ +              if (KDB_DEBUG(ARA))
+ +                      kdb_printf("%s: found task %p\n", __FUNCTION__, t);
+ +      } else if (cpu >= 0) {
+ +              /* running task */
+ +              struct kdb_running_process *krp = kdb_running_process + cpu;
+ +              if (krp->p != t || tinfo != task_thread_info(t))
+ +                      t = NULL;
+ +              if (KDB_DEBUG(ARA))
+ +                      kdb_printf("%s: running task %p\n", __FUNCTION__, t);
+ +      } else {
+ +              /* blocked task */
+ +              if (tinfo != task_thread_info(t))
+ +                      t = NULL;
+ +              if (KDB_DEBUG(ARA))
+ +                      kdb_printf("%s: blocked task %p\n", __FUNCTION__, t);
+ +      }
+ +      if (t) {
+ +              ar->stack.physical_start = rsp;
+ +              ar->stack.physical_end = rsp + THREAD_SIZE;
+ +              ar->stack.logical_start = rsp + sizeof(struct thread_info);
+ +              ar->stack.logical_end = ar->stack.physical_end - ARCH_NORMAL_PADDING;
+ +              ar->stack.next = 0;
+ +              ar->stack.id = "normal";
+ +      }
+ +out:
+ +      if (ar->stack.physical_start && KDB_DEBUG(ARA)) {
+ +              kdb_printf("%s: ar->stack\n", __FUNCTION__);
+ +              kdb_printf("    physical_start=0x%lx\n", ar->stack.physical_start);
+ +              kdb_printf("    physical_end=0x%lx\n", ar->stack.physical_end);
+ +              kdb_printf("    logical_start=0x%lx\n", ar->stack.logical_start);
+ +              kdb_printf("    logical_end=0x%lx\n", ar->stack.logical_end);
+ +              kdb_printf("    next=0x%lx\n", ar->stack.next);
+ +              kdb_printf("    id=%s\n", ar->stack.id);
+ +              kdb_printf("    set MDCOUNT %ld\n",
+ +                         (ar->stack.physical_end - ar->stack.physical_start) /
+ +                         KDB_WORD_SIZE);
+ +              kdb_printf("    mds " kdb_machreg_fmt0 "\n",
+ +                         ar->stack.physical_start);
+ +      }
+ +}
+ +
+ +static void
+ +bt_print_one(kdb_machreg_t rip, kdb_machreg_t rsp,
+ +            const struct kdb_activation_record *ar,
+ +            const kdb_symtab_t *symtab, int argcount)
+ +{
+ +      int btsymarg = 0;
+ +      int nosect = 0;
+ +
+ +      kdbgetintenv("BTSYMARG", &btsymarg);
+ +      kdbgetintenv("NOSECT", &nosect);
+ +
+ +      kdb_printf(kdb_machreg_fmt0, rsp);
+ +      kdb_symbol_print(rip, symtab,
+ +                       KDB_SP_SPACEB|KDB_SP_VALUE);
+ +      if (argcount && ar->args) {
+ +              int i, argc = ar->args;
+ +              kdb_printf(" (");
+ +              if (argc > argcount)
+ +                      argc = argcount;
+ +              for (i = 0; i < argc; i++) {
+ +                      if (i)
+ +                              kdb_printf(", ");
+ +                      if (test_bit(i, ar->valid.bits))
+ +                              kdb_printf("0x%lx", ar->arg[i]);
+ +                      else
+ +                              kdb_printf("invalid");
+ +              }
+ +              kdb_printf(")");
+ +      }
+ +      kdb_printf("\n");
+ +      if (symtab->sym_name) {
+ +              if (!nosect) {
+ +                      kdb_printf("                               %s",
+ +                                 symtab->mod_name);
+ +                      if (symtab->sec_name && symtab->sec_start)
+ +                              kdb_printf(" 0x%lx 0x%lx",
+ +                                         symtab->sec_start, symtab->sec_end);
+ +                      kdb_printf(" 0x%lx 0x%lx\n",
+ +                                 symtab->sym_start, symtab->sym_end);
+ +              }
+ +      }
+ +      if (argcount && ar->args && btsymarg) {
+ +              int i, argc = ar->args;
+ +              kdb_symtab_t arg_symtab;
+ +              for (i = 0; i < argc; i++) {
+ +                      kdb_machreg_t arg = ar->arg[i];
+ +                      if (test_bit(i, ar->valid.bits) &&
+ +                          kdbnearsym(arg, &arg_symtab)) {
+ +                              kdb_printf("                       ARG %2d ", i);
+ +                              kdb_symbol_print(arg, &arg_symtab,
+ +                                               KDB_SP_DEFAULT|KDB_SP_NEWLINE);
+ +                      }
+ +              }
+ +      }
+ +}
+ +
+ +static void
+ +kdba_bt_new_stack(struct kdb_activation_record *ar, kdb_machreg_t *rsp,
+ +                 int *count, int *suppress)
+ +{
+ +      /* Nasty: common_interrupt builds a partial pt_regs, with r15 through
+ +       * rbx not being filled in.  It passes struct pt_regs* to do_IRQ (in
+ +       * rdi) but the stack pointer is not adjusted to account for r15
+ +       * through rbx.  This has two effects :-
+ +       *
+ +       * (1) struct pt_regs on an external interrupt actually overlaps with
+ +       *     the local stack area used by do_IRQ.  Not only are r15-rbx
+ +       *     undefined, the area that claims to hold their values can even
+ +       *     change as the irq is processed.
+ +       *
+ +       * (2) The back stack pointer saved for the new frame is not pointing
+ +       *     at pt_regs, it is pointing at rbx within the pt_regs passed to
+ +       *     do_IRQ.
+ +       *
+ +       * There is nothing that I can do about (1) but I have to fix (2)
+ +       * because kdb backtrace looks for the "start" address of pt_regs as it
+ +       * walks back through the stacks.  When switching from the interrupt
+ +       * stack to another stack, we have to assume that pt_regs has been
+ +       * seen and turn off backtrace supression.
+ +       */
+ +      int probable_pt_regs = strcmp(ar->stack.id, "interrupt") == 0;
+ +      *rsp = ar->stack.next;
+ +      if (KDB_DEBUG(ARA))
+ +              kdb_printf("new " RSP "=" kdb_machreg_fmt0 "\n", *rsp);
+ +      bb_actual_set_value(BBRG_RSP, *rsp);
+ +      kdba_get_stack_info(*rsp, -1, ar, NULL);
+ +      if (!ar->stack.physical_start) {
+ +              kdb_printf("+++ Cannot resolve next stack\n");
+ +      } else if (!*suppress) {
+ +              kdb_printf(" ======================= <%s>\n",
+ +                         ar->stack.id);
+ +              ++*count;
+ +      }
+ +      if (probable_pt_regs)
+ +              *suppress = 0;
+ +}
+ +
+ +/*
+ + * kdba_bt_stack
+ + *
+ + * Inputs:
+ + *    addr    Address provided to 'bt' command, if any.
+ + *    argcount
+ + *    p       Pointer to task for 'btp' command.
+ + * Outputs:
+ + *    None.
+ + * Returns:
+ + *    zero for success, a kdb diagnostic if error
+ + * Locking:
+ + *    none.
+ + * Remarks:
+ + *    Ultimately all the bt* commands come through this routine.  If
+ + *    old_style is 0 then it uses the basic block analysis to get an accurate
+ + *    backtrace with arguments, otherwise it falls back to the old method of
+ + *    printing anything on stack that looks like a kernel address.
+ + *
+ + *    Allowing for the stack data pushed by the hardware is tricky.  We
+ + *    deduce the presence of hardware pushed data by looking for interrupt
+ + *    handlers, either by name or by the code that they contain.  This
+ + *    information must be applied to the next function up the stack, because
+ + *    the hardware data is above the saved rip for the interrupted (next)
+ + *    function.
+ + *
+ + *    To make things worse, the amount of data pushed is arch specific and
+ + *    may depend on the rsp for the next function, not the current function.
+ + *    The number of bytes pushed by hardware cannot be calculated until we
+ + *    are actually processing the stack for the interrupted function and have
+ + *    its rsp.
+ + *
+ + *    It is also possible for an interrupt to occur in user space and for the
+ + *    interrupt handler to also be interrupted.  Check the code selector
+ + *    whenever the previous function is an interrupt handler and stop
+ + *    backtracing if the interrupt was not in kernel space.
+ + */
+ +
+ +static int
+ +kdba_bt_stack(kdb_machreg_t addr, int argcount, const struct task_struct *p,
+ +             int old_style)
+ +{
+ +      struct kdb_activation_record ar;
+ +      kdb_machreg_t rip = 0, rsp = 0, prev_rsp, cs;
+ +      kdb_symtab_t symtab;
+ +      int rip_at_rsp = 0, count = 0, btsp = 0, suppress,
+ +          interrupt_handler = 0, prev_interrupt_handler = 0, hardware_pushed,
+ +          prev_noret = 0;
+ +      struct pt_regs *regs = NULL;
+ +
+ +      kdbgetintenv("BTSP", &btsp);
+ +      suppress = !btsp;
+ +      memset(&ar, 0, sizeof(ar));
+ +      if (old_style)
+ +              kdb_printf("Using old style backtrace, unreliable with no arguments\n");
+ +
+ +      /*
+ +       * The caller may have supplied an address at which the stack traceback
+ +       * operation should begin.  This address is assumed by this code to
+ +       * point to a return address on the stack to be traced back.
+ +       *
+ +       * Warning: type in the wrong address and you will get garbage in the
+ +       * backtrace.
+ +       */
+ +      if (addr) {
+ +              rsp = addr;
+ +              kdb_getword(&rip, rsp, sizeof(rip));
+ +              rip_at_rsp = 1;
+ +              suppress = 0;
+ +              kdba_get_stack_info(rsp, -1, &ar, NULL);
+ +      } else {
+ +              if (task_curr(p)) {
+ +                      struct kdb_running_process *krp =
+ +                          kdb_running_process + task_cpu(p);
+ +                      kdb_machreg_t cs;
+ +                      regs = krp->regs;
+ +                      if (krp->seqno &&
+ +                          krp->p == p &&
+ +                          krp->seqno >= kdb_seqno - 1 &&
+ +                          !KDB_NULL_REGS(regs)) {
+ +                              /* valid saved state, continue processing */
+ +                      } else {
+ +                              kdb_printf
+ +                                  ("Process did not save state, cannot backtrace\n");
+ +                              kdb_ps1(p);
+ +                              return 0;
+ +                      }
+ +                      kdba_getregcontents(XCS, regs, &cs);
+ +                      if ((cs & 0xffff) != __KERNEL_CS) {
+ +                              kdb_printf("Stack is not in kernel space, backtrace not available\n");
+ +                              return 0;
+ +                      }
+ +                      rip = krp->arch.ARCH_RIP;
+ +                      rsp = krp->arch.ARCH_RSP;
+ +                      kdba_get_stack_info(rsp, kdb_process_cpu(p), &ar, p);
+ +              } else {
+ +                      /* Not on cpu, assume blocked.  Blocked tasks do not
+ +                       * have pt_regs.  p->thread contains some data, alas
+ +                       * what it contains differs between i386 and x86_64.
+ +                       */
+ +                      rip = kdba_bt_stack_rip(p);
+ +                      rsp = p->thread.sp;
+ +                      suppress = 0;
+ +                      kdba_get_stack_info(rsp, -1, &ar, p);
+ +              }
+ +      }
+ +      if (!ar.stack.physical_start) {
+ +              kdb_printf(RSP "=0x%lx is not in a valid kernel stack, backtrace not available\n",
+ +                         rsp);
+ +              return 0;
+ +      }
+ +      memset(&bb_actual, 0, sizeof(bb_actual));
+ +      bb_actual_set_value(BBRG_RSP, rsp);
+ +      bb_actual_set_valid(BBRG_RSP, 1);
+ +
+ +      kdb_printf(RSP "%*s" RIP "%*sFunction (args)\n",
+ +                 2*KDB_WORD_SIZE, " ",
+ +                 2*KDB_WORD_SIZE, " ");
+ +      if (ar.stack.next && !suppress)
+ +              kdb_printf(" ======================= <%s>\n",
+ +                         ar.stack.id);
+ +
+ +      bb_cleanup();
+ +      /* Run through all the stacks */
+ +      while (ar.stack.physical_start) {
+ +              if (rip_at_rsp) {
+ +                      rip = *(kdb_machreg_t *)rsp;
+ +                      /* I wish that gcc was fixed to include a nop
+ +                       * instruction after ATTRIB_NORET functions.  The lack
+ +                       * of a nop means that the return address points to the
+ +                       * start of next function, so fudge it to point to one
+ +                       * byte previous.
+ +                       *
+ +                       * No, we cannot just decrement all rip values.
+ +                       * Sometimes an rip legally points to the start of a
+ +                       * function, e.g. interrupted code or hand crafted
+ +                       * assembler.
+ +                       */
+ +                      if (prev_noret) {
+ +                              kdbnearsym(rip, &symtab);
+ +                              if (rip == symtab.sym_start) {
+ +                                      --rip;
+ +                                      if (KDB_DEBUG(ARA))
+ +                                              kdb_printf("\tprev_noret, " RIP
+ +                                                         "=0x%lx\n", rip);
+ +                              }
+ +                      }
+ +              }
+ +              kdbnearsym(rip, &symtab);
+ +              if (old_style) {
+ +                      if (__kernel_text_address(rip) && !suppress) {
+ +                              bt_print_one(rip, rsp, &ar, &symtab, 0);
+ +                              ++count;
+ +                      }
+ +                      if (rsp == (unsigned long)regs) {
+ +                              if (ar.stack.next && suppress)
+ +                                      kdb_printf(" ======================= <%s>\n",
+ +                                                 ar.stack.id);
+ +                              ++count;
+ +                              suppress = 0;
+ +                      }
+ +                      rsp += sizeof(rip);
+ +                      rip_at_rsp = 1;
+ +                      if (rsp >= ar.stack.logical_end) {
+ +                              if (!ar.stack.next)
+ +                                      break;
+ +                              kdba_bt_new_stack(&ar, &rsp, &count, &suppress);
+ +                              rip_at_rsp = 0;
+ +                              continue;
+ +                      }
+ +              } else {
+ +                      /* Start each analysis with no dynamic data from the
+ +                       * previous kdb_bb() run.
+ +                       */
+ +                      bb_cleanup();
+ +                      kdb_bb(rip);
+ +                      if (bb_giveup)
+ +                              break;
+ +                      prev_interrupt_handler = interrupt_handler;
+ +                      interrupt_handler = bb_interrupt_handler(rip);
+ +                      prev_rsp = rsp;
+ +                      if (rip_at_rsp) {
+ +                              if (prev_interrupt_handler) {
+ +                                      cs = *((kdb_machreg_t *)rsp + 1) & 0xffff;
+ +                                      hardware_pushed =
+ +                                              bb_hardware_pushed_arch(rsp, &ar);
+ +                              } else {
+ +                                      cs = __KERNEL_CS;
+ +                                      hardware_pushed = 0;
+ +                              }
+ +                              rsp += sizeof(rip) + hardware_pushed;
+ +                              if (KDB_DEBUG(ARA))
+ +                                      kdb_printf("%s: " RSP " "
+ +                                                 kdb_machreg_fmt0
+ +                                                 " -> " kdb_machreg_fmt0
+ +                                                 " hardware_pushed %d"
+ +                                                 " prev_interrupt_handler %d"
+ +                                                 " cs 0x%lx\n",
+ +                                                 __FUNCTION__,
+ +                                                 prev_rsp,
+ +                                                 rsp,
+ +                                                 hardware_pushed,
+ +                                                 prev_interrupt_handler,
+ +                                                 cs);
+ +                              if (rsp >= ar.stack.logical_end &&
+ +                                  ar.stack.next) {
+ +                                      kdba_bt_new_stack(&ar, &rsp, &count,
+ +                                                         &suppress);
+ +                                      rip_at_rsp = 0;
+ +                                      continue;
+ +                              }
+ +                              bb_actual_set_value(BBRG_RSP, rsp);
+ +                      } else {
+ +                              cs = __KERNEL_CS;
+ +                      }
+ +                      rip_at_rsp = 1;
+ +                      bb_actual_rollback(&ar);
+ +                      if (bb_giveup)
+ +                              break;
+ +                      if (bb_actual_value(BBRG_RSP) < rsp) {
+ +                              kdb_printf("%s: " RSP " is going backwards, "
+ +                                         kdb_machreg_fmt0 " -> "
+ +                                         kdb_machreg_fmt0 "\n",
+ +                                         __FUNCTION__,
+ +                                         rsp,
+ +                                         bb_actual_value(BBRG_RSP));
+ +                              bb_giveup = 1;
+ +                              break;
+ +                      }
+ +                      bb_arguments(&ar);
+ +                      if (!suppress) {
+ +                              bt_print_one(rip, prev_rsp, &ar, &symtab, argcount);
+ +                              ++count;
+ +                      }
+ +                      /* Functions that terminate the backtrace */
+ +                      if (strcmp(bb_func_name, "cpu_idle") == 0 ||
+ +                          strcmp(bb_func_name, "child_rip") == 0)
+ +                              break;
+ +                      if (rsp >= ar.stack.logical_end &&
+ +                          !ar.stack.next)
+ +                              break;
+ +                      if (rsp <= (unsigned long)regs &&
+ +                          bb_actual_value(BBRG_RSP) > (unsigned long)regs) {
+ +                              if (ar.stack.next && suppress)
+ +                                      kdb_printf(" ======================= <%s>\n",
+ +                                                 ar.stack.id);
+ +                              ++count;
+ +                              suppress = 0;
+ +                      }
+ +                      if (cs != __KERNEL_CS) {
+ +                              kdb_printf("Reached user space\n");
+ +                              break;
+ +                      }
+ +                      rsp = bb_actual_value(BBRG_RSP);
+ +              }
+ +              prev_noret = bb_noret(bb_func_name);
+ +              if (count > 200)
+ +                      break;
+ +      }
+ +      if (bb_giveup)
+ +              return 1;
+ +      bb_cleanup();
+ +      kdbnearsym_cleanup();
+ +
+ +      if (count > 200) {
+ +              kdb_printf("bt truncated, count limit reached\n");
+ +              return 1;
+ +      } else if (suppress) {
+ +              kdb_printf
+ +                  ("bt did not find pt_regs - no trace produced.  Suggest 'set BTSP 1'\n");
+ +              return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * kdba_bt_address
+ + *
+ + *    Do a backtrace starting at a specified stack address.  Use this if the
+ + *    heuristics get the stack decode wrong.
+ + *
+ + * Inputs:
+ + *    addr    Address provided to 'bt' command.
+ + *    argcount
+ + * Outputs:
+ + *    None.
+ + * Returns:
+ + *    zero for success, a kdb diagnostic if error
+ + * Locking:
+ + *    none.
+ + * Remarks:
+ + *    mds %rsp comes in handy when examining the stack to do a manual
+ + *    traceback.
+ + */
+ +
+ +int kdba_bt_address(kdb_machreg_t addr, int argcount)
+ +{
+ +      int ret;
+ +      kdba_id_init(&kdb_di);                  /* kdb_bb needs this done once */
+ +      ret = kdba_bt_stack(addr, argcount, NULL, 0);
+ +      if (ret == 1)
+ +              ret = kdba_bt_stack(addr, argcount, NULL, 1);
+ +      return ret;
+ +}
+ +
+ +/*
+ + * kdba_bt_process
+ + *
+ + *    Do a backtrace for a specified process.
+ + *
+ + * Inputs:
+ + *    p       Struct task pointer extracted by 'bt' command.
+ + *    argcount
+ + * Outputs:
+ + *    None.
+ + * Returns:
+ + *    zero for success, a kdb diagnostic if error
+ + * Locking:
+ + *    none.
+ + */
+ +
+ +int kdba_bt_process(const struct task_struct *p, int argcount)
+ +{
+ +      int ret;
+ +      kdba_id_init(&kdb_di);                  /* kdb_bb needs this done once */
+ +      ret = kdba_bt_stack(0, argcount, p, 0);
+ +      if (ret == 1)
+ +              ret = kdba_bt_stack(0, argcount, p, 1);
+ +      return ret;
+ +}
+ +
+ +static int __init kdba_bt_x86_init(void)
+ +{
+ +      int i, c, cp = -1;
+ +      struct bb_name_state *r;
+ +
+ +      kdb_register_repeat("bb1", kdb_bb1, "<vaddr>",  "Analyse one basic block", 0, KDB_REPEAT_NONE);
+ +      kdb_register_repeat("bb_all", kdb_bb_all, "",   "Backtrace check on all built in functions", 0, KDB_REPEAT_NONE);
+ +
+ +      /* Split the opcode usage table by the first letter of each set of
+ +       * opcodes, for faster mapping of opcode to its operand usage.
+ +       */
+ +      for (i = 0; i < ARRAY_SIZE(bb_opcode_usage_all); ++i) {
+ +              c = bb_opcode_usage_all[i].opcode[0] - 'a';
+ +              if (c != cp) {
+ +                      cp = c;
+ +                      bb_opcode_usage[c].opcode = bb_opcode_usage_all + i;
+ +              }
+ +              ++bb_opcode_usage[c].size;
+ +      }
+ +
+ +      bb_common_interrupt = kallsyms_lookup_name("common_interrupt");
+ +      bb_error_entry = kallsyms_lookup_name("error_entry");
+ +      bb_ret_from_intr = kallsyms_lookup_name("ret_from_intr");
+ +      bb_thread_return = kallsyms_lookup_name("thread_return");
+ +      bb_sync_regs = kallsyms_lookup_name("sync_regs");
+ +      bb_save_v86_state = kallsyms_lookup_name("save_v86_state");
+ +      bb__sched_text_start = kallsyms_lookup_name("__sched_text_start");
+ +      bb__sched_text_end = kallsyms_lookup_name("__sched_text_end");
+ +      for (i = 0, r = bb_special_cases;
+ +           i < ARRAY_SIZE(bb_special_cases);
+ +           ++i, ++r) {
+ +              r->address = kallsyms_lookup_name(r->name);
+ +      }
+ +
+ +#ifdef        CONFIG_4KSTACKS
+ +      kdba_hardirq_ctx = (struct thread_info **)kallsyms_lookup_name("hardirq_ctx");
+ +      kdba_softirq_ctx = (struct thread_info **)kallsyms_lookup_name("softirq_ctx");
+ +#endif        /* CONFIG_4KSTACKS */
+ +
+ +      return 0;
+ +}
+ +
+ +static void __exit kdba_bt_x86_exit(void)
+ +{
+ +      kdb_unregister("bb1");
+ +      kdb_unregister("bb_all");
+ +}
+ +
+ +module_init(kdba_bt_x86_init)
+ +module_exit(kdba_bt_x86_exit)
diff --cc arch/x86/kdb/kdbasupport_32.c

index dd39c16,0000000..6998005

mode 100644,000000..100644
--- 1/arch/x86/kdb/kdbasupport_32.c
--- /dev/null
+++ b/arch/x86/kdb/kdbasupport_32.c
@@@ -1,1086 -1,0 +1,1086 @@@
+ +/*
+ + * Kernel Debugger Architecture Independent Support Functions
+ + *
+ + * This file is subject to the terms and conditions of the GNU General Public
+ + * License.  See the file "COPYING" in the main directory of this archive
+ + * for more details.
+ + *
+ + * Copyright (c) 1999-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ + */
+ +
+ +#include <linux/string.h>
+ +#include <linux/stddef.h>
+ +#include <linux/kernel.h>
+ +#include <linux/module.h>
+ +#include <linux/init.h>
+ +#include <linux/irq.h>
+ +#include <linux/ptrace.h>
+ +#include <linux/mm.h>
+ +#include <linux/sched.h>
+ +#include <linux/hardirq.h>
+ +#include <linux/kdb.h>
+ +#include <linux/kdbprivate.h>
+ +
+ +#include <asm/processor.h>
+ +#include <asm/msr.h>
+ +#include <asm/uaccess.h>
+ +#include <asm/desc.h>
+ +
+ +static kdb_machreg_t
+ +kdba_getcr(int regnum)
+ +{
+ +      kdb_machreg_t contents = 0;
+ +      switch(regnum) {
+ +      case 0:
+ +              __asm__ ("movl %%cr0,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 1:
+ +              break;
+ +      case 2:
+ +              __asm__ ("movl %%cr2,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 3:
+ +              __asm__ ("movl %%cr3,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 4:
+ +              __asm__ ("movl %%cr4,%0\n\t":"=r"(contents));
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +
+ +      return contents;
+ +}
+ +
+ +static void
+ +kdba_putdr(int regnum, kdb_machreg_t contents)
+ +{
+ +      switch(regnum) {
+ +      case 0:
+ +              __asm__ ("movl %0,%%db0\n\t"::"r"(contents));
+ +              break;
+ +      case 1:
+ +              __asm__ ("movl %0,%%db1\n\t"::"r"(contents));
+ +              break;
+ +      case 2:
+ +              __asm__ ("movl %0,%%db2\n\t"::"r"(contents));
+ +              break;
+ +      case 3:
+ +              __asm__ ("movl %0,%%db3\n\t"::"r"(contents));
+ +              break;
+ +      case 4:
+ +      case 5:
+ +              break;
+ +      case 6:
+ +              __asm__ ("movl %0,%%db6\n\t"::"r"(contents));
+ +              break;
+ +      case 7:
+ +              __asm__ ("movl %0,%%db7\n\t"::"r"(contents));
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +}
+ +
+ +kdb_machreg_t
+ +kdba_getdr(int regnum)
+ +{
+ +      kdb_machreg_t contents = 0;
+ +      switch(regnum) {
+ +      case 0:
+ +              __asm__ ("movl %%db0,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 1:
+ +              __asm__ ("movl %%db1,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 2:
+ +              __asm__ ("movl %%db2,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 3:
+ +              __asm__ ("movl %%db3,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 4:
+ +      case 5:
+ +              break;
+ +      case 6:
+ +              __asm__ ("movl %%db6,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 7:
+ +              __asm__ ("movl %%db7,%0\n\t":"=r"(contents));
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +
+ +      return contents;
+ +}
+ +
+ +kdb_machreg_t
+ +kdba_getdr6(void)
+ +{
+ +      return kdba_getdr(6);
+ +}
+ +
+ +kdb_machreg_t
+ +kdba_getdr7(void)
+ +{
+ +      return kdba_getdr(7);
+ +}
+ +
+ +void
+ +kdba_putdr6(kdb_machreg_t contents)
+ +{
+ +      kdba_putdr(6, contents);
+ +}
+ +
+ +static void
+ +kdba_putdr7(kdb_machreg_t contents)
+ +{
+ +      kdba_putdr(7, contents);
+ +}
+ +
+ +void
+ +kdba_installdbreg(kdb_bp_t *bp)
+ +{
+ +      int cpu = smp_processor_id();
+ +
+ +      kdb_machreg_t dr7;
+ +
+ +      dr7 = kdba_getdr7();
+ +
+ +      kdba_putdr(bp->bp_hard[cpu]->bph_reg, bp->bp_addr);
+ +
+ +      dr7 |= DR7_GE;
+ +      if (cpu_has_de)
+ +              set_in_cr4(X86_CR4_DE);
+ +
+ +      switch (bp->bp_hard[cpu]->bph_reg){
+ +      case 0:
+ +              DR7_RW0SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN0SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G0SET(dr7);
+ +              break;
+ +      case 1:
+ +              DR7_RW1SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN1SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G1SET(dr7);
+ +              break;
+ +      case 2:
+ +              DR7_RW2SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN2SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G2SET(dr7);
+ +              break;
+ +      case 3:
+ +              DR7_RW3SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN3SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G3SET(dr7);
+ +              break;
+ +      default:
+ +              kdb_printf("kdb: Bad debug register!! %ld\n",
+ +                         bp->bp_hard[cpu]->bph_reg);
+ +              break;
+ +      }
+ +
+ +      kdba_putdr7(dr7);
+ +      return;
+ +}
+ +
+ +void
+ +kdba_removedbreg(kdb_bp_t *bp)
+ +{
+ +      int regnum;
+ +      kdb_machreg_t dr7;
+ +      int cpu = smp_processor_id();
+ +
+ +      if (!bp->bp_hard[cpu])
+ +              return;
+ +
+ +      regnum = bp->bp_hard[cpu]->bph_reg;
+ +
+ +      dr7 = kdba_getdr7();
+ +
+ +      kdba_putdr(regnum, 0);
+ +
+ +      switch (regnum) {
+ +      case 0:
+ +              DR7_G0CLR(dr7);
+ +              DR7_L0CLR(dr7);
+ +              break;
+ +      case 1:
+ +              DR7_G1CLR(dr7);
+ +              DR7_L1CLR(dr7);
+ +              break;
+ +      case 2:
+ +              DR7_G2CLR(dr7);
+ +              DR7_L2CLR(dr7);
+ +              break;
+ +      case 3:
+ +              DR7_G3CLR(dr7);
+ +              DR7_L3CLR(dr7);
+ +              break;
+ +      default:
+ +              kdb_printf("kdb: Bad debug register!! %d\n", regnum);
+ +              break;
+ +      }
+ +
+ +      kdba_putdr7(dr7);
+ +}
+ +
+ +
+ +/*
+ + * kdba_getregcontents
+ + *
+ + *    Return the contents of the register specified by the
+ + *    input string argument.   Return an error if the string
+ + *    does not match a machine register.
+ + *
+ + *    The following pseudo register names are supported:
+ + *       &regs         - Prints address of exception frame
+ + *       kesp          - Prints kernel stack pointer at time of fault
+ + *       cesp          - Prints current kernel stack pointer, inside kdb
+ + *       ceflags       - Prints current flags, inside kdb
+ + *       %<regname>    - Uses the value of the registers at the
+ + *                       last time the user process entered kernel
+ + *                       mode, instead of the registers at the time
+ + *                       kdb was entered.
+ + *
+ + * Parameters:
+ + *    regname         Pointer to string naming register
+ + *    regs            Pointer to structure containing registers.
+ + * Outputs:
+ + *    *contents       Pointer to unsigned long to recieve register contents
+ + * Returns:
+ + *    0               Success
+ + *    KDB_BADREG      Invalid register name
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    If kdb was entered via an interrupt from the kernel itself then
+ + *    ss and sp are *not* on the stack.
+ + */
+ +
+ +static struct kdbregs {
+ +      char   *reg_name;
+ +      size_t  reg_offset;
+ +} kdbreglist[] = {
+ +      { "ax",         offsetof(struct pt_regs, ax) },
+ +      { "bx",         offsetof(struct pt_regs, bx) },
+ +      { "cx",         offsetof(struct pt_regs, cx) },
+ +      { "dx",         offsetof(struct pt_regs, dx) },
+ +
+ +      { "si",         offsetof(struct pt_regs, si) },
+ +      { "di",         offsetof(struct pt_regs, di) },
+ +      { "sp",         offsetof(struct pt_regs, sp) },
+ +      { "ip",         offsetof(struct pt_regs, ip) },
+ +
+ +      { "bp",         offsetof(struct pt_regs, bp) },
+ +      { "ss",         offsetof(struct pt_regs, ss) },
+ +      { "cs",         offsetof(struct pt_regs, cs) },
+ +      { "flags",      offsetof(struct pt_regs, flags) },
+ +
+ +      { "ds",         offsetof(struct pt_regs, ds) },
+ +      { "es",         offsetof(struct pt_regs, es) },
+ +      { "origax",     offsetof(struct pt_regs, orig_ax) },
+ +
+ +};
+ +
+ +static const int nkdbreglist = sizeof(kdbreglist) / sizeof(struct kdbregs);
+ +
+ +static struct kdbregs dbreglist[] = {
+ +      { "dr0",        0 },
+ +      { "dr1",        1 },
+ +      { "dr2",        2 },
+ +      { "dr3",        3 },
+ +      { "dr6",        6 },
+ +      { "dr7",        7 },
+ +};
+ +
+ +static const int ndbreglist = sizeof(dbreglist) / sizeof(struct kdbregs);
+ +
+ +int
+ +kdba_getregcontents(const char *regname,
+ +                  struct pt_regs *regs,
+ +                  kdb_machreg_t *contents)
+ +{
+ +      int i;
+ +
+ +      if (strcmp(regname, "cesp") == 0) {
+ +              asm volatile("movl %%esp,%0":"=m" (*contents));
+ +              return 0;
+ +      }
+ +
+ +      if (strcmp(regname, "ceflags") == 0) {
+ +              unsigned long flags;
+ +              local_save_flags(flags);
+ +              *contents = flags;
+ +              return 0;
+ +      }
+ +
+ +      if (regname[0] == '%') {
+ +              /* User registers:  %%e[a-c]x, etc */
+ +              regname++;
+ +              regs = (struct pt_regs *)
+ +                      (kdb_current_task->thread.sp0 - sizeof(struct pt_regs));
+ +      }
+ +
+ +      for (i=0; i<ndbreglist; i++) {
+ +              if (strnicmp(dbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < ndbreglist)
+ +       && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ +              *contents = kdba_getdr(dbreglist[i].reg_offset);
+ +              return 0;
+ +      }
+ +
+ +      if (!regs) {
+ +              kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__);
+ +              return KDB_BADREG;
+ +      }
+ +
+ +      if (strcmp(regname, "&regs") == 0) {
+ +              *contents = (unsigned long)regs;
+ +              return 0;
+ +      }
+ +
+ +      if (strcmp(regname, "kesp") == 0) {
+ +              *contents = (unsigned long)regs + sizeof(struct pt_regs);
+ +              if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ +                      /* sp and ss are not on stack */
+ +                      *contents -= 2*4;
+ +              }
+ +              return 0;
+ +      }
+ +
+ +      for (i=0; i<nkdbreglist; i++) {
+ +              if (strnicmp(kdbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < nkdbreglist)
+ +       && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ +              if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ +                      /* No cpl switch, sp and ss are not on stack */
+ +                      if (strcmp(kdbreglist[i].reg_name, "sp") == 0) {
+ +                              *contents = (kdb_machreg_t)regs +
+ +                                      sizeof(struct pt_regs) - 2*4;
+ +                              return(0);
+ +                      }
+ +                      if (strcmp(kdbreglist[i].reg_name, "xss") == 0) {
+ +                              asm volatile(
+ +                                      "pushl %%ss\n"
+ +                                      "popl %0\n"
+ +                                      :"=m" (*contents));
+ +                              return(0);
+ +                      }
+ +              }
+ +              *contents = *(unsigned long *)((unsigned long)regs +
+ +                              kdbreglist[i].reg_offset);
+ +              return(0);
+ +      }
+ +
+ +      return KDB_BADREG;
+ +}
+ +
+ +/*
+ + * kdba_setregcontents
+ + *
+ + *    Set the contents of the register specified by the
+ + *    input string argument.   Return an error if the string
+ + *    does not match a machine register.
+ + *
+ + *    Supports modification of user-mode registers via
+ + *    %<register-name>
+ + *
+ + * Parameters:
+ + *    regname         Pointer to string naming register
+ + *    regs            Pointer to structure containing registers.
+ + *    contents        Unsigned long containing new register contents
+ + * Outputs:
+ + * Returns:
+ + *    0               Success
+ + *    KDB_BADREG      Invalid register name
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + */
+ +
+ +int
+ +kdba_setregcontents(const char *regname,
+ +                struct pt_regs *regs,
+ +                unsigned long contents)
+ +{
+ +      int i;
+ +
+ +      if (regname[0] == '%') {
+ +              regname++;
+ +              regs = (struct pt_regs *)
+ +                      (kdb_current_task->thread.sp0 - sizeof(struct pt_regs));
+ +      }
+ +
+ +      for (i=0; i<ndbreglist; i++) {
+ +              if (strnicmp(dbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < ndbreglist)
+ +       && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ +              kdba_putdr(dbreglist[i].reg_offset, contents);
+ +              return 0;
+ +      }
+ +
+ +      if (!regs) {
+ +              kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__);
+ +              return KDB_BADREG;
+ +      }
+ +
+ +      for (i=0; i<nkdbreglist; i++) {
+ +              if (strnicmp(kdbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < nkdbreglist)
+ +       && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ +              *(unsigned long *)((unsigned long)regs
+ +                                 + kdbreglist[i].reg_offset) = contents;
+ +              return 0;
+ +      }
+ +
+ +      return KDB_BADREG;
+ +}
+ +
+ +/*
+ + * kdba_dumpregs
+ + *
+ + *    Dump the specified register set to the display.
+ + *
+ + * Parameters:
+ + *    regs            Pointer to structure containing registers.
+ + *    type            Character string identifying register set to dump
+ + *    extra           string further identifying register (optional)
+ + * Outputs:
+ + * Returns:
+ + *    0               Success
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    This function will dump the general register set if the type
+ + *    argument is NULL (struct pt_regs).   The alternate register
+ + *    set types supported by this function:
+ + *
+ + *    d               Debug registers
+ + *    c               Control registers
+ + *    u               User registers at most recent entry to kernel
+ + *                    for the process currently selected with "pid" command.
+ + * Following not yet implemented:
+ + *    r               Memory Type Range Registers (extra defines register)
+ + *
+ + * MSR on i386/x86_64 are handled by rdmsr/wrmsr commands.
+ + */
+ +
+ +int
+ +kdba_dumpregs(struct pt_regs *regs,
+ +          const char *type,
+ +          const char *extra)
+ +{
+ +      int i;
+ +      int count = 0;
+ +
+ +      if (type
+ +       && (type[0] == 'u')) {
+ +              type = NULL;
+ +              regs = (struct pt_regs *)
+ +                      (kdb_current_task->thread.sp0 - sizeof(struct pt_regs));
+ +      }
+ +
+ +      if (type == NULL) {
+ +              struct kdbregs *rlp;
+ +              kdb_machreg_t contents;
+ +
+ +              if (!regs) {
+ +                      kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__);
+ +                      return KDB_BADREG;
+ +              }
+ +
+ +              for (i=0, rlp=kdbreglist; i<nkdbreglist; i++,rlp++) {
+ +                      kdb_printf("%s = ", rlp->reg_name);
+ +                      kdba_getregcontents(rlp->reg_name, regs, &contents);
+ +                      kdb_printf("0x%08lx ", contents);
+ +                      if ((++count % 4) == 0)
+ +                              kdb_printf("\n");
+ +              }
+ +
+ +              kdb_printf("&regs = 0x%p\n", regs);
+ +
+ +              return 0;
+ +      }
+ +
+ +      switch (type[0]) {
+ +      case 'd':
+ +      {
+ +              unsigned long dr[8];
+ +
+ +              for(i=0; i<8; i++) {
+ +                      if ((i == 4) || (i == 5)) continue;
+ +                      dr[i] = kdba_getdr(i);
+ +              }
+ +              kdb_printf("dr0 = 0x%08lx  dr1 = 0x%08lx  dr2 = 0x%08lx  dr3 = 0x%08lx\n",
+ +                         dr[0], dr[1], dr[2], dr[3]);
+ +              kdb_printf("dr6 = 0x%08lx  dr7 = 0x%08lx\n",
+ +                         dr[6], dr[7]);
+ +              return 0;
+ +      }
+ +      case 'c':
+ +      {
+ +              unsigned long cr[5];
+ +
+ +              for (i=0; i<5; i++) {
+ +                      cr[i] = kdba_getcr(i);
+ +              }
+ +              kdb_printf("cr0 = 0x%08lx  cr1 = 0x%08lx  cr2 = 0x%08lx  cr3 = 0x%08lx\ncr4 = 0x%08lx\n",
+ +                         cr[0], cr[1], cr[2], cr[3], cr[4]);
+ +              return 0;
+ +      }
+ +      case 'r':
+ +              break;
+ +      default:
+ +              return KDB_BADREG;
+ +      }
+ +
+ +      /* NOTREACHED */
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL(kdba_dumpregs);
+ +
+ +kdb_machreg_t
+ +kdba_getpc(struct pt_regs *regs)
+ +{
+ +      return regs ? regs->ip : 0;
+ +}
+ +
+ +int
+ +kdba_setpc(struct pt_regs *regs, kdb_machreg_t newpc)
+ +{
+ +      if (KDB_NULL_REGS(regs))
+ +              return KDB_BADREG;
+ +      regs->ip = newpc;
+ +      KDB_STATE_SET(IP_ADJUSTED);
+ +      return 0;
+ +}
+ +
+ +/*
+ + * kdba_main_loop
+ + *
+ + *    Do any architecture specific set up before entering the main kdb loop.
+ + *    The primary function of this routine is to make all processes look the
+ + *    same to kdb, kdb must be able to list a process without worrying if the
+ + *    process is running or blocked, so make all process look as though they
+ + *    are blocked.
+ + *
+ + * Inputs:
+ + *    reason          The reason KDB was invoked
+ + *    error           The hardware-defined error code
+ + *    error2          kdb's current reason code.  Initially error but can change
+ + *                    acording to kdb state.
+ + *    db_result       Result from break or debug point.
+ + *    regs            The exception frame at time of fault/breakpoint.  If reason
+ + *                    is SILENT or CPU_UP then regs is NULL, otherwise it should
+ + *                    always be valid.
+ + * Returns:
+ + *    0       KDB was invoked for an event which it wasn't responsible
+ + *    1       KDB handled the event for which it was invoked.
+ + * Outputs:
+ + *    Sets ip and sp in current->thread.
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    none.
+ + */
+ +
+ +int
+ +kdba_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
+ +             kdb_dbtrap_t db_result, struct pt_regs *regs)
+ +{
+ +      int ret;
+ +      ret = kdb_save_running(regs, reason, reason2, error, db_result);
+ +      kdb_unsave_running(regs);
+ +      return ret;
+ +}
+ +
+ +void
+ +kdba_disableint(kdb_intstate_t *state)
+ +{
+ +      unsigned long *fp = (unsigned long *)state;
+ +      unsigned long flags;
+ +
+ +      local_irq_save(flags);
+ +
+ +      *fp = flags;
+ +}
+ +
+ +void
+ +kdba_restoreint(kdb_intstate_t *state)
+ +{
+ +      unsigned long flags = *(int *)state;
+ +      local_irq_restore(flags);
+ +}
+ +
+ +void
+ +kdba_setsinglestep(struct pt_regs *regs)
+ +{
+ +      if (KDB_NULL_REGS(regs))
+ +              return;
+ +      if (regs->flags & EF_IE)
+ +              KDB_STATE_SET(A_IF);
+ +      else
+ +              KDB_STATE_CLEAR(A_IF);
+ +      regs->flags = (regs->flags | EF_TF) & ~EF_IE;
+ +}
+ +
+ +void
+ +kdba_clearsinglestep(struct pt_regs *regs)
+ +{
+ +      if (KDB_NULL_REGS(regs))
+ +              return;
+ +      if (KDB_STATE(A_IF))
+ +              regs->flags |= EF_IE;
+ +      else
+ +              regs->flags &= ~EF_IE;
+ +}
+ +
+ +int asmlinkage
+ +kdba_setjmp(kdb_jmp_buf *jb)
+ +{
+ +#if defined(CONFIG_FRAME_POINTER)
+ +      __asm__ ("movl 8(%esp), %eax\n\t"
+ +               "movl %ebx, 0(%eax)\n\t"
+ +               "movl %esi, 4(%eax)\n\t"
+ +               "movl %edi, 8(%eax)\n\t"
+ +               "movl (%esp), %ecx\n\t"
+ +               "movl %ecx, 12(%eax)\n\t"
+ +               "leal 8(%esp), %ecx\n\t"
+ +               "movl %ecx, 16(%eax)\n\t"
+ +               "movl 4(%esp), %ecx\n\t"
+ +               "movl %ecx, 20(%eax)\n\t");
+ +#else  /* CONFIG_FRAME_POINTER */
+ +      __asm__ ("movl 4(%esp), %eax\n\t"
+ +               "movl %ebx, 0(%eax)\n\t"
+ +               "movl %esi, 4(%eax)\n\t"
+ +               "movl %edi, 8(%eax)\n\t"
+ +               "movl %ebp, 12(%eax)\n\t"
+ +               "leal 4(%esp), %ecx\n\t"
+ +               "movl %ecx, 16(%eax)\n\t"
+ +               "movl 0(%esp), %ecx\n\t"
+ +               "movl %ecx, 20(%eax)\n\t");
+ +#endif   /* CONFIG_FRAME_POINTER */
+ +      return 0;
+ +}
+ +
+ +void asmlinkage
+ +kdba_longjmp(kdb_jmp_buf *jb, int reason)
+ +{
+ +#if defined(CONFIG_FRAME_POINTER)
+ +      __asm__("movl 8(%esp), %ecx\n\t"
+ +              "movl 12(%esp), %eax\n\t"
+ +              "movl 20(%ecx), %edx\n\t"
+ +              "movl 0(%ecx), %ebx\n\t"
+ +              "movl 4(%ecx), %esi\n\t"
+ +              "movl 8(%ecx), %edi\n\t"
+ +              "movl 12(%ecx), %ebp\n\t"
+ +              "movl 16(%ecx), %esp\n\t"
+ +              "jmp *%edx\n");
+ +#else    /* CONFIG_FRAME_POINTER */
+ +      __asm__("movl 4(%esp), %ecx\n\t"
+ +              "movl 8(%esp), %eax\n\t"
+ +              "movl 20(%ecx), %edx\n\t"
+ +              "movl 0(%ecx), %ebx\n\t"
+ +              "movl 4(%ecx), %esi\n\t"
+ +              "movl 8(%ecx), %edi\n\t"
+ +              "movl 12(%ecx), %ebp\n\t"
+ +              "movl 16(%ecx), %esp\n\t"
+ +              "jmp *%edx\n");
+ +#endif         /* CONFIG_FRAME_POINTER */
+ +}
+ +
+ +/*
+ + * kdba_pt_regs
+ + *
+ + *    Format a struct pt_regs
+ + *
+ + * Inputs:
+ + *    argc    argument count
+ + *    argv    argument vector
+ + * Outputs:
+ + *    None.
+ + * Returns:
+ + *    zero for success, a kdb diagnostic if error
+ + * Locking:
+ + *    none.
+ + * Remarks:
+ + *    If no address is supplied, it uses the last irq pt_regs.
+ + */
+ +
+ +static int
+ +kdba_pt_regs(int argc, const char **argv)
+ +{
+ +      int diag;
+ +      kdb_machreg_t addr;
+ +      long offset = 0;
+ +      int nextarg;
+ +      struct pt_regs *p;
+ +      static const char *fmt = "  %-11.11s 0x%lx\n";
+ +
+ +      if (argc == 0) {
+ +              addr = (kdb_machreg_t) get_irq_regs();
+ +      } else if (argc == 1) {
+ +              nextarg = 1;
+ +              diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ +              if (diag)
+ +                      return diag;
+ +      } else {
+ +              return KDB_ARGCOUNT;
+ +      }
+ +
+ +      p = (struct pt_regs *) addr;
+ +      kdb_printf("struct pt_regs 0x%p-0x%p\n", p, (unsigned char *)p + sizeof(*p) - 1);
+ +      kdb_print_nameval("bx", p->bx);
+ +      kdb_print_nameval("cx", p->cx);
+ +      kdb_print_nameval("dx", p->dx);
+ +      kdb_print_nameval("si", p->si);
+ +      kdb_print_nameval("di", p->di);
+ +      kdb_print_nameval("bp", p->bp);
+ +      kdb_print_nameval("ax", p->ax);
+ +      kdb_printf(fmt, "ds", p->ds);
+ +      kdb_printf(fmt, "es", p->es);
+ +      kdb_print_nameval("orig_ax", p->orig_ax);
+ +      kdb_print_nameval("ip", p->ip);
+ +      kdb_printf(fmt, "cs", p->cs);
+ +      kdb_printf(fmt, "flags", p->flags);
+ +      kdb_printf(fmt, "sp", p->sp);
+ +      kdb_printf(fmt, "ss", p->ss);
+ +      return 0;
+ +}
+ +
+ +/*
+ + * kdba_stackdepth
+ + *
+ + *    Print processes that are using more than a specific percentage of their
+ + *    stack.
+ + *
+ + * Inputs:
+ + *    argc    argument count
+ + *    argv    argument vector
+ + * Outputs:
+ + *    None.
+ + * Returns:
+ + *    zero for success, a kdb diagnostic if error
+ + * Locking:
+ + *    none.
+ + * Remarks:
+ + *    If no percentage is supplied, it uses 60.
+ + */
+ +
+ +static void
+ +kdba_stackdepth1(struct task_struct *p, unsigned long sp)
+ +{
+ +      struct thread_info *tinfo;
+ +      int used;
+ +      const char *type;
+ +      kdb_ps1(p);
+ +      do {
+ +              tinfo = (struct thread_info *)(sp & -THREAD_SIZE);
+ +              used = sizeof(*tinfo) + THREAD_SIZE - (sp & (THREAD_SIZE-1));
+ +              type = NULL;
+ +              if (kdb_task_has_cpu(p)) {
+ +                      struct kdb_activation_record ar;
+ +                      memset(&ar, 0, sizeof(ar));
+ +                      kdba_get_stack_info_alternate(sp, -1, &ar);
+ +                      type = ar.stack.id;
+ +              }
+ +              if (!type)
+ +                      type = "process";
+ +              kdb_printf("  %s stack %p sp %lx used %d\n", type, tinfo, sp, used);
+ +              sp = tinfo->previous_esp;
+ +      } while (sp);
+ +}
+ +
+ +static int
+ +kdba_stackdepth(int argc, const char **argv)
+ +{
+ +      int diag, cpu, threshold, used, over;
+ +      unsigned long percentage;
+ +      unsigned long esp;
+ +      long offset = 0;
+ +      int nextarg;
+ +      struct task_struct *p, *g;
+ +      struct kdb_running_process *krp;
+ +      struct thread_info *tinfo;
+ +
+ +      if (argc == 0) {
+ +              percentage = 60;
+ +      } else if (argc == 1) {
+ +              nextarg = 1;
+ +              diag = kdbgetaddrarg(argc, argv, &nextarg, &percentage, &offset, NULL);
+ +              if (diag)
+ +                      return diag;
+ +      } else {
+ +              return KDB_ARGCOUNT;
+ +      }
+ +      percentage = max_t(int, percentage, 1);
+ +      percentage = min_t(int, percentage, 100);
+ +      threshold = ((2 * THREAD_SIZE * percentage) / 100 + 1) >> 1;
+ +      kdb_printf("stackdepth: processes using more than %ld%% (%d bytes) of stack\n",
+ +              percentage, threshold);
+ +
+ +      /* Run the active tasks first, they can have multiple stacks */
+ +      for (cpu = 0, krp = kdb_running_process; cpu < NR_CPUS; ++cpu, ++krp) {
+ +              if (!cpu_online(cpu))
+ +                      continue;
+ +              p = krp->p;
+ +              esp = krp->arch.sp;
+ +              over = 0;
+ +              do {
+ +                      tinfo = (struct thread_info *)(esp & -THREAD_SIZE);
+ +                      used = sizeof(*tinfo) + THREAD_SIZE - (esp & (THREAD_SIZE-1));
+ +                      if (used >= threshold)
+ +                              over = 1;
+ +                      esp = tinfo->previous_esp;
+ +              } while (esp);
+ +              if (over)
+ +                      kdba_stackdepth1(p, krp->arch.sp);
+ +      }
+ +      /* Now the tasks that are not on cpus */
+ +      kdb_do_each_thread(g, p) {
+ +              if (kdb_task_has_cpu(p))
+ +                      continue;
+ +              esp = p->thread.sp;
+ +              used = sizeof(*tinfo) + THREAD_SIZE - (esp & (THREAD_SIZE-1));
+ +              over = used >= threshold;
+ +              if (over)
+ +                      kdba_stackdepth1(p, esp);
+ +      } kdb_while_each_thread(g, p);
+ +
+ +      return 0;
+ +}
+ +
+ +asmlinkage int kdb_call(void);
+ +
+ +/* Executed once on each cpu at startup. */
+ +void
+ +kdba_cpu_up(void)
+ +{
+ +}
+ +
+ +static int __init
+ +kdba_arch_init(void)
+ +{
+ +      set_intr_gate(KDBENTER_VECTOR, kdb_call);
+ +      return 0;
+ +}
+ +
+ +arch_initcall(kdba_arch_init);
+ +
+ +/*
+ + * kdba_init
+ + *
+ + *    Architecture specific initialization.
+ + *
+ + * Parameters:
+ + *    None.
+ + * Returns:
+ + *    None.
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    None.
+ + */
+ +
+ +void __init
+ +kdba_init(void)
+ +{
+ +      kdba_arch_init();       /* Need to register KDBENTER_VECTOR early */
+ +      kdb_register("pt_regs", kdba_pt_regs, "address", "Format struct pt_regs", 0);
+ +      kdb_register("stackdepth", kdba_stackdepth, "[percentage]", "Print processes using >= stack percentage", 0);
+ +
+ +      return;
+ +}
+ +
+ +/*
+ + * kdba_adjust_ip
+ + *
+ + *    Architecture specific adjustment of instruction pointer before leaving
+ + *    kdb.
+ + *
+ + * Parameters:
+ + *    reason          The reason KDB was invoked
+ + *    error           The hardware-defined error code
+ + *    regs            The exception frame at time of fault/breakpoint.  If reason
+ + *                    is SILENT or CPU_UP then regs is NULL, otherwise it should
+ + *                    always be valid.
+ + * Returns:
+ + *    None.
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    noop on ix86.
+ + */
+ +
+ +void
+ +kdba_adjust_ip(kdb_reason_t reason, int error, struct pt_regs *regs)
+ +{
+ +      return;
+ +}
+ +
+ +void
+ +kdba_set_current_task(const struct task_struct *p)
+ +{
+ +      kdb_current_task = p;
+ +      if (kdb_task_has_cpu(p)) {
+ +              struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);
+ +              kdb_current_regs = krp->regs;
+ +              return;
+ +      }
+ +      kdb_current_regs = NULL;
+ +}
+ +
+ +/*
+ + * asm-i386 uaccess.h supplies __copy_to_user which relies on MMU to
+ + * trap invalid addresses in the _xxx fields.  Verify the other address
+ + * of the pair is valid by accessing the first and last byte ourselves,
+ + * then any access violations should only be caused by the _xxx
+ + * addresses,
+ + */
+ +
+ +int
+ +kdba_putarea_size(unsigned long to_xxx, void *from, size_t size)
+ +{
+ +      mm_segment_t oldfs = get_fs();
+ +      int r;
+ +      char c;
+ +      c = *((volatile char *)from);
+ +      c = *((volatile char *)from + size - 1);
+ +
+ +      if (to_xxx < PAGE_OFFSET) {
+ +              return kdb_putuserarea_size(to_xxx, from, size);
+ +      }
+ +
+ +      set_fs(KERNEL_DS);
+ +      r = __copy_to_user_inatomic((void __user *)to_xxx, from, size);
+ +      set_fs(oldfs);
+ +      return r;
+ +}
+ +
+ +int
+ +kdba_getarea_size(void *to, unsigned long from_xxx, size_t size)
+ +{
+ +      mm_segment_t oldfs = get_fs();
+ +      int r;
+ +      *((volatile char *)to) = '\0';
+ +      *((volatile char *)to + size - 1) = '\0';
+ +
+ +      if (from_xxx < PAGE_OFFSET) {
+ +              return kdb_getuserarea_size(to, from_xxx, size);
+ +      }
+ +
+ +      set_fs(KERNEL_DS);
+ +      switch (size) {
+ +      case 1:
+ +              r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 1);
+ +              break;
+ +      case 2:
+ +              r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 2);
+ +              break;
+ +      case 4:
+ +              r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 4);
+ +              break;
+ +      case 8:
+ +              r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, 8);
+ +              break;
+ +      default:
+ +              r = __copy_to_user_inatomic((void __user *)to, (void *)from_xxx, size);
+ +              break;
+ +      }
+ +      set_fs(oldfs);
+ +      return r;
+ +}
+ +
+ +int
+ +kdba_verify_rw(unsigned long addr, size_t size)
+ +{
+ +      unsigned char data[size];
+ +      return(kdba_getarea_size(data, addr, size) || kdba_putarea_size(addr, data, size));
+ +}
+ +
+ +#ifdef        CONFIG_SMP
+ +
+ +#include <mach_ipi.h>
+ +
+ +gate_desc save_idt[NR_VECTORS];
+ +
+ +void kdba_takeover_vector(int vector)
+ +{
+ +      memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
+ +      set_intr_gate(KDB_VECTOR, kdb_interrupt);
+ +      return;
+ +}
+ +
+ +void kdba_giveback_vector(int vector)
+ +{
+ +      native_write_idt_entry(idt_table, vector, &save_idt[vector]);
+ +      return;
+ +}
+ +
+ +/* When first entering KDB, try a normal IPI.  That reduces backtrace problems
+ + * on the other cpus.
+ + */
+ +void
+ +smp_kdb_stop(void)
+ +{
+ +      if (!KDB_FLAG(NOIPI)) {
+ +              kdba_takeover_vector(KDB_VECTOR);
+ +              send_IPI_allbutself(KDB_VECTOR);
+ +      }
+ +}
+ +
+ +/* The normal KDB IPI handler */
+ +void
+ +smp_kdb_interrupt(struct pt_regs *regs)
+ +{
+ +      struct pt_regs *old_regs = set_irq_regs(regs);
+ +      ack_APIC_irq();
+ +      irq_enter();
+ +      kdb_ipi(regs, NULL);
+ +      irq_exit();
+ +      set_irq_regs(old_regs);
+ +}
+ +
+ +/* Invoked once from kdb_wait_for_cpus when waiting for cpus.  For those cpus
+ + * that have not responded to the normal KDB interrupt yet, hit them with an
+ + * NMI event.
+ + */
+ +void
+ +kdba_wait_for_cpus(void)
+ +{
+ +      int c;
+ +      if (KDB_FLAG(CATASTROPHIC))
+ +              return;
+ +      kdb_printf("  Sending NMI to non-responding cpu(s): ");
+ +      for_each_online_cpu(c) {
+ +              if (kdb_running_process[c].seqno < kdb_seqno - 1) {
+ +                      kdb_printf(" %d", c);
-                       send_IPI_mask(&cpumask_of_cpu(c), NMI_VECTOR);
++                      send_IPI_mask(get_cpu_mask(c), NMI_VECTOR);
+ +              }
+ +      }
+ +      kdb_printf(".\n");
+ +}
+ +
+ +#endif        /* CONFIG_SMP */
diff --cc arch/x86/kdb/kdbasupport_64.c

index 0e7b945,0000000..dc5705d

mode 100644,000000..100644
--- 1/arch/x86/kdb/kdbasupport_64.c
--- /dev/null
+++ b/arch/x86/kdb/kdbasupport_64.c
@@@ -1,1042 -1,0 +1,1042 @@@
+ +/*
+ + * Kernel Debugger Architecture Independent Support Functions
+ + *
+ + * This file is subject to the terms and conditions of the GNU General Public
+ + * License.  See the file "COPYING" in the main directory of this archive
+ + * for more details.
+ + *
+ + * Copyright (c) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ + */
+ +
+ +#include <linux/string.h>
+ +#include <linux/stddef.h>
+ +#include <linux/kernel.h>
+ +#include <linux/init.h>
+ +#include <linux/ptrace.h>
+ +#include <linux/mm.h>
+ +#include <linux/sched.h>
+ +#include <linux/hardirq.h>
+ +#include <linux/kdb.h>
+ +#include <linux/kdbprivate.h>
+ +#include <linux/interrupt.h>
+ +#include <linux/module.h>
+ +#include <linux/kdebug.h>
+ +#include <linux/cpumask.h>
+ +#include <asm/processor.h>
+ +#include <asm/msr.h>
+ +#include <asm/uaccess.h>
+ +#include <asm/hw_irq.h>
+ +#include <asm/desc.h>
+ +
+ +kdb_machreg_t
+ +kdba_getdr6(void)
+ +{
+ +      return kdba_getdr(6);
+ +}
+ +
+ +kdb_machreg_t
+ +kdba_getdr7(void)
+ +{
+ +      return kdba_getdr(7);
+ +}
+ +
+ +void
+ +kdba_putdr6(kdb_machreg_t contents)
+ +{
+ +      kdba_putdr(6, contents);
+ +}
+ +
+ +static void
+ +kdba_putdr7(kdb_machreg_t contents)
+ +{
+ +      kdba_putdr(7, contents);
+ +}
+ +
+ +void
+ +kdba_installdbreg(kdb_bp_t *bp)
+ +{
+ +      int cpu = smp_processor_id();
+ +
+ +      kdb_machreg_t   dr7;
+ +
+ +      dr7 = kdba_getdr7();
+ +
+ +      kdba_putdr(bp->bp_hard[cpu]->bph_reg, bp->bp_addr);
+ +
+ +      dr7 |= DR7_GE;
+ +      if (cpu_has_de)
+ +              set_in_cr4(X86_CR4_DE);
+ +
+ +      switch (bp->bp_hard[cpu]->bph_reg){
+ +      case 0:
+ +              DR7_RW0SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN0SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G0SET(dr7);
+ +              break;
+ +      case 1:
+ +              DR7_RW1SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN1SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G1SET(dr7);
+ +              break;
+ +      case 2:
+ +              DR7_RW2SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN2SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G2SET(dr7);
+ +              break;
+ +      case 3:
+ +              DR7_RW3SET(dr7,bp->bp_hard[cpu]->bph_mode);
+ +              DR7_LEN3SET(dr7,bp->bp_hard[cpu]->bph_length);
+ +              DR7_G3SET(dr7);
+ +              break;
+ +      default:
+ +              kdb_printf("kdb: Bad debug register!! %ld\n",
+ +                         bp->bp_hard[cpu]->bph_reg);
+ +              break;
+ +      }
+ +
+ +      kdba_putdr7(dr7);
+ +      return;
+ +}
+ +
+ +void
+ +kdba_removedbreg(kdb_bp_t *bp)
+ +{
+ +      int             regnum;
+ +      kdb_machreg_t   dr7;
+ +      int cpu = smp_processor_id();
+ +
+ +      if (!bp->bp_hard[cpu])
+ +              return;
+ +
+ +      regnum = bp->bp_hard[cpu]->bph_reg;
+ +
+ +      dr7 = kdba_getdr7();
+ +
+ +      kdba_putdr(regnum, 0);
+ +
+ +      switch (regnum) {
+ +      case 0:
+ +              DR7_G0CLR(dr7);
+ +              DR7_L0CLR(dr7);
+ +              break;
+ +      case 1:
+ +              DR7_G1CLR(dr7);
+ +              DR7_L1CLR(dr7);
+ +              break;
+ +      case 2:
+ +              DR7_G2CLR(dr7);
+ +              DR7_L2CLR(dr7);
+ +              break;
+ +      case 3:
+ +              DR7_G3CLR(dr7);
+ +              DR7_L3CLR(dr7);
+ +              break;
+ +      default:
+ +              kdb_printf("kdb: Bad debug register!! %d\n", regnum);
+ +              break;
+ +      }
+ +
+ +      kdba_putdr7(dr7);
+ +}
+ +
+ +kdb_machreg_t
+ +kdba_getdr(int regnum)
+ +{
+ +      kdb_machreg_t contents = 0;
+ +      switch(regnum) {
+ +      case 0:
+ +              __asm__ ("movq %%db0,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 1:
+ +              __asm__ ("movq %%db1,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 2:
+ +              __asm__ ("movq %%db2,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 3:
+ +              __asm__ ("movq %%db3,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 4:
+ +      case 5:
+ +              break;
+ +      case 6:
+ +              __asm__ ("movq %%db6,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 7:
+ +              __asm__ ("movq %%db7,%0\n\t":"=r"(contents));
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +
+ +      return contents;
+ +}
+ +
+ +
+ +kdb_machreg_t
+ +kdb_getcr(int regnum)
+ +{
+ +      kdb_machreg_t contents = 0;
+ +      switch(regnum) {
+ +      case 0:
+ +              __asm__ ("movq %%cr0,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 1:
+ +              break;
+ +      case 2:
+ +              __asm__ ("movq %%cr2,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 3:
+ +              __asm__ ("movq %%cr3,%0\n\t":"=r"(contents));
+ +              break;
+ +      case 4:
+ +              __asm__ ("movq %%cr4,%0\n\t":"=r"(contents));
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +
+ +      return contents;
+ +}
+ +
+ +void
+ +kdba_putdr(int regnum, kdb_machreg_t contents)
+ +{
+ +      switch(regnum) {
+ +      case 0:
+ +              __asm__ ("movq %0,%%db0\n\t"::"r"(contents));
+ +              break;
+ +      case 1:
+ +              __asm__ ("movq %0,%%db1\n\t"::"r"(contents));
+ +              break;
+ +      case 2:
+ +              __asm__ ("movq %0,%%db2\n\t"::"r"(contents));
+ +              break;
+ +      case 3:
+ +              __asm__ ("movq %0,%%db3\n\t"::"r"(contents));
+ +              break;
+ +      case 4:
+ +      case 5:
+ +              break;
+ +      case 6:
+ +              __asm__ ("movq %0,%%db6\n\t"::"r"(contents));
+ +              break;
+ +      case 7:
+ +              __asm__ ("movq %0,%%db7\n\t"::"r"(contents));
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +}
+ +
+ +/*
+ + * kdba_getregcontents
+ + *
+ + *    Return the contents of the register specified by the
+ + *    input string argument.   Return an error if the string
+ + *    does not match a machine register.
+ + *
+ + *    The following pseudo register names are supported:
+ + *       &regs         - Prints address of exception frame
+ + *       krsp          - Prints kernel stack pointer at time of fault
+ + *       crsp          - Prints current kernel stack pointer, inside kdb
+ + *       ceflags       - Prints current flags, inside kdb
+ + *       %<regname>    - Uses the value of the registers at the
+ + *                       last time the user process entered kernel
+ + *                       mode, instead of the registers at the time
+ + *                       kdb was entered.
+ + *
+ + * Parameters:
+ + *    regname         Pointer to string naming register
+ + *    regs            Pointer to structure containing registers.
+ + * Outputs:
+ + *    *contents       Pointer to unsigned long to recieve register contents
+ + * Returns:
+ + *    0               Success
+ + *    KDB_BADREG      Invalid register name
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    If kdb was entered via an interrupt from the kernel itself then
+ + *    ss and sp are *not* on the stack.
+ + */
+ +
+ +static struct kdbregs {
+ +      char   *reg_name;
+ +      size_t  reg_offset;
+ +} kdbreglist[] = {
+ +      { "r15",        offsetof(struct pt_regs, r15) },
+ +      { "r14",        offsetof(struct pt_regs, r14) },
+ +      { "r13",        offsetof(struct pt_regs, r13) },
+ +      { "r12",        offsetof(struct pt_regs, r12) },
+ +      { "bp",         offsetof(struct pt_regs, bp) },
+ +      { "bx",         offsetof(struct pt_regs, bx) },
+ +      { "r11",        offsetof(struct pt_regs, r11) },
+ +      { "r10",        offsetof(struct pt_regs, r10) },
+ +      { "r9",         offsetof(struct pt_regs, r9) },
+ +      { "r8",         offsetof(struct pt_regs, r8) },
+ +      { "ax",         offsetof(struct pt_regs, ax) },
+ +      { "cx",         offsetof(struct pt_regs, cx) },
+ +      { "dx",         offsetof(struct pt_regs, dx) },
+ +      { "si",         offsetof(struct pt_regs, si) },
+ +      { "di",         offsetof(struct pt_regs, di) },
+ +      { "orig_ax",    offsetof(struct pt_regs, orig_ax) },
+ +      { "ip",         offsetof(struct pt_regs, ip) },
+ +      { "cs",         offsetof(struct pt_regs, cs) },
+ +      { "flags",      offsetof(struct pt_regs, flags) },
+ +      { "sp",         offsetof(struct pt_regs, sp) },
+ +      { "ss",         offsetof(struct pt_regs, ss) },
+ +};
+ +
+ +static const int nkdbreglist = sizeof(kdbreglist) / sizeof(struct kdbregs);
+ +
+ +static struct kdbregs dbreglist[] = {
+ +      { "dr0",        0 },
+ +      { "dr1",        1 },
+ +      { "dr2",        2 },
+ +      { "dr3",        3 },
+ +      { "dr6",        6 },
+ +      { "dr7",        7 },
+ +};
+ +
+ +static const int ndbreglist = sizeof(dbreglist) / sizeof(struct kdbregs);
+ +
+ +int
+ +kdba_getregcontents(const char *regname,
+ +                  struct pt_regs *regs,
+ +                  kdb_machreg_t *contents)
+ +{
+ +      int i;
+ +
+ +      if (strcmp(regname, "&regs") == 0) {
+ +              *contents = (unsigned long)regs;
+ +              return 0;
+ +      }
+ +
+ +      if (strcmp(regname, "krsp") == 0) {
+ +              *contents = (unsigned long)regs + sizeof(struct pt_regs);
+ +              if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ +                      /* sp and ss are not on stack */
+ +                      *contents -= 2*4;
+ +              }
+ +              return 0;
+ +      }
+ +
+ +      if (strcmp(regname, "crsp") == 0) {
+ +              asm volatile("movq %%rsp,%0":"=m" (*contents));
+ +              return 0;
+ +      }
+ +
+ +      if (strcmp(regname, "ceflags") == 0) {
+ +              unsigned long flags;
+ +              local_save_flags(flags);
+ +              *contents = flags;
+ +              return 0;
+ +      }
+ +
+ +      if (regname[0] == '%') {
+ +              /* User registers:  %%r[a-c]x, etc */
+ +              regname++;
+ +              regs = (struct pt_regs *)
+ +                      (current->thread.sp0 - sizeof(struct pt_regs));
+ +      }
+ +
+ +      for (i=0; i<nkdbreglist; i++) {
+ +              if (strnicmp(kdbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < nkdbreglist)
+ +       && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ +              if ((regs->cs & 0xffff) == __KERNEL_CS) {
+ +                      /* No cpl switch, sp is not on stack */
+ +                      if (strcmp(kdbreglist[i].reg_name, "sp") == 0) {
+ +                              *contents = (kdb_machreg_t)regs +
+ +                                      sizeof(struct pt_regs) - 2*8;
+ +                              return(0);
+ +                      }
+ +#if 0 /* FIXME */
+ +                      if (strcmp(kdbreglist[i].reg_name, "ss") == 0) {
+ +                              kdb_machreg_t r;
+ +
+ +                              r = (kdb_machreg_t)regs +
+ +                                      sizeof(struct pt_regs) - 2*8;
+ +                              *contents = (kdb_machreg_t)SS(r);       /* XXX */
+ +                              return(0);
+ +                      }
+ +#endif
+ +              }
+ +              *contents = *(unsigned long *)((unsigned long)regs +
+ +                              kdbreglist[i].reg_offset);
+ +              return(0);
+ +      }
+ +
+ +      for (i=0; i<ndbreglist; i++) {
+ +              if (strnicmp(dbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < ndbreglist)
+ +       && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ +              *contents = kdba_getdr(dbreglist[i].reg_offset);
+ +              return 0;
+ +      }
+ +      return KDB_BADREG;
+ +}
+ +
+ +/*
+ + * kdba_setregcontents
+ + *
+ + *    Set the contents of the register specified by the
+ + *    input string argument.   Return an error if the string
+ + *    does not match a machine register.
+ + *
+ + *    Supports modification of user-mode registers via
+ + *    %<register-name>
+ + *
+ + * Parameters:
+ + *    regname         Pointer to string naming register
+ + *    regs            Pointer to structure containing registers.
+ + *    contents        Unsigned long containing new register contents
+ + * Outputs:
+ + * Returns:
+ + *    0               Success
+ + *    KDB_BADREG      Invalid register name
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + */
+ +
+ +int
+ +kdba_setregcontents(const char *regname,
+ +                struct pt_regs *regs,
+ +                unsigned long contents)
+ +{
+ +      int i;
+ +
+ +      if (regname[0] == '%') {
+ +              regname++;
+ +              regs = (struct pt_regs *)
+ +                      (current->thread.sp0 - sizeof(struct pt_regs));
+ +      }
+ +
+ +      for (i=0; i<nkdbreglist; i++) {
+ +              if (strnicmp(kdbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < nkdbreglist)
+ +       && (strlen(kdbreglist[i].reg_name) == strlen(regname))) {
+ +              *(unsigned long *)((unsigned long)regs
+ +                                 + kdbreglist[i].reg_offset) = contents;
+ +              return 0;
+ +      }
+ +
+ +      for (i=0; i<ndbreglist; i++) {
+ +              if (strnicmp(dbreglist[i].reg_name,
+ +                           regname,
+ +                           strlen(regname)) == 0)
+ +                      break;
+ +      }
+ +
+ +      if ((i < ndbreglist)
+ +       && (strlen(dbreglist[i].reg_name) == strlen(regname))) {
+ +              kdba_putdr(dbreglist[i].reg_offset, contents);
+ +              return 0;
+ +      }
+ +
+ +      return KDB_BADREG;
+ +}
+ +
+ +/*
+ + * kdba_dumpregs
+ + *
+ + *    Dump the specified register set to the display.
+ + *
+ + * Parameters:
+ + *    regs            Pointer to structure containing registers.
+ + *    type            Character string identifying register set to dump
+ + *    extra           string further identifying register (optional)
+ + * Outputs:
+ + * Returns:
+ + *    0               Success
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    This function will dump the general register set if the type
+ + *    argument is NULL (struct pt_regs).   The alternate register
+ + *    set types supported by this function:
+ + *
+ + *    d               Debug registers
+ + *    c               Control registers
+ + *    u               User registers at most recent entry to kernel
+ + * Following not yet implemented:
+ + *    r               Memory Type Range Registers (extra defines register)
+ + *
+ + * MSR on i386/x86_64 are handled by rdmsr/wrmsr commands.
+ + */
+ +
+ +int
+ +kdba_dumpregs(struct pt_regs *regs,
+ +          const char *type,
+ +          const char *extra)
+ +{
+ +      int i;
+ +      int count = 0;
+ +
+ +      if (type
+ +       && (type[0] == 'u')) {
+ +              type = NULL;
+ +              regs = (struct pt_regs *)
+ +                      (current->thread.sp0 - sizeof(struct pt_regs));
+ +      }
+ +
+ +      if (type == NULL) {
+ +              struct kdbregs *rlp;
+ +              kdb_machreg_t contents;
+ +
+ +              if (!regs) {
+ +                      kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__);
+ +                      return KDB_BADREG;
+ +              }
+ +
+ +              for (i=0, rlp=kdbreglist; i<nkdbreglist; i++,rlp++) {
+ +                      kdb_printf("%8s = ", rlp->reg_name);
+ +                      kdba_getregcontents(rlp->reg_name, regs, &contents);
+ +                      kdb_printf("0x%016lx ", contents);
+ +                      if ((++count % 2) == 0)
+ +                              kdb_printf("\n");
+ +              }
+ +
+ +              kdb_printf("&regs = 0x%p\n", regs);
+ +
+ +              return 0;
+ +      }
+ +
+ +      switch (type[0]) {
+ +      case 'd':
+ +      {
+ +              unsigned long dr[8];
+ +
+ +              for(i=0; i<8; i++) {
+ +                      if ((i == 4) || (i == 5)) continue;
+ +                      dr[i] = kdba_getdr(i);
+ +              }
+ +              kdb_printf("dr0 = 0x%08lx  dr1 = 0x%08lx  dr2 = 0x%08lx  dr3 = 0x%08lx\n",
+ +                         dr[0], dr[1], dr[2], dr[3]);
+ +              kdb_printf("dr6 = 0x%08lx  dr7 = 0x%08lx\n",
+ +                         dr[6], dr[7]);
+ +              return 0;
+ +      }
+ +      case 'c':
+ +      {
+ +              unsigned long cr[5];
+ +
+ +              for (i=0; i<5; i++) {
+ +                      cr[i] = kdb_getcr(i);
+ +              }
+ +              kdb_printf("cr0 = 0x%08lx  cr1 = 0x%08lx  cr2 = 0x%08lx  cr3 = 0x%08lx\ncr4 = 0x%08lx\n",
+ +                         cr[0], cr[1], cr[2], cr[3], cr[4]);
+ +              return 0;
+ +      }
+ +      case 'r':
+ +              break;
+ +      default:
+ +              return KDB_BADREG;
+ +      }
+ +
+ +      /* NOTREACHED */
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL(kdba_dumpregs);
+ +
+ +kdb_machreg_t
+ +kdba_getpc(struct pt_regs *regs)
+ +{
+ +      return regs ? regs->ip : 0;
+ +}
+ +
+ +int
+ +kdba_setpc(struct pt_regs *regs, kdb_machreg_t newpc)
+ +{
+ +      if (KDB_NULL_REGS(regs))
+ +              return KDB_BADREG;
+ +      regs->ip = newpc;
+ +      KDB_STATE_SET(IP_ADJUSTED);
+ +      return 0;
+ +}
+ +
+ +/*
+ + * kdba_main_loop
+ + *
+ + *    Do any architecture specific set up before entering the main kdb loop.
+ + *    The primary function of this routine is to make all processes look the
+ + *    same to kdb, kdb must be able to list a process without worrying if the
+ + *    process is running or blocked, so make all process look as though they
+ + *    are blocked.
+ + *
+ + * Inputs:
+ + *    reason          The reason KDB was invoked
+ + *    error           The hardware-defined error code
+ + *    error2          kdb's current reason code.  Initially error but can change
+ + *                    acording to kdb state.
+ + *    db_result       Result from break or debug point.
+ + *    ef              The exception frame at time of fault/breakpoint.  If reason
+ + *                    is SILENT or CPU_UP then regs is NULL, otherwise it should
+ + *                    always be valid.
+ + * Returns:
+ + *    0       KDB was invoked for an event which it wasn't responsible
+ + *    1       KDB handled the event for which it was invoked.
+ + * Outputs:
+ + *    Sets ip and sp in current->thread.
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    none.
+ + */
+ +
+ +int
+ +kdba_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
+ +             kdb_dbtrap_t db_result, struct pt_regs *regs)
+ +{
+ +      int ret;
+ +
+ +      if (regs)
+ +              kdba_getregcontents("sp", regs, &(current->thread.sp));
+ +      ret = kdb_save_running(regs, reason, reason2, error, db_result);
+ +      kdb_unsave_running(regs);
+ +      return ret;
+ +}
+ +
+ +void
+ +kdba_disableint(kdb_intstate_t *state)
+ +{
+ +      unsigned long *fp = (unsigned long *)state;
+ +      unsigned long flags;
+ +
+ +      local_irq_save(flags);
+ +      *fp = flags;
+ +}
+ +
+ +void
+ +kdba_restoreint(kdb_intstate_t *state)
+ +{
+ +      unsigned long flags = *(unsigned long *)state;
+ +      local_irq_restore(flags);
+ +}
+ +
+ +void
+ +kdba_setsinglestep(struct pt_regs *regs)
+ +{
+ +      if (KDB_NULL_REGS(regs))
+ +              return;
+ +      if (regs->flags & X86_EFLAGS_IF)
+ +              KDB_STATE_SET(A_IF);
+ +      else
+ +              KDB_STATE_CLEAR(A_IF);
+ +      regs->flags = (regs->flags | X86_EFLAGS_TF) & ~X86_EFLAGS_IF;
+ +}
+ +
+ +void
+ +kdba_clearsinglestep(struct pt_regs *regs)
+ +{
+ +      if (KDB_NULL_REGS(regs))
+ +              return;
+ +      if (KDB_STATE(A_IF))
+ +              regs->flags |= X86_EFLAGS_IF;
+ +      else
+ +              regs->flags &= ~X86_EFLAGS_IF;
+ +}
+ +
+ +int asmlinkage
+ +kdba_setjmp(kdb_jmp_buf *jb)
+ +{
+ +#ifdef        CONFIG_FRAME_POINTER
+ +      __asm__ __volatile__
+ +              ("movq %%rbx, (0*8)(%%rdi);"
+ +              "movq %%rcx, (1*8)(%%rdi);"
+ +              "movq %%r12, (2*8)(%%rdi);"
+ +              "movq %%r13, (3*8)(%%rdi);"
+ +              "movq %%r14, (4*8)(%%rdi);"
+ +              "movq %%r15, (5*8)(%%rdi);"
+ +              "leaq 16(%%rsp), %%rdx;"
+ +              "movq %%rdx, (6*8)(%%rdi);"
+ +              "movq %%rax, (7*8)(%%rdi)"
+ +              :
+ +              : "a" (__builtin_return_address(0)),
+ +                "c" (__builtin_frame_address(1))
+ +              );
+ +#else  /* !CONFIG_FRAME_POINTER */
+ +      __asm__ __volatile__
+ +              ("movq %%rbx, (0*8)(%%rdi);"
+ +              "movq %%rbp, (1*8)(%%rdi);"
+ +              "movq %%r12, (2*8)(%%rdi);"
+ +              "movq %%r13, (3*8)(%%rdi);"
+ +              "movq %%r14, (4*8)(%%rdi);"
+ +              "movq %%r15, (5*8)(%%rdi);"
+ +              "leaq 8(%%rsp), %%rdx;"
+ +              "movq %%rdx, (6*8)(%%rdi);"
+ +              "movq %%rax, (7*8)(%%rdi)"
+ +              :
+ +              : "a" (__builtin_return_address(0))
+ +              );
+ +#endif   /* CONFIG_FRAME_POINTER */
+ +      return 0;
+ +}
+ +
+ +void asmlinkage
+ +kdba_longjmp(kdb_jmp_buf *jb, int reason)
+ +{
+ +      __asm__("movq (0*8)(%rdi),%rbx;"
+ +              "movq (1*8)(%rdi),%rbp;"
+ +              "movq (2*8)(%rdi),%r12;"
+ +              "movq (3*8)(%rdi),%r13;"
+ +              "movq (4*8)(%rdi),%r14;"
+ +              "movq (5*8)(%rdi),%r15;"
+ +              "movq (7*8)(%rdi),%rdx;"
+ +              "movq (6*8)(%rdi),%rsp;"
+ +              "mov %rsi, %rax;"
+ +              "jmpq *%rdx");
+ +}
+ +
+ +/*
+ + * kdba_pt_regs
+ + *
+ + *    Format a struct pt_regs
+ + *
+ + * Inputs:
+ + *    argc    argument count
+ + *    argv    argument vector
+ + * Outputs:
+ + *    None.
+ + * Returns:
+ + *    zero for success, a kdb diagnostic if error
+ + * Locking:
+ + *    none.
+ + * Remarks:
+ + *    If no address is supplied, it uses the current irq pt_regs.
+ + */
+ +
+ +static int
+ +kdba_pt_regs(int argc, const char **argv)
+ +{
+ +      int diag;
+ +      kdb_machreg_t addr;
+ +      long offset = 0;
+ +      int nextarg;
+ +      struct pt_regs *p;
+ +      static const char *fmt = "  %-11.11s 0x%lx\n";
+ +      static int first_time = 1;
+ +
+ +      if (argc == 0) {
+ +              addr = (kdb_machreg_t) get_irq_regs();
+ +      } else if (argc == 1) {
+ +              nextarg = 1;
+ +              diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ +              if (diag)
+ +                      return diag;
+ +      } else {
+ +              return KDB_ARGCOUNT;
+ +      }
+ +
+ +      p = (struct pt_regs *) addr;
+ +      if (first_time) {
+ +              first_time = 0;
+ +              kdb_printf("\n+++ Warning: x86_64 pt_regs are not always "
+ +                         "completely defined, r15-bx may be invalid\n\n");
+ +      }
+ +      kdb_printf("struct pt_regs 0x%p-0x%p\n", p, (unsigned char *)p + sizeof(*p) - 1);
+ +      kdb_print_nameval("r15", p->r15);
+ +      kdb_print_nameval("r14", p->r14);
+ +      kdb_print_nameval("r13", p->r13);
+ +      kdb_print_nameval("r12", p->r12);
+ +      kdb_print_nameval("bp", p->bp);
+ +      kdb_print_nameval("bx", p->bx);
+ +      kdb_print_nameval("r11", p->r11);
+ +      kdb_print_nameval("r10", p->r10);
+ +      kdb_print_nameval("r9", p->r9);
+ +      kdb_print_nameval("r8", p->r8);
+ +      kdb_print_nameval("ax", p->ax);
+ +      kdb_print_nameval("cx", p->cx);
+ +      kdb_print_nameval("dx", p->dx);
+ +      kdb_print_nameval("si", p->si);
+ +      kdb_print_nameval("di", p->di);
+ +      kdb_print_nameval("orig_ax", p->orig_ax);
+ +      kdb_print_nameval("ip", p->ip);
+ +      kdb_printf(fmt, "cs", p->cs);
+ +      kdb_printf(fmt, "flags", p->flags);
+ +      kdb_printf(fmt, "sp", p->sp);
+ +      kdb_printf(fmt, "ss", p->ss);
+ +      return 0;
+ +}
+ +
+ +/*
+ + * kdba_cpu_pda
+ + *
+ + *    Format a struct cpu_pda
+ + *
+ + * Inputs:
+ + *    argc    argument count
+ + *    argv    argument vector
+ + * Outputs:
+ + *    None.
+ + * Returns:
+ + *    zero for success, a kdb diagnostic if error
+ + * Locking:
+ + *    none.
+ + * Remarks:
+ + *    If no cpu is supplied, it prints the current cpu.  If the cpu is '*'
+ + *    then it prints all cpus.
+ + */
+ +
+ +static int
+ +kdba_cpu_pda(int argc, const char **argv)
+ +{
+ +      int diag, nextarg, all_cpus = 0;
+ +      long offset = 0;
+ +      unsigned long cpu;
+ +      struct x8664_pda *c;
+ +      static const char *fmtl = "  %-17.17s 0x%lx\n";
+ +      static const char *fmtd = "  %-17.17s %d\n";
+ +      static const char *fmtp = "  %-17.17s 0x%p\n";
+ +
+ +      if (argc == 0) {
+ +              cpu = smp_processor_id();
+ +      } else if (argc == 1) {
+ +              if (strcmp(argv[1], "*") == 0) {
+ +                      all_cpus = 1;
+ +                      cpu = 0;
+ +              } else {
+ +                      nextarg = 1;
+ +                      diag = kdbgetaddrarg(argc, argv, &nextarg, &cpu, &offset, NULL);
+ +                      if (diag)
+ +                              return diag;
+ +              }
+ +      } else {
+ +              return KDB_ARGCOUNT;
+ +      }
+ +
+ +      for (; cpu < NR_CPUS; ++cpu) {
+ +              if (cpu_online(cpu)) {
+ +                      c = cpu_pda(cpu);
+ +                      kdb_printf("struct cpu_pda 0x%p-0x%p\n", c, (unsigned char *)c + sizeof(*c) - 1);
+ +                      kdb_printf(fmtp, "pcurrent", c->pcurrent);
+ +                      kdb_printf(fmtl, "data_offset", c->data_offset);
+ +                      kdb_printf(fmtl, "kernelstack", c->kernelstack);
+ +                      kdb_printf(fmtl, "oldrsp", c->oldrsp);
+ +                      kdb_printf(fmtd, "irqcount", c->irqcount);
+ +                      kdb_printf(fmtd, "cpunumber", c->cpunumber);
+ +                      kdb_printf(fmtp, "irqstackptr", c->irqstackptr);
+ +                      kdb_printf(fmtp, "nodenumber", cpu_to_node(cpu));
+ +                      kdb_printf(fmtd, "__softirq_pending", c->__softirq_pending);
+ +                      kdb_printf(fmtd, "__nmi_count", c->__nmi_count);
+ +                      kdb_printf(fmtd, "mmu_state", c->mmu_state);
+ +                      kdb_printf(fmtp, "active_mm", c->active_mm);
+ +                      kdb_printf(fmtd, "apic_timer_irqs", c->apic_timer_irqs);
+ +              }
+ +              if (!all_cpus)
+ +                      break;
+ +      }
+ +      return 0;
+ +}
+ +
+ +/*
+ + * kdba_entry
+ + *
+ + *    This is the interface routine between
+ + *    the notifier die_chain and kdb
+ + */
+ +static int kdba_entry( struct notifier_block *b, unsigned long val, void *v)
+ +{
+ +      struct die_args *args = v;
+ +      int err, trap, ret = 0;
+ +      struct pt_regs *regs;
+ +
+ +      regs = args->regs;
+ +      err  = args->err;
+ +      trap  = args->trapnr;
+ +      switch (val){
+ +#ifdef        CONFIG_SMP
+ +              case DIE_NMI_IPI:
+ +                      ret = kdb_ipi(regs, NULL);
+ +                      break;
+ +#endif        /* CONFIG_SMP */
+ +              case DIE_OOPS:
+ +                      ret = kdb(KDB_REASON_OOPS, err, regs);
+ +                      break;
+ +              case DIE_CALL:
+ +                      ret = kdb(KDB_REASON_ENTER, err, regs);
+ +                      break;
+ +              case DIE_DEBUG:
+ +                      ret = kdb(KDB_REASON_DEBUG, err, regs);
+ +                      break;
+ +              case DIE_NMIWATCHDOG:
+ +                      ret = kdb(KDB_REASON_NMI, err, regs);
+ +                      break;
+ +              case DIE_INT3:
+ +                       ret = kdb(KDB_REASON_BREAK, err, regs);
+ +                      // falls thru
+ +              default:
+ +                      break;
+ +      }
+ +      return (ret ? NOTIFY_STOP : NOTIFY_DONE);
+ +}
+ +
+ +/*
+ + * notifier block for kdb entry
+ + */
+ +static struct notifier_block kdba_notifier = {
+ +      .notifier_call = kdba_entry
+ +};
+ +
+ +asmlinkage int kdb_call(void);
+ +
+ +/* Executed once on each cpu at startup. */
+ +void
+ +kdba_cpu_up(void)
+ +{
+ +}
+ +
+ +static int __init
+ +kdba_arch_init(void)
+ +{
+ +      set_intr_gate(KDBENTER_VECTOR, kdb_call);
+ +      return 0;
+ +}
+ +
+ +arch_initcall(kdba_arch_init);
+ +
+ +/*
+ + * kdba_init
+ + *
+ + *    Architecture specific initialization.
+ + *
+ + * Parameters:
+ + *    None.
+ + * Returns:
+ + *    None.
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    None.
+ + */
+ +
+ +void __init
+ +kdba_init(void)
+ +{
+ +      kdba_arch_init();       /* Need to register KDBENTER_VECTOR early */
+ +      kdb_register("pt_regs", kdba_pt_regs, "address", "Format struct pt_regs", 0);
+ +      kdb_register("cpu_pda", kdba_cpu_pda, "<cpu>", "Format struct cpu_pda", 0);
+ +      register_die_notifier(&kdba_notifier);
+ +      return;
+ +}
+ +
+ +/*
+ + * kdba_adjust_ip
+ + *
+ + *    Architecture specific adjustment of instruction pointer before leaving
+ + *    kdb.
+ + *
+ + * Parameters:
+ + *    reason          The reason KDB was invoked
+ + *    error           The hardware-defined error code
+ + *    ef              The exception frame at time of fault/breakpoint.  If reason
+ + *                    is SILENT or CPU_UP then regs is NULL, otherwise it should
+ + *                    always be valid.
+ + * Returns:
+ + *    None.
+ + * Locking:
+ + *    None.
+ + * Remarks:
+ + *    noop on ix86.
+ + */
+ +
+ +void
+ +kdba_adjust_ip(kdb_reason_t reason, int error, struct pt_regs *ef)
+ +{
+ +      return;
+ +}
+ +
+ +void
+ +kdba_set_current_task(const struct task_struct *p)
+ +{
+ +      kdb_current_task = p;
+ +      if (kdb_task_has_cpu(p)) {
+ +              struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);
+ +              kdb_current_regs = krp->regs;
+ +              return;
+ +      }
+ +      kdb_current_regs = NULL;
+ +}
+ +
+ +#ifdef        CONFIG_SMP
+ +
+ +#include <mach_ipi.h>
+ +
+ +gate_desc save_idt[NR_VECTORS];
+ +
+ +void kdba_takeover_vector(int vector)
+ +{
+ +      memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
+ +      set_intr_gate(KDB_VECTOR, kdb_interrupt);
+ +      return;
+ +}
+ +
+ +void kdba_giveback_vector(int vector)
+ +{
+ +      native_write_idt_entry(idt_table, vector, &save_idt[vector]);
+ +      return;
+ +}
+ +
+ +/* When first entering KDB, try a normal IPI.  That reduces backtrace problems
+ + * on the other cpus.
+ + */
+ +void
+ +smp_kdb_stop(void)
+ +{
+ +      if (!KDB_FLAG(NOIPI)) {
+ +              kdba_takeover_vector(KDB_VECTOR);
+ +              send_IPI_allbutself(KDB_VECTOR);
+ +      }
+ +}
+ +
+ +/* The normal KDB IPI handler */
+ +extern asmlinkage void smp_kdb_interrupt(struct pt_regs *regs);       /* for sparse */
+ +asmlinkage void
+ +smp_kdb_interrupt(struct pt_regs *regs)
+ +{
+ +      struct pt_regs *old_regs = set_irq_regs(regs);
+ +      ack_APIC_irq();
+ +      irq_enter();
+ +      kdb_ipi(regs, NULL);
+ +      irq_exit();
+ +      set_irq_regs(old_regs);
+ +}
+ +
+ +/* Invoked once from kdb_wait_for_cpus when waiting for cpus.  For those cpus
+ + * that have not responded to the normal KDB interrupt yet, hit them with an
+ + * NMI event.
+ + */
+ +void
+ +kdba_wait_for_cpus(void)
+ +{
+ +      int c;
+ +      if (KDB_FLAG(CATASTROPHIC))
+ +              return;
+ +      kdb_printf("  Sending NMI to non-responding cpus: ");
+ +      for_each_online_cpu(c) {
+ +              if (kdb_running_process[c].seqno < kdb_seqno - 1) {
+ +                      kdb_printf(" %d", c);
-                       send_IPI_mask(&cpumask_of_cpu(c), NMI_VECTOR);
++                      send_IPI_mask(get_cpu_mask(c), NMI_VECTOR);
+ +              }
+ +      }
+ +      kdb_printf(".\n");
+ +}
+ +
+ +#endif        /* CONFIG_SMP */
diff --cc arch/x86/kernel/acpi/boot.c

index 2902267,d37593c..7fe43de
--- 1/arch/x86/kernel/acpi/boot.c
--- 2/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@@ -1444,11 -1455,19 +1455,34 @@@ static int __init force_acpi_ht(const s
    */
   static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
   {
-       pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
-       acpi_skip_timer_override = 1;
+       /*
+        * The ati_ixp4x0_rev() early PCI quirk should have set
+        * the acpi_skip_timer_override flag already:
+        */
+       if (!acpi_skip_timer_override) {
+               WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
+               pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
+                       d->ident);
+               acpi_skip_timer_override = 1;
+       }
+       return 0;
+ }
+ 
++static int __init force_acpi_rsdt(const struct dmi_system_id *d)
++{
++      if (!acpi_force) {
++              printk(KERN_NOTICE "%s detected: force use of acpi=rsdt\n",
++                     d->ident);
++              acpi_rsdt_forced = 1;
++      } else {
++              printk(KERN_NOTICE
++                     "Warning: acpi=force overrules DMI blacklist: "
++                     "acpi=rsdt\n");
++      }
+ +      return 0;
++
+ +}
+ +
   /*
    * If your system is blacklisted here, but you find that acpi=force
    * works for you, please contact acpi-devel@sourceforge.net
@@@ -1616,6 -1635,6 +1650,32 @@@ static struct dmi_system_id __initdata 
                      DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
                      },
          },
++
++      /*
++       * Boxes that need RSDT as ACPI root table
++       */
++      {
++          .callback = force_acpi_rsdt,
++          .ident = "ThinkPad ", /* R40e, broken C-states */
++          .matches = {
++              DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
++              DMI_MATCH(DMI_BIOS_VERSION, "1SET")},
++      },
++      {
++          .callback = force_acpi_rsdt,
++          .ident = "ThinkPad ", /* R50e, slow booting */
++          .matches = {
++              DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
++              DMI_MATCH(DMI_BIOS_VERSION, "1WET")},
++      },
++      {
++          .callback = force_acpi_rsdt,
++          .ident = "ThinkPad ", /* T40, T40p, T41, T41p, T42, T42p
++                                   R50, R50p */
++          .matches = {
++              DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
++              DMI_MATCH(DMI_BIOS_VERSION, "1RET")},
++      },
         {}
   };
   
@@@ -1812,6 -1835,6 +1876,18 @@@ static int __init parse_acpi(char *arg
   }
   early_param("acpi", parse_acpi);
   
++/* Alias for acpi=rsdt for compatibility with openSUSE 11.1 and SLE11 */
++static int __init parse_acpi_root_table(char *opt)
++{
++      if (!strcmp(opt, "rsdt")) {
++              acpi_rsdt_forced = 1;
++              printk(KERN_WARNING "acpi_root_table=rsdt is deprecated. "
++                     "Please use acpi=rsdt instead.\n");
++      }
++      return 0;
++}
++early_param("acpi_root_table", parse_acpi_root_table);
++
   /* FIXME: Using pci= for an ACPI parameter is a travesty. */
   static int __init parse_pci(char *arg)
   {
diff --cc arch/x86/kernel/apm_32.c
Simple merge
diff --cc arch/x86/kernel/cpu/common.c

index d1fb95b,83492b1..faae419
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -1,12 -1,15 +1,16 @@@
   #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/sched.h>
   #include <linux/string.h>
+ #include <linux/bootmem.h>
+ #include <linux/bitops.h>
+ #include <linux/module.h>
+ #include <linux/kgdb.h>
+ #include <linux/topology.h>
   #include <linux/delay.h>
   #include <linux/smp.h>
- #include <linux/module.h>
   #include <linux/percpu.h>
- #include <linux/bootmem.h>
+ +#include <linux/perfmon_kern.h>
- #include <asm/processor.h>
   #include <asm/i387.h>
   #include <asm/msr.h>
   #include <asm/io.h>
@@@ -730,17 -1156,14 +1157,16 @@@ void __cpuinit cpu_init(void
         clear_used_math();
         mxcsr_feature_mask_init();
   
+       /*
+        * Boot processor to setup the FP and extended state context info.
+        */
+       if (smp_processor_id() == boot_cpu_id)
+               init_thread_xstate();
+ 
+       xsave_init();
++
+ +      pfm_init_percpu();
   }
   
- #ifdef CONFIG_HOTPLUG_CPU
- void __cpuinit cpu_uninit(void)
- {
-       int cpu = raw_smp_processor_id();
-       cpu_clear(cpu, cpu_initialized);
   
-       /* lazy TLB state */
-       per_cpu(cpu_tlbstate, cpu).state = 0;
-       per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
- }
   #endif
diff --cc arch/x86/kernel/cpu/cpufreq/powernow-k8.c

index 072fc7f,5c28b37..954d60a
--- 1/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
--- 2/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@@ -925,20 -936,9 +936,21 @@@ static void powernow_k8_cpu_exit_acpi(s
   {
         if (data->acpi_data.state_count)
                 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+       free_cpumask_var(data->acpi_data.shared_cpu_map);
   }
   
+ +static int get_transition_latency(struct powernow_k8_data *data)
+ +{
+ +      int max_latency = 0;
+ +      int i;
+ +        for (i = 0; i < data->acpi_data.state_count; i++) {
+ +              int cur_latency = data->acpi_data.states[i].transition_latency
+ +                      + data->acpi_data.states[i].bus_master_latency;
+ +              if (cur_latency > max_latency)
+ +                      max_latency = cur_latency;
+ +      }
+ +      return max_latency;
+ +}
   #else
   static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
   static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
@@@ -1172,19 -1171,9 +1184,19 @@@ static int __cpuinit powernowk8_cpu_ini
                 }
                 rc = find_psb_table(data);
                 if (rc) {
-                       kfree(data);
-                       return -ENODEV;
+                       goto err_out;
                 }
++
+ +              /* Take a crude guess here.
+ +               * That guess was in microseconds, so multiply with 1000 */
+ +              pol->cpuinfo.transition_latency = (
+ +                      ( (data->rvo + 8) * data->vstable * VST_UNITS_20US) +
+ +                      (  (1 << data->irt) * 30)
+ +                      ) * 1000;
         }
+ +      else /* ACPI _PSS objects available */
+ +              pol->cpuinfo.transition_latency =
+ +                      get_transition_latency(data) * 1000;
   
         /* only run on specific CPU from here on */
         oldmask = current->cpus_allowed;
@@@ -1210,11 -1199,16 +1222,11 @@@
         set_cpus_allowed_ptr(current, &oldmask);
   
         if (cpu_family == CPU_HW_PSTATE)
-               pol->cpus = cpumask_of_cpu(pol->cpu);
+               cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
         else
-               pol->cpus = per_cpu(cpu_core_map, pol->cpu);
-       data->available_cores = &(pol->cpus);
+               cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
+       data->available_cores = pol->cpus;
   
- -      /* Take a crude guess here.
- -       * That guess was in microseconds, so multiply with 1000 */
- -      pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
- -          + (3 * (1 << data->irt) * 10)) * 1000;
- -
         if (cpu_family == CPU_HW_PSTATE)
                 pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
         else
diff --cc arch/x86/kernel/dumpstack.c

index 0000000,6b1f6f6..5133924

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@@ -1,0 -1,351 +1,456 @@@
+ /*
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+  */
+ #include <linux/kallsyms.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
+ #include <linux/utsname.h>
+ #include <linux/hardirq.h>
+ #include <linux/kdebug.h>
+ #include <linux/module.h>
+ #include <linux/ptrace.h>
+ #include <linux/kexec.h>
+ #include <linux/bug.h>
+ #include <linux/nmi.h>
+ #include <linux/sysfs.h>
+ 
++#ifdef CONFIG_KDB
++#include <linux/kdb.h>
++#endif /* CONFIG_KDB */
++
+ #include <asm/stacktrace.h>
++#include <linux/unwind.h>
+ 
+ #include "dumpstack.h"
+ 
+ int panic_on_unrecovered_nmi;
++int panic_on_io_nmi;
+ unsigned int code_bytes = 64;
+ int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
+ static int die_counter;
+ 
+ void printk_address(unsigned long address, int reliable)
+ {
+       printk(" [<%p>] %s%pS\n", (void *) address,
+                       reliable ? "" : "? ", (void *) address);
+ }
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ static void
+ print_ftrace_graph_addr(unsigned long addr, void *data,
+                       const struct stacktrace_ops *ops,
+                       struct thread_info *tinfo, int *graph)
+ {
+       struct task_struct *task = tinfo->task;
+       unsigned long ret_addr;
+       int index = task->curr_ret_stack;
+ 
+       if (addr != (unsigned long)return_to_handler)
+               return;
+ 
+       if (!task->ret_stack || index < *graph)
+               return;
+ 
+       index -= *graph;
+       ret_addr = task->ret_stack[index].ret;
+ 
+       ops->address(data, ret_addr, 1);
+ 
+       (*graph)++;
+ }
+ #else
+ static inline void
+ print_ftrace_graph_addr(unsigned long addr, void *data,
+                       const struct stacktrace_ops *ops,
+                       struct thread_info *tinfo, int *graph)
+ { }
+ #endif
+ 
++int asmlinkage dump_trace_unwind(struct unwind_frame_info *info,
++                    const struct stacktrace_ops *ops, void *data)
++{
++      int n = 0;
++#ifdef CONFIG_UNWIND_INFO
++      unsigned long sp = UNW_SP(info);
++
++      if (arch_unw_user_mode(info))
++              return -1;
++      while (unwind(info) == 0 && UNW_PC(info)) {
++              n++;
++              ops->address(data, UNW_PC(info), 1);
++              if (arch_unw_user_mode(info))
++                      break;
++              if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++                  && sp > UNW_SP(info))
++                      break;
++              sp = UNW_SP(info);
++      }
++#endif
++      return n;
++}
++
++int try_stack_unwind(struct task_struct *task, struct pt_regs *regs,
++                   unsigned long **stack, unsigned long *bp,
++                   const struct stacktrace_ops *ops, void *data)
++{
++#ifdef CONFIG_UNWIND_INFO
++      int unw_ret = 0;
++      struct unwind_frame_info info;
++      if (call_trace < 0)
++              return 0;
++
++      if (regs) {
++              if (unwind_init_frame_info(&info, task, regs) == 0)
++                      unw_ret = dump_trace_unwind(&info, ops, data);
++      } else if (task == current)
++              unw_ret = unwind_init_running(&info, dump_trace_unwind, ops, data);
++      else {
++              if (unwind_init_blocked(&info, task) == 0)
++                      unw_ret = dump_trace_unwind(&info, ops, data);
++      }
++      if (unw_ret > 0) {
++              if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++                      ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
++                                   UNW_PC(&info));
++                      if ((long)UNW_SP(&info) < 0) {
++                              ops->warning(data, "Leftover inexact backtrace:\n");
++                              *stack = (unsigned long *)UNW_SP(&info);
++                              if (!stack) {
++                                      *bp = UNW_FP(&info);
++                                      return -1;
++                              }
++                      } else
++                              ops->warning(data, "Full inexact backtrace again:\n");
++              } else if (call_trace >= 1) {
++                      return -1;
++              } else
++                      ops->warning(data, "Full inexact backtrace again:\n");
++      } else
++              ops->warning(data, "Inexact backtrace:\n");
++#endif
++      return 0;
++}
++
+ /*
+  * x86-64 can have up to three kernel stacks:
+  * process stack
+  * interrupt stack
+  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+  */
+ 
+ static inline int valid_stack_ptr(struct thread_info *tinfo,
+                       void *p, unsigned int size, void *end)
+ {
+       void *t = tinfo;
+       if (end) {
+               if (p < end && p >= (end-THREAD_SIZE))
+                       return 1;
+               else
+                       return 0;
+       }
+       return p > t && p < t + THREAD_SIZE - size;
+ }
+ 
+ unsigned long
+ print_context_stack(struct thread_info *tinfo,
+               unsigned long *stack, unsigned long bp,
+               const struct stacktrace_ops *ops, void *data,
+               unsigned long *end, int *graph)
+ {
+       struct stack_frame *frame = (struct stack_frame *)bp;
+ 
+       while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+               unsigned long addr;
+ 
+               addr = *stack;
+               if (__kernel_text_address(addr)) {
+                       if ((unsigned long) stack == bp + sizeof(long)) {
+                               ops->address(data, addr, 1);
+                               frame = frame->next_frame;
+                               bp = (unsigned long) frame;
+                       } else {
+                               ops->address(data, addr, bp == 0);
+                       }
+                       print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+               }
+               stack++;
+       }
+       return bp;
+ }
+ 
+ 
+ static void
+ print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+ {
+       printk(data);
+       print_symbol(msg, symbol);
+       printk("\n");
+ }
+ 
+ static void print_trace_warning(void *data, char *msg)
+ {
+       printk("%s%s\n", (char *)data, msg);
+ }
+ 
+ static int print_trace_stack(void *data, char *name)
+ {
+       printk("%s <%s> ", (char *)data, name);
+       return 0;
+ }
+ 
+ /*
+  * Print one address/symbol entries per line.
+  */
+ static void print_trace_address(void *data, unsigned long addr, int reliable)
+ {
+       touch_nmi_watchdog();
+       printk(data);
+       printk_address(addr, reliable);
+ }
+ 
+ static const struct stacktrace_ops print_trace_ops = {
+       .warning = print_trace_warning,
+       .warning_symbol = print_trace_warning_symbol,
+       .stack = print_trace_stack,
+       .address = print_trace_address,
+ };
+ 
+ void
+ show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp, char *log_lvl)
+ {
+       printk("%sCall Trace:\n", log_lvl);
+       dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+ }
+ 
+ void show_trace(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp)
+ {
+       show_trace_log_lvl(task, regs, stack, bp, "");
+ }
+ 
+ void show_stack(struct task_struct *task, unsigned long *sp)
+ {
+       show_stack_log_lvl(task, NULL, sp, 0, "");
+ }
+ 
+ /*
+  * The architecture-independent dump_stack generator
+  */
+ void dump_stack(void)
+ {
+       unsigned long bp = 0;
+       unsigned long stack;
+ 
+ #ifdef CONFIG_FRAME_POINTER
+       if (!bp)
+               get_bp(bp);
+ #endif
+ 
+       printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+               current->pid, current->comm, print_tainted(),
+               init_utsname()->release,
+               (int)strcspn(init_utsname()->version, " "),
+               init_utsname()->version);
+       show_trace(NULL, NULL, &stack, bp);
+ }
+ EXPORT_SYMBOL(dump_stack);
+ 
+ static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ static int die_owner = -1;
+ static unsigned int die_nest_count;
+ 
+ unsigned __kprobes long oops_begin(void)
+ {
+       int cpu;
+       unsigned long flags;
+ 
+       oops_enter();
+ 
+       /* racy, but better than risking deadlock. */
+       raw_local_irq_save(flags);
+       cpu = smp_processor_id();
+       if (!__raw_spin_trylock(&die_lock)) {
+               if (cpu == die_owner)
+                       /* nested oops. should stop eventually */;
+               else
+                       __raw_spin_lock(&die_lock);
+       }
+       die_nest_count++;
+       die_owner = cpu;
+       console_verbose();
+       bust_spinlocks(1);
+       return flags;
+ }
+ 
+ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+ {
+       if (regs && kexec_should_crash(current))
+               crash_kexec(regs);
+ 
+       bust_spinlocks(0);
+       die_owner = -1;
+       add_taint(TAINT_DIE);
+       die_nest_count--;
+       if (!die_nest_count)
+               /* Nest count reaches zero, release the lock. */
+               __raw_spin_unlock(&die_lock);
+       raw_local_irq_restore(flags);
++#ifdef CONFIG_KDB
++      kdb(KDB_REASON_OOPS, signr, regs);
++#endif /* CONFIG_KDB */
+       oops_exit();
+ 
+       if (!signr)
+               return;
+       if (in_interrupt())
+               panic("Fatal exception in interrupt");
+       if (panic_on_oops)
+               panic("Fatal exception");
+       do_exit(signr);
+ }
+ 
+ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+ {
+ #ifdef CONFIG_X86_32
+       unsigned short ss;
+       unsigned long sp;
+ #endif
+       printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+ #ifdef CONFIG_PREEMPT
+       printk("PREEMPT ");
+ #endif
+ #ifdef CONFIG_SMP
+       printk("SMP ");
+ #endif
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+       printk("DEBUG_PAGEALLOC");
+ #endif
+       printk("\n");
+       sysfs_printk_last_file();
+       if (notify_die(DIE_OOPS, str, regs, err,
+                       current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+               return 1;
+ 
+       show_registers(regs);
+ #ifdef CONFIG_X86_32
+       sp = (unsigned long) (&regs->sp);
+       savesegment(ss, ss);
+       if (user_mode(regs)) {
+               sp = regs->sp;
+               ss = regs->ss & 0xffff;
+       }
+       printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+       print_symbol("%s", regs->ip);
+       printk(" SS:ESP %04x:%08lx\n", ss, sp);
+ #else
+       /* Executive summary in case the oops scrolled away */
+       printk(KERN_ALERT "RIP ");
+       printk_address(regs->ip, 1);
+       printk(" RSP <%016lx>\n", regs->sp);
+ #endif
+       return 0;
+ }
+ 
+ /*
+  * This is gone through when something in the kernel has done something bad
+  * and is about to be terminated:
+  */
+ void die(const char *str, struct pt_regs *regs, long err)
+ {
+       unsigned long flags = oops_begin();
+       int sig = SIGSEGV;
+ 
+       if (!user_mode_vm(regs))
+               report_bug(regs->ip, regs);
+ 
+       if (__die(str, regs, err))
+               sig = 0;
++#ifdef CONFIG_KDB
++      kdb_diemsg = str;
++#endif /* CONFIG_KDB */
+       oops_end(flags, regs, sig);
+ }
+ 
+ void notrace __kprobes
+ die_nmi(char *str, struct pt_regs *regs, int do_panic)
+ {
+       unsigned long flags;
+ 
+       if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+               return;
+ 
+       /*
+        * We are in trouble anyway, lets at least try
+        * to get a message out.
+        */
+       flags = oops_begin();
+       printk(KERN_EMERG "%s", str);
+       printk(" on CPU%d, ip %08lx, registers:\n",
+               smp_processor_id(), regs->ip);
+       show_registers(regs);
++      if (strncmp(str, "NMI Watchdog", 12) == 0)
++              notify_die(DIE_NMIWATCHDOG, "nmi_watchdog", regs, 0, 2, SIGINT);
++#ifdef CONFIG_KDB
++      kdb(KDB_REASON_NMI, 0, regs);
++#endif        /* CONFIG_KDB */
+       oops_end(flags, regs, 0);
+       if (do_panic || panic_on_oops)
+               panic("Non maskable interrupt");
+       nmi_exit();
+       local_irq_enable();
+       do_exit(SIGBUS);
+ }
+ 
+ static int __init oops_setup(char *s)
+ {
+       if (!s)
+               return -EINVAL;
+       if (!strcmp(s, "panic"))
+               panic_on_oops = 1;
+       return 0;
+ }
+ early_param("oops", oops_setup);
+ 
+ static int __init kstack_setup(char *s)
+ {
+       if (!s)
+               return -EINVAL;
+       kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+       return 0;
+ }
+ early_param("kstack", kstack_setup);
+ 
+ static int __init code_bytes_setup(char *s)
+ {
+       code_bytes = simple_strtoul(s, NULL, 0);
+       if (code_bytes > 8192)
+               code_bytes = 8192;
+ 
+       return 1;
+ }
+ __setup("code_bytes=", code_bytes_setup);
++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++      if (!s)
++              return -EINVAL;
++      if (strcmp(s, "old") == 0)
++              call_trace = -1;
++      else if (strcmp(s, "both") == 0)
++              call_trace = 0;
++      else if (strcmp(s, "newfallback") == 0)
++              call_trace = 1;
++      else if (strcmp(s, "new") == 0)
++              call_trace = 2;
++      return 0;
++}
++early_param("call_trace", call_trace_setup);
++#endif
diff --cc arch/x86/kernel/dumpstack_32.c

index 0000000,d593cd1..1ce3191

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@@ -1,0 -1,154 +1,158 @@@
+ /*
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+  */
+ #include <linux/kallsyms.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
+ #include <linux/utsname.h>
+ #include <linux/hardirq.h>
+ #include <linux/kdebug.h>
+ #include <linux/module.h>
+ #include <linux/ptrace.h>
+ #include <linux/kexec.h>
+ #include <linux/bug.h>
+ #include <linux/nmi.h>
+ #include <linux/sysfs.h>
+ 
++#include <linux/unwind.h>
+ #include <asm/stacktrace.h>
+ 
+ #include "dumpstack.h"
+ 
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp,
+               const struct stacktrace_ops *ops, void *data)
+ {
+       int graph = 0;
+ 
+       if (!task)
+               task = current;
+ 
- -      if (!stack) {
- -              unsigned long dummy;
- -              stack = &dummy;
- -              if (task && task != current)
- -                      stack = (unsigned long *)task->thread.sp;
- -      }
- -
+ #ifdef CONFIG_FRAME_POINTER
+       if (!bp) {
+               if (task == current) {
+                       /* Grab bp right from our regs */
+                       get_bp(bp);
+               } else {
+                       /* bp is the last reg pushed by switch_to */
+                       bp = *(unsigned long *) task->thread.sp;
+               }
+       }
+ #endif
+ 
++      if (try_stack_unwind(task, regs, &stack, &bp, ops, data))
++              return;
++
++      if (!stack) {
++              unsigned long dummy;
++              stack = &dummy;
++              if (task && task != current)
++                      stack = (unsigned long *)task->thread.sp;
++      }
++
+       for (;;) {
+               struct thread_info *context;
+ 
+               context = (struct thread_info *)
+                       ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+               bp = print_context_stack(context, stack, bp, ops,
+                                        data, NULL, &graph);
+ 
+               stack = (unsigned long *)context->previous_esp;
+               if (!stack)
+                       break;
+               if (ops->stack(data, "IRQ") < 0)
+                       break;
+               touch_nmi_watchdog();
+       }
+ }
+ EXPORT_SYMBOL(dump_trace);
+ 
+ void
+ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *sp, unsigned long bp, char *log_lvl)
+ {
+       unsigned long *stack;
+       int i;
+ 
+       if (sp == NULL) {
+               if (task)
+                       sp = (unsigned long *)task->thread.sp;
+               else
+                       sp = (unsigned long *)&sp;
+       }
+ 
+       stack = sp;
+       for (i = 0; i < kstack_depth_to_print; i++) {
+               if (kstack_end(stack))
+                       break;
+               if (i && ((i % STACKSLOTS_PER_LINE) == 0))
+                       printk("\n%s", log_lvl);
+               printk(" %08lx", *stack++);
+               touch_nmi_watchdog();
+       }
+       printk("\n");
+       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ }
+ 
+ 
+ void show_registers(struct pt_regs *regs)
+ {
+       int i;
+ 
+       print_modules();
+       __show_regs(regs, 0);
+ 
+       printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
+               TASK_COMM_LEN, current->comm, task_pid_nr(current),
+               current_thread_info(), current, task_thread_info(current));
+       /*
+        * When in-kernel, we also print out the stack and code at the
+        * time of the fault..
+        */
+       if (!user_mode_vm(regs)) {
+               unsigned int code_prologue = code_bytes * 43 / 64;
+               unsigned int code_len = code_bytes;
+               unsigned char c;
+               u8 *ip;
+ 
+               printk(KERN_EMERG "Stack:\n");
+               show_stack_log_lvl(NULL, regs, &regs->sp,
+                               0, KERN_EMERG);
+ 
+               printk(KERN_EMERG "Code: ");
+ 
+               ip = (u8 *)regs->ip - code_prologue;
+               if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
+                       /* try starting at IP */
+                       ip = (u8 *)regs->ip;
+                       code_len = code_len - code_prologue + 1;
+               }
+               for (i = 0; i < code_len; i++, ip++) {
+                       if (ip < (u8 *)PAGE_OFFSET ||
+                                       probe_kernel_address(ip, c)) {
+                               printk(" Bad EIP value.");
+                               break;
+                       }
+                       if (ip == (u8 *)regs->ip)
+                               printk("<%02x> ", c);
+                       else
+                               printk("%02x ", c);
+               }
+       }
+       printk("\n");
+ }
+ 
+ int is_valid_bugaddr(unsigned long ip)
+ {
+       unsigned short ud2;
+ 
+       if (ip < PAGE_OFFSET)
+               return 0;
+       if (probe_kernel_address((unsigned short *)ip, ud2))
+               return 0;
+ 
+       return ud2 == 0x0b0f;
+ }
+ 
diff --cc arch/x86/kernel/dumpstack_64.c

index 0000000,c302d07..0b7039d

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@@ -1,0 -1,298 +1,304 @@@
+ /*
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+  */
+ #include <linux/kallsyms.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
+ #include <linux/utsname.h>
+ #include <linux/hardirq.h>
+ #include <linux/kdebug.h>
+ #include <linux/module.h>
+ #include <linux/ptrace.h>
+ #include <linux/kexec.h>
+ #include <linux/bug.h>
+ #include <linux/nmi.h>
+ #include <linux/sysfs.h>
+ 
++#include <linux/unwind.h>
+ #include <asm/stacktrace.h>
+ 
+ #include "dumpstack.h"
+ 
+ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+                                       unsigned *usedp, char **idp)
+ {
+       static char ids[][8] = {
+               [DEBUG_STACK - 1] = "#DB",
+               [NMI_STACK - 1] = "NMI",
+               [DOUBLEFAULT_STACK - 1] = "#DF",
+               [STACKFAULT_STACK - 1] = "#SS",
+               [MCE_STACK - 1] = "#MC",
+ #if DEBUG_STKSZ > EXCEPTION_STKSZ
+               [N_EXCEPTION_STACKS ...
+                       N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+ #endif
+       };
+       unsigned k;
+ 
+       /*
+        * Iterate over all exception stacks, and figure out whether
+        * 'stack' is in one of them:
+        */
+       for (k = 0; k < N_EXCEPTION_STACKS; k++) {
+               unsigned long end = per_cpu(orig_ist, cpu).ist[k];
+               /*
+                * Is 'stack' above this exception frame's end?
+                * If yes then skip to the next frame.
+                */
+               if (stack >= end)
+                       continue;
+               /*
+                * Is 'stack' above this exception frame's start address?
+                * If yes then we found the right frame.
+                */
+               if (stack >= end - EXCEPTION_STKSZ) {
+                       /*
+                        * Make sure we only iterate through an exception
+                        * stack once. If it comes up for the second time
+                        * then there's something wrong going on - just
+                        * break out and return NULL:
+                        */
+                       if (*usedp & (1U << k))
+                               break;
+                       *usedp |= 1U << k;
+                       *idp = ids[k];
+                       return (unsigned long *)end;
+               }
+               /*
+                * If this is a debug stack, and if it has a larger size than
+                * the usual exception stacks, then 'stack' might still
+                * be within the lower portion of the debug stack:
+                */
+ #if DEBUG_STKSZ > EXCEPTION_STKSZ
+               if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
+                       unsigned j = N_EXCEPTION_STACKS - 1;
+ 
+                       /*
+                        * Black magic. A large debug stack is composed of
+                        * multiple exception stack entries, which we
+                        * iterate through now. Dont look:
+                        */
+                       do {
+                               ++j;
+                               end -= EXCEPTION_STKSZ;
+                               ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
+                       } while (stack < end - EXCEPTION_STKSZ);
+                       if (*usedp & (1U << j))
+                               break;
+                       *usedp |= 1U << j;
+                       *idp = ids[j];
+                       return (unsigned long *)end;
+               }
+ #endif
+       }
+       return NULL;
+ }
+ 
+ /*
+  * x86-64 can have up to three kernel stacks:
+  * process stack
+  * interrupt stack
+  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+  */
+ 
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp,
+               const struct stacktrace_ops *ops, void *data)
+ {
+       const unsigned cpu = get_cpu();
+       unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+       unsigned used = 0;
+       struct thread_info *tinfo;
+       int graph = 0;
+ 
+       if (!task)
+               task = current;
+ 
- -      if (!stack) {
- -              unsigned long dummy;
- -              stack = &dummy;
- -              if (task && task != current)
- -                      stack = (unsigned long *)task->thread.sp;
- -      }
- -
+ #ifdef CONFIG_FRAME_POINTER
+       if (!bp) {
+               if (task == current) {
+                       /* Grab bp right from our regs */
+                       get_bp(bp);
+               } else {
+                       /* bp is the last reg pushed by switch_to */
+                       bp = *(unsigned long *) task->thread.sp;
+               }
+       }
+ #endif
+ 
++      if (try_stack_unwind(task, regs, &stack, &bp, ops, data)) {
++              put_cpu();
++              return;
++      }
++
++      if (!stack) {
++              unsigned long dummy;
++              stack = &dummy;
++              if (task && task != current)
++                      stack = (unsigned long *)task->thread.sp;
++      }
++
+       /*
+        * Print function call entries in all stacks, starting at the
+        * current stack address. If the stacks consist of nested
+        * exceptions
+        */
+       tinfo = task_thread_info(task);
+       for (;;) {
+               char *id;
+               unsigned long *estack_end;
+               estack_end = in_exception_stack(cpu, (unsigned long)stack,
+                                               &used, &id);
+ 
+               if (estack_end) {
+                       if (ops->stack(data, id) < 0)
+                               break;
+ 
+                       bp = print_context_stack(tinfo, stack, bp, ops,
+                                                data, estack_end, &graph);
+                       ops->stack(data, "<EOE>");
+                       /*
+                        * We link to the next stack via the
+                        * second-to-last pointer (index -2 to end) in the
+                        * exception stack:
+                        */
+                       stack = (unsigned long *) estack_end[-2];
+                       continue;
+               }
+               if (irqstack_end) {
+                       unsigned long *irqstack;
+                       irqstack = irqstack_end -
+                               (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+ 
+                       if (stack >= irqstack && stack < irqstack_end) {
+                               if (ops->stack(data, "IRQ") < 0)
+                                       break;
+                               bp = print_context_stack(tinfo, stack, bp,
+                                       ops, data, irqstack_end, &graph);
+                               /*
+                                * We link to the next stack (which would be
+                                * the process stack normally) the last
+                                * pointer (index -1 to end) in the IRQ stack:
+                                */
+                               stack = (unsigned long *) (irqstack_end[-1]);
+                               irqstack_end = NULL;
+                               ops->stack(data, "EOI");
+                               continue;
+                       }
+               }
+               break;
+       }
+ 
+       /*
+        * This handles the process stack:
+        */
+       bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
+       put_cpu();
+ }
+ EXPORT_SYMBOL(dump_trace);
+ 
+ void
+ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *sp, unsigned long bp, char *log_lvl)
+ {
+       unsigned long *stack;
+       int i;
+       const int cpu = smp_processor_id();
+       unsigned long *irqstack_end =
+               (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+       unsigned long *irqstack =
+               (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+ 
+       /*
+        * debugging aid: "show_stack(NULL, NULL);" prints the
+        * back trace for this cpu.
+        */
+ 
+       if (sp == NULL) {
+               if (task)
+                       sp = (unsigned long *)task->thread.sp;
+               else
+                       sp = (unsigned long *)&sp;
+       }
+ 
+       stack = sp;
+       for (i = 0; i < kstack_depth_to_print; i++) {
+               if (stack >= irqstack && stack <= irqstack_end) {
+                       if (stack == irqstack_end) {
+                               stack = (unsigned long *) (irqstack_end[-1]);
+                               printk(" <EOI> ");
+                       }
+               } else {
+               if (((long) stack & (THREAD_SIZE-1)) == 0)
+                       break;
+               }
+               if (i && ((i % STACKSLOTS_PER_LINE) == 0))
+                       printk("\n%s", log_lvl);
+               printk(" %016lx", *stack++);
+               touch_nmi_watchdog();
+       }
+       printk("\n");
+       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ }
+ 
+ void show_registers(struct pt_regs *regs)
+ {
+       int i;
+       unsigned long sp;
+       const int cpu = smp_processor_id();
+       struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+ 
+       sp = regs->sp;
+       printk("CPU %d ", cpu);
+       __show_regs(regs, 1);
+       printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+               cur->comm, cur->pid, task_thread_info(cur), cur);
+ 
+       /*
+        * When in-kernel, we also print out the stack and code at the
+        * time of the fault..
+        */
+       if (!user_mode(regs)) {
+               unsigned int code_prologue = code_bytes * 43 / 64;
+               unsigned int code_len = code_bytes;
+               unsigned char c;
+               u8 *ip;
+ 
+               printk(KERN_EMERG "Stack:\n");
+               show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
+                               regs->bp, KERN_EMERG);
+ 
+               printk(KERN_EMERG "Code: ");
+ 
+               ip = (u8 *)regs->ip - code_prologue;
+               if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
+                       /* try starting at IP */
+                       ip = (u8 *)regs->ip;
+                       code_len = code_len - code_prologue + 1;
+               }
+               for (i = 0; i < code_len; i++, ip++) {
+                       if (ip < (u8 *)PAGE_OFFSET ||
+                                       probe_kernel_address(ip, c)) {
+                               printk(" Bad RIP value.");
+                               break;
+                       }
+                       if (ip == (u8 *)regs->ip)
+                               printk("<%02x> ", c);
+                       else
+                               printk("%02x ", c);
+               }
+       }
+       printk("\n");
+ }
+ 
+ int is_valid_bugaddr(unsigned long ip)
+ {
+       unsigned short ud2;
+ 
+       if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
+               return 0;
+ 
+       return ud2 == 0x0b0f;
+ }
+ 
diff --cc arch/x86/kernel/entry_32.S

index 9d43355,4646902..73218b0
--- 1/arch/x86/kernel/entry_32.S
--- 2/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@@ -1067,38 -837,6 +857,40 @@@ ENTRY(spurious_interrupt_bug
         CFI_ENDPROC
   END(spurious_interrupt_bug)
   
+ +#ifdef CONFIG_STACK_UNWIND
+ +ENTRY(arch_unwind_init_running)
+ +      CFI_STARTPROC
+ +      movl    4(%esp), %edx
+ +      movl    (%esp), %ecx
+ +      leal    4(%esp), %eax
+ +      movl    %ebx, PT_EBX(%edx)
+ +      xorl    %ebx, %ebx
+ +      movl    %ebx, PT_ECX(%edx)
+ +      movl    %ebx, PT_EDX(%edx)
+ +      movl    %esi, PT_ESI(%edx)
+ +      movl    %edi, PT_EDI(%edx)
+ +      movl    %ebp, PT_EBP(%edx)
+ +      movl    %ebx, PT_EAX(%edx)
+ +      movl    $__USER_DS, PT_DS(%edx)
+ +      movl    $__USER_DS, PT_ES(%edx)
+ +      movl    $__KERNEL_PERCPU, PT_FS(%edx)
+ +      movl    %ebx, PT_ORIG_EAX(%edx)
+ +      movl    %ecx, PT_EIP(%edx)
++      movl    16(%esp), %eax
+ +      movl    12(%esp), %ecx
+ +      movl    $__KERNEL_CS, PT_CS(%edx)
+ +      movl    %ebx, PT_EFLAGS(%edx)
++      movl    %eax, 12(%esp)
+ +      movl    %eax, PT_OLDESP(%edx)
+ +      movl    8(%esp), %eax
+ +      movl    %ecx, 8(%esp)
+ +      movl    PT_EBX(%edx), %ebx
+ +      movl    $__KERNEL_DS, PT_OLDSS(%edx)
+ +      jmpl    *%eax
+ +      CFI_ENDPROC
+ +ENDPROC(arch_unwind_init_running)
+ +#endif
+ +
   ENTRY(kernel_thread_helper)
         pushl $0                # fake return address for unwinder
         CFI_STARTPROC
diff --cc arch/x86/kernel/entry_64.S

index 946fe25,e28c7a9..9f97e76
--- 1/arch/x86/kernel/entry_64.S
--- 2/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@ -831,285 -970,169 +970,173 @@@ ENTRY(\sym
         INTR_FRAME
         pushq $~(\num)
         CFI_ADJUST_CFA_OFFSET 8
-       interrupt \func
+       interrupt \do_sym
         jmp ret_from_intr
         CFI_ENDPROC
-       .endm
- 
- ENTRY(thermal_interrupt)
-       apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
- END(thermal_interrupt)
- 
- ENTRY(threshold_interrupt)
-       apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
- END(threshold_interrupt)
+ END(\sym)
+ .endm
   
- #ifdef CONFIG_SMP     
- ENTRY(reschedule_interrupt)
-       apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
- END(reschedule_interrupt)
+ #ifdef CONFIG_SMP
+ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+       irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+ #endif
   
-       .macro INVALIDATE_ENTRY num
- ENTRY(invalidate_interrupt\num)
-       apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 
- END(invalidate_interrupt\num)
-       .endm
+ apicinterrupt UV_BAU_MESSAGE \
+       uv_bau_message_intr1 uv_bau_message_interrupt
+ apicinterrupt LOCAL_TIMER_VECTOR \
+       apic_timer_interrupt smp_apic_timer_interrupt
   
-       INVALIDATE_ENTRY 0
-       INVALIDATE_ENTRY 1
-       INVALIDATE_ENTRY 2
-       INVALIDATE_ENTRY 3
-       INVALIDATE_ENTRY 4
-       INVALIDATE_ENTRY 5
-       INVALIDATE_ENTRY 6
-       INVALIDATE_ENTRY 7
- 
- ENTRY(call_function_interrupt)
-       apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
- END(call_function_interrupt)
- ENTRY(call_function_single_interrupt)
-       apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
- END(call_function_single_interrupt)
- ENTRY(irq_move_cleanup_interrupt)
-       apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
- END(irq_move_cleanup_interrupt)
+ #ifdef CONFIG_SMP
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
+       invalidate_interrupt0 smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
+       invalidate_interrupt1 smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
+       invalidate_interrupt2 smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
+       invalidate_interrupt3 smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
+       invalidate_interrupt4 smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
+       invalidate_interrupt5 smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
+       invalidate_interrupt6 smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
+       invalidate_interrupt7 smp_invalidate_interrupt
   #endif
   
- ENTRY(apic_timer_interrupt)
-       apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
- END(apic_timer_interrupt)
+ apicinterrupt THRESHOLD_APIC_VECTOR \
+       threshold_interrupt mce_threshold_interrupt
+ apicinterrupt THERMAL_APIC_VECTOR \
+       thermal_interrupt smp_thermal_interrupt
   
- ENTRY(uv_bau_message_intr1)
-       apicinterrupt 220,uv_bau_message_interrupt
- END(uv_bau_message_intr1)
- 
- ENTRY(error_interrupt)
-       apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
- END(error_interrupt)
+ #ifdef CONFIG_SMP
+ apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
+       call_function_single_interrupt smp_call_function_single_interrupt
+ apicinterrupt CALL_FUNCTION_VECTOR \
+       call_function_interrupt smp_call_function_interrupt
+ apicinterrupt RESCHEDULE_VECTOR \
+       reschedule_interrupt smp_reschedule_interrupt
+ #endif
   
- ENTRY(spurious_interrupt)
-       apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
- END(spurious_interrupt)
+ apicinterrupt ERROR_APIC_VECTOR \
+       error_interrupt smp_error_interrupt
+ apicinterrupt SPURIOUS_APIC_VECTOR \
+       spurious_interrupt smp_spurious_interrupt
   
+ +#ifdef CONFIG_PERFMON
- ENTRY(pmu_interrupt)
-       apicinterrupt LOCAL_PERFMON_VECTOR,smp_pmu_interrupt
- END(pmu_interrupt)
++apicinterrupt LOCAL_PERFMON_VECTOR pmu_interrupt smp_pmu_interrupt
+ +#endif
+ +
   /*
    * Exception entry points.
-  */           
-       .macro zeroentry sym
+  */
+ .macro zeroentry sym do_sym
+ ENTRY(\sym)
         INTR_FRAME
         PARAVIRT_ADJUST_EXCEPTION_FRAME
-       pushq $0        /* push error code/oldrax */ 
-       CFI_ADJUST_CFA_OFFSET 8
-       pushq %rax      /* push real oldrax to the rdi slot */ 
-       CFI_ADJUST_CFA_OFFSET 8
-       CFI_REL_OFFSET rax,0
-       leaq  \sym(%rip),%rax
-       jmp error_entry
+       pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
+       subq $15*8,%rsp
+       CFI_ADJUST_CFA_OFFSET 15*8
+       call error_entry
+       DEFAULT_FRAME 0
+       movq %rsp,%rdi          /* pt_regs pointer */
+       xorl %esi,%esi          /* no error code */
+       call \do_sym
+       jmp error_exit          /* %ebx: no swapgs flag */
         CFI_ENDPROC
-       .endm   
+ END(\sym)
+ .endm
   
-       .macro errorentry sym
-       XCPT_FRAME
+ .macro paranoidzeroentry sym do_sym
+ ENTRY(\sym)
+       INTR_FRAME
         PARAVIRT_ADJUST_EXCEPTION_FRAME
-       pushq %rax
+       pushq $-1               /* ORIG_RAX: no syscall to restart */
         CFI_ADJUST_CFA_OFFSET 8
-       CFI_REL_OFFSET rax,0
-       leaq  \sym(%rip),%rax
-       jmp error_entry
+       subq $15*8, %rsp
+       call save_paranoid
+       TRACE_IRQS_OFF
+       movq %rsp,%rdi          /* pt_regs pointer */
+       xorl %esi,%esi          /* no error code */
+       call \do_sym
+       jmp paranoid_exit       /* %ebx: no swapgs flag */
         CFI_ENDPROC
-       .endm
+ END(\sym)
+ .endm
   
-       /* error code is on the stack already */
-       /* handle NMI like exceptions that can happen everywhere */
-       .macro paranoidentry sym, ist=0, irqtrace=1
-       SAVE_ALL
-       cld
-       movl $1,%ebx
-       movl  $MSR_GS_BASE,%ecx
-       rdmsr
-       testl %edx,%edx
-       js    1f
-       SWAPGS
-       xorl  %ebx,%ebx
- 1:
-       .if \ist
-       movq    %gs:pda_data_offset, %rbp
-       .endif
-       movq %rsp,%rdi
-       movq ORIG_RAX(%rsp),%rsi
-       movq $-1,ORIG_RAX(%rsp)
-       .if \ist
-       subq    $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
-       .endif
-       call \sym
-       .if \ist
-       addq    $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
-       .endif
-       DISABLE_INTERRUPTS(CLBR_NONE)
-       .if \irqtrace
+ .macro paranoidzeroentry_ist sym do_sym ist
+ ENTRY(\sym)
+       INTR_FRAME
+       PARAVIRT_ADJUST_EXCEPTION_FRAME
+       pushq $-1               /* ORIG_RAX: no syscall to restart */
+       CFI_ADJUST_CFA_OFFSET 8
+       subq $15*8, %rsp
+       call save_paranoid
         TRACE_IRQS_OFF
-       .endif
-       .endm
+       movq %rsp,%rdi          /* pt_regs pointer */
+       xorl %esi,%esi          /* no error code */
+       movq %gs:pda_data_offset, %rbp
+       subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       call \do_sym
+       addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       jmp paranoid_exit       /* %ebx: no swapgs flag */
+       CFI_ENDPROC
+ END(\sym)
+ .endm
   
-       /*
-        * "Paranoid" exit path from exception stack.
-        * Paranoid because this is used by NMIs and cannot take
-        * any kernel state for granted.
-        * We don't do kernel preemption checks here, because only
-        * NMI should be common and it does not enable IRQs and
-        * cannot get reschedule ticks.
-        *
-        * "trace" is 0 for the NMI handler only, because irq-tracing
-        * is fundamentally NMI-unsafe. (we cannot change the soft and
-        * hard flags at once, atomically)
-        */
-       .macro paranoidexit trace=1
-       /* ebx: no swapgs flag */
- paranoid_exit\trace:
-       testl %ebx,%ebx                         /* swapgs needed? */
-       jnz paranoid_restore\trace
-       testl $3,CS(%rsp)
-       jnz   paranoid_userspace\trace
- paranoid_swapgs\trace:
-       .if \trace
-       TRACE_IRQS_IRETQ 0
-       .endif
-       SWAPGS_UNSAFE_STACK
- paranoid_restore\trace:
-       RESTORE_ALL 8
-       jmp irq_return
- paranoid_userspace\trace:
-       GET_THREAD_INFO(%rcx)
-       movl TI_flags(%rcx),%ebx
-       andl $_TIF_WORK_MASK,%ebx
-       jz paranoid_swapgs\trace
-       movq %rsp,%rdi                  /* &pt_regs */
-       call sync_regs
-       movq %rax,%rsp                  /* switch stack for scheduling */
-       testl $_TIF_NEED_RESCHED,%ebx
-       jnz paranoid_schedule\trace
-       movl %ebx,%edx                  /* arg3: thread flags */
-       .if \trace
-       TRACE_IRQS_ON
-       .endif
-       ENABLE_INTERRUPTS(CLBR_NONE)
-       xorl %esi,%esi                  /* arg2: oldset */
-       movq %rsp,%rdi                  /* arg1: &pt_regs */
-       call do_notify_resume
-       DISABLE_INTERRUPTS(CLBR_NONE)
-       .if \trace
-       TRACE_IRQS_OFF
-       .endif
-       jmp paranoid_userspace\trace
- paranoid_schedule\trace:
-       .if \trace
-       TRACE_IRQS_ON
-       .endif
-       ENABLE_INTERRUPTS(CLBR_ANY)
-       call schedule
-       DISABLE_INTERRUPTS(CLBR_ANY)
-       .if \trace
-       TRACE_IRQS_OFF
-       .endif
-       jmp paranoid_userspace\trace
+ .macro errorentry sym do_sym
+ ENTRY(\sym)
+       XCPT_FRAME
+       PARAVIRT_ADJUST_EXCEPTION_FRAME
+       subq $15*8,%rsp
+       CFI_ADJUST_CFA_OFFSET 15*8
+       call error_entry
+       DEFAULT_FRAME 0
+       movq %rsp,%rdi                  /* pt_regs pointer */
+       movq ORIG_RAX(%rsp),%rsi        /* get error code */
+       movq $-1,ORIG_RAX(%rsp)         /* no syscall to restart */
+       call \do_sym
+       jmp error_exit                  /* %ebx: no swapgs flag */
         CFI_ENDPROC
-       .endm
+ END(\sym)
+ .endm
   
- /*
-  * Exception entry point. This expects an error code/orig_rax on the stack
-  * and the exception handler in %rax. 
-  */                                           
- KPROBE_ENTRY(error_entry)
-       _frame RDI
-       CFI_REL_OFFSET rax,0
-       /* rdi slot contains rax, oldrax contains error code */
-       cld     
-       subq  $14*8,%rsp
-       CFI_ADJUST_CFA_OFFSET   (14*8)
-       movq %rsi,13*8(%rsp)
-       CFI_REL_OFFSET  rsi,RSI
-       movq 14*8(%rsp),%rsi    /* load rax from rdi slot */
-       CFI_REGISTER    rax,rsi
-       movq %rdx,12*8(%rsp)
-       CFI_REL_OFFSET  rdx,RDX
-       movq %rcx,11*8(%rsp)
-       CFI_REL_OFFSET  rcx,RCX
-       movq %rsi,10*8(%rsp)    /* store rax */ 
-       CFI_REL_OFFSET  rax,RAX
-       movq %r8, 9*8(%rsp)
-       CFI_REL_OFFSET  r8,R8
-       movq %r9, 8*8(%rsp)
-       CFI_REL_OFFSET  r9,R9
-       movq %r10,7*8(%rsp)
-       CFI_REL_OFFSET  r10,R10
-       movq %r11,6*8(%rsp)
-       CFI_REL_OFFSET  r11,R11
-       movq %rbx,5*8(%rsp) 
-       CFI_REL_OFFSET  rbx,RBX
-       movq %rbp,4*8(%rsp) 
-       CFI_REL_OFFSET  rbp,RBP
-       movq %r12,3*8(%rsp) 
-       CFI_REL_OFFSET  r12,R12
-       movq %r13,2*8(%rsp) 
-       CFI_REL_OFFSET  r13,R13
-       movq %r14,1*8(%rsp) 
-       CFI_REL_OFFSET  r14,R14
-       movq %r15,(%rsp) 
-       CFI_REL_OFFSET  r15,R15
-       xorl %ebx,%ebx  
-       testl $3,CS(%rsp)
-       je  error_kernelspace
- error_swapgs: 
-       SWAPGS
- error_sti:    
-       movq %rdi,RDI(%rsp)     
-       CFI_REL_OFFSET  rdi,RDI
-       movq %rsp,%rdi
-       movq ORIG_RAX(%rsp),%rsi        /* get error code */ 
-       movq $-1,ORIG_RAX(%rsp)
-       call *%rax
-       /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
- error_exit:
-       movl %ebx,%eax
-       RESTORE_REST
-       DISABLE_INTERRUPTS(CLBR_NONE)
+       /* error code is on the stack already */
+ .macro paranoiderrorentry sym do_sym
+ ENTRY(\sym)
+       XCPT_FRAME
+       PARAVIRT_ADJUST_EXCEPTION_FRAME
+       subq $15*8,%rsp
+       CFI_ADJUST_CFA_OFFSET 15*8
+       call save_paranoid
+       DEFAULT_FRAME 0
         TRACE_IRQS_OFF
-       GET_THREAD_INFO(%rcx)   
-       testl %eax,%eax
-       jne  retint_kernel
-       LOCKDEP_SYS_EXIT_IRQ
-       movl  TI_flags(%rcx),%edx
-       movl  $_TIF_WORK_MASK,%edi
-       andl  %edi,%edx
-       jnz  retint_careful
-       jmp retint_swapgs
+       movq %rsp,%rdi                  /* pt_regs pointer */
+       movq ORIG_RAX(%rsp),%rsi        /* get error code */
+       movq $-1,ORIG_RAX(%rsp)         /* no syscall to restart */
+       call \do_sym
+       jmp paranoid_exit               /* %ebx: no swapgs flag */
         CFI_ENDPROC
+ END(\sym)
+ .endm
   
- error_kernelspace:
-       incl %ebx
-        /* There are two places in the kernel that can potentially fault with
-           usergs. Handle them here. The exception handlers after
-          iret run with kernel gs again, so don't set the user space flag.
-          B stepping K8s sometimes report an truncated RIP for IRET 
-          exceptions returning to compat mode. Check for these here too. */
-       leaq irq_return(%rip),%rcx
-       cmpq %rcx,RIP(%rsp)
-       je   error_swapgs
-       movl %ecx,%ecx  /* zero extend */
-       cmpq %rcx,RIP(%rsp)
-       je   error_swapgs
-       cmpq $gs_change,RIP(%rsp)
-         je   error_swapgs
-       jmp  error_sti
- KPROBE_END(error_entry)
-       
-        /* Reload gs selector with exception handling */
-        /* edi:  new selector */ 
+ zeroentry divide_error do_divide_error
+ zeroentry overflow do_overflow
+ zeroentry bounds do_bounds
+ zeroentry invalid_op do_invalid_op
+ zeroentry device_not_available do_device_not_available
+ paranoiderrorentry double_fault do_double_fault
+ zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
+ errorentry invalid_TSS do_invalid_TSS
+ errorentry segment_not_present do_segment_not_present
+ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
+ zeroentry coprocessor_error do_coprocessor_error
+ errorentry alignment_check do_alignment_check
+ zeroentry simd_coprocessor_error do_simd_coprocessor_error
+ 
+       /* Reload gs selector with exception handling */
+       /* edi:  new selector */
   ENTRY(native_load_gs_index)
         CFI_STARTPROC
         pushf
@@@ -1367,73 -1269,33 +1273,67 @@@ ENTRY(call_softirq
         decl %gs:pda_irqcount
         ret
         CFI_ENDPROC
- ENDPROC(call_softirq)
- 
- KPROBE_ENTRY(ignore_sysret)
-       CFI_STARTPROC
-       mov $-ENOSYS,%eax
-       sysret
-       CFI_ENDPROC
- ENDPROC(ignore_sysret)
+ END(call_softirq)
   
+ +#ifdef CONFIG_STACK_UNWIND
+ +ENTRY(arch_unwind_init_running)
+ +      CFI_STARTPROC
+ +      movq    %r15, R15(%rdi)
+ +      movq    %r14, R14(%rdi)
+ +      xchgq   %rsi, %rdx
+ +      movq    %r13, R13(%rdi)
+ +      movq    %r12, R12(%rdi)
+ +      xorl    %eax, %eax
+ +      movq    %rbp, RBP(%rdi)
+ +      movq    %rbx, RBX(%rdi)
-       movq    (%rsp), %rcx
++      movq    (%rsp), %r9
++      xchgq   %rdx, %rcx
+ +      movq    %rax, R11(%rdi)
+ +      movq    %rax, R10(%rdi)
+ +      movq    %rax, R9(%rdi)
+ +      movq    %rax, R8(%rdi)
+ +      movq    %rax, RAX(%rdi)
+ +      movq    %rax, RCX(%rdi)
+ +      movq    %rax, RDX(%rdi)
+ +      movq    %rax, RSI(%rdi)
+ +      movq    %rax, RDI(%rdi)
+ +      movq    %rax, ORIG_RAX(%rdi)
-       movq    %rcx, RIP(%rdi)
-       leaq    8(%rsp), %rcx
++      movq    %r9, RIP(%rdi)
++      leaq    8(%rsp), %r9
+ +      movq    $__KERNEL_CS, CS(%rdi)
+ +      movq    %rax, EFLAGS(%rdi)
-       movq    %rcx, RSP(%rdi)
++      movq    %r9, RSP(%rdi)
+ +      movq    $__KERNEL_DS, SS(%rdi)
-       jmpq    *%rdx
++      jmpq    *%rcx
+ +      CFI_ENDPROC
- ENDPROC(arch_unwind_init_running)
++END(arch_unwind_init_running)
+ +#endif
+ +
- #ifdef CONFIG_PARAVIRT_XEN
- ENTRY(xen_hypervisor_callback)
-       zeroentry xen_do_hypervisor_callback
- END(xen_hypervisor_callback)
+ #ifdef CONFIG_XEN
+ zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
   
   /*
- # A note on the "critical region" in our callback handler.
- # We want to avoid stacking callback handlers due to events occurring
- # during handling of the last event. To do this, we keep events disabled
- # until we've done all processing. HOWEVER, we must enable events before
- # popping the stack frame (can't be done atomically) and so it would still
- # be possible to get enough handler activations to overflow the stack.
- # Although unlikely, bugs of that kind are hard to track down, so we'd
- # like to avoid the possibility.
- # So, on entry to the handler we detect whether we interrupted an
- # existing activation in its critical region -- if so, we pop the current
- # activation and restart the handler using the previous one.
- */
+  * A note on the "critical region" in our callback handler.
+  * We want to avoid stacking callback handlers due to events occurring
+  * during handling of the last event. To do this, we keep events disabled
+  * until we've done all processing. HOWEVER, we must enable events before
+  * popping the stack frame (can't be done atomically) and so it would still
+  * be possible to get enough handler activations to overflow the stack.
+  * Although unlikely, bugs of that kind are hard to track down, so we'd
+  * like to avoid the possibility.
+  * So, on entry to the handler we detect whether we interrupted an
+  * existing activation in its critical region -- if so, we pop the current
+  * activation and restart the handler using the previous one.
+  */
   ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
         CFI_STARTPROC
- /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
-    see the correct pointer to the pt_regs */
+ /*
+  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+  * see the correct pointer to the pt_regs
+  */
         movq %rdi, %rsp            # we don't return, adjust the stack frame
         CFI_ENDPROC
-       CFI_DEFAULT_STACK
+       DEFAULT_FRAME
   11:   incl %gs:pda_irqcount
         movq %rsp,%rbp
         CFI_DEF_CFA_REGISTER rbp
@@@ -1507,31 -1368,217 +1406,242 @@@ ENTRY(xen_failsafe_callback
         CFI_ENDPROC
   END(xen_failsafe_callback)
   
- #endif /* CONFIG_PARAVIRT_XEN */
+ #endif /* CONFIG_XEN */
   
+ +#ifdef        CONFIG_KDB
+ +
+ +#ifdef CONFIG_SMP
- ENTRY(kdb_interrupt)
-       apicinterrupt KDB_VECTOR,smp_kdb_interrupt
- END(kdb_interrupt)
++      apicinterrupt KDB_VECTOR kdb_interrupt smp_kdb_interrupt
+ +#endif        /* CONFIG_SMP */
+ +
+ +ENTRY(kdb_call)
+ +      INTR_FRAME
+ +      cld
+ +      pushq $-1                       # orig_eax
+ +      CFI_ADJUST_CFA_OFFSET 8
+ +      SAVE_ALL
+ +      movq $1,%rdi                    # KDB_REASON_ENTER
+ +      movq $0,%rsi                    # error_code
+ +      movq %rsp,%rdx                  # struct pt_regs
+ +      call kdb
+ +      RESTORE_ALL
+ +      addq $8,%rsp                    # forget orig_eax
+ +      CFI_ADJUST_CFA_OFFSET -8
+ +      iretq
+ +      CFI_ENDPROC
+ +END(kdb_call)
+ +
+ +#endif        /* CONFIG_KDB */
++
+ /*
+  * Some functions should be protected against kprobes
+  */
+       .pushsection .kprobes.text, "ax"
+ 
+ paranoidzeroentry_ist debug do_debug DEBUG_STACK
+ paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
+ paranoiderrorentry stack_segment do_stack_segment
+ errorentry general_protection do_general_protection
+ errorentry page_fault do_page_fault
+ #ifdef CONFIG_X86_MCE
+ paranoidzeroentry machine_check do_machine_check
+ #endif
+ 
+       /*
+        * "Paranoid" exit path from exception stack.
+        * Paranoid because this is used by NMIs and cannot take
+        * any kernel state for granted.
+        * We don't do kernel preemption checks here, because only
+        * NMI should be common and it does not enable IRQs and
+        * cannot get reschedule ticks.
+        *
+        * "trace" is 0 for the NMI handler only, because irq-tracing
+        * is fundamentally NMI-unsafe. (we cannot change the soft and
+        * hard flags at once, atomically)
+        */
+ 
+       /* ebx: no swapgs flag */
+ ENTRY(paranoid_exit)
+       INTR_FRAME
+       DISABLE_INTERRUPTS(CLBR_NONE)
+       TRACE_IRQS_OFF
+       testl %ebx,%ebx                         /* swapgs needed? */
+       jnz paranoid_restore
+       testl $3,CS(%rsp)
+       jnz   paranoid_userspace
+ paranoid_swapgs:
+       TRACE_IRQS_IRETQ 0
+       SWAPGS_UNSAFE_STACK
+ paranoid_restore:
+       RESTORE_ALL 8
+       jmp irq_return
+ paranoid_userspace:
+       GET_THREAD_INFO(%rcx)
+       movl TI_flags(%rcx),%ebx
+       andl $_TIF_WORK_MASK,%ebx
+       jz paranoid_swapgs
+       movq %rsp,%rdi                  /* &pt_regs */
+       call sync_regs
+       movq %rax,%rsp                  /* switch stack for scheduling */
+       testl $_TIF_NEED_RESCHED,%ebx
+       jnz paranoid_schedule
+       movl %ebx,%edx                  /* arg3: thread flags */
+       TRACE_IRQS_ON
+       ENABLE_INTERRUPTS(CLBR_NONE)
+       xorl %esi,%esi                  /* arg2: oldset */
+       movq %rsp,%rdi                  /* arg1: &pt_regs */
+       call do_notify_resume
+       DISABLE_INTERRUPTS(CLBR_NONE)
+       TRACE_IRQS_OFF
+       jmp paranoid_userspace
+ paranoid_schedule:
+       TRACE_IRQS_ON
+       ENABLE_INTERRUPTS(CLBR_ANY)
+       call schedule
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_OFF
+       jmp paranoid_userspace
+       CFI_ENDPROC
+ END(paranoid_exit)
+ 
+ /*
+  * Exception entry point. This expects an error code/orig_rax on the stack.
+  * returns in "no swapgs flag" in %ebx.
+  */
+ ENTRY(error_entry)
+       XCPT_FRAME
+       CFI_ADJUST_CFA_OFFSET 15*8
+       /* oldrax contains error code */
+       cld
+       movq_cfi rdi, RDI+8
+       movq_cfi rsi, RSI+8
+       movq_cfi rdx, RDX+8
+       movq_cfi rcx, RCX+8
+       movq_cfi rax, RAX+8
+       movq_cfi  r8,  R8+8
+       movq_cfi  r9,  R9+8
+       movq_cfi r10, R10+8
+       movq_cfi r11, R11+8
+       movq_cfi rbx, RBX+8
+       movq_cfi rbp, RBP+8
+       movq_cfi r12, R12+8
+       movq_cfi r13, R13+8
+       movq_cfi r14, R14+8
+       movq_cfi r15, R15+8
+       xorl %ebx,%ebx
+       testl $3,CS+8(%rsp)
+       je error_kernelspace
+ error_swapgs:
+       SWAPGS
+ error_sti:
+       TRACE_IRQS_OFF
+       ret
+       CFI_ENDPROC
+ 
+ /*
+  * There are two places in the kernel that can potentially fault with
+  * usergs. Handle them here. The exception handlers after iret run with
+  * kernel gs again, so don't set the user space flag. B stepping K8s
+  * sometimes report an truncated RIP for IRET exceptions returning to
+  * compat mode. Check for these here too.
+  */
+ error_kernelspace:
+       incl %ebx
+       leaq irq_return(%rip),%rcx
+       cmpq %rcx,RIP+8(%rsp)
+       je error_swapgs
+       movl %ecx,%ecx  /* zero extend */
+       cmpq %rcx,RIP+8(%rsp)
+       je error_swapgs
+       cmpq $gs_change,RIP+8(%rsp)
+       je error_swapgs
+       jmp error_sti
+ END(error_entry)
+ 
+ 
+ /* ebx:       no swapgs flag (1: don't need swapgs, 0: need it) */
+ ENTRY(error_exit)
+       DEFAULT_FRAME
+       movl %ebx,%eax
+       RESTORE_REST
+       DISABLE_INTERRUPTS(CLBR_NONE)
+       TRACE_IRQS_OFF
+       GET_THREAD_INFO(%rcx)
+       testl %eax,%eax
+       jne retint_kernel
+       LOCKDEP_SYS_EXIT_IRQ
+       movl TI_flags(%rcx),%edx
+       movl $_TIF_WORK_MASK,%edi
+       andl %edi,%edx
+       jnz retint_careful
+       jmp retint_swapgs
+       CFI_ENDPROC
+ END(error_exit)
+ 
+ 
+       /* runs on exception stack */
+ ENTRY(nmi)
+       INTR_FRAME
+       PARAVIRT_ADJUST_EXCEPTION_FRAME
+       pushq_cfi $-1
+       subq $15*8, %rsp
+       CFI_ADJUST_CFA_OFFSET 15*8
+       call save_paranoid
+       DEFAULT_FRAME 0
+       /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+       movq %rsp,%rdi
+       movq $-1,%rsi
+       call do_nmi
+ #ifdef CONFIG_TRACE_IRQFLAGS
+       /* paranoidexit; without TRACE_IRQS_OFF */
+       /* ebx: no swapgs flag */
+       DISABLE_INTERRUPTS(CLBR_NONE)
+       testl %ebx,%ebx                         /* swapgs needed? */
+       jnz nmi_restore
+       testl $3,CS(%rsp)
+       jnz nmi_userspace
+ nmi_swapgs:
+       SWAPGS_UNSAFE_STACK
+ nmi_restore:
+       RESTORE_ALL 8
+       jmp irq_return
+ nmi_userspace:
+       GET_THREAD_INFO(%rcx)
+       movl TI_flags(%rcx),%ebx
+       andl $_TIF_WORK_MASK,%ebx
+       jz nmi_swapgs
+       movq %rsp,%rdi                  /* &pt_regs */
+       call sync_regs
+       movq %rax,%rsp                  /* switch stack for scheduling */
+       testl $_TIF_NEED_RESCHED,%ebx
+       jnz nmi_schedule
+       movl %ebx,%edx                  /* arg3: thread flags */
+       ENABLE_INTERRUPTS(CLBR_NONE)
+       xorl %esi,%esi                  /* arg2: oldset */
+       movq %rsp,%rdi                  /* arg1: &pt_regs */
+       call do_notify_resume
+       DISABLE_INTERRUPTS(CLBR_NONE)
+       jmp nmi_userspace
+ nmi_schedule:
+       ENABLE_INTERRUPTS(CLBR_ANY)
+       call schedule
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       jmp nmi_userspace
+       CFI_ENDPROC
+ #else
+       jmp paranoid_exit
+       CFI_ENDPROC
+ #endif
+ END(nmi)
+ 
+ ENTRY(ignore_sysret)
+       CFI_STARTPROC
+       mov $-ENOSYS,%eax
+       sysret
+       CFI_ENDPROC
+ END(ignore_sysret)
+ 
+ /*
+  * End of kprobes section
+  */
+       .popsection
diff --cc arch/x86/kernel/hpet.c

index 2599699,64d5ad0..6fedc2c
--- 1/arch/x86/kernel/hpet.c
--- 2/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@@ -22,10 -32,21 +32,22 @@@
   /*
    * HPET address is set in acpi/boot.c, when an ACPI entry exists
    */
- unsigned long hpet_address;
- static void __iomem *hpet_virt_address;
+ unsigned long                         hpet_address;
+ #ifdef CONFIG_PCI_MSI
+ static unsigned long                  hpet_num_timers;
+ #endif
+ static void __iomem                   *hpet_virt_address;
+ +static int hpet_legacy_use_64_bits;
   
+ struct hpet_dev {
+       struct clock_event_device       evt;
+       unsigned int                    num;
+       int                             cpu;
+       unsigned int                    irq;
+       unsigned int                    flags;
+       char                            name[10];
+ };
+ 
   unsigned long hpet_readl(unsigned long a)
   {
         return readl(hpet_virt_address + a);
@@@ -106,20 -100,9 +128,20 @@@ static int __init disable_hpet(char *st
   }
   __setup("nohpet", disable_hpet);
   
+ +#ifdef CONFIG_X86_64
+ +static int hpet64 = 0;
+ +static int __init hpet64_setup(char *str)
+ +{
+ +      hpet64 = 1;
+ +      return 1;
+ +}
+ +__setup("hpet64", hpet64_setup);
+ +#endif
+ +
+ +
   static inline int is_hpet_capable(void)
   {
-       return (!boot_hpet_disable && hpet_address);
+       return !boot_hpet_disable && hpet_address;
   }
   
   /*
@@@ -232,33 -223,6 +263,33 @@@ static void hpet_enable_legacy_int(void
         hpet_legacy_int_enabled = 1;
   }
   
+ +static int timer0_use_64_bits(void)
+ +{
+ +#ifndef CONFIG_X86_64
+ +      /* using the HPET in 64-bit mode without atomic 64-bit
+ +       * accesses is too inefficient
+ +       */
+ +      return 0;
+ +#else
+ +
+ +      if (unlikely(hpet64)) {
+ +              u32 id, t0_cfg;
+ +              id = hpet_readl(HPET_ID);
-               t0_cfg = hpet_readl(HPET_T0_CFG);
++              t0_cfg = hpet_readl(HPET_Tn_CFG(0));
+ +
+ +              if ((id & HPET_ID_64BIT) && (t0_cfg & HPET_TN_64BIT_CAP)) {
+ +                      printk(KERN_DEBUG "hpet timer0 configured in 64-bit mode\n");
+ +                      return 1;
+ +              }
+ +              else {
+ +                      printk(KERN_DEBUG "hpet timer0 does not support 64-bit mode\n");
+ +                      return 0;
+ +              }
+ +      }
+ +      else return 0;
+ +#endif
+ +}
+ +
   static void hpet_legacy_clockevent_register(void)
   {
         /* Start HPET legacy interrupts */
@@@ -297,33 -262,31 +330,33 @@@ static void hpet_set_mode(enum clock_ev
         unsigned long cfg, cmp, now;
         uint64_t delta;
   
-       switch(mode) {
+       switch (mode) {
         case CLOCK_EVT_MODE_PERIODIC:
-               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
-               delta >>= hpet_clockevent.shift;
+               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+               delta >>= evt->shift;
- -              now = hpet_readl(HPET_COUNTER);
+ +              now = hpet_read_value(HPET_COUNTER);
                 cmp = now + (unsigned long) delta;
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
- -                     HPET_TN_SETVAL | HPET_TN_32BIT;
+ +                     HPET_TN_SETVAL |
+ +                     (hpet_legacy_use_64_bits ? 0 : HPET_TN_32BIT);
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                 /*
                  * The first write after writing TN_SETVAL to the
                  * config register sets the counter value, the second
                  * write sets the period.
                  */
-               hpet_write_value(cmp, HPET_T0_CMP);
- -              hpet_writel(cmp, HPET_Tn_CMP(timer));
++              hpet_write_value(cmp, HPET_Tn_CMP(timer));
                 udelay(1);
-               hpet_write_value((unsigned long) delta, HPET_T0_CMP);
- -              hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
++              hpet_write_value((unsigned long) delta, HPET_Tn_CMP(timer));
                 break;
   
         case CLOCK_EVT_MODE_ONESHOT:
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                 cfg &= ~HPET_TN_PERIODIC;
- -              cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ +              cfg |= HPET_TN_ENABLE |
+ +                     (hpet_legacy_use_64_bits ? 0 : HPET_TN_32BIT);
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                 break;
   
         case CLOCK_EVT_MODE_UNUSED:
@@@ -339,26 -310,367 +380,368 @@@
         }
   }
   
- static int hpet_legacy_next_event(unsigned long delta,
-                                 struct clock_event_device *evt)
+ static int hpet_next_event(unsigned long delta,
+                          struct clock_event_device *evt, int timer)
   {
- -      u32 cnt;
+ +      unsigned long cnt;
   
- -      cnt = hpet_readl(HPET_COUNTER);
+ +      cnt = hpet_read_value(HPET_COUNTER);
         cnt += (u32) delta;
-       hpet_write_value(cnt, HPET_T0_CMP);
- -      hpet_writel(cnt, HPET_Tn_CMP(timer));
++      hpet_write_value(cnt, HPET_Tn_CMP(timer));
   
-       hpet_readl(HPET_T0_CMP); /* pre-read for bnc#433746 */
++      hpet_readl(HPET_Tn_CMP(timer)); /* pre-read for bnc#433746 */
         /*
          * We need to read back the CMP register to make sure that
          * what we wrote hit the chip before we compare it to the
          * counter.
          */
-       WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != (u32)cnt);
- -      WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
++      WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != (u32)cnt);
   
- -      return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
+ +      return (s32)((u32)hpet_readl(HPET_COUNTER) - (u32)cnt) >= 0 ? -ETIME : 0;
   }
   
+ static void hpet_legacy_set_mode(enum clock_event_mode mode,
+                       struct clock_event_device *evt)
+ {
+       hpet_set_mode(mode, evt, 0);
+ }
+ 
+ static int hpet_legacy_next_event(unsigned long delta,
+                       struct clock_event_device *evt)
+ {
+       return hpet_next_event(delta, evt, 0);
+ }
+ 
+ /*
+  * HPET MSI Support
+  */
+ #ifdef CONFIG_PCI_MSI
+ 
+ static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+ static struct hpet_dev        *hpet_devs;
+ 
+ void hpet_msi_unmask(unsigned int irq)
+ {
+       struct hpet_dev *hdev = get_irq_data(irq);
+       unsigned long cfg;
+ 
+       /* unmask it */
+       cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+       cfg |= HPET_TN_FSB;
+       hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+ }
+ 
+ void hpet_msi_mask(unsigned int irq)
+ {
+       unsigned long cfg;
+       struct hpet_dev *hdev = get_irq_data(irq);
+ 
+       /* mask it */
+       cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+       cfg &= ~HPET_TN_FSB;
+       hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+ }
+ 
+ void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+ {
+       struct hpet_dev *hdev = get_irq_data(irq);
+ 
+       hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
+       hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+ }
+ 
+ void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+ {
+       struct hpet_dev *hdev = get_irq_data(irq);
+ 
+       msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
+       msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
+       msg->address_hi = 0;
+ }
+ 
+ static void hpet_msi_set_mode(enum clock_event_mode mode,
+                               struct clock_event_device *evt)
+ {
+       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+       hpet_set_mode(mode, evt, hdev->num);
+ }
+ 
+ static int hpet_msi_next_event(unsigned long delta,
+                               struct clock_event_device *evt)
+ {
+       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+       return hpet_next_event(delta, evt, hdev->num);
+ }
+ 
+ static int hpet_setup_msi_irq(unsigned int irq)
+ {
+       if (arch_setup_hpet_msi(irq)) {
+               destroy_irq(irq);
+               return -EINVAL;
+       }
+       return 0;
+ }
+ 
+ static int hpet_assign_irq(struct hpet_dev *dev)
+ {
+       unsigned int irq;
+ 
+       irq = create_irq();
+       if (!irq)
+               return -EINVAL;
+ 
+       set_irq_data(irq, dev);
+ 
+       if (hpet_setup_msi_irq(irq))
+               return -EINVAL;
+ 
+       dev->irq = irq;
+       return 0;
+ }
+ 
+ static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+ {
+       struct hpet_dev *dev = (struct hpet_dev *)data;
+       struct clock_event_device *hevt = &dev->evt;
+ 
+       if (!hevt->event_handler) {
+               printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
+                               dev->num);
+               return IRQ_HANDLED;
+       }
+ 
+       hevt->event_handler(hevt);
+       return IRQ_HANDLED;
+ }
+ 
+ static int hpet_setup_irq(struct hpet_dev *dev)
+ {
+ 
+       if (request_irq(dev->irq, hpet_interrupt_handler,
+                       IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev))
+               return -1;
+ 
+       disable_irq(dev->irq);
+       irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
+       enable_irq(dev->irq);
+ 
+       printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
+                        dev->name, dev->irq);
+ 
+       return 0;
+ }
+ 
+ /* This should be called in specific @cpu */
+ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+ {
+       struct clock_event_device *evt = &hdev->evt;
+       uint64_t hpet_freq;
+ 
+       WARN_ON(cpu != smp_processor_id());
+       if (!(hdev->flags & HPET_DEV_VALID))
+               return;
+ 
+       if (hpet_setup_msi_irq(hdev->irq))
+               return;
+ 
+       hdev->cpu = cpu;
+       per_cpu(cpu_hpet_dev, cpu) = hdev;
+       evt->name = hdev->name;
+       hpet_setup_irq(hdev);
+       evt->irq = hdev->irq;
+ 
+       evt->rating = 110;
+       evt->features = CLOCK_EVT_FEAT_ONESHOT;
+       if (hdev->flags & HPET_DEV_PERI_CAP)
+               evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+ 
+       evt->set_mode = hpet_msi_set_mode;
+       evt->set_next_event = hpet_msi_next_event;
+       evt->shift = 32;
+ 
+       /*
+        * The period is a femto seconds value. We need to calculate the
+        * scaled math multiplication factor for nanosecond to hpet tick
+        * conversion.
+        */
+       hpet_freq = 1000000000000000ULL;
+       do_div(hpet_freq, hpet_period);
+       evt->mult = div_sc((unsigned long) hpet_freq,
+                                     NSEC_PER_SEC, evt->shift);
+       /* Calculate the max delta */
+       evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
+       /* 5 usec minimum reprogramming delta. */
+       evt->min_delta_ns = 5000;
+ 
+       evt->cpumask = cpumask_of(hdev->cpu);
+       clockevents_register_device(evt);
+ }
+ 
+ #ifdef CONFIG_HPET
+ /* Reserve at least one timer for userspace (/dev/hpet) */
+ #define RESERVE_TIMERS 1
+ #else
+ #define RESERVE_TIMERS 0
+ #endif
+ 
+ static void hpet_msi_capability_lookup(unsigned int start_timer)
+ {
+       unsigned int id;
+       unsigned int num_timers;
+       unsigned int num_timers_used = 0;
+       int i;
+ 
+       id = hpet_readl(HPET_ID);
+ 
+       num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+       num_timers++; /* Value read out starts from 0 */
+ 
+       hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+       if (!hpet_devs)
+               return;
+ 
+       hpet_num_timers = num_timers;
+ 
+       for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+               struct hpet_dev *hdev = &hpet_devs[num_timers_used];
+               unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+ 
+               /* Only consider HPET timer with MSI support */
+               if (!(cfg & HPET_TN_FSB_CAP))
+                       continue;
+ 
+               hdev->flags = 0;
+               if (cfg & HPET_TN_PERIODIC_CAP)
+                       hdev->flags |= HPET_DEV_PERI_CAP;
+               hdev->num = i;
+ 
+               sprintf(hdev->name, "hpet%d", i);
+               if (hpet_assign_irq(hdev))
+                       continue;
+ 
+               hdev->flags |= HPET_DEV_FSB_CAP;
+               hdev->flags |= HPET_DEV_VALID;
+               num_timers_used++;
+               if (num_timers_used == num_possible_cpus())
+                       break;
+       }
+ 
+       printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
+               num_timers, num_timers_used);
+ }
+ 
+ #ifdef CONFIG_HPET
+ static void hpet_reserve_msi_timers(struct hpet_data *hd)
+ {
+       int i;
+ 
+       if (!hpet_devs)
+               return;
+ 
+       for (i = 0; i < hpet_num_timers; i++) {
+               struct hpet_dev *hdev = &hpet_devs[i];
+ 
+               if (!(hdev->flags & HPET_DEV_VALID))
+                       continue;
+ 
+               hd->hd_irq[hdev->num] = hdev->irq;
+               hpet_reserve_timer(hd, hdev->num);
+       }
+ }
+ #endif
+ 
+ static struct hpet_dev *hpet_get_unused_timer(void)
+ {
+       int i;
+ 
+       if (!hpet_devs)
+               return NULL;
+ 
+       for (i = 0; i < hpet_num_timers; i++) {
+               struct hpet_dev *hdev = &hpet_devs[i];
+ 
+               if (!(hdev->flags & HPET_DEV_VALID))
+                       continue;
+               if (test_and_set_bit(HPET_DEV_USED_BIT,
+                       (unsigned long *)&hdev->flags))
+                       continue;
+               return hdev;
+       }
+       return NULL;
+ }
+ 
+ struct hpet_work_struct {
+       struct delayed_work work;
+       struct completion complete;
+ };
+ 
+ static void hpet_work(struct work_struct *w)
+ {
+       struct hpet_dev *hdev;
+       int cpu = smp_processor_id();
+       struct hpet_work_struct *hpet_work;
+ 
+       hpet_work = container_of(w, struct hpet_work_struct, work.work);
+ 
+       hdev = hpet_get_unused_timer();
+       if (hdev)
+               init_one_hpet_msi_clockevent(hdev, cpu);
+ 
+       complete(&hpet_work->complete);
+ }
+ 
+ static int hpet_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+ {
+       unsigned long cpu = (unsigned long)hcpu;
+       struct hpet_work_struct work;
+       struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+ 
+       switch (action & 0xf) {
+       case CPU_ONLINE:
+               INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
+               init_completion(&work.complete);
+               /* FIXME: add schedule_work_on() */
+               schedule_delayed_work_on(cpu, &work.work, 0);
+               wait_for_completion(&work.complete);
+               destroy_timer_on_stack(&work.work.timer);
+               break;
+       case CPU_DEAD:
+               if (hdev) {
+                       free_irq(hdev->irq, hdev);
+                       hdev->flags &= ~HPET_DEV_USED;
+                       per_cpu(cpu_hpet_dev, cpu) = NULL;
+               }
+               break;
+       }
+       return NOTIFY_OK;
+ }
+ #else
+ 
+ static int hpet_setup_msi_irq(unsigned int irq)
+ {
+       return 0;
+ }
+ static void hpet_msi_capability_lookup(unsigned int start_timer)
+ {
+       return;
+ }
+ 
+ #ifdef CONFIG_HPET
+ static void hpet_reserve_msi_timers(struct hpet_data *hd)
+ {
+       return;
+ }
+ #endif
+ 
+ static int hpet_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+ {
+       return NOTIFY_OK;
+ }
+ 
+ #endif
+ 
   /*
    * Clock source related code
    */
diff --cc arch/x86/kernel/io_apic.c

index 0000000,1c4a130..066d172

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@@ -1,0 -1,4158 +1,4166 @@@
+ /*
+  *    Intel IO-APIC support for multi-Pentium hosts.
+  *
+  *    Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+  *
+  *    Many thanks to Stig Venaas for trying out countless experimental
+  *    patches and reporting/debugging problems patiently!
+  *
+  *    (c) 1999, Multiple IO-APIC support, developed by
+  *    Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+  *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+  *    further tested and cleaned up by Zach Brown <zab@redhat.com>
+  *    and Ingo Molnar <mingo@redhat.com>
+  *
+  *    Fixes
+  *    Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+  *                                    thanks to Eric Gilmore
+  *                                    and Rolf G. Tews
+  *                                    for testing these extensively
+  *    Paul Diefenbaugh        :       Added full ACPI support
+  */
+ 
+ #include <linux/mm.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
+ #include <linux/delay.h>
+ #include <linux/sched.h>
+ #include <linux/pci.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/compiler.h>
+ #include <linux/acpi.h>
+ #include <linux/module.h>
+ #include <linux/sysdev.h>
+ #include <linux/msi.h>
+ #include <linux/htirq.h>
+ #include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/jiffies.h>    /* time_after() */
+ #ifdef CONFIG_ACPI
+ #include <acpi/acpi_bus.h>
+ #endif
+ #include <linux/bootmem.h>
+ #include <linux/dmar.h>
+ #include <linux/hpet.h>
+ 
++#ifdef        CONFIG_KDB
++#include <linux/kdb.h>
++#endif        /* CONFIG_KDB */
++
+ #include <asm/idle.h>
+ #include <asm/io.h>
+ #include <asm/smp.h>
+ #include <asm/desc.h>
+ #include <asm/proto.h>
+ #include <asm/acpi.h>
+ #include <asm/dma.h>
+ #include <asm/timer.h>
+ #include <asm/i8259.h>
+ #include <asm/nmi.h>
+ #include <asm/msidef.h>
+ #include <asm/hypertransport.h>
+ #include <asm/setup.h>
+ #include <asm/irq_remapping.h>
+ #include <asm/hpet.h>
+ #include <asm/uv/uv_hub.h>
+ #include <asm/uv/uv_irq.h>
+ 
+ #include <mach_ipi.h>
+ #include <mach_apic.h>
+ #include <mach_apicdef.h>
+ 
+ #define __apicdebuginit(type) static type __init
+ 
+ /*
+  *      Is the SiS APIC rmw bug present ?
+  *      -1 = don't know, 0 = no, 1 = yes
+  */
+ int sis_apic_bug = -1;
+ 
+ static DEFINE_SPINLOCK(ioapic_lock);
+ static DEFINE_SPINLOCK(vector_lock);
+ 
+ /*
+  * # of IRQ routing registers
+  */
+ int nr_ioapic_registers[MAX_IO_APICS];
+ 
+ /* I/O APIC entries */
+ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+ int nr_ioapics;
+ 
+ /* MP IRQ source entries */
+ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+ 
+ /* # of MP IRQ source entries */
+ int mp_irq_entries;
+ 
+ #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+ int mp_bus_id_to_type[MAX_MP_BUSSES];
+ #endif
+ 
+ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+ 
+ int skip_ioapic_setup;
+ 
+ static int __init parse_noapic(char *str)
+ {
+       /* disable IO-APIC */
+       disable_ioapic_setup();
+       return 0;
+ }
+ early_param("noapic", parse_noapic);
+ 
+ struct irq_pin_list;
+ 
+ /*
+  * This is performance-critical, we want to do it O(1)
+  *
+  * the indexing order of this array favors 1:1 mappings
+  * between pins and IRQs.
+  */
+ 
+ struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+ };
+ 
+ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+ {
+       struct irq_pin_list *pin;
+       int node;
+ 
+       node = cpu_to_node(cpu);
+ 
+       pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+ 
+       return pin;
+ }
+ 
+ struct irq_cfg {
+       struct irq_pin_list *irq_2_pin;
+       cpumask_var_t domain;
+       cpumask_var_t old_domain;
+       unsigned move_cleanup_count;
+       u8 vector;
+       u8 move_in_progress : 1;
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+       u8 move_desc_pending : 1;
+ #endif
+ };
+ 
+ /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg irq_cfgx[] = {
+ #else
+ static struct irq_cfg irq_cfgx[NR_IRQS] = {
+ #endif
+       [0]  = { .vector = IRQ0_VECTOR,  },
+       [1]  = { .vector = IRQ1_VECTOR,  },
+       [2]  = { .vector = IRQ2_VECTOR,  },
+       [3]  = { .vector = IRQ3_VECTOR,  },
+       [4]  = { .vector = IRQ4_VECTOR,  },
+       [5]  = { .vector = IRQ5_VECTOR,  },
+       [6]  = { .vector = IRQ6_VECTOR,  },
+       [7]  = { .vector = IRQ7_VECTOR,  },
+       [8]  = { .vector = IRQ8_VECTOR,  },
+       [9]  = { .vector = IRQ9_VECTOR,  },
+       [10] = { .vector = IRQ10_VECTOR, },
+       [11] = { .vector = IRQ11_VECTOR, },
+       [12] = { .vector = IRQ12_VECTOR, },
+       [13] = { .vector = IRQ13_VECTOR, },
+       [14] = { .vector = IRQ14_VECTOR, },
+       [15] = { .vector = IRQ15_VECTOR, },
+ };
+ 
+ int __init arch_early_irq_init(void)
+ {
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       int count;
+       int i;
+ 
+       cfg = irq_cfgx;
+       count = ARRAY_SIZE(irq_cfgx);
+ 
+       for (i = 0; i < count; i++) {
+               desc = irq_to_desc(i);
+               desc->chip_data = &cfg[i];
+               alloc_bootmem_cpumask_var(&cfg[i].domain);
+               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+               if (i < NR_IRQS_LEGACY)
+                       cpumask_setall(cfg[i].domain);
+       }
+ 
+       return 0;
+ }
+ 
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg *irq_cfg(unsigned int irq)
+ {
+       struct irq_cfg *cfg = NULL;
+       struct irq_desc *desc;
+ 
+       desc = irq_to_desc(irq);
+       if (desc)
+               cfg = desc->chip_data;
+ 
+       return cfg;
+ }
+ 
+ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+ {
+       struct irq_cfg *cfg;
+       int node;
+ 
+       node = cpu_to_node(cpu);
+ 
+       cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+       if (cfg) {
+               if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+                       kfree(cfg);
+                       cfg = NULL;
+               } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+                                                         GFP_ATOMIC, node)) {
+                       free_cpumask_var(cfg->domain);
+                       kfree(cfg);
+                       cfg = NULL;
+               } else {
+                       cpumask_clear(cfg->domain);
+                       cpumask_clear(cfg->old_domain);
+               }
+       }
+ 
+       return cfg;
+ }
+ 
+ int arch_init_chip_data(struct irq_desc *desc, int cpu)
+ {
+       struct irq_cfg *cfg;
+ 
+       cfg = desc->chip_data;
+       if (!cfg) {
+               desc->chip_data = get_one_free_irq_cfg(cpu);
+               if (!desc->chip_data) {
+                       printk(KERN_ERR "can not alloc irq_cfg\n");
+                       BUG_ON(1);
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ 
+ static void
+ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+ {
+       struct irq_pin_list *old_entry, *head, *tail, *entry;
+ 
+       cfg->irq_2_pin = NULL;
+       old_entry = old_cfg->irq_2_pin;
+       if (!old_entry)
+               return;
+ 
+       entry = get_one_free_irq_2_pin(cpu);
+       if (!entry)
+               return;
+ 
+       entry->apic     = old_entry->apic;
+       entry->pin      = old_entry->pin;
+       head            = entry;
+       tail            = entry;
+       old_entry       = old_entry->next;
+       while (old_entry) {
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       entry = head;
+                       while (entry) {
+                               head = entry->next;
+                               kfree(entry);
+                               entry = head;
+                       }
+                       /* still use the old one */
+                       return;
+               }
+               entry->apic     = old_entry->apic;
+               entry->pin      = old_entry->pin;
+               tail->next      = entry;
+               tail            = entry;
+               old_entry       = old_entry->next;
+       }
+ 
+       tail->next = NULL;
+       cfg->irq_2_pin = head;
+ }
+ 
+ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+ {
+       struct irq_pin_list *entry, *next;
+ 
+       if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+               return;
+ 
+       entry = old_cfg->irq_2_pin;
+ 
+       while (entry) {
+               next = entry->next;
+               kfree(entry);
+               entry = next;
+       }
+       old_cfg->irq_2_pin = NULL;
+ }
+ 
+ void arch_init_copy_chip_data(struct irq_desc *old_desc,
+                                struct irq_desc *desc, int cpu)
+ {
+       struct irq_cfg *cfg;
+       struct irq_cfg *old_cfg;
+ 
+       cfg = get_one_free_irq_cfg(cpu);
+ 
+       if (!cfg)
+               return;
+ 
+       desc->chip_data = cfg;
+ 
+       old_cfg = old_desc->chip_data;
+ 
+       memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ 
+       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+ }
+ 
+ static void free_irq_cfg(struct irq_cfg *old_cfg)
+ {
+       kfree(old_cfg);
+ }
+ 
+ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+ {
+       struct irq_cfg *old_cfg, *cfg;
+ 
+       old_cfg = old_desc->chip_data;
+       cfg = desc->chip_data;
+ 
+       if (old_cfg == cfg)
+               return;
+ 
+       if (old_cfg) {
+               free_irq_2_pin(old_cfg, cfg);
+               free_irq_cfg(old_cfg);
+               old_desc->chip_data = NULL;
+       }
+ }
+ 
+ static void
+ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+ {
+       struct irq_cfg *cfg = desc->chip_data;
+ 
+       if (!cfg->move_in_progress) {
+               /* it means that domain is not changed */
+               if (!cpumask_intersects(&desc->affinity, mask))
+                       cfg->move_desc_pending = 1;
+       }
+ }
+ #endif
+ 
+ #else
+ static struct irq_cfg *irq_cfg(unsigned int irq)
+ {
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ }
+ 
+ #endif
+ 
+ #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ static inline void
+ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+ {
+ }
+ #endif
+ 
+ struct io_apic {
+       unsigned int index;
+       unsigned int unused[3];
+       unsigned int data;
+ };
+ 
+ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+ {
+       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+               + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
+ }
+ 
+ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+ {
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       return readl(&io_apic->data);
+ }
+ 
+ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+ {
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+ }
+ 
+ /*
+  * Re-write a value: to be used for read-modify-write
+  * cycles where the read already set up the index register.
+  *
+  * Older SiS APIC requires we rewrite the index register
+  */
+ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+ {
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+ 
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+ }
+ 
+ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+ {
+       struct irq_pin_list *entry;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       entry = cfg->irq_2_pin;
+       for (;;) {
+               unsigned int reg;
+               int pin;
+ 
+               if (!entry)
+                       break;
+               pin = entry->pin;
+               reg = io_apic_read(entry->apic, 0x10 + pin*2);
+               /* Is the remote IRR bit set? */
+               if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+                       spin_unlock_irqrestore(&ioapic_lock, flags);
+                       return true;
+               }
+               if (!entry->next)
+                       break;
+               entry = entry->next;
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+       return false;
+ }
+ 
+ union entry_union {
+       struct { u32 w1, w2; };
+       struct IO_APIC_route_entry entry;
+ };
+ 
+ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+ {
+       union entry_union eu;
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+       eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       return eu.entry;
+ }
+ 
+ /*
+  * When we write a new IO APIC routing entry, we need to write the high
+  * word first! If the mask bit in the low word is clear, we will enable
+  * the interrupt, and we need to make sure the entry is fully populated
+  * before that happens.
+  */
+ static void
+ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+ {
+       union entry_union eu;
+       eu.entry = e;
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ }
+ 
+ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+ {
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __ioapic_write_entry(apic, pin, e);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+ /*
+  * When we mask an IO APIC routing entry, we need to write the low
+  * word first, in order to set the mask bit before we change the
+  * high bits!
+  */
+ static void ioapic_mask_entry(int apic, int pin)
+ {
+       unsigned long flags;
+       union entry_union eu = { .entry.mask = 1 };
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+ #ifdef CONFIG_SMP
+ static void send_cleanup_vector(struct irq_cfg *cfg)
+ {
+       cpumask_var_t cleanup_mask;
+ 
+       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+               unsigned int i;
+               cfg->move_cleanup_count = 0;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       cfg->move_cleanup_count++;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+       } else {
+               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               free_cpumask_var(cleanup_mask);
+       }
+       cfg->move_in_progress = 0;
+ }
+ 
+ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+ {
+       int apic, pin;
+       struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
+ 
+       entry = cfg->irq_2_pin;
+       for (;;) {
+               unsigned int reg;
+ 
+               if (!entry)
+                       break;
+ 
+               apic = entry->apic;
+               pin = entry->pin;
+ #ifdef CONFIG_INTR_REMAP
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
+ #else
+               io_apic_write(apic, 0x11 + pin*2, dest);
+ #endif
+               reg = io_apic_read(apic, 0x10 + pin*2);
+               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+               reg |= vector;
+               io_apic_modify(apic, 0x10 + pin*2, reg);
+               if (!entry->next)
+                       break;
+               entry = entry->next;
+       }
+ }
+ 
+ static int
+ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+ 
+ /*
+  * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+  * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+  */
+ static unsigned int
+ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+ {
+       struct irq_cfg *cfg;
+       unsigned int irq;
+ 
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return BAD_APICID;
+ 
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
+               return BAD_APICID;
+ 
+       cpumask_and(&desc->affinity, cfg->domain, mask);
+       set_extra_move_desc(desc, mask);
+       return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+ }
+ 
+ static void
+ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ {
+       struct irq_cfg *cfg;
+       unsigned long flags;
+       unsigned int dest;
+       unsigned int irq;
+ 
+       irq = desc->irq;
+       cfg = desc->chip_data;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       dest = set_desc_affinity(desc, mask);
+       if (dest != BAD_APICID) {
+               /* Only the high 8 bits are valid. */
+               dest = SET_APIC_LOGICAL_ID(dest);
+               __target_IO_APIC_irq(irq, dest, cfg);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+ static void
+ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+ {
+       struct irq_desc *desc;
+ 
+       desc = irq_to_desc(irq);
+ 
+       set_ioapic_affinity_irq_desc(desc, mask);
+ }
+ #endif /* CONFIG_SMP */
+ 
+ /*
+  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+  * shared ISA-space IRQs, so we have to support them. We are super
+  * fast in the common case, and fast for shared ISA-space IRQs.
+  */
+ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+ {
+       struct irq_pin_list *entry;
+ 
+       entry = cfg->irq_2_pin;
+       if (!entry) {
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+                                       apic, pin);
+                       return;
+               }
+               cfg->irq_2_pin = entry;
+               entry->apic = apic;
+               entry->pin = pin;
+               return;
+       }
+ 
+       while (entry->next) {
+               /* not again, please */
+               if (entry->apic == apic && entry->pin == pin)
+                       return;
+ 
+               entry = entry->next;
+       }
+ 
+       entry->next = get_one_free_irq_2_pin(cpu);
+       entry = entry->next;
+       entry->apic = apic;
+       entry->pin = pin;
+ }
+ 
+ /*
+  * Reroute an IRQ to a different pin.
+  */
+ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+                                     int oldapic, int oldpin,
+                                     int newapic, int newpin)
+ {
+       struct irq_pin_list *entry = cfg->irq_2_pin;
+       int replaced = 0;
+ 
+       while (entry) {
+               if (entry->apic == oldapic && entry->pin == oldpin) {
+                       entry->apic = newapic;
+                       entry->pin = newpin;
+                       replaced = 1;
+                       /* every one is different, right? */
+                       break;
+               }
+               entry = entry->next;
+       }
+ 
+       /* why? call replace before add? */
+       if (!replaced)
+               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+ }
+ 
+ static inline void io_apic_modify_irq(struct irq_cfg *cfg,
+                               int mask_and, int mask_or,
+                               void (*final)(struct irq_pin_list *entry))
+ {
+       int pin;
+       struct irq_pin_list *entry;
+ 
+       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+               unsigned int reg;
+               pin = entry->pin;
+               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+               reg &= mask_and;
+               reg |= mask_or;
+               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+               if (final)
+                       final(entry);
+       }
+ }
+ 
+ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
+ {
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
+ }
+ 
+ #ifdef CONFIG_X86_64
+ static void io_apic_sync(struct irq_pin_list *entry)
+ {
+       /*
+        * Synchronize the IO-APIC and the CPU by doing
+        * a dummy read from the IO-APIC
+        */
+       struct io_apic __iomem *io_apic;
+       io_apic = io_apic_base(entry->apic);
+       readl(&io_apic->data);
+ }
+ 
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+ {
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ }
+ #else /* CONFIG_X86_32 */
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+ {
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
+ }
+ 
+ static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
+ {
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+                       IO_APIC_REDIR_MASKED, NULL);
+ }
+ 
+ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
+ {
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
+                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+ }
+ #endif /* CONFIG_X86_32 */
+ 
+ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
+ {
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned long flags;
+ 
+       BUG_ON(!cfg);
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __mask_IO_APIC_irq(cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
+ {
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __unmask_IO_APIC_irq(cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+ static void mask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+       mask_IO_APIC_irq_desc(desc);
+ }
+ static void unmask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+       unmask_IO_APIC_irq_desc(desc);
+ }
+ 
+ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+ {
+       struct IO_APIC_route_entry entry;
+ 
+       /* Check delivery_mode to be sure we're not clearing an SMI pin */
+       entry = ioapic_read_entry(apic, pin);
+       if (entry.delivery_mode == dest_SMI)
+               return;
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+       ioapic_mask_entry(apic, pin);
+ }
+ 
+ static void clear_IO_APIC (void)
+ {
+       int apic, pin;
+ 
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       clear_IO_APIC_pin(apic, pin);
+ }
+ 
+ #if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+ void send_IPI_self(int vector)
+ {
+       unsigned int cfg;
+ 
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+       /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+       apic_write(APIC_ICR, cfg);
+ }
+ #endif /* !CONFIG_SMP && CONFIG_X86_32*/
+ 
+ #ifdef CONFIG_X86_32
+ /*
+  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+  * specific CPU-side IRQs.
+  */
+ 
+ #define MAX_PIRQS 8
+ static int pirq_entries [MAX_PIRQS];
+ static int pirqs_enabled;
+ 
+ static int __init ioapic_pirq_setup(char *str)
+ {
+       int i, max;
+       int ints[MAX_PIRQS+1];
+ 
+       get_options(str, ARRAY_SIZE(ints), ints);
+ 
+       for (i = 0; i < MAX_PIRQS; i++)
+               pirq_entries[i] = -1;
+ 
+       pirqs_enabled = 1;
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "PIRQ redirection, working around broken MP-BIOS.\n");
+       max = MAX_PIRQS;
+       if (ints[0] < MAX_PIRQS)
+               max = ints[0];
+ 
+       for (i = 0; i < max; i++) {
+               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               /*
+                * PIRQs are mapped upside down, usually.
+                */
+               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+       }
+       return 1;
+ }
+ 
+ __setup("pirq=", ioapic_pirq_setup);
+ #endif /* CONFIG_X86_32 */
+ 
+ #ifdef CONFIG_INTR_REMAP
+ /* I/O APIC RTE contents at the OS boot up */
+ static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+ 
+ /*
+  * Saves and masks all the unmasked IO-APIC RTE's
+  */
+ int save_mask_IO_APIC_setup(void)
+ {
+       union IO_APIC_reg_01 reg_01;
+       unsigned long flags;
+       int apic, pin;
+ 
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+ 
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               early_ioapic_entries[apic] =
+                       kzalloc(sizeof(struct IO_APIC_route_entry) *
+                               nr_ioapic_registers[apic], GFP_KERNEL);
+               if (!early_ioapic_entries[apic])
+                       goto nomem;
+       }
+ 
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+ 
+                       entry = early_ioapic_entries[apic][pin] =
+                               ioapic_read_entry(apic, pin);
+                       if (!entry.mask) {
+                               entry.mask = 1;
+                               ioapic_write_entry(apic, pin, entry);
+                       }
+               }
+ 
+       return 0;
+ 
+ nomem:
+       while (apic >= 0)
+               kfree(early_ioapic_entries[apic--]);
+       memset(early_ioapic_entries, 0,
+               ARRAY_SIZE(early_ioapic_entries));
+ 
+       return -ENOMEM;
+ }
+ 
+ void restore_IO_APIC_setup(void)
+ {
+       int apic, pin;
+ 
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               if (!early_ioapic_entries[apic])
+                       break;
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       ioapic_write_entry(apic, pin,
+                                          early_ioapic_entries[apic][pin]);
+               kfree(early_ioapic_entries[apic]);
+               early_ioapic_entries[apic] = NULL;
+       }
+ }
+ 
+ void reinit_intr_remapped_IO_APIC(int intr_remapping)
+ {
+       /*
+        * for now plain restore of previous settings.
+        * TBD: In the case of OS enabling interrupt-remapping,
+        * IO-APIC RTE's need to be setup to point to interrupt-remapping
+        * table entries. for now, do a plain restore, and wait for
+        * the setup_IO_APIC_irqs() to do proper initialization.
+        */
+       restore_IO_APIC_setup();
+ }
+ #endif
+ 
+ /*
+  * Find the IRQ entry number of a certain pin.
+  */
+ static int find_irq_entry(int apic, int pin, int type)
+ {
+       int i;
+ 
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].mp_irqtype == type &&
+                   (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+                    mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mp_dstirq == pin)
+                       return i;
+ 
+       return -1;
+ }
+ 
+ /*
+  * Find the pin to which IRQ[irq] (ISA) is connected
+  */
+ static int __init find_isa_irq_pin(int irq, int type)
+ {
+       int i;
+ 
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mp_srcbus;
+ 
+               if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
+ 
+                       return mp_irqs[i].mp_dstirq;
+       }
+       return -1;
+ }
+ 
+ static int __init find_isa_irq_apic(int irq, int type)
+ {
+       int i;
+ 
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mp_srcbus;
+ 
+               if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
+                               return apic;
+               }
+       }
+ 
+       return -1;
+ }
+ 
+ /*
+  * Find a specific PCI IRQ entry.
+  * Not an __init, possibly needed by modules
+  */
+ static int pin_2_irq(int idx, int apic, int pin);
+ 
+ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+ {
+       int apic, i, best_guess = -1;
+ 
+       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+               bus, slot, pin);
+       if (test_bit(bus, mp_bus_not_pci)) {
+               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mp_srcbus;
+ 
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+                           mp_irqs[i].mp_dstapic == MP_APIC_ALL)
+                               break;
+ 
+               if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].mp_irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
+ 
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+ 
+                       if (pin == (mp_irqs[i].mp_srcbusirq & 3))
+                               return irq;
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0)
+                               best_guess = irq;
+               }
+       }
+       return best_guess;
+ }
+ 
+ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+ 
+ #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+ /*
+  * EISA Edge/Level control register, ELCR
+  */
+ static int EISA_ELCR(unsigned int irq)
+ {
+       if (irq < NR_IRQS_LEGACY) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+ }
+ 
+ #endif
+ 
+ /* ISA interrupts are always polarity zero edge triggered,
+  * when listed as conforming in the MP table. */
+ 
+ #define default_ISA_trigger(idx)      (0)
+ #define default_ISA_polarity(idx)     (0)
+ 
+ /* EISA interrupts are always polarity zero and can be edge or level
+  * trigger depending on the ELCR value.  If an interrupt is listed as
+  * EISA conforming in the MP table, that means its trigger type must
+  * be read in from the ELCR */
+ 
+ #define default_EISA_trigger(idx)     (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+ #define default_EISA_polarity(idx)    default_ISA_polarity(idx)
+ 
+ /* PCI interrupts are always polarity one level triggered,
+  * when listed as conforming in the MP table. */
+ 
+ #define default_PCI_trigger(idx)      (1)
+ #define default_PCI_polarity(idx)     (1)
+ 
+ /* MCA interrupts are always polarity zero level triggered,
+  * when listed as conforming in the MP table. */
+ 
+ #define default_MCA_trigger(idx)      (1)
+ #define default_MCA_polarity(idx)     default_ISA_polarity(idx)
+ 
+ static int MPBIOS_polarity(int idx)
+ {
+       int bus = mp_irqs[idx].mp_srcbus;
+       int polarity;
+ 
+       /*
+        * Determine IRQ line polarity (high active or low active):
+        */
+       switch (mp_irqs[idx].mp_irqflag & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+                       if (test_bit(bus, mp_bus_not_pci))
+                               polarity = default_ISA_polarity(idx);
+                       else
+                               polarity = default_PCI_polarity(idx);
+                       break;
+               case 1: /* high active */
+               {
+                       polarity = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+               case 3: /* low active */
+               {
+                       polarity = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+       }
+       return polarity;
+ }
+ 
+ static int MPBIOS_trigger(int idx)
+ {
+       int bus = mp_irqs[idx].mp_srcbus;
+       int trigger;
+ 
+       /*
+        * Determine IRQ trigger mode (edge or level sensitive):
+        */
+       switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+                       if (test_bit(bus, mp_bus_not_pci))
+                               trigger = default_ISA_trigger(idx);
+                       else
+                               trigger = default_PCI_trigger(idx);
+ #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+                       switch (mp_bus_id_to_type[bus]) {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+ #endif
+                       break;
+               case 1: /* edge */
+               {
+                       trigger = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 1;
+                       break;
+               }
+               case 3: /* level */
+               {
+                       trigger = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 0;
+                       break;
+               }
+       }
+       return trigger;
+ }
+ 
+ static inline int irq_polarity(int idx)
+ {
+       return MPBIOS_polarity(idx);
+ }
+ 
+ static inline int irq_trigger(int idx)
+ {
+       return MPBIOS_trigger(idx);
+ }
+ 
+ int (*ioapic_renumber_irq)(int ioapic, int irq);
+ static int pin_2_irq(int idx, int apic, int pin)
+ {
+       int irq, i;
+       int bus = mp_irqs[idx].mp_srcbus;
+ 
+       /*
+        * Debugging check, we are in big trouble if this message pops up!
+        */
+       if (mp_irqs[idx].mp_dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+ 
+       if (test_bit(bus, mp_bus_not_pci)) {
+               irq = mp_irqs[idx].mp_srcbusirq;
+       } else {
+               /*
+                * PCI IRQs are mapped in order
+                */
+               i = irq = 0;
+               while (i < apic)
+                       irq += nr_ioapic_registers[i++];
+               irq += pin;
+               /*
+                  * For MPS mode, so far only needed by ES7000 platform
+                  */
+               if (ioapic_renumber_irq)
+                       irq = ioapic_renumber_irq(apic, irq);
+       }
+ 
+ #ifdef CONFIG_X86_32
+       /*
+        * PCI IRQ command line redirection. Yes, limits are hardcoded.
+        */
+       if ((pin >= 16) && (pin <= 23)) {
+               if (pirq_entries[pin-16] != -1) {
+                       if (!pirq_entries[pin-16]) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "disabling PIRQ%d\n", pin-16);
+                       } else {
+                               irq = pirq_entries[pin-16];
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "using PIRQ%d -> IRQ %d\n",
+                                               pin-16, irq);
+                       }
+               }
+       }
+ #endif
+ 
+       return irq;
+ }
+ 
+ void lock_vector_lock(void)
+ {
+       /* Used to the online set of cpus does not change
+        * during assign_irq_vector.
+        */
+       spin_lock(&vector_lock);
+ }
+ 
+ void unlock_vector_lock(void)
+ {
+       spin_unlock(&vector_lock);
+ }
+ 
+ static int
+ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+ {
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+       unsigned int old_vector;
+       int cpu, err;
+       cpumask_var_t tmp_mask;
+ 
+       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+               return -EBUSY;
+ 
+       if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+               return -ENOMEM;
+ 
+       old_vector = cfg->vector;
+       if (old_vector) {
+               cpumask_and(tmp_mask, mask, cpu_online_mask);
+               cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+               if (!cpumask_empty(tmp_mask)) {
+                       free_cpumask_var(tmp_mask);
+                       return 0;
+               }
+       }
+ 
+       /* Only try and allocate irqs on cpus that are present */
+       err = -ENOSPC;
+       for_each_cpu_and(cpu, mask, cpu_online_mask) {
+               int new_cpu;
+               int vector, offset;
+ 
+               vector_allocation_domain(cpu, tmp_mask);
+ 
+               vector = current_vector;
+               offset = current_offset;
+ next:
+               vector += 8;
+               if (vector >= first_system_vector) {
+                       /* If out of vectors on large boxen, must share them. */
+                       offset = (offset + 1) % 8;
+                       vector = FIRST_DEVICE_VECTOR + offset;
+               }
+               if (unlikely(current_vector == vector))
+                       continue;
+ 
+               if (test_bit(vector, used_vectors))
+                       goto next;
+ 
++#ifdef CONFIG_KDB
++              if (vector == KDBENTER_VECTOR)
++                      goto next;
++#endif        /* CONFIG_KDB */
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+                       if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+                               goto next;
+               /* Found one! */
+               current_vector = vector;
+               current_offset = offset;
+               if (old_vector) {
+                       cfg->move_in_progress = 1;
+                       cpumask_copy(cfg->old_domain, cfg->domain);
+               }
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+                       per_cpu(vector_irq, new_cpu)[vector] = irq;
+               cfg->vector = vector;
+               cpumask_copy(cfg->domain, tmp_mask);
+               err = 0;
+               break;
+       }
+       free_cpumask_var(tmp_mask);
+       return err;
+ }
+ 
+ static int
+ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+ {
+       int err;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&vector_lock, flags);
+       err = __assign_irq_vector(irq, cfg, mask);
+       spin_unlock_irqrestore(&vector_lock, flags);
+       return err;
+ }
+ 
+ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
+ {
+       int cpu, vector;
+ 
+       BUG_ON(!cfg->vector);
+ 
+       vector = cfg->vector;
+       for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+               per_cpu(vector_irq, cpu)[vector] = -1;
+ 
+       cfg->vector = 0;
+       cpumask_clear(cfg->domain);
+ 
+       if (likely(!cfg->move_in_progress))
+               return;
+       for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+               for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+                                                               vector++) {
+                       if (per_cpu(vector_irq, cpu)[vector] != irq)
+                               continue;
+                       per_cpu(vector_irq, cpu)[vector] = -1;
+                       break;
+               }
+       }
+       cfg->move_in_progress = 0;
+ }
+ 
+ void __setup_vector_irq(int cpu)
+ {
+       /* Initialize vector_irq on a new cpu */
+       /* This function must be called with vector_lock held */
+       int irq, vector;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+ 
+       /* Mark the inuse vectors */
+       for_each_irq_desc(irq, desc) {
+               cfg = desc->chip_data;
+               if (!cpumask_test_cpu(cpu, cfg->domain))
+                       continue;
+               vector = cfg->vector;
+               per_cpu(vector_irq, cpu)[vector] = irq;
+       }
+       /* Mark the free vectors */
+       for (vector = 0; vector < NR_VECTORS; ++vector) {
+               irq = per_cpu(vector_irq, cpu)[vector];
+               if (irq < 0)
+                       continue;
+ 
+               cfg = irq_cfg(irq);
+               if (!cpumask_test_cpu(cpu, cfg->domain))
+                       per_cpu(vector_irq, cpu)[vector] = -1;
+       }
+ }
+ 
+ static struct irq_chip ioapic_chip;
+ #ifdef CONFIG_INTR_REMAP
+ static struct irq_chip ir_ioapic_chip;
+ #endif
+ 
+ #define IOAPIC_AUTO     -1
+ #define IOAPIC_EDGE     0
+ #define IOAPIC_LEVEL    1
+ 
+ #ifdef CONFIG_X86_32
+ static inline int IO_APIC_irq_trigger(int irq)
+ {
+       int apic, idx, pin;
+ 
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+          * nonexistent IRQs are edge default
+          */
+       return 0;
+ }
+ #else
+ static inline int IO_APIC_irq_trigger(int irq)
+ {
+       return 1;
+ }
+ #endif
+ 
+ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
+ {
+ 
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
+               desc->status |= IRQ_LEVEL;
+       else
+               desc->status &= ~IRQ_LEVEL;
+ 
+ #ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               desc->status |= IRQ_MOVE_PCNTXT;
+               if (trigger)
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_fasteoi_irq,
+                                                    "fasteoi");
+               else
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_edge_irq, "edge");
+               return;
+       }
+ #endif
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                             handle_fasteoi_irq,
+                                             "fasteoi");
+       else
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                             handle_edge_irq, "edge");
+ }
+ 
+ static int setup_ioapic_entry(int apic, int irq,
+                             struct IO_APIC_route_entry *entry,
+                             unsigned int destination, int trigger,
+                             int polarity, int vector)
+ {
+       /*
+        * add it to the IO-APIC irq-routing table:
+        */
+       memset(entry,0,sizeof(*entry));
+ 
+ #ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+               struct irte irte;
+               struct IR_IO_APIC_route_entry *ir_entry =
+                       (struct IR_IO_APIC_route_entry *) entry;
+               int index;
+ 
+               if (!iommu)
+                       panic("No mapping iommu for ioapic %d\n", apic);
+ 
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       panic("Failed to allocate IRTE for ioapic %d\n", apic);
+ 
+               memset(&irte, 0, sizeof(irte));
+ 
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = trigger;
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = vector;
+               irte.dest_id = IRTE_DEST(destination);
+ 
+               modify_irte(irq, &irte);
+ 
+               ir_entry->index2 = (index >> 15) & 0x1;
+               ir_entry->zero = 0;
+               ir_entry->format = 1;
+               ir_entry->index = (index & 0x7fff);
+       } else
+ #endif
+       {
+               entry->delivery_mode = INT_DELIVERY_MODE;
+               entry->dest_mode = INT_DEST_MODE;
+               entry->dest = destination;
+       }
+ 
+       entry->mask = 0;                                /* enable IRQ */
+       entry->trigger = trigger;
+       entry->polarity = polarity;
+       entry->vector = vector;
+ 
+       /* Mask level triggered irqs.
+        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+        */
+       if (trigger)
+               entry->mask = 1;
+       return 0;
+ }
+ 
+ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
+                             int trigger, int polarity)
+ {
+       struct irq_cfg *cfg;
+       struct IO_APIC_route_entry entry;
+       unsigned int dest;
+ 
+       if (!IO_APIC_IRQ(irq))
+               return;
+ 
+       cfg = desc->chip_data;
+ 
+       if (assign_irq_vector(irq, cfg, TARGET_CPUS))
+               return;
+ 
+       dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+ 
+       apic_printk(APIC_VERBOSE,KERN_DEBUG
+                   "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+                   "IRQ %d Mode:%i Active:%i)\n",
+                   apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+                   irq, trigger, polarity);
+ 
+ 
+       if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+                              dest, trigger, polarity, cfg->vector)) {
+               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+                      mp_ioapics[apic].mp_apicid, pin);
+               __clear_irq_vector(irq, cfg);
+               return;
+       }
+ 
+       ioapic_register_intr(irq, desc, trigger);
+       if (irq < NR_IRQS_LEGACY)
+               disable_8259A_irq(irq);
+ 
+       ioapic_write_entry(apic, pin, entry);
+ }
+ 
+ static void __init setup_IO_APIC_irqs(void)
+ {
+       int apic, pin, idx, irq;
+       int notcon = 0;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
+ 
+       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+ 
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ 
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if (idx == -1) {
+                               if (!notcon) {
+                                       notcon = 1;
+                                       apic_printk(APIC_VERBOSE,
+                                               KERN_DEBUG " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               } else
+                                       apic_printk(APIC_VERBOSE, " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               continue;
+                       }
+                       if (notcon) {
+                               apic_printk(APIC_VERBOSE,
+                                       " (apicid-pin) not connected\n");
+                               notcon = 0;
+                       }
+ 
+                       irq = pin_2_irq(idx, apic, pin);
+ #ifdef CONFIG_X86_32
+                       if (multi_timer_check(apic, irq))
+                               continue;
+ #endif
+                       desc = irq_to_desc_alloc_cpu(irq, cpu);
+                       if (!desc) {
+                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                               continue;
+                       }
+                       cfg = desc->chip_data;
+                       add_pin_to_irq_cpu(cfg, cpu, apic, pin);
+ 
+                       setup_IO_APIC_irq(apic, pin, irq, desc,
+                                       irq_trigger(idx), irq_polarity(idx));
+               }
+       }
+ 
+       if (notcon)
+               apic_printk(APIC_VERBOSE,
+                       " (apicid-pin) not connected\n");
+ }
+ 
+ /*
+  * Set up the timer pin, possibly with the 8259A-master behind.
+  */
+ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
+                                       int vector)
+ {
+       struct IO_APIC_route_entry entry;
+ 
+ #ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled)
+               return;
+ #endif
+ 
+       memset(&entry, 0, sizeof(entry));
+ 
+       /*
+        * We use logical delivery to get the timer IRQ
+        * to the first CPU.
+        */
+       entry.dest_mode = INT_DEST_MODE;
+       entry.mask = 1;                                 /* mask IRQ now */
+       entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
+       entry.delivery_mode = INT_DELIVERY_MODE;
+       entry.polarity = 0;
+       entry.trigger = 0;
+       entry.vector = vector;
+ 
+       /*
+        * The timer IRQ doesn't have to know that behind the
+        * scene we may have a 8259A-master in AEOI mode ...
+        */
+       set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+ 
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+       ioapic_write_entry(apic, pin, entry);
+ }
+ 
+ 
+ __apicdebuginit(void) print_IO_APIC(void)
+ {
+       int apic, i;
+       union IO_APIC_reg_00 reg_00;
+       union IO_APIC_reg_01 reg_01;
+       union IO_APIC_reg_02 reg_02;
+       union IO_APIC_reg_03 reg_03;
+       unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       unsigned int irq;
+ 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+ 
+       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+       for (i = 0; i < nr_ioapics; i++)
+               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+                      mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
+ 
+       /*
+        * We are a bit conservative about what we expect.  We have to
+        * know about every hardware change ASAP.
+        */
+       printk(KERN_INFO "testing the IO APIC.......................\n");
+ 
+       for (apic = 0; apic < nr_ioapics; apic++) {
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(apic, 0);
+       reg_01.raw = io_apic_read(apic, 1);
+       if (reg_01.bits.version >= 0x10)
+               reg_02.raw = io_apic_read(apic, 2);
+       if (reg_01.bits.version >= 0x20)
+               reg_03.raw = io_apic_read(apic, 3);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+       printk("\n");
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
+       printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+ 
+       printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
+ 
+       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
+       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
+ 
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+        * but the value of reg_02 is read as the previous read register
+        * value, so ignore it if reg_02 == reg_01.
+        */
+       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+               printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
+       }
+ 
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+        * or reg_03, but the value of reg_0[23] is read as the previous read
+        * register value, so ignore it if reg_03 == reg_0[12].
+        */
+       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+           reg_03.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+       }
+ 
+       printk(KERN_DEBUG ".... IRQ redirection table:\n");
+ 
+       printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+                         " Stat Dmod Deli Vect:   \n");
+ 
+       for (i = 0; i <= reg_01.bits.entries; i++) {
+               struct IO_APIC_route_entry entry;
+ 
+               entry = ioapic_read_entry(apic, i);
+ 
+               printk(KERN_DEBUG " %02x %03X ",
+                       i,
+                       entry.dest
+               );
+ 
+               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+                       entry.mask,
+                       entry.trigger,
+                       entry.irr,
+                       entry.polarity,
+                       entry.delivery_status,
+                       entry.dest_mode,
+                       entry.delivery_mode,
+                       entry.vector
+               );
+       }
+       }
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for_each_irq_desc(irq, desc) {
+               struct irq_pin_list *entry;
+ 
+               cfg = desc->chip_data;
+               entry = cfg->irq_2_pin;
+               if (!entry)
+                       continue;
+               printk(KERN_DEBUG "IRQ%d ", irq);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+                               break;
+                       entry = entry->next;
+               }
+               printk("\n");
+       }
+ 
+       printk(KERN_INFO ".................................... done.\n");
+ 
+       return;
+ }
+ 
+ __apicdebuginit(void) print_APIC_bitfield(int base)
+ {
+       unsigned int v;
+       int i, j;
+ 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+ 
+       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+       for (i = 0; i < 8; i++) {
+               v = apic_read(base + i*0x10);
+               for (j = 0; j < 32; j++) {
+                       if (v & (1<<j))
+                               printk("1");
+                       else
+                               printk("0");
+               }
+               printk("\n");
+       }
+ }
+ 
+ __apicdebuginit(void) print_local_APIC(void *dummy)
+ {
+       unsigned int v, ver, maxlvt;
+       u64 icr;
+ 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+ 
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+       v = apic_read(APIC_ID);
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+       maxlvt = lapic_get_maxlvt();
+ 
+       v = apic_read(APIC_TASKPRI);
+       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+ 
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               if (!APIC_XAPIC(ver)) {
+                       v = apic_read(APIC_ARBPRI);
+                       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                              v & APIC_ARBPRI_MASK);
+               }
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+ 
+       /*
+        * Remote read supported only in the 82489DX and local APIC for
+        * Pentium processors.
+        */
+       if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+               v = apic_read(APIC_RRR);
+               printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       }
+ 
+       v = apic_read(APIC_LDR);
+       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+       if (!x2apic_enabled()) {
+               v = apic_read(APIC_DFR);
+               printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       }
+       v = apic_read(APIC_SPIV);
+       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+ 
+       printk(KERN_DEBUG "... APIC ISR field:\n");
+       print_APIC_bitfield(APIC_ISR);
+       printk(KERN_DEBUG "... APIC TMR field:\n");
+       print_APIC_bitfield(APIC_TMR);
+       printk(KERN_DEBUG "... APIC IRR field:\n");
+       print_APIC_bitfield(APIC_IRR);
+ 
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+ 
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
+ 
+       icr = apic_icr_read();
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
+ 
+       v = apic_read(APIC_LVTT);
+       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+ 
+       if (maxlvt > 3) {                       /* PC is LVT#4. */
+               v = apic_read(APIC_LVTPC);
+               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+       }
+       v = apic_read(APIC_LVT0);
+       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+       v = apic_read(APIC_LVT1);
+       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+ 
+       if (maxlvt > 2) {                       /* ERR is LVT#3. */
+               v = apic_read(APIC_LVTERR);
+               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+       }
+ 
+       v = apic_read(APIC_TMICT);
+       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+       v = apic_read(APIC_TMCCT);
+       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+       v = apic_read(APIC_TDCR);
+       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+       printk("\n");
+ }
+ 
+ __apicdebuginit(void) print_all_local_APICs(void)
+ {
+       int cpu;
+ 
+       preempt_disable();
+       for_each_online_cpu(cpu)
+               smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+       preempt_enable();
+ }
+ 
+ __apicdebuginit(void) print_PIC(void)
+ {
+       unsigned int v;
+       unsigned long flags;
+ 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+ 
+       printk(KERN_DEBUG "\nprinting PIC contents\n");
+ 
+       spin_lock_irqsave(&i8259A_lock, flags);
+ 
+       v = inb(0xa1) << 8 | inb(0x21);
+       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+ 
+       v = inb(0xa0) << 8 | inb(0x20);
+       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+ 
+       outb(0x0b,0xa0);
+       outb(0x0b,0x20);
+       v = inb(0xa0) << 8 | inb(0x20);
+       outb(0x0a,0xa0);
+       outb(0x0a,0x20);
+ 
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+ 
+       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+ 
+       v = inb(0x4d1) << 8 | inb(0x4d0);
+       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+ }
+ 
+ __apicdebuginit(int) print_all_ICs(void)
+ {
+       print_PIC();
+       print_all_local_APICs();
+       print_IO_APIC();
+ 
+       return 0;
+ }
+ 
+ fs_initcall(print_all_ICs);
+ 
+ 
+ /* Where if anywhere is the i8259 connect in external int mode */
+ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+ 
+ void __init enable_IO_APIC(void)
+ {
+       union IO_APIC_reg_01 reg_01;
+       int i8259_apic, i8259_pin;
+       int apic;
+       unsigned long flags;
+ 
+ #ifdef CONFIG_X86_32
+       int i;
+       if (!pirqs_enabled)
+               for (i = 0; i < MAX_PIRQS; i++)
+                       pirq_entries[i] = -1;
+ #endif
+ 
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       entry = ioapic_read_entry(apic, pin);
+ 
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+  found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+       }
+ 
+       /*
+        * Do not trust the IO-APIC being empty at bootup
+        */
+       clear_IO_APIC();
+ }
+ 
+ /*
+  * Not an __init, needed by the reboot code
+  */
+ void disable_IO_APIC(void)
+ {
+       /*
+        * Clear the IO-APIC before rebooting:
+        */
+       clear_IO_APIC();
+ 
+       /*
+        * If the i8259 is routed through an IOAPIC
+        * Put that IOAPIC in virtual wire mode
+        * so legacy interrupts can be delivered.
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+ 
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+               entry.trigger         = 0; /* Edge */
+               entry.irr             = 0;
+               entry.polarity        = 0; /* High */
+               entry.delivery_status = 0;
+               entry.dest_mode       = 0; /* Physical */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+               entry.dest            = read_apic_id();
+ 
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+               ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+       }
+ 
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+ }
+ 
+ #ifdef CONFIG_X86_32
+ /*
+  * function to set the IO-APIC physical IDs based on the
+  * values stored in the MPC table.
+  *
+  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+  */
+ 
+ static void __init setup_ioapic_ids_from_mpc(void)
+ {
+       union IO_APIC_reg_00 reg_00;
+       physid_mask_t phys_id_present_map;
+       int apic;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+ 
+       if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+               return;
+ 
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       /*
+        * This is broken; anything with a real cpu count has to
+        * circumvent this idiocy regardless.
+        */
+       phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+ 
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+ 
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+               old_id = mp_ioapics[apic].mp_apicid;
+ 
+               if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+                               apic, mp_ioapics[apic].mp_apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               reg_00.bits.ID);
+                       mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+               }
+ 
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (check_apicid_used(phys_id_present_map,
+                                       mp_ioapics[apic].mp_apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+                               apic, mp_ioapics[apic].mp_apicid);
+                       for (i = 0; i < get_physical_broadcast(); i++)
+                               if (!physid_isset(i, phys_id_present_map))
+                                       break;
+                       if (i >= get_physical_broadcast())
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               i);
+                       physid_set(i, phys_id_present_map);
+                       mp_ioapics[apic].mp_apicid = i;
+               } else {
+                       physid_mask_t tmp;
+                       tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+                       apic_printk(APIC_VERBOSE, "Setting %d in the "
+                                       "phys_id_present_map\n",
+                                       mp_ioapics[apic].mp_apicid);
+                       physids_or(phys_id_present_map, phys_id_present_map, tmp);
+               }
+ 
+ 
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic].mp_apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].mp_dstapic == old_id)
+                                       mp_irqs[i].mp_dstapic
+                                               = mp_ioapics[apic].mp_apicid;
+ 
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               apic_printk(APIC_VERBOSE, KERN_INFO
+                       "...changing IO-APIC physical APIC ID to %d ...",
+                       mp_ioapics[apic].mp_apicid);
+ 
+               reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, reg_00.raw);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+                       printk("could not set ID!\n");
+               else
+                       apic_printk(APIC_VERBOSE, " ok.\n");
+       }
+ }
+ #endif
+ 
+ int no_timer_check __initdata;
+ 
+ static int __init notimercheck(char *s)
+ {
+       no_timer_check = 1;
+       return 1;
+ }
+ __setup("no_timer_check", notimercheck);
+ 
+ /*
+  * There is a nasty bug in some older SMP boards, their mptable lies
+  * about the timer IRQ. We do the following to work around the situation:
+  *
+  *    - timer IRQ defaults to IO-APIC IRQ
+  *    - if this function detects that timer IRQs are defunct, then we fall
+  *      back to ISA timer IRQs
+  */
+ static int __init timer_irq_works(void)
+ {
+       unsigned long t1 = jiffies;
+       unsigned long flags;
+ 
+       if (no_timer_check)
+               return 1;
+ 
+       local_save_flags(flags);
+       local_irq_enable();
+       /* Let ten ticks pass... */
+       mdelay((10 * 1000) / HZ);
+       local_irq_restore(flags);
+ 
+       /*
+        * Expect a few ticks at least, to be sure some possible
+        * glue logic does not lock up after one or two first
+        * ticks in a non-ExtINT mode.  Also the local APIC
+        * might have cached one ExtINT interrupt.  Finally, at
+        * least one tick may be lost due to delays.
+        */
+ 
+       /* jiffies wrap? */
+       if (time_after(jiffies, t1 + 4))
+               return 1;
+       return 0;
+ }
+ 
+ /*
+  * In the SMP+IOAPIC case it might happen that there are an unspecified
+  * number of pending IRQ events unhandled. These cases are very rare,
+  * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+  * better to do it this way as thus we do not have to be aware of
+  * 'pending' interrupts in the IRQ path, except at this point.
+  */
+ /*
+  * Edge triggered needs to resend any interrupt
+  * that was delayed but this is now handled in the device
+  * independent code.
+  */
+ 
+ /*
+  * Starting up a edge-triggered IO-APIC interrupt is
+  * nasty - we need to make sure that we get the edge.
+  * If it is already asserted for some reason, we need
+  * return 1 to indicate that is was pending.
+  *
+  * This is not complete - we should be able to fake
+  * an edge even if it isn't on the 8259A...
+  */
+ 
+ static unsigned int startup_ioapic_irq(unsigned int irq)
+ {
+       int was_pending = 0;
+       unsigned long flags;
+       struct irq_cfg *cfg;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < NR_IRQS_LEGACY) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       cfg = irq_cfg(irq);
+       __unmask_IO_APIC_irq(cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+       return was_pending;
+ }
+ 
+ #ifdef CONFIG_X86_64
+ static int ioapic_retrigger_irq(unsigned int irq)
+ {
+ 
+       struct irq_cfg *cfg = irq_cfg(irq);
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&vector_lock, flags);
+       send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+       spin_unlock_irqrestore(&vector_lock, flags);
+ 
+       return 1;
+ }
+ #else
+ static int ioapic_retrigger_irq(unsigned int irq)
+ {
+       send_IPI_self(irq_cfg(irq)->vector);
+ 
+       return 1;
+ }
+ #endif
+ 
+ /*
+  * Level and edge triggered IO-APIC interrupts need different handling,
+  * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+  * handled with the level-triggered descriptor, but that one has slightly
+  * more overhead. Level-triggered interrupts cannot be handled with the
+  * edge-triggered handler, without risking IRQ storms and other ugly
+  * races.
+  */
+ 
+ #ifdef CONFIG_SMP
+ 
+ #ifdef CONFIG_INTR_REMAP
+ static void ir_irq_migration(struct work_struct *work);
+ 
+ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+ 
+ /*
+  * Migrate the IO-APIC irq in the presence of intr-remapping.
+  *
+  * For edge triggered, irq migration is a simple atomic update(of vector
+  * and cpu destination) of IRTE and flush the hardware cache.
+  *
+  * For level triggered, we need to modify the io-apic RTE aswell with the update
+  * vector information, along with modifying IRTE with vector and destination.
+  * So irq migration for level triggered is little  bit more complex compared to
+  * edge triggered migration. But the good news is, we use the same algorithm
+  * for level triggered migration as we have today, only difference being,
+  * we now initiate the irq migration from process context instead of the
+  * interrupt context.
+  *
+  * In future, when we do a directed EOI (combined with cpu EOI broadcast
+  * suppression) to the IO-APIC, level triggered irq migration will also be
+  * as simple as edge triggered migration and we can do the irq migration
+  * with a simple atomic update to IO-APIC RTE.
+  */
+ static void
+ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ {
+       struct irq_cfg *cfg;
+       struct irte irte;
+       int modify_ioapic_rte;
+       unsigned int dest;
+       unsigned long flags;
+       unsigned int irq;
+ 
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return;
+ 
+       irq = desc->irq;
+       if (get_irte(irq, &irte))
+               return;
+ 
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
+               return;
+ 
+       set_extra_move_desc(desc, mask);
+ 
+       dest = cpu_mask_to_apicid_and(cfg->domain, mask);
+ 
+       modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       if (modify_ioapic_rte) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               __target_IO_APIC_irq(irq, dest, cfg);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+ 
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+ 
+       /*
+        * Modified the IRTE and flushes the Interrupt entry cache.
+        */
+       modify_irte(irq, &irte);
+ 
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+ 
+       cpumask_copy(&desc->affinity, mask);
+ }
+ 
+ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
+ {
+       int ret = -1;
+       struct irq_cfg *cfg = desc->chip_data;
+ 
+       mask_IO_APIC_irq_desc(desc);
+ 
+       if (io_apic_level_ack_pending(cfg)) {
+               /*
+                * Interrupt in progress. Migrating irq now will change the
+                * vector information in the IO-APIC RTE and that will confuse
+                * the EOI broadcast performed by cpu.
+                * So, delay the irq migration to the next instance.
+                */
+               schedule_delayed_work(&ir_migration_work, 1);
+               goto unmask;
+       }
+ 
+       /* everthing is clear. we have right of way */
+       migrate_ioapic_irq_desc(desc, &desc->pending_mask);
+ 
+       ret = 0;
+       desc->status &= ~IRQ_MOVE_PENDING;
+       cpumask_clear(&desc->pending_mask);
+ 
+ unmask:
+       unmask_IO_APIC_irq_desc(desc);
+ 
+       return ret;
+ }
+ 
+ static void ir_irq_migration(struct work_struct *work)
+ {
+       unsigned int irq;
+       struct irq_desc *desc;
+ 
+       for_each_irq_desc(irq, desc) {
+               if (desc->status & IRQ_MOVE_PENDING) {
+                       unsigned long flags;
+ 
+                       spin_lock_irqsave(&desc->lock, flags);
+                       if (!desc->chip->set_affinity ||
+                           !(desc->status & IRQ_MOVE_PENDING)) {
+                               desc->status &= ~IRQ_MOVE_PENDING;
+                               spin_unlock_irqrestore(&desc->lock, flags);
+                               continue;
+                       }
+ 
+                       desc->chip->set_affinity(irq, &desc->pending_mask);
+                       spin_unlock_irqrestore(&desc->lock, flags);
+               }
+       }
+ }
+ 
+ /*
+  * Migrates the IRQ destination in the process context.
+  */
+ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+                                           const struct cpumask *mask)
+ {
+       if (desc->status & IRQ_LEVEL) {
+               desc->status |= IRQ_MOVE_PENDING;
+               cpumask_copy(&desc->pending_mask, mask);
+               migrate_irq_remapped_level_desc(desc);
+               return;
+       }
+ 
+       migrate_ioapic_irq_desc(desc, mask);
+ }
+ static void set_ir_ioapic_affinity_irq(unsigned int irq,
+                                      const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+       set_ir_ioapic_affinity_irq_desc(desc, mask);
+ }
+ #endif
+ 
+ asmlinkage void smp_irq_move_cleanup_interrupt(void)
+ {
+       unsigned vector, me;
+ 
+       ack_APIC_irq();
+       exit_idle();
+       irq_enter();
+ 
+       me = smp_processor_id();
+       for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+               unsigned int irq;
+               struct irq_desc *desc;
+               struct irq_cfg *cfg;
+               irq = __get_cpu_var(vector_irq)[vector];
+ 
+               if (irq == -1)
+                       continue;
+ 
+               desc = irq_to_desc(irq);
+               if (!desc)
+                       continue;
+ 
+               cfg = irq_cfg(irq);
+               spin_lock(&desc->lock);
+               if (!cfg->move_cleanup_count)
+                       goto unlock;
+ 
+               if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+                       goto unlock;
+ 
+               __get_cpu_var(vector_irq)[vector] = -1;
+               cfg->move_cleanup_count--;
+ unlock:
+               spin_unlock(&desc->lock);
+       }
+ 
+       irq_exit();
+ }
+ 
+ static void irq_complete_move(struct irq_desc **descp)
+ {
+       struct irq_desc *desc = *descp;
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned vector, me;
+ 
+       if (likely(!cfg->move_in_progress)) {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               if (likely(!cfg->move_desc_pending))
+                       return;
+ 
+               /* domain has not changed, but affinity did */
+               me = smp_processor_id();
+               if (cpu_isset(me, desc->affinity)) {
+                       *descp = desc = move_irq_desc(desc, me);
+                       /* get the new one */
+                       cfg = desc->chip_data;
+                       cfg->move_desc_pending = 0;
+               }
+ #endif
+               return;
+       }
+ 
+       vector = ~get_irq_regs()->orig_ax;
+       me = smp_processor_id();
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               *descp = desc = move_irq_desc(desc, me);
+               /* get the new one */
+               cfg = desc->chip_data;
+ #endif
+ 
+       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+               send_cleanup_vector(cfg);
+ }
+ #else
+ static inline void irq_complete_move(struct irq_desc **descp) {}
+ #endif
+ 
+ #ifdef CONFIG_INTR_REMAP
+ static void ack_x2apic_level(unsigned int irq)
+ {
+       ack_x2APIC_irq();
+ }
+ 
+ static void ack_x2apic_edge(unsigned int irq)
+ {
+       ack_x2APIC_irq();
+ }
+ 
+ #endif
+ 
+ static void ack_apic_edge(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+       irq_complete_move(&desc);
+       move_native_irq(irq);
+       ack_APIC_irq();
+ }
+ 
+ atomic_t irq_mis_count;
+ 
+ static void ack_apic_level(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+ #ifdef CONFIG_X86_32
+       unsigned long v;
+       int i;
+ #endif
+       struct irq_cfg *cfg;
+       int do_unmask_irq = 0;
+ 
+       irq_complete_move(&desc);
+ #ifdef CONFIG_GENERIC_PENDING_IRQ
+       /* If we are moving the irq we need to mask it */
+       if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+               do_unmask_irq = 1;
+               mask_IO_APIC_irq_desc(desc);
+       }
+ #endif
+ 
+ #ifdef CONFIG_X86_32
+       /*
+       * It appears there is an erratum which affects at least version 0x11
+       * of I/O APIC (that's the 82093AA and cores integrated into various
+       * chipsets).  Under certain conditions a level-triggered interrupt is
+       * erroneously delivered as edge-triggered one but the respective IRR
+       * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+       * message but it will never arrive and further interrupts are blocked
+       * from the source.  The exact reason is so far unknown, but the
+       * phenomenon was observed when two consecutive interrupt requests
+       * from a given source get delivered to the same CPU and the source is
+       * temporarily disabled in between.
+       *
+       * A workaround is to simulate an EOI message manually.  We achieve it
+       * by setting the trigger mode to edge and then to level when the edge
+       * trigger mode gets detected in the TMR of a local APIC for a
+       * level-triggered interrupt.  We mask the source for the time of the
+       * operation to prevent an edge-triggered interrupt escaping meanwhile.
+       * The idea is from Manfred Spraul.  --macro
+       */
+       cfg = desc->chip_data;
+       i = cfg->vector;
+ 
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+ #endif
+ 
+       /*
+        * We must acknowledge the irq before we move it or the acknowledge will
+        * not propagate properly.
+        */
+       ack_APIC_irq();
+ 
+       /* Now we can move and renable the irq */
+       if (unlikely(do_unmask_irq)) {
+               /* Only migrate the irq if the ack has been received.
+                *
+                * On rare occasions the broadcast level triggered ack gets
+                * delayed going to ioapics, and if we reprogram the
+                * vector while Remote IRR is still set the irq will never
+                * fire again.
+                *
+                * To prevent this scenario we read the Remote IRR bit
+                * of the ioapic.  This has two effects.
+                * - On any sane system the read of the ioapic will
+                *   flush writes (and acks) going to the ioapic from
+                *   this cpu.
+                * - We get to see if the ACK has actually been delivered.
+                *
+                * Based on failed experiments of reprogramming the
+                * ioapic entry from outside of irq context starting
+                * with masking the ioapic entry and then polling until
+                * Remote IRR was clear before reprogramming the
+                * ioapic I don't trust the Remote IRR bit to be
+                * completey accurate.
+                *
+                * However there appears to be no other way to plug
+                * this race, so if the Remote IRR bit is not
+                * accurate and is causing problems then it is a hardware bug
+                * and you can go talk to the chipset vendor about it.
+                */
+               cfg = desc->chip_data;
+               if (!io_apic_level_ack_pending(cfg))
+                       move_masked_irq(irq);
+               unmask_IO_APIC_irq_desc(desc);
+       }
+ 
+ #ifdef CONFIG_X86_32
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(cfg);
+               __unmask_and_level_IO_APIC_irq(cfg);
+               spin_unlock(&ioapic_lock);
+       }
+ #endif
+ }
+ 
+ static struct irq_chip ioapic_chip __read_mostly = {
+       .name           = "IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_apic_edge,
+       .eoi            = ack_apic_level,
+ #ifdef CONFIG_SMP
+       .set_affinity   = set_ioapic_affinity_irq,
+ #endif
+       .retrigger      = ioapic_retrigger_irq,
+ };
+ 
+ #ifdef CONFIG_INTR_REMAP
+ static struct irq_chip ir_ioapic_chip __read_mostly = {
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
+ #ifdef CONFIG_SMP
+       .set_affinity   = set_ir_ioapic_affinity_irq,
+ #endif
+       .retrigger      = ioapic_retrigger_irq,
+ };
+ #endif
+ 
+ static inline void init_IO_APIC_traps(void)
+ {
+       int irq;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+ 
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for_each_irq_desc(irq, desc) {
+               cfg = desc->chip_data;
+               if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+                        * interrupt if we can..
+                        */
+                       if (irq < NR_IRQS_LEGACY)
+                               make_8259A_irq(irq);
+                       else
+                               /* Strange. Oh, well.. */
+                               desc->chip = &no_irq_chip;
+               }
+       }
+ }
+ 
+ /*
+  * The local APIC irq-chip implementation:
+  */
+ 
+ static void mask_lapic_irq(unsigned int irq)
+ {
+       unsigned long v;
+ 
+       v = apic_read(APIC_LVT0);
+       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+ }
+ 
+ static void unmask_lapic_irq(unsigned int irq)
+ {
+       unsigned long v;
+ 
+       v = apic_read(APIC_LVT0);
+       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+ }
+ 
+ static void ack_lapic_irq(unsigned int irq)
+ {
+       ack_APIC_irq();
+ }
+ 
+ static struct irq_chip lapic_chip __read_mostly = {
+       .name           = "local-APIC",
+       .mask           = mask_lapic_irq,
+       .unmask         = unmask_lapic_irq,
+       .ack            = ack_lapic_irq,
+ };
+ 
+ static void lapic_register_intr(int irq, struct irq_desc *desc)
+ {
+       desc->status &= ~IRQ_LEVEL;
+       set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+                                     "edge");
+ }
+ 
+ static void __init setup_nmi(void)
+ {
+       /*
+        * Dirty trick to enable the NMI watchdog ...
+        * We put the 8259A master into AEOI mode and
+        * unmask on all local APICs LVT0 as NMI.
+        *
+        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+        * is from Maciej W. Rozycki - so we do not have to EOI from
+        * the NMI handler or the timer interrupt.
+        */
+       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+ 
+       enable_NMI_through_LVT0();
+ 
+       apic_printk(APIC_VERBOSE, " done.\n");
+ }
+ 
+ /*
+  * This looks a bit hackish but it's about the only one way of sending
+  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+  * not support the ExtINT mode, unfortunately.  We need to send these
+  * cycles as some i82489DX-based boards have glue logic that keeps the
+  * 8259A interrupt line asserted until INTA.  --macro
+  */
+ static inline void __init unlock_ExtINT_logic(void)
+ {
+       int apic, pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+ 
+       pin  = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+       apic = find_isa_irq_apic(8, mp_INT);
+       if (apic == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+ 
+       entry0 = ioapic_read_entry(apic, pin);
+       clear_IO_APIC_pin(apic, pin);
+ 
+       memset(&entry1, 0, sizeof(entry1));
+ 
+       entry1.dest_mode = 0;                   /* physical delivery */
+       entry1.mask = 0;                        /* unmask IRQ now */
+       entry1.dest = hard_smp_processor_id();
+       entry1.delivery_mode = dest_ExtINT;
+       entry1.polarity = entry0.polarity;
+       entry1.trigger = 0;
+       entry1.vector = 0;
+ 
+       ioapic_write_entry(apic, pin, entry1);
+ 
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+                  RTC_FREQ_SELECT);
+       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+ 
+       i = 100;
+       while (i-- > 0) {
+               mdelay(10);
+               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+                       i -= 10;
+       }
+ 
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(apic, pin);
+ 
+       ioapic_write_entry(apic, pin, entry0);
+ }
+ 
+ static int disable_timer_pin_1 __initdata;
+ /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+ static int __init disable_timer_pin_setup(char *arg)
+ {
+       disable_timer_pin_1 = 1;
+       return 0;
+ }
+ early_param("disable_timer_pin_1", disable_timer_pin_setup);
+ 
+ int timer_through_8259 __initdata;
+ 
+ /*
+  * This code may look a bit paranoid, but it's supposed to cooperate with
+  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+  * fanatically on his truly buggy board.
+  *
+  * FIXME: really need to revamp this for all platforms.
+  */
+ static inline void __init check_timer(void)
+ {
+       struct irq_desc *desc = irq_to_desc(0);
+       struct irq_cfg *cfg = desc->chip_data;
+       int cpu = boot_cpu_id;
+       int apic1, pin1, apic2, pin2;
+       unsigned long flags;
+       unsigned int ver;
+       int no_pin1 = 0;
+ 
+       local_irq_save(flags);
+ 
+       ver = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(ver);
+ 
+       /*
+        * get/set the timer IRQ vector:
+        */
+       disable_8259A_irq(0);
+       assign_irq_vector(0, cfg, TARGET_CPUS);
+ 
+       /*
+        * As IRQ0 is to be enabled in the 8259A, the virtual
+        * wire has to be disabled in the local APIC.  Also
+        * timer interrupts need to be acknowledged manually in
+        * the 8259A for the i82489DX when using the NMI
+        * watchdog as that APIC treats NMIs as level-triggered.
+        * The AEOI mode will finish them in the 8259A
+        * automatically.
+        */
+       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       init_8259A(1);
+ #ifdef CONFIG_X86_32
+       timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+ #endif
+ 
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
+ 
+       apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+                   "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+                   cfg->vector, apic1, pin1, apic2, pin2);
+ 
+       /*
+        * Some BIOS writers are clueless and report the ExtINTA
+        * I/O APIC input from the cascaded 8259A as the timer
+        * interrupt input.  So just in case, if only one pin
+        * was found above, try it both directly and through the
+        * 8259A.
+        */
+       if (pin1 == -1) {
+ #ifdef CONFIG_INTR_REMAP
+               if (intr_remapping_enabled)
+                       panic("BIOS bug: timer not connected to IO-APIC");
+ #endif
+               pin1 = pin2;
+               apic1 = apic2;
+               no_pin1 = 1;
+       } else if (pin2 == -1) {
+               pin2 = pin1;
+               apic2 = apic1;
+       }
+ 
+       if (pin1 != -1) {
+               /*
+                * Ok, does IRQ0 through the IOAPIC work?
+                */
+               if (no_pin1) {
+                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+                       setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+               }
+               unmask_IO_APIC_irq_desc(desc);
+               if (timer_irq_works()) {
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                       }
+                       if (disable_timer_pin_1 > 0)
+                               clear_IO_APIC_pin(0, pin1);
+                       goto out;
+               }
+ #ifdef CONFIG_INTR_REMAP
+               if (intr_remapping_enabled)
+                       panic("timer doesn't work through Interrupt-remapped IO-APIC");
+ #endif
+               clear_IO_APIC_pin(apic1, pin1);
+               if (!no_pin1)
+                       apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+                                   "8254 timer not connected to IO-APIC\n");
+ 
+               apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+                           "(IRQ0) through the 8259A ...\n");
+               apic_printk(APIC_QUIET, KERN_INFO
+                           "..... (found apic %d pin %d) ...\n", apic2, pin2);
+               /*
+                * legacy devices should be connected to IO APIC #0
+                */
+               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+               setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+               unmask_IO_APIC_irq_desc(desc);
+               enable_8259A_irq(0);
+               if (timer_irq_works()) {
+                       apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
+                       timer_through_8259 = 1;
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               disable_8259A_irq(0);
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                       }
+                       goto out;
+               }
+               /*
+                * Cleanup, just in case ...
+                */
+               disable_8259A_irq(0);
+               clear_IO_APIC_pin(apic2, pin2);
+               apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+       }
+ 
+       if (nmi_watchdog == NMI_IO_APIC) {
+               apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
+                           "through the IO-APIC - disabling NMI Watchdog!\n");
+               nmi_watchdog = NMI_NONE;
+       }
+ #ifdef CONFIG_X86_32
+       timer_ack = 0;
+ #endif
+ 
+       apic_printk(APIC_QUIET, KERN_INFO
+                   "...trying to set up timer as Virtual Wire IRQ...\n");
+ 
+       lapic_register_intr(0, desc);
+       apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
+       enable_8259A_irq(0);
+ 
+       if (timer_irq_works()) {
+               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+               goto out;
+       }
+       disable_8259A_irq(0);
+       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
+       apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+ 
+       apic_printk(APIC_QUIET, KERN_INFO
+                   "...trying to set up timer as ExtINT IRQ...\n");
+ 
+       init_8259A(0);
+       make_8259A_irq(0);
+       apic_write(APIC_LVT0, APIC_DM_EXTINT);
+ 
+       unlock_ExtINT_logic();
+ 
+       if (timer_irq_works()) {
+               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+               goto out;
+       }
+       apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+       panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+               "report.  Then try booting with the 'noapic' option.\n");
+ out:
+       local_irq_restore(flags);
+ }
+ 
+ /*
+  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+  * to devices.  However there may be an I/O APIC pin available for
+  * this interrupt regardless.  The pin may be left unconnected, but
+  * typically it will be reused as an ExtINT cascade interrupt for
+  * the master 8259A.  In the MPS case such a pin will normally be
+  * reported as an ExtINT interrupt in the MP table.  With ACPI
+  * there is no provision for ExtINT interrupts, and in the absence
+  * of an override it would be treated as an ordinary ISA I/O APIC
+  * interrupt, that is edge-triggered and unmasked by default.  We
+  * used to do this, but it caused problems on some systems because
+  * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+  * the same ExtINT cascade interrupt to drive the local APIC of the
+  * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+  * the I/O APIC in all cases now.  No actual device should request
+  * it anyway.  --macro
+  */
+ #define PIC_IRQS      (1 << PIC_CASCADE_IR)
+ 
+ void __init setup_IO_APIC(void)
+ {
+ 
+ #ifdef CONFIG_X86_32
+       enable_IO_APIC();
+ #else
+       /*
+        * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+        */
+ #endif
+ 
+       io_apic_irqs = ~PIC_IRQS;
+ 
+       apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+       /*
+          * Set up IO-APIC IRQ routing.
+          */
+ #ifdef CONFIG_X86_32
+       if (!acpi_ioapic)
+               setup_ioapic_ids_from_mpc();
+ #endif
+       sync_Arb_IDs();
+       setup_IO_APIC_irqs();
+       init_IO_APIC_traps();
+       check_timer();
+ }
+ 
+ /*
+  *      Called after all the initialization is done. If we didnt find any
+  *      APIC bugs then we can allow the modify fast path
+  */
+ 
+ static int __init io_apic_bug_finalize(void)
+ {
+       if (sis_apic_bug == -1)
+               sis_apic_bug = 0;
+       return 0;
+ }
+ 
+ late_initcall(io_apic_bug_finalize);
+ 
+ struct sysfs_ioapic_data {
+       struct sys_device dev;
+       struct IO_APIC_route_entry entry[0];
+ };
+ static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+ 
+ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+ {
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       int i;
+ 
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
+               *entry = ioapic_read_entry(dev->id, i);
+ 
+       return 0;
+ }
+ 
+ static int ioapic_resume(struct sys_device *dev)
+ {
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       union IO_APIC_reg_00 reg_00;
+       int i;
+ 
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(dev->id, 0);
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
+               ioapic_write_entry(dev->id, i, entry[i]);
+ 
+       return 0;
+ }
+ 
+ static struct sysdev_class ioapic_sysdev_class = {
+       .name = "ioapic",
+       .suspend = ioapic_suspend,
+       .resume = ioapic_resume,
+ };
+ 
+ static int __init ioapic_init_sysfs(void)
+ {
+       struct sys_device * dev;
+       int i, size, error;
+ 
+       error = sysdev_class_register(&ioapic_sysdev_class);
+       if (error)
+               return error;
+ 
+       for (i = 0; i < nr_ioapics; i++ ) {
+               size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+                       * sizeof(struct IO_APIC_route_entry);
+               mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
+               if (!mp_ioapic_data[i]) {
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+               dev = &mp_ioapic_data[i]->dev;
+               dev->id = i;
+               dev->cls = &ioapic_sysdev_class;
+               error = sysdev_register(dev);
+               if (error) {
+                       kfree(mp_ioapic_data[i]);
+                       mp_ioapic_data[i] = NULL;
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ device_initcall(ioapic_init_sysfs);
+ 
+ /*
+  * Dynamic irq allocate and deallocation
+  */
+ unsigned int create_irq_nr(unsigned int irq_want)
+ {
+       /* Allocate an unused irq */
+       unsigned int irq;
+       unsigned int new;
+       unsigned long flags;
+       struct irq_cfg *cfg_new = NULL;
+       int cpu = boot_cpu_id;
+       struct irq_desc *desc_new = NULL;
+ 
+       irq = 0;
+       spin_lock_irqsave(&vector_lock, flags);
+       for (new = irq_want; new < NR_IRQS; new++) {
+               if (platform_legacy_irq(new))
+                       continue;
+ 
+               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               if (!desc_new) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", new);
+                       continue;
+               }
+               cfg_new = desc_new->chip_data;
+ 
+               if (cfg_new->vector != 0)
+                       continue;
+               if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
+                       irq = new;
+               break;
+       }
+       spin_unlock_irqrestore(&vector_lock, flags);
+ 
+       if (irq > 0) {
+               dynamic_irq_init(irq);
+               /* restore it, in case dynamic_irq_init clear it */
+               if (desc_new)
+                       desc_new->chip_data = cfg_new;
+       }
+       return irq;
+ }
+ 
+ static int nr_irqs_gsi = NR_IRQS_LEGACY;
+ int create_irq(void)
+ {
+       unsigned int irq_want;
+       int irq;
+ 
+       irq_want = nr_irqs_gsi;
+       irq = create_irq_nr(irq_want);
+ 
+       if (irq == 0)
+               irq = -1;
+ 
+       return irq;
+ }
+ 
+ void destroy_irq(unsigned int irq)
+ {
+       unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+ 
+       /* store it, in case dynamic_irq_cleanup clear it */
+       desc = irq_to_desc(irq);
+       cfg = desc->chip_data;
+       dynamic_irq_cleanup(irq);
+       /* connect back irq_cfg */
+       if (desc)
+               desc->chip_data = cfg;
+ 
+ #ifdef CONFIG_INTR_REMAP
+       free_irte(irq);
+ #endif
+       spin_lock_irqsave(&vector_lock, flags);
+       __clear_irq_vector(irq, cfg);
+       spin_unlock_irqrestore(&vector_lock, flags);
+ }
+ 
+ /*
+  * MSI message composition
+  */
+ #ifdef CONFIG_PCI_MSI
+ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+ {
+       struct irq_cfg *cfg;
+       int err;
+       unsigned dest;
+ 
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+       if (err)
+               return err;
+ 
+       dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+ 
+ #ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irte irte;
+               int ir_index;
+               u16 sub_handle;
+ 
+               ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+               BUG_ON(ir_index == -1);
+ 
+               memset (&irte, 0, sizeof(irte));
+ 
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = 0; /* edge */
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = cfg->vector;
+               irte.dest_id = IRTE_DEST(dest);
+ 
+               modify_irte(irq, &irte);
+ 
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->data = sub_handle;
+               msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+                                 MSI_ADDR_IR_SHV |
+                                 MSI_ADDR_IR_INDEX1(ir_index) |
+                                 MSI_ADDR_IR_INDEX2(ir_index);
+       } else
+ #endif
+       {
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->address_lo =
+                       MSI_ADDR_BASE_LO |
+                       ((INT_DEST_MODE == 0) ?
+                               MSI_ADDR_DEST_MODE_PHYSICAL:
+                               MSI_ADDR_DEST_MODE_LOGICAL) |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               MSI_ADDR_REDIRECTION_CPU:
+                               MSI_ADDR_REDIRECTION_LOWPRI) |
+                       MSI_ADDR_DEST_ID(dest);
+ 
+               msg->data =
+                       MSI_DATA_TRIGGER_EDGE |
+                       MSI_DATA_LEVEL_ASSERT |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               MSI_DATA_DELIVERY_FIXED:
+                               MSI_DATA_DELIVERY_LOWPRI) |
+                       MSI_DATA_VECTOR(cfg->vector);
+       }
+       return err;
+ }
+ 
+ #ifdef CONFIG_SMP
+ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       struct msi_msg msg;
+       unsigned int dest;
+ 
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+ 
+       cfg = desc->chip_data;
+ 
+       read_msi_msg_desc(desc, &msg);
+ 
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ 
+       write_msi_msg_desc(desc, &msg);
+ }
+ #ifdef CONFIG_INTR_REMAP
+ /*
+  * Migrate the MSI irq to another cpumask. This migration is
+  * done in the process context using interrupt-remapping hardware.
+  */
+ static void
+ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned int dest;
+       struct irte irte;
+ 
+       if (get_irte(irq, &irte))
+               return;
+ 
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+ 
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+ 
+       /*
+        * atomically update the IRTE with the new destination and vector.
+        */
+       modify_irte(irq, &irte);
+ 
+       /*
+        * After this point, all the interrupts will start arriving
+        * at the new destination. So, time to cleanup the previous
+        * vector allocation.
+        */
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+ }
+ 
+ #endif
+ #endif /* CONFIG_SMP */
+ 
+ /*
+  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+  * which implement the MSI or MSI-X Capability Structure.
+  */
+ static struct irq_chip msi_chip = {
+       .name           = "PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_apic_edge,
+ #ifdef CONFIG_SMP
+       .set_affinity   = set_msi_irq_affinity,
+ #endif
+       .retrigger      = ioapic_retrigger_irq,
+ };
+ 
+ #ifdef CONFIG_INTR_REMAP
+ static struct irq_chip msi_ir_chip = {
+       .name           = "IR-PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_x2apic_edge,
+ #ifdef CONFIG_SMP
+       .set_affinity   = ir_set_msi_irq_affinity,
+ #endif
+       .retrigger      = ioapic_retrigger_irq,
+ };
+ 
+ /*
+  * Map the PCI dev to the corresponding remapping hardware unit
+  * and allocate 'nvec' consecutive interrupt-remapping table entries
+  * in it.
+  */
+ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+ {
+       struct intel_iommu *iommu;
+       int index;
+ 
+       iommu = map_dev_to_ir(dev);
+       if (!iommu) {
+               printk(KERN_ERR
+                      "Unable to map PCI %s to iommu\n", pci_name(dev));
+               return -ENOENT;
+       }
+ 
+       index = alloc_irte(iommu, irq, nvec);
+       if (index < 0) {
+               printk(KERN_ERR
+                      "Unable to allocate %d IRTE for PCI %s\n", nvec,
+                      pci_name(dev));
+               return -ENOSPC;
+       }
+       return index;
+ }
+ #endif
+ 
+ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+ {
+       int ret;
+       struct msi_msg msg;
+ 
+       ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0)
+               return ret;
+ 
+       set_irq_msi(irq, msidesc);
+       write_msi_msg(irq, &msg);
+ 
+ #ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irq_desc *desc = irq_to_desc(irq);
+               /*
+                * irq migration in process context
+                */
+               desc->status |= IRQ_MOVE_PCNTXT;
+               set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+       } else
+ #endif
+               set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+ 
+       dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+ 
+       return 0;
+ }
+ 
+ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
+ {
+       unsigned int irq;
+       int ret;
+       unsigned int irq_want;
+ 
+       irq_want = nr_irqs_gsi;
+       irq = create_irq_nr(irq_want);
+       if (irq == 0)
+               return -1;
+ 
+ #ifdef CONFIG_INTR_REMAP
+       if (!intr_remapping_enabled)
+               goto no_ir;
+ 
+       ret = msi_alloc_irte(dev, irq, 1);
+       if (ret < 0)
+               goto error;
+ no_ir:
+ #endif
+       ret = setup_msi_irq(dev, msidesc, irq);
+       if (ret < 0) {
+               destroy_irq(irq);
+               return ret;
+       }
+       return 0;
+ 
+ #ifdef CONFIG_INTR_REMAP
+ error:
+       destroy_irq(irq);
+       return ret;
+ #endif
+ }
+ 
+ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+ {
+       unsigned int irq;
+       int ret, sub_handle;
+       struct msi_desc *msidesc;
+       unsigned int irq_want;
+ 
+ #ifdef CONFIG_INTR_REMAP
+       struct intel_iommu *iommu = 0;
+       int index = 0;
+ #endif
+ 
+       irq_want = nr_irqs_gsi;
+       sub_handle = 0;
+       list_for_each_entry(msidesc, &dev->msi_list, list) {
+               irq = create_irq_nr(irq_want);
+               irq_want++;
+               if (irq == 0)
+                       return -1;
+ #ifdef CONFIG_INTR_REMAP
+               if (!intr_remapping_enabled)
+                       goto no_ir;
+ 
+               if (!sub_handle) {
+                       /*
+                        * allocate the consecutive block of IRTE's
+                        * for 'nvec'
+                        */
+                       index = msi_alloc_irte(dev, irq, nvec);
+                       if (index < 0) {
+                               ret = index;
+                               goto error;
+                       }
+               } else {
+                       iommu = map_dev_to_ir(dev);
+                       if (!iommu) {
+                               ret = -ENOENT;
+                               goto error;
+                       }
+                       /*
+                        * setup the mapping between the irq and the IRTE
+                        * base index, the sub_handle pointing to the
+                        * appropriate interrupt remap table entry.
+                        */
+                       set_irte_irq(irq, iommu, index, sub_handle);
+               }
+ no_ir:
+ #endif
+               ret = setup_msi_irq(dev, msidesc, irq);
+               if (ret < 0)
+                       goto error;
+               sub_handle++;
+       }
+       return 0;
+ 
+ error:
+       destroy_irq(irq);
+       return ret;
+ }
+ 
+ void arch_teardown_msi_irq(unsigned int irq)
+ {
+       destroy_irq(irq);
+ }
+ 
+ #ifdef CONFIG_DMAR
+ #ifdef CONFIG_SMP
+ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       struct msi_msg msg;
+       unsigned int dest;
+ 
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+ 
+       cfg = desc->chip_data;
+ 
+       dmar_msi_read(irq, &msg);
+ 
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ 
+       dmar_msi_write(irq, &msg);
+ }
+ 
+ #endif /* CONFIG_SMP */
+ 
+ struct irq_chip dmar_msi_type = {
+       .name = "DMAR_MSI",
+       .unmask = dmar_msi_unmask,
+       .mask = dmar_msi_mask,
+       .ack = ack_apic_edge,
+ #ifdef CONFIG_SMP
+       .set_affinity = dmar_msi_set_affinity,
+ #endif
+       .retrigger = ioapic_retrigger_irq,
+ };
+ 
+ int arch_setup_dmar_msi(unsigned int irq)
+ {
+       int ret;
+       struct msi_msg msg;
+ 
+       ret = msi_compose_msg(NULL, irq, &msg);
+       if (ret < 0)
+               return ret;
+       dmar_msi_write(irq, &msg);
+       set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+               "edge");
+       return 0;
+ }
+ #endif
+ 
+ #ifdef CONFIG_HPET_TIMER
+ 
+ #ifdef CONFIG_SMP
+ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       struct msi_msg msg;
+       unsigned int dest;
+ 
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+ 
+       cfg = desc->chip_data;
+ 
+       hpet_msi_read(irq, &msg);
+ 
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ 
+       hpet_msi_write(irq, &msg);
+ }
+ 
+ #endif /* CONFIG_SMP */
+ 
+ struct irq_chip hpet_msi_type = {
+       .name = "HPET_MSI",
+       .unmask = hpet_msi_unmask,
+       .mask = hpet_msi_mask,
+       .ack = ack_apic_edge,
+ #ifdef CONFIG_SMP
+       .set_affinity = hpet_msi_set_affinity,
+ #endif
+       .retrigger = ioapic_retrigger_irq,
+ };
+ 
+ int arch_setup_hpet_msi(unsigned int irq)
+ {
+       int ret;
+       struct msi_msg msg;
+ 
+       ret = msi_compose_msg(NULL, irq, &msg);
+       if (ret < 0)
+               return ret;
+ 
+       hpet_msi_write(irq, &msg);
+       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+               "edge");
+ 
+       return 0;
+ }
+ #endif
+ 
+ #endif /* CONFIG_PCI_MSI */
+ /*
+  * Hypertransport interrupt support
+  */
+ #ifdef CONFIG_HT_IRQ
+ 
+ #ifdef CONFIG_SMP
+ 
+ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+ {
+       struct ht_irq_msg msg;
+       fetch_ht_irq_msg(irq, &msg);
+ 
+       msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+ 
+       msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+ 
+       write_ht_irq_msg(irq, &msg);
+ }
+ 
+ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       unsigned int dest;
+ 
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+ 
+       cfg = desc->chip_data;
+ 
+       target_ht_irq(irq, dest, cfg->vector);
+ }
+ 
+ #endif
+ 
+ static struct irq_chip ht_irq_chip = {
+       .name           = "PCI-HT",
+       .mask           = mask_ht_irq,
+       .unmask         = unmask_ht_irq,
+       .ack            = ack_apic_edge,
+ #ifdef CONFIG_SMP
+       .set_affinity   = set_ht_irq_affinity,
+ #endif
+       .retrigger      = ioapic_retrigger_irq,
+ };
+ 
+ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+ {
+       struct irq_cfg *cfg;
+       int err;
+ 
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+       if (!err) {
+               struct ht_irq_msg msg;
+               unsigned dest;
+ 
+               dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+ 
+               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+ 
+               msg.address_lo =
+                       HT_IRQ_LOW_BASE |
+                       HT_IRQ_LOW_DEST_ID(dest) |
+                       HT_IRQ_LOW_VECTOR(cfg->vector) |
+                       ((INT_DEST_MODE == 0) ?
+                               HT_IRQ_LOW_DM_PHYSICAL :
+                               HT_IRQ_LOW_DM_LOGICAL) |
+                       HT_IRQ_LOW_RQEOI_EDGE |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               HT_IRQ_LOW_MT_FIXED :
+                               HT_IRQ_LOW_MT_ARBITRATED) |
+                       HT_IRQ_LOW_IRQ_MASKED;
+ 
+               write_ht_irq_msg(irq, &msg);
+ 
+               set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+                                             handle_edge_irq, "edge");
+ 
+               dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
+       }
+       return err;
+ }
+ #endif /* CONFIG_HT_IRQ */
+ 
+ #ifdef CONFIG_X86_64
+ /*
+  * Re-target the irq to the specified CPU and enable the specified MMR located
+  * on the specified blade to allow the sending of MSIs to the specified CPU.
+  */
+ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+                      unsigned long mmr_offset)
+ {
+       const struct cpumask *eligible_cpu = cpumask_of(cpu);
+       struct irq_cfg *cfg;
+       int mmr_pnode;
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       unsigned long flags;
+       int err;
+ 
+       cfg = irq_cfg(irq);
+ 
+       err = assign_irq_vector(irq, cfg, eligible_cpu);
+       if (err != 0)
+               return err;
+ 
+       spin_lock_irqsave(&vector_lock, flags);
+       set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+                                     irq_name);
+       spin_unlock_irqrestore(&vector_lock, flags);
+ 
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+ 
+       entry->vector = cfg->vector;
+       entry->delivery_mode = INT_DELIVERY_MODE;
+       entry->dest_mode = INT_DEST_MODE;
+       entry->polarity = 0;
+       entry->trigger = 0;
+       entry->mask = 0;
+       entry->dest = cpu_mask_to_apicid(eligible_cpu);
+ 
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+ 
+       return irq;
+ }
+ 
+ /*
+  * Disable the specified MMR located on the specified blade so that MSIs are
+  * longer allowed to be sent.
+  */
+ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+ {
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       int mmr_pnode;
+ 
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+ 
+       entry->mask = 1;
+ 
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+ }
+ #endif /* CONFIG_X86_64 */
+ 
+ int __init io_apic_get_redir_entries (int ioapic)
+ {
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+       return reg_01.bits.entries;
+ }
+ 
+ void __init probe_nr_irqs_gsi(void)
+ {
+       int idx;
+       int nr = 0;
+ 
+       for (idx = 0; idx < nr_ioapics; idx++)
+               nr += io_apic_get_redir_entries(idx) + 1;
+ 
+       if (nr > nr_irqs_gsi)
+               nr_irqs_gsi = nr;
+ }
+ 
+ /* --------------------------------------------------------------------------
+                           ACPI-based IOAPIC Configuration
+    -------------------------------------------------------------------------- */
+ 
+ #ifdef CONFIG_ACPI
+ 
+ #ifdef CONFIG_X86_32
+ int __init io_apic_get_unique_id(int ioapic, int apic_id)
+ {
+       union IO_APIC_reg_00 reg_00;
+       static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+       physid_mask_t tmp;
+       unsigned long flags;
+       int i = 0;
+ 
+       /*
+        * The P4 platform supports up to 256 APIC IDs on two separate APIC
+        * buses (one for LAPICs, one for IOAPICs), where predecessors only
+        * supports up to 16 on one shared APIC bus.
+        *
+        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+        *      advantage of new APIC bus architecture.
+        */
+ 
+       if (physids_empty(apic_id_map))
+               apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(ioapic, 0);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+       if (apic_id >= get_physical_broadcast()) {
+               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
+               apic_id = reg_00.bits.ID;
+       }
+ 
+       /*
+        * Every APIC in a system must have a unique ID or we get lots of nice
+        * 'stuck on smp_invalidate_needed IPI wait' messages.
+        */
+       if (check_apicid_used(apic_id_map, apic_id)) {
+ 
+               for (i = 0; i < get_physical_broadcast(); i++) {
+                       if (!check_apicid_used(apic_id_map, i))
+                               break;
+               }
+ 
+               if (i == get_physical_broadcast())
+                       panic("Max apic_id exceeded!\n");
+ 
+               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+                       "trying %d\n", ioapic, apic_id, i);
+ 
+               apic_id = i;
+       }
+ 
+       tmp = apicid_to_cpu_present(apic_id);
+       physids_or(apic_id_map, apic_id_map, tmp);
+ 
+       if (reg_00.bits.ID != apic_id) {
+               reg_00.bits.ID = apic_id;
+ 
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic, 0, reg_00.raw);
+               reg_00.raw = io_apic_read(ioapic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+               /* Sanity check */
+               if (reg_00.bits.ID != apic_id) {
+                       printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+                       return -1;
+               }
+       }
+ 
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+ 
+       return apic_id;
+ }
+ 
+ int __init io_apic_get_version(int ioapic)
+ {
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+       return reg_01.bits.version;
+ }
+ #endif
+ 
+ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
+ {
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
+ 
+       if (!IO_APIC_IRQ(irq)) {
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+ 
+       desc = irq_to_desc_alloc_cpu(irq, cpu);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+ 
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+       }
+ 
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
+ 
+       return 0;
+ }
+ 
+ 
+ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
+ {
+       int i;
+ 
+       if (skip_ioapic_setup)
+               return -1;
+ 
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].mp_irqtype == mp_INT &&
+                   mp_irqs[i].mp_srcbusirq == bus_irq)
+                       break;
+       if (i >= mp_irq_entries)
+               return -1;
+ 
+       *trigger = irq_trigger(i);
+       *polarity = irq_polarity(i);
+       return 0;
+ }
+ 
+ #endif /* CONFIG_ACPI */
+ 
+ /*
+  * This function currently is only a helper for the i386 smp boot process where
+  * we need to reprogram the ioredtbls to cater for the cpus which have come online
+  * so mask in all cases should simply be TARGET_CPUS
+  */
+ #ifdef CONFIG_SMP
+ void __init setup_ioapic_dest(void)
+ {
+       int pin, ioapic, irq, irq_entry;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       const struct cpumask *mask;
+ 
+       if (skip_ioapic_setup == 1)
+               return;
+ 
+       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+                       if (irq_entry == -1)
+                               continue;
+                       irq = pin_2_irq(irq_entry, ioapic, pin);
+ 
+                       /* setup_IO_APIC_irqs could fail to get vector for some device
+                        * when you have too many devices, because at that time only boot
+                        * cpu is online.
+                        */
+                       desc = irq_to_desc(irq);
+                       cfg = desc->chip_data;
+                       if (!cfg->vector) {
+                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
+                                                 irq_trigger(irq_entry),
+                                                 irq_polarity(irq_entry));
+                               continue;
+ 
+                       }
+ 
+                       /*
+                        * Honour affinities which have been set in early boot
+                        */
+                       if (desc->status &
+                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+                               mask = &desc->affinity;
+                       else
+                               mask = TARGET_CPUS;
+ 
+ #ifdef CONFIG_INTR_REMAP
+                       if (intr_remapping_enabled)
+                               set_ir_ioapic_affinity_irq_desc(desc, mask);
+                       else
+ #endif
+                               set_ioapic_affinity_irq_desc(desc, mask);
+               }
+ 
+       }
+ }
+ #endif
+ 
+ #define IOAPIC_RESOURCE_NAME_SIZE 11
+ 
+ static struct resource *ioapic_resources;
+ 
+ static struct resource * __init ioapic_setup_resources(void)
+ {
+       unsigned long n;
+       struct resource *res;
+       char *mem;
+       int i;
+ 
+       if (nr_ioapics <= 0)
+               return NULL;
+ 
+       n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+       n *= nr_ioapics;
+ 
+       mem = alloc_bootmem(n);
+       res = (void *)mem;
+ 
+       if (mem != NULL) {
+               mem += sizeof(struct resource) * nr_ioapics;
+ 
+               for (i = 0; i < nr_ioapics; i++) {
+                       res[i].name = mem;
+                       res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+                       sprintf(mem,  "IOAPIC %u", i);
+                       mem += IOAPIC_RESOURCE_NAME_SIZE;
+               }
+       }
+ 
+       ioapic_resources = res;
+ 
+       return res;
+ }
+ 
+ void __init ioapic_init_mappings(void)
+ {
+       unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+       struct resource *ioapic_res;
+       int i;
+ 
+       ioapic_res = ioapic_setup_resources();
+       for (i = 0; i < nr_ioapics; i++) {
+               if (smp_found_config) {
+                       ioapic_phys = mp_ioapics[i].mp_apicaddr;
+ #ifdef CONFIG_X86_32
+                       if (!ioapic_phys) {
+                               printk(KERN_ERR
+                                      "WARNING: bogus zero IO-APIC "
+                                      "address found in MPTABLE, "
+                                      "disabling IO/APIC support!\n");
+                               smp_found_config = 0;
+                               skip_ioapic_setup = 1;
+                               goto fake_ioapic_page;
+                       }
+ #endif
+               } else {
+ #ifdef CONFIG_X86_32
+ fake_ioapic_page:
+ #endif
+                       ioapic_phys = (unsigned long)
+                               alloc_bootmem_pages(PAGE_SIZE);
+                       ioapic_phys = __pa(ioapic_phys);
+               }
+               set_fixmap_nocache(idx, ioapic_phys);
+               apic_printk(APIC_VERBOSE,
+                           "mapped IOAPIC to %08lx (%08lx)\n",
+                           __fix_to_virt(idx), ioapic_phys);
+               idx++;
+ 
+               if (ioapic_res != NULL) {
+                       ioapic_res->start = ioapic_phys;
+                       ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+                       ioapic_res++;
+               }
+       }
+ }
+ 
+ static int __init ioapic_insert_resources(void)
+ {
+       int i;
+       struct resource *r = ioapic_resources;
+ 
+       if (!r) {
+               printk(KERN_ERR
+                      "IO APIC resources could be not be allocated.\n");
+               return -1;
+       }
+ 
+       for (i = 0; i < nr_ioapics; i++) {
+               insert_resource(&iomem_resource, r);
+               r++;
+       }
+ 
+       return 0;
+ }
+ 
+ /* Insert the IO APIC resources after PCI initialization has occured to handle
+  * IO APICS that are mapped in on a BAR in PCI space. */
+ late_initcall(ioapic_insert_resources);
diff --cc arch/x86/kernel/irqinit_32.c

index d669142,1507ad4..010ca65
--- 1/arch/x86/kernel/irqinit_32.c
--- 2/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@@ -9,6 -9,8 +9,9 @@@
   #include <linux/kernel_stat.h>
   #include <linux/sysdev.h>
   #include <linux/bitops.h>
+ #include <linux/io.h>
+ #include <linux/delay.h>
++#include <linux/perfmon_kern.h>
   
   #include <asm/atomic.h>
   #include <asm/system.h>
@@@ -89,15 -135,52 +136,55 @@@ void __init native_init_IRQ(void
          * us. (some of these will be overridden and become
          * 'special' SMP interrupts)
          */
-       for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-               int vector = FIRST_EXTERNAL_VECTOR + i;
-               if (i >= NR_IRQS)
-                       break;
+       for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
                 /* SYSCALL_VECTOR was reserved in trap_init. */
-               if (!test_bit(vector, used_vectors))
-                       set_intr_gate(vector, interrupt[i]);
+               if (i != SYSCALL_VECTOR)
+                       set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
         }
   
+ 
+ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+       /*
+        * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+        * IPI, driven by wakeup.
+        */
+       alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+ 
+       /* IPI for invalidation */
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+ 
+       /* IPI for generic function call */
+       alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+ 
+       /* IPI for single call function */
+       alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+                                call_function_single_interrupt);
+ 
+       /* Low priority IPI to cleanup after moving an irq */
+       set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+       set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+ #endif
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+       /* self generated IPI for local APIC timer */
+       alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ 
+       /* IPI vectors for APIC spurious and error interrupts */
+       alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+       alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+ #endif
+ 
+ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
+       /* thermal monitor LVT interrupt */
+       alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+ #endif
+ 
++#ifdef CONFIG_PERFMON
++      set_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
++#endif
+       if (!acpi_ioapic)
+               setup_irq(2, &irq2);
+ 
         /* setup after call gates are initialised (usually add in
          * the architecture specific gates)
          */
diff --cc arch/x86/kernel/irqinit_64.c

index 83f6bc1,da481a1..73fb317
--- 1/arch/x86/kernel/irqinit_64.c
--- 2/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@@ -11,15 -11,14 +11,15 @@@
   #include <linux/kernel_stat.h>
   #include <linux/sysdev.h>
   #include <linux/bitops.h>
+ #include <linux/acpi.h>
+ #include <linux/io.h>
+ #include <linux/delay.h>
+ +#include <linux/perfmon_kern.h>
   
- #include <asm/acpi.h>
   #include <asm/atomic.h>
   #include <asm/system.h>
- #include <asm/io.h>
   #include <asm/hw_irq.h>
   #include <asm/pgtable.h>
- #include <asm/delay.h>
   #include <asm/desc.h>
   #include <asm/apic.h>
   #include <asm/i8259.h>
@@@ -217,10 -150,25 +151,29 @@@ static void __init apic_intr_init(void
         /* IPI vectors for APIC spurious and error interrupts */
         alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
         alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+ +
+ +#ifdef CONFIG_PERFMON
+ +      alloc_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
+ +#endif
+ }
+ 
+ void __init native_init_IRQ(void)
+ {
+       int i;
+ 
+       init_ISA_irqs();
+       /*
+        * Cover the whole vector space, no vector can escape
+        * us. (some of these will be overridden and become
+        * 'special' SMP interrupts)
+        */
+       for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+               int vector = FIRST_EXTERNAL_VECTOR + i;
+               if (vector != IA32_SYSCALL_VECTOR)
+                       set_intr_gate(vector, interrupt[i]);
+       }
+ 
+       apic_intr_init();
   
         if (!acpi_ioapic)
                 setup_irq(2, &irq2);
diff --cc arch/x86/kernel/process_32.c

index d184df5,a546f55..bcd144c
--- 1/arch/x86/kernel/process_32.c
--- 2/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@@ -36,13 -36,15 +36,16 @@@
   #include <linux/personality.h>
   #include <linux/tick.h>
   #include <linux/percpu.h>
+ +#include <linux/perfmon_kern.h>
   #include <linux/prctl.h>
+ #include <linux/dmi.h>
+ #include <linux/ftrace.h>
+ #include <linux/uaccess.h>
+ #include <linux/io.h>
+ #include <linux/kdebug.h>
   
- #include <asm/uaccess.h>
   #include <asm/pgtable.h>
   #include <asm/system.h>
- #include <asm/io.h>
   #include <asm/ldt.h>
   #include <asm/processor.h>
   #include <asm/i387.h>
@@@ -278,7 -251,8 +252,9 @@@ void exit_thread(void
                 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
                 put_cpu();
         }
+ 
+ +      pfm_exit_thread();
+       ds_exit_thread(current);
   }
   
   void flush_thread(void)
@@@ -454,24 -429,12 +435,18 @@@ __switch_to_xtra(struct task_struct *pr
         prev = &prev_p->thread;
         next = &next_p->thread;
   
+ +      if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
+ +              pfm_ctxsw_out(prev_p, next_p);
+ +
-       debugctl = prev->debugctlmsr;
-       if (next->ds_area_msr != prev->ds_area_msr) {
-               /* we clear debugctl to make sure DS
-                * is not in use when we change it */
-               debugctl = 0;
-               update_debugctlmsr(0);
-               wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
-       }
- 
-       if (next->debugctlmsr != debugctl)
+       if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+           test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+               ds_switch_to(prev_p, next_p);
+       else if (next->debugctlmsr != prev->debugctlmsr)
                 update_debugctlmsr(next->debugctlmsr);
   
+ +      if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
+ +              pfm_ctxsw_in(prev_p, next_p);
+ +
         if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
                 set_debugreg(next->debugreg0, 0);
                 set_debugreg(next->debugreg1, 1);
diff --cc arch/x86/kernel/process_64.c

index fbf2fac,416fb92..50ec84d
--- 1/arch/x86/kernel/process_64.c
--- 2/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@@ -36,13 -36,13 +36,14 @@@
   #include <linux/kprobes.h>
   #include <linux/kdebug.h>
   #include <linux/tick.h>
+ +#include <linux/perfmon_kern.h>
   #include <linux/prctl.h>
+ #include <linux/uaccess.h>
+ #include <linux/io.h>
+ #include <linux/ftrace.h>
   
- #include <asm/uaccess.h>
   #include <asm/pgtable.h>
   #include <asm/system.h>
- #include <asm/io.h>
   #include <asm/processor.h>
   #include <asm/i387.h>
   #include <asm/mmu_context.h>
@@@ -241,7 -237,8 +238,9 @@@ void exit_thread(void
                 t->io_bitmap_max = 0;
                 put_cpu();
         }
+ 
+ +      pfm_exit_thread();
+       ds_exit_thread(current);
   }
   
   void flush_thread(void)
@@@ -478,24 -476,12 +482,18 @@@ static inline void __switch_to_xtra(str
         prev = &prev_p->thread,
         next = &next_p->thread;
   
+ +      if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
+ +              pfm_ctxsw_out(prev_p, next_p);
+ +
-       debugctl = prev->debugctlmsr;
-       if (next->ds_area_msr != prev->ds_area_msr) {
-               /* we clear debugctl to make sure DS
-                * is not in use when we change it */
-               debugctl = 0;
-               update_debugctlmsr(0);
-               wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
-       }
- 
-       if (next->debugctlmsr != debugctl)
+       if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+           test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+               ds_switch_to(prev_p, next_p);
+       else if (next->debugctlmsr != prev->debugctlmsr)
                 update_debugctlmsr(next->debugctlmsr);
   
+ +      if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
+ +              pfm_ctxsw_in(prev_p, next_p);
+ +
         if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
                 loaddebug(next, 0);
                 loaddebug(next, 1);
diff --cc arch/x86/kernel/ptrace.c
Simple merge
diff --cc arch/x86/kernel/reboot.c

index d4efb7f,2b46eb4..51f3824
--- 1/arch/x86/kernel/reboot.c
--- 2/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@@ -3,9 -3,6 +3,10 @@@
   #include <linux/init.h>
   #include <linux/pm.h>
   #include <linux/efi.h>
+ +#ifdef CONFIG_KDB
+ +#include <linux/kdb.h>
++#include <linux/kexec.h>
+ +#endif /* CONFIG_KDB */
   #include <acpi/reboot.h>
   #include <asm/io.h>
   #include <asm/apic.h>
@@@ -430,16 -511,8 +515,16 @@@ void native_machine_shutdown(void
                 reboot_cpu_id = smp_processor_id();
   
         /* Make certain I only run on the appropriate processor */
-       set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
+       set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
   
+ +#if defined(CONFIG_X86_32) && defined(CONFIG_KDB)
+ +      /*
+ +       * If this restart is occuring while kdb is running (e.g. reboot
+ +       * command), the other CPU's are already stopped.  Don't try to
+ +       * stop them yet again.
+ +       */
+ +      if (!KDB_IS_RUNNING())
+ +#endif        /* defined(CONFIG_X86_32) && defined(CONFIG_KDB) */
         /* O.K Now that I'm on the appropriate processor,
          * stop all of the others.
          */
@@@ -525,3 -609,92 +621,139 @@@ void machine_crash_shutdown(struct pt_r
         machine_ops.crash_shutdown(regs);
   }
   #endif
+ 
+ 
+ #if defined(CONFIG_SMP)
+ 
+ /* This keeps a track of which one is crashing cpu. */
+ static int crashing_cpu;
+ static nmi_shootdown_cb shootdown_callback;
+ 
+ static atomic_t waiting_for_crash_ipi;
+ 
++#ifdef CONFIG_KDB_KDUMP
++void halt_current_cpu(struct pt_regs *regs)
++{
++#ifdef CONFIG_X86_32
++      struct pt_regs fixed_regs;
++#endif
++      local_irq_disable();
++#ifdef CONFIG_X86_32
++      if (!user_mode_vm(regs)) {
++              crash_fixup_ss_esp(&fixed_regs, regs);
++              regs = &fixed_regs;
++      }
++#endif
++      crash_save_cpu(regs, raw_smp_processor_id());
++      disable_local_APIC();
++      atomic_dec(&waiting_for_crash_ipi);
++      /* Assume hlt works */
++      halt();
++      for(;;)
++              cpu_relax();
++}
++#endif /* CONFIG_KDB_KDUMP */
++
+ static int crash_nmi_callback(struct notifier_block *self,
+                       unsigned long val, void *data)
+ {
+       int cpu;
++      struct die_args *args = (struct die_args *)data;
+ 
+       if (val != DIE_NMI_IPI)
+               return NOTIFY_OK;
+ 
+       cpu = raw_smp_processor_id();
+ 
+       /* Don't do anything if this handler is invoked on crashing cpu.
+        * Otherwise, system will completely hang. Crashing cpu can get
+        * an NMI if system was initially booted with nmi_watchdog parameter.
+        */
+       if (cpu == crashing_cpu)
+               return NOTIFY_STOP;
++#ifdef CONFIG_KDB_KDUMP
++      halt_current_cpu(args->regs);
++#else
+       local_irq_disable();
+ 
+       shootdown_callback(cpu, (struct die_args *)data);
+ 
+       atomic_dec(&waiting_for_crash_ipi);
+       /* Assume hlt works */
+       halt();
+       for (;;)
+               cpu_relax();
++#endif /* !CONFIG_KDB_KDUMP */
+ 
+       return 1;
+ }
+ 
+ static void smp_send_nmi_allbutself(void)
+ {
+       send_IPI_allbutself(NMI_VECTOR);
+ }
+ 
+ static struct notifier_block crash_nmi_nb = {
+       .notifier_call = crash_nmi_callback,
+ };
+ 
+ /* Halt all other CPUs, calling the specified function on each of them
+  *
+  * This function can be used to halt all other CPUs on crash
+  * or emergency reboot time. The function passed as parameter
+  * will be called inside a NMI handler on all CPUs.
+  */
- -void nmi_shootdown_cpus(nmi_shootdown_cb callback)
++static void wait_other_cpus(void)
+ {
+       unsigned long msecs;
++
++      msecs = 1000; /* Wait at most a second for the other cpus to stop */
++      while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
++              udelay(1000);
++              msecs--;
++      }
++}
++
++static void nmi_shootdown_cpus_init(void)
++{
++      atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
++}
++
++void nmi_shootdown_cpus(nmi_shootdown_cb callback)
++{
++      nmi_shootdown_cpus_init();
+       local_irq_disable();
+ 
+       /* Make a note of crashing cpu. Will be used in NMI callback.*/
+       crashing_cpu = safe_smp_processor_id();
+ 
+       shootdown_callback = callback;
+ 
- -      atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
++
+       /* Would it be better to replace the trap vector here? */
+       if (register_die_notifier(&crash_nmi_nb))
+               return;         /* return what? */
+       /* Ensure the new callback function is set before sending
+        * out the NMI
+        */
+       wmb();
+ 
+       smp_send_nmi_allbutself();
+ 
- -      msecs = 1000; /* Wait at most a second for the other cpus to stop */
- -      while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- -              mdelay(1);
- -              msecs--;
- -      }
- -
++      wait_other_cpus();
+       /* Leave the nmi callback set */
+ }
- -#else /* !CONFIG_SMP */
++#else /* defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) */
++
+ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+ {
+       /* No other CPUs to shoot down */
+ }
- -#endif
++
++#ifdef CONFIG_KDB_KDUMP
++static void nmi_shootdown_cpus_init(void) {};
++static void wait_other_cpus() {}
++static void halt_current_cpu(struct pt_regs *regs) {};
++#endif /* CONFIG_KDB_KDUMP */
++
++#endif /* defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) */
diff --cc arch/x86/kernel/signal.c

index 0000000,df0587f..435797a

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@@ -1,0 -1,916 +1,926 @@@
+ /*
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+  *
+  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+  *  2000-2002   x86-64 support by Andi Kleen
+  */
+ 
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+ #include <linux/kernel.h>
+ #include <linux/signal.h>
+ #include <linux/errno.h>
+ #include <linux/wait.h>
+ #include <linux/ptrace.h>
+ #include <linux/tracehook.h>
+ #include <linux/unistd.h>
+ #include <linux/stddef.h>
+ #include <linux/personality.h>
+ #include <linux/uaccess.h>
++#include <linux/perfmon_kern.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/ucontext.h>
+ #include <asm/i387.h>
+ #include <asm/vdso.h>
+ 
+ #ifdef CONFIG_X86_64
+ #include <asm/proto.h>
+ #include <asm/ia32_unistd.h>
+ #include <asm/mce.h>
+ #endif /* CONFIG_X86_64 */
+ 
+ #include <asm/syscall.h>
+ #include <asm/syscalls.h>
+ 
+ #include <asm/sigframe.h>
+ 
+ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+ 
+ #define __FIX_EFLAGS  (X86_EFLAGS_AC | X86_EFLAGS_OF | \
+                        X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
+                        X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
+                        X86_EFLAGS_CF)
+ 
+ #ifdef CONFIG_X86_32
+ # define FIX_EFLAGS   (__FIX_EFLAGS | X86_EFLAGS_RF)
+ #else
+ # define FIX_EFLAGS   __FIX_EFLAGS
+ #endif
+ 
+ #define COPY(x)                       {               \
+       err |= __get_user(regs->x, &sc->x);     \
+ }
+ 
+ #define COPY_SEG(seg)         {                       \
+               unsigned short tmp;                     \
+               err |= __get_user(tmp, &sc->seg);       \
+               regs->seg = tmp;                        \
+ }
+ 
+ #define COPY_SEG_CPL3(seg)    {                       \
+               unsigned short tmp;                     \
+               err |= __get_user(tmp, &sc->seg);       \
+               regs->seg = tmp | 3;                    \
+ }
+ 
+ #define GET_SEG(seg)          {                       \
+               unsigned short tmp;                     \
+               err |= __get_user(tmp, &sc->seg);       \
+               loadsegment(seg, tmp);                  \
+ }
+ 
+ static int
+ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
+                  unsigned long *pax)
+ {
+       void __user *buf;
+       unsigned int tmpflags;
+       unsigned int err = 0;
+ 
+       /* Always make any pending restarted system calls return -EINTR */
+       current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ 
+ #ifdef CONFIG_X86_32
+       GET_SEG(gs);
+       COPY_SEG(fs);
+       COPY_SEG(es);
+       COPY_SEG(ds);
+ #endif /* CONFIG_X86_32 */
+ 
+       COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+       COPY(dx); COPY(cx); COPY(ip);
+ 
+ #ifdef CONFIG_X86_64
+       COPY(r8);
+       COPY(r9);
+       COPY(r10);
+       COPY(r11);
+       COPY(r12);
+       COPY(r13);
+       COPY(r14);
+       COPY(r15);
+ #endif /* CONFIG_X86_64 */
+ 
+ #ifdef CONFIG_X86_32
+       COPY_SEG_CPL3(cs);
+       COPY_SEG_CPL3(ss);
+ #else /* !CONFIG_X86_32 */
+       /* Kernel saves and restores only the CS segment register on signals,
+        * which is the bare minimum needed to allow mixed 32/64-bit code.
+        * App's signal handler can save/restore other segments if needed. */
+       COPY_SEG_CPL3(cs);
+ #endif /* CONFIG_X86_32 */
+ 
+       err |= __get_user(tmpflags, &sc->flags);
+       regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+       regs->orig_ax = -1;             /* disable syscall checks */
+ 
+       err |= __get_user(buf, &sc->fpstate);
+       err |= restore_i387_xstate(buf);
+ 
+       err |= __get_user(*pax, &sc->ax);
+       return err;
+ }
+ 
+ static int
+ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
+                struct pt_regs *regs, unsigned long mask)
+ {
+       int err = 0;
+ 
+ #ifdef CONFIG_X86_32
+       {
+               unsigned int tmp;
+ 
+               savesegment(gs, tmp);
+               err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+       }
+       err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
+       err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+       err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+ #endif /* CONFIG_X86_32 */
+ 
+       err |= __put_user(regs->di, &sc->di);
+       err |= __put_user(regs->si, &sc->si);
+       err |= __put_user(regs->bp, &sc->bp);
+       err |= __put_user(regs->sp, &sc->sp);
+       err |= __put_user(regs->bx, &sc->bx);
+       err |= __put_user(regs->dx, &sc->dx);
+       err |= __put_user(regs->cx, &sc->cx);
+       err |= __put_user(regs->ax, &sc->ax);
+ #ifdef CONFIG_X86_64
+       err |= __put_user(regs->r8, &sc->r8);
+       err |= __put_user(regs->r9, &sc->r9);
+       err |= __put_user(regs->r10, &sc->r10);
+       err |= __put_user(regs->r11, &sc->r11);
+       err |= __put_user(regs->r12, &sc->r12);
+       err |= __put_user(regs->r13, &sc->r13);
+       err |= __put_user(regs->r14, &sc->r14);
+       err |= __put_user(regs->r15, &sc->r15);
+ #endif /* CONFIG_X86_64 */
+ 
+       err |= __put_user(current->thread.trap_no, &sc->trapno);
+       err |= __put_user(current->thread.error_code, &sc->err);
+       err |= __put_user(regs->ip, &sc->ip);
+ #ifdef CONFIG_X86_32
+       err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+       err |= __put_user(regs->flags, &sc->flags);
+       err |= __put_user(regs->sp, &sc->sp_at_signal);
+       err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+ #else /* !CONFIG_X86_32 */
+       err |= __put_user(regs->flags, &sc->flags);
+       err |= __put_user(regs->cs, &sc->cs);
+       err |= __put_user(0, &sc->gs);
+       err |= __put_user(0, &sc->fs);
+ #endif /* CONFIG_X86_32 */
+ 
+       err |= __put_user(fpstate, &sc->fpstate);
+ 
+       /* non-iBCS2 extensions.. */
+       err |= __put_user(mask, &sc->oldmask);
+       err |= __put_user(current->thread.cr2, &sc->cr2);
+ 
+       return err;
+ }
+ 
+ /*
+  * Set up a signal frame.
+  */
+ #ifdef CONFIG_X86_32
+ static const struct {
+       u16 poplmovl;
+       u32 val;
+       u16 int80;
+ } __attribute__((packed)) retcode = {
+       0xb858,         /* popl %eax; movl $..., %eax */
+       __NR_sigreturn,
+       0x80cd,         /* int $0x80 */
+ };
+ 
+ static const struct {
+       u8  movl;
+       u32 val;
+       u16 int80;
+       u8  pad;
+ } __attribute__((packed)) rt_retcode = {
+       0xb8,           /* movl $..., %eax */
+       __NR_rt_sigreturn,
+       0x80cd,         /* int $0x80 */
+       0
+ };
+ 
+ /*
+  * Determine which stack to use..
+  */
+ static inline void __user *
+ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+            void **fpstate)
+ {
+       unsigned long sp;
+ 
+       /* Default to using normal stack */
+       sp = regs->sp;
+ 
+       /*
+        * If we are on the alternate signal stack and would overflow it, don't.
+        * Return an always-bogus address instead so we will die with SIGSEGV.
+        */
+       if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+               return (void __user *) -1L;
+ 
+       /* This is the X/Open sanctioned signal stack switching.  */
+       if (ka->sa.sa_flags & SA_ONSTACK) {
+               if (sas_ss_flags(sp) == 0)
+                       sp = current->sas_ss_sp + current->sas_ss_size;
+       } else {
+               /* This is the legacy signal stack switching. */
+               if ((regs->ss & 0xffff) != __USER_DS &&
+                       !(ka->sa.sa_flags & SA_RESTORER) &&
+                               ka->sa.sa_restorer)
+                       sp = (unsigned long) ka->sa.sa_restorer;
+       }
+ 
+       if (used_math()) {
+               sp = sp - sig_xstate_size;
+               *fpstate = (struct _fpstate *) sp;
+               if (save_i387_xstate(*fpstate) < 0)
+                       return (void __user *)-1L;
+       }
+ 
+       sp -= frame_size;
+       /*
+        * Align the stack pointer according to the i386 ABI,
+        * i.e. so that on function entry ((sp + 4) & 15) == 0.
+        */
+       sp = ((sp + 4) & -16ul) - 4;
+ 
+       return (void __user *) sp;
+ }
+ 
+ static int
+ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+             struct pt_regs *regs)
+ {
+       struct sigframe __user *frame;
+       void __user *restorer;
+       int err = 0;
+       void __user *fpstate = NULL;
+ 
+       frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+ 
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               return -EFAULT;
+ 
+       if (__put_user(sig, &frame->sig))
+               return -EFAULT;
+ 
+       if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+               return -EFAULT;
+ 
+       if (_NSIG_WORDS > 1) {
+               if (__copy_to_user(&frame->extramask, &set->sig[1],
+                                  sizeof(frame->extramask)))
+                       return -EFAULT;
+       }
+ 
+       if (current->mm->context.vdso)
+               restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
+       else
+               restorer = &frame->retcode;
+       if (ka->sa.sa_flags & SA_RESTORER)
+               restorer = ka->sa.sa_restorer;
+ 
+       /* Set up to return from userspace.  */
+       err |= __put_user(restorer, &frame->pretcode);
+ 
+       /*
+        * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
+        *
+        * WE DO NOT USE IT ANY MORE! It's only left here for historical
+        * reasons and because gdb uses it as a signature to notice
+        * signal handler stack frames.
+        */
+       err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
+ 
+       if (err)
+               return -EFAULT;
+ 
+       /* Set up registers for signal handler */
+       regs->sp = (unsigned long)frame;
+       regs->ip = (unsigned long)ka->sa.sa_handler;
+       regs->ax = (unsigned long)sig;
+       regs->dx = 0;
+       regs->cx = 0;
+ 
+       regs->ds = __USER_DS;
+       regs->es = __USER_DS;
+       regs->ss = __USER_DS;
+       regs->cs = __USER_CS;
+ 
+       return 0;
+ }
+ 
+ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+                           sigset_t *set, struct pt_regs *regs)
+ {
+       struct rt_sigframe __user *frame;
+       void __user *restorer;
+       int err = 0;
+       void __user *fpstate = NULL;
+ 
+       frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+ 
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               return -EFAULT;
+ 
+       err |= __put_user(sig, &frame->sig);
+       err |= __put_user(&frame->info, &frame->pinfo);
+       err |= __put_user(&frame->uc, &frame->puc);
+       err |= copy_siginfo_to_user(&frame->info, info);
+       if (err)
+               return -EFAULT;
+ 
+       /* Create the ucontext.  */
+       if (cpu_has_xsave)
+               err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+       else
+               err |= __put_user(0, &frame->uc.uc_flags);
+       err |= __put_user(0, &frame->uc.uc_link);
+       err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+       err |= __put_user(sas_ss_flags(regs->sp),
+                         &frame->uc.uc_stack.ss_flags);
+       err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+       err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+                               regs, set->sig[0]);
+       err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+       if (err)
+               return -EFAULT;
+ 
+       /* Set up to return from userspace.  */
+       restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+       if (ka->sa.sa_flags & SA_RESTORER)
+               restorer = ka->sa.sa_restorer;
+       err |= __put_user(restorer, &frame->pretcode);
+ 
+       /*
+        * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
+        *
+        * WE DO NOT USE IT ANY MORE! It's only left here for historical
+        * reasons and because gdb uses it as a signature to notice
+        * signal handler stack frames.
+        */
+       err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
+ 
+       if (err)
+               return -EFAULT;
+ 
+       /* Set up registers for signal handler */
+       regs->sp = (unsigned long)frame;
+       regs->ip = (unsigned long)ka->sa.sa_handler;
+       regs->ax = (unsigned long)sig;
+       regs->dx = (unsigned long)&frame->info;
+       regs->cx = (unsigned long)&frame->uc;
+ 
+       regs->ds = __USER_DS;
+       regs->es = __USER_DS;
+       regs->ss = __USER_DS;
+       regs->cs = __USER_CS;
+ 
+       return 0;
+ }
+ #else /* !CONFIG_X86_32 */
+ /*
+  * Determine which stack to use..
+  */
+ static void __user *
+ get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
+ {
+       /* Default to using normal stack - redzone*/
+       sp -= 128;
+ 
+       /* This is the X/Open sanctioned signal stack switching.  */
+       if (ka->sa.sa_flags & SA_ONSTACK) {
+               if (sas_ss_flags(sp) == 0)
+                       sp = current->sas_ss_sp + current->sas_ss_size;
+       }
+ 
+       return (void __user *)round_down(sp - size, 64);
+ }
+ 
+ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+                           sigset_t *set, struct pt_regs *regs)
+ {
+       struct rt_sigframe __user *frame;
+       void __user *fp = NULL;
+       int err = 0;
+       struct task_struct *me = current;
+ 
+       if (used_math()) {
+               fp = get_stack(ka, regs->sp, sig_xstate_size);
+               frame = (void __user *)round_down(
+                       (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
+ 
+               if (save_i387_xstate(fp) < 0)
+                       return -EFAULT;
+       } else
+               frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
+ 
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               return -EFAULT;
+ 
+       if (ka->sa.sa_flags & SA_SIGINFO) {
+               if (copy_siginfo_to_user(&frame->info, info))
+                       return -EFAULT;
+       }
+ 
+       /* Create the ucontext.  */
+       if (cpu_has_xsave)
+               err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+       else
+               err |= __put_user(0, &frame->uc.uc_flags);
+       err |= __put_user(0, &frame->uc.uc_link);
+       err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+       err |= __put_user(sas_ss_flags(regs->sp),
+                         &frame->uc.uc_stack.ss_flags);
+       err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+       err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+       err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ 
+       /* Set up to return from userspace.  If provided, use a stub
+          already in userspace.  */
+       /* x86-64 should always use SA_RESTORER. */
+       if (ka->sa.sa_flags & SA_RESTORER) {
+               err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+       } else {
+               /* could use a vstub here */
+               return -EFAULT;
+       }
+ 
+       if (err)
+               return -EFAULT;
+ 
+       /* Set up registers for signal handler */
+       regs->di = sig;
+       /* In case the signal handler was declared without prototypes */
+       regs->ax = 0;
+ 
+       /* This also works for non SA_SIGINFO handlers because they expect the
+          next argument after the signal number on the stack. */
+       regs->si = (unsigned long)&frame->info;
+       regs->dx = (unsigned long)&frame->uc;
+       regs->ip = (unsigned long) ka->sa.sa_handler;
+ 
+       regs->sp = (unsigned long)frame;
+ 
+       /* Set up the CS register to run signal handlers in 64-bit mode,
+          even if the handler happens to be interrupting 32-bit code. */
+       regs->cs = __USER_CS;
+ 
+       return 0;
+ }
+ #endif /* CONFIG_X86_32 */
+ 
+ #ifdef CONFIG_X86_32
+ /*
+  * Atomically swap in the new signal mask, and wait for a signal.
+  */
+ asmlinkage int
+ sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+ {
+       mask &= _BLOCKABLE;
+       spin_lock_irq(&current->sighand->siglock);
+       current->saved_sigmask = current->blocked;
+       siginitset(&current->blocked, mask);
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
+ 
+       current->state = TASK_INTERRUPTIBLE;
+       schedule();
+       set_restore_sigmask();
+ 
+       return -ERESTARTNOHAND;
+ }
+ 
+ asmlinkage int
+ sys_sigaction(int sig, const struct old_sigaction __user *act,
+             struct old_sigaction __user *oact)
+ {
+       struct k_sigaction new_ka, old_ka;
+       int ret;
+ 
+       if (act) {
+               old_sigset_t mask;
+ 
+               if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+                   __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+                   __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+                       return -EFAULT;
+ 
+               __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+               __get_user(mask, &act->sa_mask);
+               siginitset(&new_ka.sa.sa_mask, mask);
+       }
+ 
+       ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+ 
+       if (!ret && oact) {
+               if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+                   __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+                   __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+                       return -EFAULT;
+ 
+               __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+               __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+       }
+ 
+       return ret;
+ }
+ #endif /* CONFIG_X86_32 */
+ 
+ #ifdef CONFIG_X86_32
+ asmlinkage int sys_sigaltstack(unsigned long bx)
+ {
+       /*
+        * This is needed to make gcc realize it doesn't own the
+        * "struct pt_regs"
+        */
+       struct pt_regs *regs = (struct pt_regs *)&bx;
+       const stack_t __user *uss = (const stack_t __user *)bx;
+       stack_t __user *uoss = (stack_t __user *)regs->cx;
+ 
+       return do_sigaltstack(uss, uoss, regs->sp);
+ }
+ #else /* !CONFIG_X86_32 */
+ asmlinkage long
+ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+               struct pt_regs *regs)
+ {
+       return do_sigaltstack(uss, uoss, regs->sp);
+ }
+ #endif /* CONFIG_X86_32 */
+ 
+ /*
+  * Do a signal return; undo the signal stack.
+  */
+ #ifdef CONFIG_X86_32
+ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
+ {
+       struct sigframe __user *frame;
+       struct pt_regs *regs;
+       unsigned long ax;
+       sigset_t set;
+ 
+       regs = (struct pt_regs *) &__unused;
+       frame = (struct sigframe __user *)(regs->sp - 8);
+ 
+       if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+       if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
+               && __copy_from_user(&set.sig[1], &frame->extramask,
+                                   sizeof(frame->extramask))))
+               goto badframe;
+ 
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sighand->siglock);
+       current->blocked = set;
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
+ 
+       if (restore_sigcontext(regs, &frame->sc, &ax))
+               goto badframe;
+       return ax;
+ 
+ badframe:
+       signal_fault(regs, frame, "sigreturn");
+ 
+       return 0;
+ }
+ #endif /* CONFIG_X86_32 */
+ 
+ static long do_rt_sigreturn(struct pt_regs *regs)
+ {
+       struct rt_sigframe __user *frame;
+       unsigned long ax;
+       sigset_t set;
+ 
+       frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+       if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+       if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+               goto badframe;
+ 
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sighand->siglock);
+       current->blocked = set;
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
+ 
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+               goto badframe;
+ 
+       if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+               goto badframe;
+ 
+       return ax;
+ 
+ badframe:
+       signal_fault(regs, frame, "rt_sigreturn");
+       return 0;
+ }
+ 
+ #ifdef CONFIG_X86_32
+ /*
+  * Note: do not pass in pt_regs directly as with tail-call optimization
+  * GCC will incorrectly stomp on the caller's frame and corrupt user-space
+  * register state:
+  */
+ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+ {
+       struct pt_regs *regs = (struct pt_regs *)&__unused;
+ 
+       return do_rt_sigreturn(regs);
+ }
+ #else /* !CONFIG_X86_32 */
+ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+ {
+       return do_rt_sigreturn(regs);
+ }
+ #endif /* CONFIG_X86_32 */
+ 
+ /*
+  * OK, we're invoking a handler:
+  */
+ static int signr_convert(int sig)
+ {
+ #ifdef CONFIG_X86_32
+       struct thread_info *info = current_thread_info();
+ 
+       if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
+               return info->exec_domain->signal_invmap[sig];
+ #endif /* CONFIG_X86_32 */
+       return sig;
+ }
+ 
+ #ifdef CONFIG_X86_32
+ 
+ #define is_ia32       1
+ #define ia32_setup_frame      __setup_frame
+ #define ia32_setup_rt_frame   __setup_rt_frame
+ 
+ #else /* !CONFIG_X86_32 */
+ 
+ #ifdef CONFIG_IA32_EMULATION
+ #define is_ia32       test_thread_flag(TIF_IA32)
+ #else /* !CONFIG_IA32_EMULATION */
+ #define is_ia32       0
+ #endif /* CONFIG_IA32_EMULATION */
+ 
+ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+               sigset_t *set, struct pt_regs *regs);
+ int ia32_setup_frame(int sig, struct k_sigaction *ka,
+               sigset_t *set, struct pt_regs *regs);
+ 
+ #endif /* CONFIG_X86_32 */
+ 
+ static int
+ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+              sigset_t *set, struct pt_regs *regs)
+ {
+       int usig = signr_convert(sig);
+       int ret;
+ 
+       /* Set up the stack frame */
+       if (is_ia32) {
+               if (ka->sa.sa_flags & SA_SIGINFO)
+                       ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
+               else
+                       ret = ia32_setup_frame(usig, ka, set, regs);
+       } else
+               ret = __setup_rt_frame(sig, ka, info, set, regs);
+ 
+       if (ret) {
+               force_sigsegv(sig, current);
+               return -EFAULT;
+       }
+ 
+       return ret;
+ }
+ 
+ static int
+ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
+             sigset_t *oldset, struct pt_regs *regs)
+ {
+       int ret;
+ 
+       /* Are we from a system call? */
+       if (syscall_get_nr(current, regs) >= 0) {
+               /* If so, check system call restarting.. */
+               switch (syscall_get_error(current, regs)) {
+               case -ERESTART_RESTARTBLOCK:
+               case -ERESTARTNOHAND:
+                       regs->ax = -EINTR;
+                       break;
+ 
+               case -ERESTARTSYS:
+                       if (!(ka->sa.sa_flags & SA_RESTART)) {
+                               regs->ax = -EINTR;
+                               break;
+                       }
+               /* fallthrough */
+               case -ERESTARTNOINTR:
+                       regs->ax = regs->orig_ax;
+                       regs->ip -= 2;
+                       break;
+               }
+       }
+ 
+       /*
+        * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+        * flag so that register information in the sigcontext is correct.
+        */
+       if (unlikely(regs->flags & X86_EFLAGS_TF) &&
+           likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+               regs->flags &= ~X86_EFLAGS_TF;
+ 
+       ret = setup_rt_frame(sig, ka, info, oldset, regs);
+ 
+       if (ret)
+               return ret;
+ 
+ #ifdef CONFIG_X86_64
+       /*
+        * This has nothing to do with segment registers,
+        * despite the name.  This magic affects uaccess.h
+        * macros' behavior.  Reset it to the normal setting.
+        */
+       set_fs(USER_DS);
+ #endif
+ 
+       /*
+        * Clear the direction flag as per the ABI for function entry.
+        */
+       regs->flags &= ~X86_EFLAGS_DF;
+ 
+       /*
+        * Clear TF when entering the signal handler, but
+        * notify any tracer that was single-stepping it.
+        * The tracer may want to single-step inside the
+        * handler too.
+        */
+       regs->flags &= ~X86_EFLAGS_TF;
+ 
+       spin_lock_irq(&current->sighand->siglock);
+       sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+       if (!(ka->sa.sa_flags & SA_NODEFER))
+               sigaddset(&current->blocked, sig);
+       recalc_sigpending();
+       spin_unlock_irq(&current->sighand->siglock);
+ 
+       tracehook_signal_handler(sig, info, ka, regs,
+                                test_thread_flag(TIF_SINGLESTEP));
+ 
++      if (current->instrumentation & PTS_SELF) {
++              clear_thread_flag(TIF_SYSCALL_TRACE);
++              current->instrumentation &= ~PTS_SELF;
++      }
++
+       return 0;
+ }
+ 
+ #ifdef CONFIG_X86_32
+ #define NR_restart_syscall    __NR_restart_syscall
+ #else /* !CONFIG_X86_32 */
+ #define NR_restart_syscall    \
+       test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
+ #endif /* CONFIG_X86_32 */
+ 
+ /*
+  * Note that 'init' is a special process: it doesn't get signals it doesn't
+  * want to handle. Thus you cannot kill init even with a SIGKILL even by
+  * mistake.
+  */
+ static void do_signal(struct pt_regs *regs)
+ {
+       struct k_sigaction ka;
+       siginfo_t info;
+       int signr;
+       sigset_t *oldset;
+ 
+       /*
+        * We want the common case to go fast, which is why we may in certain
+        * cases get here from kernel mode. Just return without doing anything
+        * if so.
+        * X86_32: vm86 regs switched out by assembly code before reaching
+        * here, so testing against kernel CS suffices.
+        */
+       if (!user_mode(regs))
+               return;
+ 
+       if (current_thread_info()->status & TS_RESTORE_SIGMASK)
+               oldset = &current->saved_sigmask;
+       else
+               oldset = &current->blocked;
+ 
+       signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+       if (signr > 0) {
+               /*
+                * Re-enable any watchpoints before delivering the
+                * signal to user space. The processor register will
+                * have been cleared if the watchpoint triggered
+                * inside the kernel.
+                */
+               if (current->thread.debugreg7)
+                       set_debugreg(current->thread.debugreg7, 7);
+ 
+               /* Whee! Actually deliver the signal.  */
+               if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+                       /*
+                        * A signal was successfully delivered; the saved
+                        * sigmask will have been stored in the signal frame,
+                        * and will be restored by sigreturn, so we can simply
+                        * clear the TS_RESTORE_SIGMASK flag.
+                        */
+                       current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+               }
+               return;
+       }
+ 
+       /* Did we come from a system call? */
+       if (syscall_get_nr(current, regs) >= 0) {
+               /* Restart the system call - no handlers present */
+               switch (syscall_get_error(current, regs)) {
+               case -ERESTARTNOHAND:
+               case -ERESTARTSYS:
+               case -ERESTARTNOINTR:
+                       regs->ax = regs->orig_ax;
+                       regs->ip -= 2;
+                       break;
+ 
+               case -ERESTART_RESTARTBLOCK:
+                       regs->ax = NR_restart_syscall;
+                       regs->ip -= 2;
+                       break;
+               }
+       }
+ 
+       /*
+        * If there's no signal to deliver, we just put the saved sigmask
+        * back.
+        */
+       if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+               current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+               sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+       }
+ }
+ 
+ /*
+  * notification of userspace execution resumption
+  * - triggered by the TIF_WORK_MASK flags
+  */
+ void
+ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
+ {
+ #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+       /* notify userspace of pending MCEs */
+       if (thread_info_flags & _TIF_MCE_NOTIFY)
+               mce_notify_user();
+ #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+ 
++      /* process perfmon asynchronous work (e.g. block thread or reset) */
++      if (thread_info_flags & _TIF_PERFMON_WORK)
++              pfm_handle_work(regs);
++
+       /* deal with pending signal delivery */
+       if (thread_info_flags & _TIF_SIGPENDING)
+               do_signal(regs);
+ 
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+       }
+ 
+ #ifdef CONFIG_X86_32
+       clear_thread_flag(TIF_IRET);
+ #endif /* CONFIG_X86_32 */
+ }
+ 
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+ {
+       struct task_struct *me = current;
+ 
+       if (show_unhandled_signals && printk_ratelimit()) {
+               printk("%s"
+                      "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+                      task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+                      me->comm, me->pid, where, frame,
+                      regs->ip, regs->sp, regs->orig_ax);
+               print_vma_addr(" in ", regs->ip);
+               printk(KERN_CONT "\n");
+       }
+ 
+       force_sig(SIGSEGV, me);
+ }
diff --cc arch/x86/kernel/smpboot.c

index 7b57825,bb1a3b1..6e97e36
--- 1/arch/x86/kernel/smpboot.c
--- 2/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@ -1377,8 -1372,29 +1373,30 @@@ void cpu_disable_common(void
         lock_vector_lock();
         remove_cpu_from_maps(cpu);
         unlock_vector_lock();
-       fixup_irqs(cpu_online_map);
+       fixup_irqs();
+ +      pfm_cpu_disable();
+ }
+ 
+ int native_cpu_disable(void)
+ {
+       int cpu = smp_processor_id();
+ 
+       /*
+        * Perhaps use cpufreq to drop frequency, but that could go
+        * into generic code.
+        *
+        * We won't take down the boot processor on i386 due to some
+        * interrupts only being able to be serviced by the BSP.
+        * Especially so if we're not using an IOAPIC   -zwane
+        */
+       if (cpu == 0)
+               return -EBUSY;
+ 
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               stop_apic_nmi_watchdog(NULL);
+       clear_local_APIC();
+ 
+       cpu_disable_common();
         return 0;
   }
   
diff --cc arch/x86/kernel/sys_x86_64.c
Simple merge
diff --cc arch/x86/kernel/traps.c

index 0000000,98c2d05..4664bf0

mode 000000,100644..100644
--- /dev/null
--- 2/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@@ -1,0 -1,1020 +1,1050 @@@
+ /*
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+  *
+  *  Pentium III FXSR, SSE support
+  *    Gareth Hughes <gareth@valinux.com>, May 2000
+  */
+ 
+ /*
+  * Handle hardware traps and faults.
+  */
+ #include <linux/interrupt.h>
+ #include <linux/kallsyms.h>
+ #include <linux/spinlock.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
+ #include <linux/utsname.h>
+ #include <linux/kdebug.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/ptrace.h>
+ #include <linux/string.h>
+ #include <linux/delay.h>
+ #include <linux/errno.h>
+ #include <linux/kexec.h>
+ #include <linux/sched.h>
+ #include <linux/timer.h>
+ #include <linux/init.h>
+ #include <linux/bug.h>
+ #include <linux/nmi.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+ #include <linux/io.h>
+ 
+ #ifdef CONFIG_EISA
+ #include <linux/ioport.h>
+ #include <linux/eisa.h>
+ #endif
+ 
+ #ifdef CONFIG_MCA
+ #include <linux/mca.h>
+ #endif
+ 
+ #if defined(CONFIG_EDAC)
+ #include <linux/edac.h>
+ #endif
+ 
++#ifdef CONFIG_KDB
++#include <linux/kdb.h>
++#endif /* CONFIG_KDB */
++
+ #include <asm/stacktrace.h>
+ #include <asm/processor.h>
+ #include <asm/debugreg.h>
+ #include <asm/atomic.h>
+ #include <asm/system.h>
+ #include <asm/traps.h>
+ #include <asm/desc.h>
+ #include <asm/i387.h>
+ 
+ #include <mach_traps.h>
+ 
+ #ifdef CONFIG_X86_64
+ #include <asm/pgalloc.h>
+ #include <asm/proto.h>
+ #include <asm/pda.h>
+ #else
+ #include <asm/processor-flags.h>
+ #include <asm/arch_hooks.h>
+ #include <asm/traps.h>
+ 
+ #include "cpu/mcheck/mce.h"
+ 
+ asmlinkage int system_call(void);
+ 
+ /* Do we ignore FPU interrupts ? */
+ char ignore_fpu_irq;
+ 
+ /*
+  * The IDT has to be page-aligned to simplify the Pentium
+  * F0 0F bug workaround.. We have a special link segment
+  * for this.
+  */
+ gate_desc idt_table[256]
+       __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
+ #endif
+ 
+ DECLARE_BITMAP(used_vectors, NR_VECTORS);
+ EXPORT_SYMBOL_GPL(used_vectors);
+ 
+ static int ignore_nmis;
+ 
+ static inline void conditional_sti(struct pt_regs *regs)
+ {
+       if (regs->flags & X86_EFLAGS_IF)
+               local_irq_enable();
+ }
+ 
+ static inline void preempt_conditional_sti(struct pt_regs *regs)
+ {
+       inc_preempt_count();
+       if (regs->flags & X86_EFLAGS_IF)
+               local_irq_enable();
+ }
+ 
+ static inline void preempt_conditional_cli(struct pt_regs *regs)
+ {
+       if (regs->flags & X86_EFLAGS_IF)
+               local_irq_disable();
+       dec_preempt_count();
+ }
+ 
+ #ifdef CONFIG_X86_32
+ static inline void
+ die_if_kernel(const char *str, struct pt_regs *regs, long err)
+ {
+       if (!user_mode_vm(regs))
+               die(str, regs, err);
+ }
+ 
+ /*
+  * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
+  * invalid offset set (the LAZY one) and the faulting thread has
+  * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
+  * we set the offset field correctly and return 1.
+  */
+ static int lazy_iobitmap_copy(void)
+ {
+       struct thread_struct *thread;
+       struct tss_struct *tss;
+       int cpu;
+ 
+       cpu = get_cpu();
+       tss = &per_cpu(init_tss, cpu);
+       thread = &current->thread;
+ 
+       if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+           thread->io_bitmap_ptr) {
+               memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
+                      thread->io_bitmap_max);
+               /*
+                * If the previously set map was extending to higher ports
+                * than the current one, pad extra space with 0xff (no access).
+                */
+               if (thread->io_bitmap_max < tss->io_bitmap_max) {
+                       memset((char *) tss->io_bitmap +
+                               thread->io_bitmap_max, 0xff,
+                               tss->io_bitmap_max - thread->io_bitmap_max);
+               }
+               tss->io_bitmap_max = thread->io_bitmap_max;
+               tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+               tss->io_bitmap_owner = thread;
+               put_cpu();
+ 
+               return 1;
+       }
+       put_cpu();
+ 
+       return 0;
+ }
+ #endif
+ 
+ static void __kprobes
+ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
+       long error_code, siginfo_t *info)
+ {
+       struct task_struct *tsk = current;
+ 
+ #ifdef CONFIG_X86_32
+       if (regs->flags & X86_VM_MASK) {
+               /*
+                * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
+                * On nmi (interrupt 2), do_trap should not be called.
+                */
+               if (trapnr < 6)
+                       goto vm86_trap;
+               goto trap_signal;
+       }
+ #endif
+ 
+       if (!user_mode(regs))
+               goto kernel_trap;
+ 
+ #ifdef CONFIG_X86_32
+ trap_signal:
+ #endif
+       /*
+        * We want error_code and trap_no set for userspace faults and
+        * kernelspace faults which result in die(), but not
+        * kernelspace faults which are fixed up.  die() gives the
+        * process no chance to handle the signal and notice the
+        * kernel fault information, so that won't result in polluting
+        * the information about previously queued, but not yet
+        * delivered, faults.  See also do_general_protection below.
+        */
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = trapnr;
+ 
+ #ifdef CONFIG_X86_64
+       if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
+           printk_ratelimit()) {
+               printk(KERN_INFO
+                      "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
+                      tsk->comm, tsk->pid, str,
+                      regs->ip, regs->sp, error_code);
+               print_vma_addr(" in ", regs->ip);
+               printk("\n");
+       }
+ #endif
+ 
+       if (info)
+               force_sig_info(signr, info, tsk);
+       else
+               force_sig(signr, tsk);
+       return;
+ 
+ kernel_trap:
+       if (!fixup_exception(regs)) {
+               tsk->thread.error_code = error_code;
+               tsk->thread.trap_no = trapnr;
+               die(str, regs, error_code);
+       }
+       return;
+ 
+ #ifdef CONFIG_X86_32
+ vm86_trap:
+       if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
+                                               error_code, trapnr))
+               goto trap_signal;
+       return;
+ #endif
+ }
+ 
+ #define DO_ERROR(trapnr, signr, str, name)                            \
+ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)   \
+ {                                                                     \
+       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
+                                                       == NOTIFY_STOP) \
+               return;                                                 \
+       conditional_sti(regs);                                          \
+       do_trap(trapnr, signr, str, regs, error_code, NULL);            \
+ }
+ 
+ #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)               \
+ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)   \
+ {                                                                     \
+       siginfo_t info;                                                 \
+       info.si_signo = signr;                                          \
+       info.si_errno = 0;                                              \
+       info.si_code = sicode;                                          \
+       info.si_addr = (void __user *)siaddr;                           \
+       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
+                                                       == NOTIFY_STOP) \
+               return;                                                 \
+       conditional_sti(regs);                                          \
+       do_trap(trapnr, signr, str, regs, error_code, &info);           \
+ }
+ 
+ DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
+ DO_ERROR(4, SIGSEGV, "overflow", overflow)
+ DO_ERROR(5, SIGSEGV, "bounds", bounds)
+ DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
+ DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
+ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+ DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
+ #ifdef CONFIG_X86_32
+ DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
+ #endif
+ DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+ 
+ #ifdef CONFIG_X86_64
+ /* Runs on IST stack */
+ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
+ {
+       if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
+                       12, SIGBUS) == NOTIFY_STOP)
+               return;
+       preempt_conditional_sti(regs);
+       do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
+       preempt_conditional_cli(regs);
+ }
+ 
+ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+ {
+       static const char str[] = "double fault";
+       struct task_struct *tsk = current;
+ 
+       /* Return not checked because double check cannot be ignored */
+       notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
+ 
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = 8;
+ 
+       /*
+        * This is always a kernel trap and never fixable (and thus must
+        * never return).
+        */
+       for (;;)
+               die(str, regs, error_code);
+ }
+ #endif
+ 
+ dotraplinkage void __kprobes
+ do_general_protection(struct pt_regs *regs, long error_code)
+ {
+       struct task_struct *tsk;
+ 
+       conditional_sti(regs);
+ 
+ #ifdef CONFIG_X86_32
+       if (lazy_iobitmap_copy()) {
+               /* restart the faulting instruction */
+               return;
+       }
+ 
+       if (regs->flags & X86_VM_MASK)
+               goto gp_in_vm86;
+ #endif
+ 
+       tsk = current;
+       if (!user_mode(regs))
+               goto gp_in_kernel;
+ 
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = 13;
+ 
+       if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+                       printk_ratelimit()) {
+               printk(KERN_INFO
+                       "%s[%d] general protection ip:%lx sp:%lx error:%lx",
+                       tsk->comm, task_pid_nr(tsk),
+                       regs->ip, regs->sp, error_code);
+               print_vma_addr(" in ", regs->ip);
+               printk("\n");
+       }
+ 
+       force_sig(SIGSEGV, tsk);
+       return;
+ 
+ #ifdef CONFIG_X86_32
+ gp_in_vm86:
+       local_irq_enable();
+       handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+       return;
+ #endif
+ 
+ gp_in_kernel:
+       if (fixup_exception(regs))
+               return;
+ 
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = 13;
+       if (notify_die(DIE_GPF, "general protection fault", regs,
+                               error_code, 13, SIGSEGV) == NOTIFY_STOP)
+               return;
+       die("general protection fault", regs, error_code);
+ }
+ 
+ static notrace __kprobes void
+ mem_parity_error(unsigned char reason, struct pt_regs *regs)
+ {
+       printk(KERN_EMERG
+               "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+                       reason, smp_processor_id());
+ 
+       printk(KERN_EMERG
+               "You have some hardware problem, likely on the PCI bus.\n");
+ 
+ #if defined(CONFIG_EDAC)
+       if (edac_handler_set()) {
+               edac_atomic_assert_error();
+               return;
+       }
+ #endif
+ 
+       if (panic_on_unrecovered_nmi)
+               panic("NMI: Not continuing");
+ 
+       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ 
+       /* Clear and disable the memory parity error line. */
+       reason = (reason & 0xf) | 4;
+       outb(reason, 0x61);
+ }
+ 
+ static notrace __kprobes void
+ io_check_error(unsigned char reason, struct pt_regs *regs)
+ {
+       unsigned long i;
+ 
+       printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+       show_registers(regs);
+ 
++      if (panic_on_io_nmi)
++              panic("NMI IOCK error: Not continuing");
++
+       /* Re-enable the IOCK line, wait for a few seconds */
+       reason = (reason & 0xf) | 8;
+       outb(reason, 0x61);
+ 
+       i = 2000;
+       while (--i)
+               udelay(1000);
+ 
+       reason &= ~8;
+       outb(reason, 0x61);
+ }
+ 
+ static notrace __kprobes void
+ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
+ {
++#ifdef CONFIG_KDB
++      (void)kdb(KDB_REASON_NMI, reason, regs);
++#endif /* CONFIG_KDB */
++
+       if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+                       NOTIFY_STOP)
+               return;
+ #ifdef CONFIG_MCA
+       /*
+        * Might actually be able to figure out what the guilty party
+        * is:
+        */
+       if (MCA_bus) {
+               mca_handle_nmi();
+               return;
+       }
+ #endif
+       printk(KERN_EMERG
+               "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+                       reason, smp_processor_id());
+ 
+       printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+       if (panic_on_unrecovered_nmi)
+               panic("NMI: Not continuing");
+ 
+       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ }
+ 
+ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+ {
+       unsigned char reason = 0;
+       int cpu;
+ 
+       cpu = smp_processor_id();
+ 
+       /* Only the BSP gets external NMIs from the system. */
+       if (!cpu)
+               reason = get_nmi_reason();
+ 
++#if defined(CONFIG_SMP) && defined(CONFIG_KDB)
++      /*
++       * Call the kernel debugger to see if this NMI is due
++       * to an KDB requested IPI.  If so, kdb will handle it.
++       */
++      if (kdb_ipi(regs, NULL)) {
++              return;
++      }
++#endif /* defined(CONFIG_SMP) && defined(CONFIG_KDB) */
++
+       if (!(reason & 0xc0)) {
+               if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
+                                                               == NOTIFY_STOP)
+                       return;
+ #ifdef CONFIG_X86_LOCAL_APIC
+               /*
+                * Ok, so this is none of the documented NMI sources,
+                * so it must be the NMI watchdog.
+                */
+               if (nmi_watchdog_tick(regs, reason))
+                       return;
+               if (!do_nmi_callback(regs, cpu))
+                       unknown_nmi_error(reason, regs);
+ #else
+               unknown_nmi_error(reason, regs);
+ #endif
+ 
+               return;
+       }
+       if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+               return;
+ 
+       /* AK: following checks seem to be broken on modern chipsets. FIXME */
+       if (reason & 0x80)
+               mem_parity_error(reason, regs);
+       if (reason & 0x40)
+               io_check_error(reason, regs);
+ #ifdef CONFIG_X86_32
+       /*
+        * Reassert NMI in case it became active meanwhile
+        * as it's edge-triggered:
+        */
+       reassert_nmi();
+ #endif
+ }
+ 
+ dotraplinkage notrace __kprobes void
+ do_nmi(struct pt_regs *regs, long error_code)
+ {
+       nmi_enter();
+ 
+       inc_irq_stat(__nmi_count);
+ 
+       if (!ignore_nmis)
+               default_do_nmi(regs);
+ 
+       nmi_exit();
+ }
+ 
+ void stop_nmi(void)
+ {
+       acpi_nmi_disable();
+       ignore_nmis++;
+ }
+ 
+ void restart_nmi(void)
+ {
+       ignore_nmis--;
+       acpi_nmi_enable();
+ }
+ 
+ /* May run on IST stack. */
+ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
+ {
++#ifdef  CONFIG_KDB
++      if (kdb(KDB_REASON_BREAK, error_code, regs))
++              return;
++#endif
+ #ifdef CONFIG_KPROBES
+       if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+                       == NOTIFY_STOP)
+               return;
+ #else
+       if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
+                       == NOTIFY_STOP)
+               return;
+ #endif
+ 
+       preempt_conditional_sti(regs);
+       do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
+       preempt_conditional_cli(regs);
+ }
+ 
+ #ifdef CONFIG_X86_64
+ /*
+  * Help handler running on IST stack to switch back to user stack
+  * for scheduling or signal handling. The actual stack switch is done in
+  * entry.S
+  */
+ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+ {
+       struct pt_regs *regs = eregs;
+       /* Did already sync */
+       if (eregs == (struct pt_regs *)eregs->sp)
+               ;
+       /* Exception from user space */
+       else if (user_mode(eregs))
+               regs = task_pt_regs(current);
+       /*
+        * Exception from kernel and interrupts are enabled. Move to
+        * kernel process stack.
+        */
+       else if (eregs->flags & X86_EFLAGS_IF)
+               regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
+       if (eregs != regs)
+               *regs = *eregs;
+       return regs;
+ }
+ #endif
+ 
+ /*
+  * Our handling of the processor debug registers is non-trivial.
+  * We do not clear them on entry and exit from the kernel. Therefore
+  * it is possible to get a watchpoint trap here from inside the kernel.
+  * However, the code in ./ptrace.c has ensured that the user can
+  * only set watchpoints on userspace addresses. Therefore the in-kernel
+  * watchpoint trap can only occur in code which is reading/writing
+  * from user space. Such code must not hold kernel locks (since it
+  * can equally take a page fault), therefore it is safe to call
+  * force_sig_info even though that claims and releases locks.
+  *
+  * Code in ./signal.c ensures that the debug control register
+  * is restored before we deliver any signal, and therefore that
+  * user code runs with the correct debug control register even though
+  * we clear it here.
+  *
+  * Being careful here means that we don't have to be as careful in a
+  * lot of more complicated places (task switching can be a bit lazy
+  * about restoring all the debug state, and ptrace doesn't have to
+  * find every occurrence of the TF bit that could be saved away even
+  * by user code)
+  *
+  * May run on IST stack.
+  */
+ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
+ {
+       struct task_struct *tsk = current;
+       unsigned long condition;
+       int si_code;
+ 
+       get_debugreg(condition, 6);
+ 
+       /*
+        * The processor cleared BTF, so don't mark that we need it set.
+        */
+       clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
+       tsk->thread.debugctlmsr = 0;
+ 
++#ifdef        CONFIG_KDB
++      if (kdb(KDB_REASON_DEBUG, error_code, regs))
++              return;
++#endif        /* CONFIG_KDB */
++
+       if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
+                                               SIGTRAP) == NOTIFY_STOP)
+               return;
+ 
+       /* It's safe to allow irq's after DR6 has been saved */
+       preempt_conditional_sti(regs);
+ 
+       /* Mask out spurious debug traps due to lazy DR7 setting */
+       if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+               if (!tsk->thread.debugreg7)
+                       goto clear_dr7;
+       }
+ 
+ #ifdef CONFIG_X86_32
+       if (regs->flags & X86_VM_MASK)
+               goto debug_vm86;
+ #endif
+ 
+       /* Save debug status register where ptrace can see it */
+       tsk->thread.debugreg6 = condition;
+ 
+       /*
+        * Single-stepping through TF: make sure we ignore any events in
+        * kernel space (but re-enable TF when returning to user mode).
+        */
+       if (condition & DR_STEP) {
+               if (!user_mode(regs))
+                       goto clear_TF_reenable;
+       }
+ 
+       si_code = get_si_code(condition);
+       /* Ok, finally something we can handle */
+       send_sigtrap(tsk, regs, error_code, si_code);
+ 
+       /*
+        * Disable additional traps. They'll be re-enabled when
+        * the signal is delivered.
+        */
+ clear_dr7:
+       set_debugreg(0, 7);
+       preempt_conditional_cli(regs);
+       return;
+ 
+ #ifdef CONFIG_X86_32
+ debug_vm86:
+       handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
+       preempt_conditional_cli(regs);
+       return;
+ #endif
+ 
+ clear_TF_reenable:
+       set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+       regs->flags &= ~X86_EFLAGS_TF;
+       preempt_conditional_cli(regs);
+       return;
+ }
+ 
+ #ifdef CONFIG_X86_64
+ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
+ {
+       if (fixup_exception(regs))
+               return 1;
+ 
+       notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
+       /* Illegal floating point operation in the kernel */
+       current->thread.trap_no = trapnr;
+       die(str, regs, 0);
+       return 0;
+ }
+ #endif
+ 
+ /*
+  * Note that we play around with the 'TS' bit in an attempt to get
+  * the correct behaviour even in the presence of the asynchronous
+  * IRQ13 behaviour
+  */
+ void math_error(void __user *ip)
+ {
+       struct task_struct *task;
+       siginfo_t info;
+       unsigned short cwd, swd, err;
+ 
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 16;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_addr = ip;
+       /*
+        * (~cwd & swd) will mask out exceptions that are not set to unmasked
+        * status.  0x3f is the exception bits in these regs, 0x200 is the
+        * C1 reg you need in case of a stack fault, 0x040 is the stack
+        * fault bit.  We should only be taking one exception at a time,
+        * so if this combination doesn't produce any single exception,
+        * then we have a bad program that isn't synchronizing its FPU usage
+        * and it will suffer the consequences since we won't be able to
+        * fully reproduce the context of the exception
+        */
+       cwd = get_fpu_cwd(task);
+       swd = get_fpu_swd(task);
+ 
+       err = swd & ~cwd;
+ 
+       if (err & 0x001) {      /* Invalid op */
+               /*
+                * swd & 0x240 == 0x040: Stack Underflow
+                * swd & 0x240 == 0x240: Stack Overflow
+                * User must clear the SF bit (0x40) if set
+                */
+               info.si_code = FPE_FLTINV;
+       } else if (err & 0x004) { /* Divide by Zero */
+               info.si_code = FPE_FLTDIV;
+       } else if (err & 0x008) { /* Overflow */
+               info.si_code = FPE_FLTOVF;
+       } else if (err & 0x012) { /* Denormal, Underflow */
+               info.si_code = FPE_FLTUND;
+       } else if (err & 0x020) { /* Precision */
+               info.si_code = FPE_FLTRES;
+       } else {
+               /*
+                * If we're using IRQ 13, or supposedly even some trap 16
+                * implementations, it's possible we get a spurious trap...
+                */
+               return;         /* Spurious trap, no error */
+       }
+       force_sig_info(SIGFPE, &info, task);
+ }
+ 
+ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
+ {
+       conditional_sti(regs);
+ 
+ #ifdef CONFIG_X86_32
+       ignore_fpu_irq = 1;
+ #else
+       if (!user_mode(regs) &&
+           kernel_math_error(regs, "kernel x87 math error", 16))
+               return;
+ #endif
+ 
+       math_error((void __user *)regs->ip);
+ }
+ 
+ static void simd_math_error(void __user *ip)
+ {
+       struct task_struct *task;
+       siginfo_t info;
+       unsigned short mxcsr;
+ 
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 19;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = __SI_FAULT;
+       info.si_addr = ip;
+       /*
+        * The SIMD FPU exceptions are handled a little differently, as there
+        * is only a single status/control register.  Thus, to determine which
+        * unmasked exception was caught we must mask the exception mask bits
+        * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+        */
+       mxcsr = get_fpu_mxcsr(task);
+       switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+       case 0x000:
+       default:
+               break;
+       case 0x001: /* Invalid Op */
+               info.si_code = FPE_FLTINV;
+               break;
+       case 0x002: /* Denormalize */
+       case 0x010: /* Underflow */
+               info.si_code = FPE_FLTUND;
+               break;
+       case 0x004: /* Zero Divide */
+               info.si_code = FPE_FLTDIV;
+               break;
+       case 0x008: /* Overflow */
+               info.si_code = FPE_FLTOVF;
+               break;
+       case 0x020: /* Precision */
+               info.si_code = FPE_FLTRES;
+               break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+ }
+ 
+ dotraplinkage void
+ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
+ {
+       conditional_sti(regs);
+ 
+ #ifdef CONFIG_X86_32
+       if (cpu_has_xmm) {
+               /* Handle SIMD FPU exceptions on PIII+ processors. */
+               ignore_fpu_irq = 1;
+               simd_math_error((void __user *)regs->ip);
+               return;
+       }
+       /*
+        * Handle strange cache flush from user space exception
+        * in all other cases.  This is undocumented behaviour.
+        */
+       if (regs->flags & X86_VM_MASK) {
+               handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
+               return;
+       }
+       current->thread.trap_no = 19;
+       current->thread.error_code = error_code;
+       die_if_kernel("cache flush denied", regs, error_code);
+       force_sig(SIGSEGV, current);
+ #else
+       if (!user_mode(regs) &&
+                       kernel_math_error(regs, "kernel simd math error", 19))
+               return;
+       simd_math_error((void __user *)regs->ip);
+ #endif
+ }
+ 
+ dotraplinkage void
+ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
+ {
+       conditional_sti(regs);
+ #if 0
+       /* No need to warn about this any longer. */
+       printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+ #endif
+ }
+ 
+ #ifdef CONFIG_X86_32
+ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
+ {
+       struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
+       unsigned long base = (kesp - uesp) & -THREAD_SIZE;
+       unsigned long new_kesp = kesp - base;
+       unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
+       __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
+ 
+       /* Set up base for espfix segment */
+       desc &= 0x00f0ff0000000000ULL;
+       desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
+               ((((__u64)base) << 32) & 0xff00000000000000ULL) |
+               ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
+               (lim_pages & 0xffff);
+       *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
+ 
+       return new_kesp;
+ }
+ #else
+ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+ {
+ }
+ 
+ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+ {
+ }
+ #endif
+ 
+ /*
+  * 'math_state_restore()' saves the current math information in the
+  * old math state array, and gets the new ones from the current task
+  *
+  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+  * Don't touch unless you *really* know how it works.
+  *
+  * Must be called with kernel preemption disabled (in this case,
+  * local interrupts are disabled at the call-site in entry.S).
+  */
+ asmlinkage void math_state_restore(void)
+ {
+       struct thread_info *thread = current_thread_info();
+       struct task_struct *tsk = thread->task;
+ 
+       if (!tsk_used_math(tsk)) {
+               local_irq_enable();
+               /*
+                * does a slab alloc which can sleep
+                */
+               if (init_fpu(tsk)) {
+                       /*
+                        * ran out of memory!
+                        */
+                       do_group_exit(SIGKILL);
+                       return;
+               }
+               local_irq_disable();
+       }
+ 
+       clts();                         /* Allow maths ops (or we recurse) */
+ #ifdef CONFIG_X86_32
+       restore_fpu(tsk);
+ #else
+       /*
+        * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+        */
+       if (unlikely(restore_fpu_checking(tsk))) {
+               stts();
+               force_sig(SIGSEGV, tsk);
+               return;
+       }
+ #endif
+       thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
+       tsk->fpu_counter++;
+ }
+ EXPORT_SYMBOL_GPL(math_state_restore);
+ 
+ #ifndef CONFIG_MATH_EMULATION
+ asmlinkage void math_emulate(long arg)
+ {
+       printk(KERN_EMERG
+               "math-emulation not enabled and no coprocessor found.\n");
+       printk(KERN_EMERG "killing %s.\n", current->comm);
+       force_sig(SIGFPE, current);
+       schedule();
+ }
+ #endif /* CONFIG_MATH_EMULATION */
+ 
+ dotraplinkage void __kprobes
+ do_device_not_available(struct pt_regs *regs, long error)
+ {
+ #ifdef CONFIG_X86_32
+       if (read_cr0() & X86_CR0_EM) {
+               conditional_sti(regs);
+               math_emulate(0);
+       } else {
+               math_state_restore(); /* interrupts still off */
+               conditional_sti(regs);
+       }
+ #else
+       math_state_restore();
+ #endif
+ }
+ 
+ #ifdef CONFIG_X86_32
+ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
+ {
+       siginfo_t info;
+       local_irq_enable();
+ 
+       info.si_signo = SIGILL;
+       info.si_errno = 0;
+       info.si_code = ILL_BADSTK;
+       info.si_addr = 0;
+       if (notify_die(DIE_TRAP, "iret exception",
+                       regs, error_code, 32, SIGILL) == NOTIFY_STOP)
+               return;
+       do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
+ }
+ #endif
+ 
+ void __init trap_init(void)
+ {
+       int i;
+ 
+ #ifdef CONFIG_EISA
+       void __iomem *p = early_ioremap(0x0FFFD9, 4);
+ 
+       if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+               EISA_bus = 1;
+       early_iounmap(p, 4);
+ #endif
+ 
+       set_intr_gate(0, &divide_error);
+       set_intr_gate_ist(1, &debug, DEBUG_STACK);
+       set_intr_gate_ist(2, &nmi, NMI_STACK);
+       /* int3 can be called from all */
+       set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
+       /* int4 can be called from all */
+       set_system_intr_gate(4, &overflow);
+       set_intr_gate(5, &bounds);
+       set_intr_gate(6, &invalid_op);
+       set_intr_gate(7, &device_not_available);
+ #ifdef CONFIG_X86_32
+       set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
+ #else
+       set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
+ #endif
+       set_intr_gate(9, &coprocessor_segment_overrun);
+       set_intr_gate(10, &invalid_TSS);
+       set_intr_gate(11, &segment_not_present);
+       set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
+       set_intr_gate(13, &general_protection);
+       set_intr_gate(14, &page_fault);
+       set_intr_gate(15, &spurious_interrupt_bug);
+       set_intr_gate(16, &coprocessor_error);
+       set_intr_gate(17, &alignment_check);
+ #ifdef CONFIG_X86_MCE
+       set_intr_gate_ist(18, &machine_check, MCE_STACK);
+ #endif
+       set_intr_gate(19, &simd_coprocessor_error);
+ 
+ #ifdef CONFIG_IA32_EMULATION
+       set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+ #endif
+ 
+ #ifdef CONFIG_X86_32
+       if (cpu_has_fxsr) {
+               printk(KERN_INFO "Enabling fast FPU save and restore... ");
+               set_in_cr4(X86_CR4_OSFXSR);
+               printk("done.\n");
+       }
+       if (cpu_has_xmm) {
+               printk(KERN_INFO
+                       "Enabling unmasked SIMD FPU exception support... ");
+               set_in_cr4(X86_CR4_OSXMMEXCPT);
+               printk("done.\n");
+       }
+ 
+       set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+ #endif
+ 
+       /* Reserve all the builtin and the syscall vector: */
+       for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+               set_bit(i, used_vectors);
+ 
+ #ifdef CONFIG_X86_64
+       set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+ #else
+       set_bit(SYSCALL_VECTOR, used_vectors);
+ #endif
+       /*
+        * Should be a barrier for any external CPU state:
+        */
+       cpu_init();
+ 
+ #ifdef CONFIG_X86_32
+       trap_init_hook();
+ #endif
+ }
diff --cc arch/x86/kvm/Kconfig
Simple merge
diff --cc arch/x86/mach-generic/bigsmp.c
Simple merge
diff --cc arch/x86/mach-generic/probe.c

index 4bff99d,15a38da..1e73a18
--- 1/arch/x86/mach-generic/probe.c
--- 2/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@@ -106,8 -115,10 +115,10 @@@ int __init mps_oem_check(struct mpc_tab
         int i;
         for (i = 0; apic_probe[i]; ++i) {
                 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
- -                      if (!cmdline_apic) {
+ +                      if (!cmdline_apic && genapic == &apic_default) {
                                 genapic = apic_probe[i];
+                               if (x86_quirks->update_genapic)
+                                       x86_quirks->update_genapic();
                                 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
                                        genapic->name);
                         }
@@@ -122,8 -133,10 +133,10 @@@ int __init acpi_madt_oem_check(char *oe
         int i;
         for (i = 0; apic_probe[i]; ++i) {
                 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
- -                      if (!cmdline_apic) {
+ +                      if (!cmdline_apic && genapic == &apic_default) {
                                 genapic = apic_probe[i];
+                               if (x86_quirks->update_genapic)
+                                       x86_quirks->update_genapic();
                                 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
                                        genapic->name);
                         }
diff --cc arch/x86/mm/init_32.c
Simple merge
diff --cc arch/x86/mm/init_64.c
Simple merge
diff --cc arch/x86/mm/pageattr.c

index 06d8811,84ba748..f572cf7
--- 1/arch/x86/mm/pageattr.c
--- 2/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@@ -190,8 -201,41 +201,43 @@@ static void cpa_flush_range(unsigned lo
         }
   }
   
+ static void cpa_flush_array(unsigned long *start, int numpages, int cache)
+ {
+       unsigned int i, level;
+       unsigned long *addr;
+ 
+       BUG_ON(irqs_disabled());
+ 
+       on_each_cpu(__cpa_flush_range, NULL, 1);
+ 
+       if (!cache)
+               return;
+ 
+       /* 4M threshold */
+       if (numpages >= 1024) {
+               if (boot_cpu_data.x86_model >= 4)
+                       wbinvd();
+               return;
+       }
+       /*
+        * We only need to flush on one CPU,
+        * clflush is a MESI-coherent instruction that
+        * will cause all other CPUs to flush the same
+        * cachelines:
+        */
+       for (i = 0, addr = start; i < numpages; i++, addr++) {
+               pte_t *pte = lookup_address(*addr, &level);
+ 
+               /*
+                * Only flush present addresses:
+                */
+               if (pte && (pte_val(*pte) & _PAGE_PRESENT))
+                       clflush_cache_range((void *) *addr, PAGE_SIZE);
+       }
+ }
+ 
+ +static int static_protections_allow_rodata __read_mostly;
+ +
   /*
    * Certain areas of memory on x86 require very specific protection flags,
    * for example the BIOS area or kernel text. Callers don't always get this
@@@ -914,28 -1005,13 +1009,28 @@@ EXPORT_SYMBOL_GPL(set_memory_ro)
   
   int set_memory_rw(unsigned long addr, int numpages)
   {
-       return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
+       return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
   }
- EXPORT_SYMBOL(set_memory_rw);
+ EXPORT_SYMBOL_GPL(set_memory_rw);
   
+ +/* hack: bypass kernel rodata section static_protections check. */
+ +int set_memory_rw_force(unsigned long addr, int numpages)
+ +{
+ +      static DEFINE_MUTEX(lock);
+ +      int ret;
+ +
+ +      mutex_lock(&lock);
+ +      static_protections_allow_rodata = 1;
-       ret = change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
++      ret = change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
+ +      static_protections_allow_rodata = 0;
+ +      mutex_unlock(&lock);
+ +
+ +      return ret;
+ +}
+ +
   int set_memory_np(unsigned long addr, int numpages)
   {
-       return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+       return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
   }
   
   int set_memory_4k(unsigned long addr, int numpages)
diff --cc arch/x86/oprofile/nmi_int.c
Simple merge
diff --cc block/genhd.c

index af2e093,397960c..23f6764
--- 1/block/genhd.c
--- 2/block/genhd.c
+++ b/block/genhd.c
@@@ -195,13 -496,33 +508,36 @@@ __setup("no_partition_scan", no_partiti
   void add_disk(struct gendisk *disk)
   {
         struct backing_dev_info *bdi;
+       dev_t devt;
         int retval;
   
+       /* minors == 0 indicates to use ext devt from part0 and should
+        * be accompanied with EXT_DEVT flag.  Make sure all
+        * parameters make sense.
+        */
+       WARN_ON(disk->minors && !(disk->major || disk->first_minor));
+       WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
+ 
         disk->flags |= GENHD_FL_UP;
+ 
+ +      if (no_partition_scan)
+ +              disk->flags |= GENHD_FL_NO_PARTITION_SCAN;
-       blk_register_region(MKDEV(disk->major, disk->first_minor),
-                           disk->minors, NULL, exact_match, exact_lock, disk);
++
+       retval = blk_alloc_devt(&disk->part0, &devt);
+       if (retval) {
+               WARN_ON(1);
+               return;
+       }
+       disk_to_dev(disk)->devt = devt;
+ 
+       /* ->major and ->first_minor aren't supposed to be
+        * dereferenced from here on, but set them just in case.
+        */
+       disk->major = MAJOR(devt);
+       disk->first_minor = MINOR(devt);
+ 
+       blk_register_region(disk_devt(disk), disk->minors, NULL,
+                           exact_match, exact_lock, disk);
         register_disk(disk);
         blk_register_queue(disk);
   
@@@ -443,30 -796,10 +811,30 @@@ static ssize_t disk_range_show(struct d
   {
         struct gendisk *disk = dev_to_disk(dev);
   
- -      return sprintf(buf, "%d\n", disk->minors);
+ +      return sprintf(buf, "%d\n",
+ +                     (disk->flags & GENHD_FL_NO_PARTITION_SCAN ? 0 : disk->minors));
+ +}
+ +
+ +static ssize_t disk_range_store(struct device *dev,
+ +                              struct device_attribute *attr,
+ +                              const char *buf, size_t count)
+ +{
+ +      struct gendisk *disk = dev_to_disk(dev);
+ +      int i;
+ +
+ +      if (count > 0 && sscanf(buf, "%d", &i) > 0) {
+ +              if (i == 0)
+ +                      disk->flags |= GENHD_FL_NO_PARTITION_SCAN;
+ +              else if (i <= disk->minors)
+ +                      disk->flags &= ~GENHD_FL_NO_PARTITION_SCAN;
+ +              else
+ +                      count = -EINVAL;
+ +      }
+ +
+ +      return count;
   }
   
- static ssize_t disk_removable_show(struct device *dev,
+ static ssize_t disk_ext_range_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
   {
         struct gendisk *disk = dev_to_disk(dev);
@@@ -499,69 -832,21 +867,21 @@@ static ssize_t disk_capability_show(str
         return sprintf(buf, "%x\n", disk->flags);
   }
   
- static ssize_t disk_stat_show(struct device *dev,
-                             struct device_attribute *attr, char *buf)
- {
-       struct gendisk *disk = dev_to_disk(dev);
- 
-       preempt_disable();
-       disk_round_stats(disk);
-       preempt_enable();
-       return sprintf(buf,
-               "%8lu %8lu %8llu %8u "
-               "%8lu %8lu %8llu %8u "
-               "%8u %8u %8u"
-               "\n",
-               disk_stat_read(disk, ios[READ]),
-               disk_stat_read(disk, merges[READ]),
-               (unsigned long long)disk_stat_read(disk, sectors[READ]),
-               jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
-               disk_stat_read(disk, ios[WRITE]),
-               disk_stat_read(disk, merges[WRITE]),
-               (unsigned long long)disk_stat_read(disk, sectors[WRITE]),
-               jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
-               disk->in_flight,
-               jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
-               jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
- }
- 
- #ifdef CONFIG_FAIL_MAKE_REQUEST
- static ssize_t disk_fail_show(struct device *dev,
-                             struct device_attribute *attr, char *buf)
- {
-       struct gendisk *disk = dev_to_disk(dev);
- 
-       return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0);
- }
- 
- static ssize_t disk_fail_store(struct device *dev,
-                              struct device_attribute *attr,
-                              const char *buf, size_t count)
- {
-       struct gendisk *disk = dev_to_disk(dev);
-       int i;
- 
-       if (count > 0 && sscanf(buf, "%d", &i) > 0) {
-               if (i == 0)
-                       disk->flags &= ~GENHD_FL_FAIL;
-               else
-                       disk->flags |= GENHD_FL_FAIL;
-       }
- 
-       return count;
- }
- 
- #endif
- 
- -static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
+ +static DEVICE_ATTR(range, S_IRUGO|S_IWUSR, disk_range_show, disk_range_store);
+ static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
   static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
   static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
- static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
+ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
   static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
- static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
+ static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
   #ifdef CONFIG_FAIL_MAKE_REQUEST
   static struct device_attribute dev_attr_fail =
-       __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store);
+       __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+ #endif
+ #ifdef CONFIG_FAIL_IO_TIMEOUT
+ static struct device_attribute dev_attr_fail_timeout =
+       __ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
+               part_timeout_store);
   #endif
   
   static struct attribute *disk_attrs[] = {
diff --cc drivers/Makefile

index 675921e,c1bf417..0e9e15b
--- 1/drivers/Makefile
--- 2/drivers/Makefile
+++ b/drivers/Makefile
@@@ -37,10 -40,9 +40,9 @@@ obj-y                                += base/ block/ misc/ mfd/ net
   obj-$(CONFIG_NUBUS)           += nubus/
   obj-$(CONFIG_ATM)             += atm/
   obj-y                         += macintosh/
- obj-$(CONFIG_XEN)             += xen/
- -obj-$(CONFIG_IDE)             += ide/
   obj-$(CONFIG_SCSI)            += scsi/
   obj-$(CONFIG_ATA)             += ata/
+ +obj-$(CONFIG_IDE)             += ide/
   obj-$(CONFIG_FUSION)          += message/
   obj-$(CONFIG_FIREWIRE)                += firewire/
   obj-y                         += ieee1394/
diff --cc drivers/acpi/Kconfig
Simple merge
diff --cc drivers/acpi/acpica/tbxface.c

index 0000000,c3e841f..97b0902

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@@ -1,0 -1,735 +1,732 @@@
+ /******************************************************************************
+  *
+  * Module Name: tbxface - Public interfaces to the ACPI subsystem
+  *                         ACPI table oriented interfaces
+  *
+  *****************************************************************************/
+ 
+ /*
+  * Copyright (C) 2000 - 2008, Intel Corp.
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions, and the following disclaimer,
+  *    without modification.
+  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+  *    substantially similar to the "NO WARRANTY" disclaimer below
+  *    ("Disclaimer") and any redistribution must be conditioned upon
+  *    including a substantially similar Disclaimer requirement for further
+  *    binary redistribution.
+  * 3. Neither the names of the above-listed copyright holders nor the names
+  *    of any contributors may be used to endorse or promote products derived
+  *    from this software without specific prior written permission.
+  *
+  * Alternatively, this software may be distributed under the terms of the
+  * GNU General Public License ("GPL") version 2 as published by the Free
+  * Software Foundation.
+  *
+  * NO WARRANTY
+  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGES.
+  */
+ 
+ #include <acpi/acpi.h>
+ #include "accommon.h"
+ #include "acnamesp.h"
+ #include "actables.h"
+ 
+ #define _COMPONENT          ACPI_TABLES
+ ACPI_MODULE_NAME("tbxface")
+ 
+ /* Local prototypes */
+ static acpi_status acpi_tb_load_namespace(void);
+ 
+ static int no_auto_ssdt;
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_allocate_root_table
+  *
+  * PARAMETERS:  initial_table_count - Size of initial_table_array, in number of
+  *                                    struct acpi_table_desc structures
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Allocate a root table array. Used by i_aSL compiler and
+  *              acpi_initialize_tables.
+  *
+  ******************************************************************************/
+ 
+ acpi_status acpi_allocate_root_table(u32 initial_table_count)
+ {
+ 
+       acpi_gbl_root_table_list.size = initial_table_count;
+       acpi_gbl_root_table_list.flags = ACPI_ROOT_ALLOW_RESIZE;
+ 
+       return (acpi_tb_resize_root_table_list());
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_initialize_tables
+  *
+  * PARAMETERS:  initial_table_array - Pointer to an array of pre-allocated
+  *                                    struct acpi_table_desc structures. If NULL, the
+  *                                    array is dynamically allocated.
+  *              initial_table_count - Size of initial_table_array, in number of
+  *                                    struct acpi_table_desc structures
+  *              allow_realloc       - Flag to tell Table Manager if resize of
+  *                                    pre-allocated array is allowed. Ignored
+  *                                    if initial_table_array is NULL.
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Initialize the table manager, get the RSDP and RSDT/XSDT.
+  *
+  * NOTE:        Allows static allocation of the initial table array in order
+  *              to avoid the use of dynamic memory in confined environments
+  *              such as the kernel boot sequence where it may not be available.
+  *
+  *              If the host OS memory managers are initialized, use NULL for
+  *              initial_table_array, and the table will be dynamically allocated.
+  *
+  ******************************************************************************/
+ 
+ acpi_status __init
+ acpi_initialize_tables(struct acpi_table_desc * initial_table_array,
+                      u32 initial_table_count, u8 allow_resize)
+ {
+       acpi_physical_address rsdp_address;
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(acpi_initialize_tables);
+ 
+       /*
+        * Set up the Root Table Array
+        * Allocate the table array if requested
+        */
+       if (!initial_table_array) {
+               status = acpi_allocate_root_table(initial_table_count);
+               if (ACPI_FAILURE(status)) {
+                       return_ACPI_STATUS(status);
+               }
+       } else {
+               /* Root Table Array has been statically allocated by the host */
+ 
+               ACPI_MEMSET(initial_table_array, 0,
+                           (acpi_size) initial_table_count *
+                           sizeof(struct acpi_table_desc));
+ 
+               acpi_gbl_root_table_list.tables = initial_table_array;
+               acpi_gbl_root_table_list.size = initial_table_count;
+               acpi_gbl_root_table_list.flags = ACPI_ROOT_ORIGIN_UNKNOWN;
+               if (allow_resize) {
+                       acpi_gbl_root_table_list.flags |=
+                           ACPI_ROOT_ALLOW_RESIZE;
+               }
+       }
+ 
+       /* Get the address of the RSDP */
+ 
+       rsdp_address = acpi_os_get_root_pointer();
+       if (!rsdp_address) {
+               return_ACPI_STATUS(AE_NOT_FOUND);
+       }
+ 
+       /*
+        * Get the root table (RSDT or XSDT) and extract all entries to the local
+        * Root Table Array. This array contains the information of the RSDT/XSDT
+        * in a common, more useable format.
+        */
+       status =
+           acpi_tb_parse_root_table(rsdp_address, ACPI_TABLE_ORIGIN_MAPPED);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_reallocate_root_table
+  *
+  * PARAMETERS:  None
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Reallocate Root Table List into dynamic memory. Copies the
+  *              root list from the previously provided scratch area. Should
+  *              be called once dynamic memory allocation is available in the
+  *              kernel
+  *
+  ******************************************************************************/
+ acpi_status acpi_reallocate_root_table(void)
+ {
+       struct acpi_table_desc *tables;
+       acpi_size new_size;
+ 
+       ACPI_FUNCTION_TRACE(acpi_reallocate_root_table);
+ 
+       /*
+        * Only reallocate the root table if the host provided a static buffer
+        * for the table array in the call to acpi_initialize_tables.
+        */
+       if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) {
+               return_ACPI_STATUS(AE_SUPPORT);
+       }
+ 
+       new_size = ((acpi_size) acpi_gbl_root_table_list.count +
+                   ACPI_ROOT_TABLE_SIZE_INCREMENT) *
+           sizeof(struct acpi_table_desc);
+ 
+       /* Create new array and copy the old array */
+ 
+       tables = ACPI_ALLOCATE_ZEROED(new_size);
+       if (!tables) {
+               return_ACPI_STATUS(AE_NO_MEMORY);
+       }
+ 
+       ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables, new_size);
+ 
+       acpi_gbl_root_table_list.size = acpi_gbl_root_table_list.count;
+       acpi_gbl_root_table_list.tables = tables;
+       acpi_gbl_root_table_list.flags =
+           ACPI_ROOT_ORIGIN_ALLOCATED | ACPI_ROOT_ALLOW_RESIZE;
+ 
+       return_ACPI_STATUS(AE_OK);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_load_table
+  *
+  * PARAMETERS:  table_ptr       - pointer to a buffer containing the entire
+  *                                table to be loaded
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: This function is called to load a table from the caller's
+  *              buffer. The buffer must contain an entire ACPI Table including
+  *              a valid header. The header fields will be verified, and if it
+  *              is determined that the table is invalid, the call will fail.
+  *
+  ******************************************************************************/
+ acpi_status acpi_load_table(struct acpi_table_header *table_ptr)
+ {
+       acpi_status status;
+       u32 table_index;
+       struct acpi_table_desc table_desc;
+ 
+       if (!table_ptr)
+               return AE_BAD_PARAMETER;
+ 
+       ACPI_MEMSET(&table_desc, 0, sizeof(struct acpi_table_desc));
+       table_desc.pointer = table_ptr;
+       table_desc.length = table_ptr->length;
+       table_desc.flags = ACPI_TABLE_ORIGIN_UNKNOWN;
+ 
+       /*
+        * Install the new table into the local data structures
+        */
+       status = acpi_tb_add_table(&table_desc, &table_index);
+       if (ACPI_FAILURE(status)) {
+               return status;
+       }
+       status = acpi_ns_load_table(table_index, acpi_gbl_root_node);
+       return status;
+ }
+ 
+ ACPI_EXPORT_SYMBOL(acpi_load_table)
+ 
+ /******************************************************************************
+  *
+  * FUNCTION:    acpi_get_table_header
+  *
+  * PARAMETERS:  Signature           - ACPI signature of needed table
+  *              Instance            - Which instance (for SSDTs)
+  *              out_table_header    - The pointer to the table header to fill
+  *
+  * RETURN:      Status and pointer to mapped table header
+  *
+  * DESCRIPTION: Finds an ACPI table header.
+  *
+  * NOTE:        Caller is responsible in unmapping the header with
+  *              acpi_os_unmap_memory
+  *
+  *****************************************************************************/
+ acpi_status
+ acpi_get_table_header(char *signature,
+                     u32 instance, struct acpi_table_header *out_table_header)
+ {
+        u32 i;
+        u32 j;
+       struct acpi_table_header *header;
+ 
+       /* Parameter validation */
+ 
+       if (!signature || !out_table_header) {
+               return (AE_BAD_PARAMETER);
+       }
+ 
+       /*
+        * Walk the root table list
+        */
+       for (i = 0, j = 0; i < acpi_gbl_root_table_list.count; i++) {
+               if (!ACPI_COMPARE_NAME
+                   (&(acpi_gbl_root_table_list.tables[i].signature),
+                    signature)) {
+                       continue;
+               }
+ 
+               if (++j < instance) {
+                       continue;
+               }
+ 
+               if (!acpi_gbl_root_table_list.tables[i].pointer) {
+                       if ((acpi_gbl_root_table_list.tables[i].
+                            flags & ACPI_TABLE_ORIGIN_MASK) ==
+                           ACPI_TABLE_ORIGIN_MAPPED) {
+                               header =
+                                   acpi_os_map_memory(acpi_gbl_root_table_list.
+                                                      tables[i].address,
+                                                      sizeof(struct
+                                                             acpi_table_header));
+                               if (!header) {
+                                       return AE_NO_MEMORY;
+                               }
+                               ACPI_MEMCPY(out_table_header, header,
+                                           sizeof(struct acpi_table_header));
+                               acpi_os_unmap_memory(header,
+                                                    sizeof(struct
+                                                           acpi_table_header));
+                       } else {
+                               return AE_NOT_FOUND;
+                       }
+               } else {
+                       ACPI_MEMCPY(out_table_header,
+                                   acpi_gbl_root_table_list.tables[i].pointer,
+                                   sizeof(struct acpi_table_header));
+               }
+               return (AE_OK);
+       }
+ 
+       return (AE_NOT_FOUND);
+ }
+ 
+ ACPI_EXPORT_SYMBOL(acpi_get_table_header)
+ 
+ /******************************************************************************
+  *
+  * FUNCTION:    acpi_unload_table_id
+  *
+  * PARAMETERS:  id            - Owner ID of the table to be removed.
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: This routine is used to force the unload of a table (by id)
+  *
+  ******************************************************************************/
+ acpi_status acpi_unload_table_id(acpi_owner_id id)
+ {
+       int i;
+       acpi_status status = AE_NOT_EXIST;
+ 
+       ACPI_FUNCTION_TRACE(acpi_unload_table_id);
+ 
+       /* Find table in the global table list */
+       for (i = 0; i < acpi_gbl_root_table_list.count; ++i) {
+               if (id != acpi_gbl_root_table_list.tables[i].owner_id) {
+                       continue;
+               }
+               /*
+                * Delete all namespace objects owned by this table. Note that these
+                * objects can appear anywhere in the namespace by virtue of the AML
+                * "Scope" operator. Thus, we need to track ownership by an ID, not
+                * simply a position within the hierarchy
+                */
+               acpi_tb_delete_namespace_by_owner(i);
+               status = acpi_tb_release_owner_id(i);
+               acpi_tb_set_table_loaded_flag(i, FALSE);
+               break;
+       }
+       return_ACPI_STATUS(status);
+ }
+ 
+ ACPI_EXPORT_SYMBOL(acpi_unload_table_id)
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_get_table
+  *
+  * PARAMETERS:  Signature           - ACPI signature of needed table
+  *              Instance            - Which instance (for SSDTs)
+  *              out_table           - Where the pointer to the table is returned
+  *
+  * RETURN:      Status and pointer to table
+  *
+  * DESCRIPTION: Finds and verifies an ACPI table.
+  *
+  *****************************************************************************/
+ acpi_status
+ acpi_get_table(char *signature,
+              u32 instance, struct acpi_table_header **out_table)
+ {
+        u32 i;
+        u32 j;
+       acpi_status status;
+ 
+       /* Parameter validation */
+ 
+       if (!signature || !out_table) {
+               return (AE_BAD_PARAMETER);
+       }
+ 
+       /*
+        * Walk the root table list
+        */
+       for (i = 0, j = 0; i < acpi_gbl_root_table_list.count; i++) {
+               if (!ACPI_COMPARE_NAME
+                   (&(acpi_gbl_root_table_list.tables[i].signature),
+                    signature)) {
+                       continue;
+               }
+ 
+               if (++j < instance) {
+                       continue;
+               }
+ 
+               status =
+                   acpi_tb_verify_table(&acpi_gbl_root_table_list.tables[i]);
+               if (ACPI_SUCCESS(status)) {
+                       *out_table = acpi_gbl_root_table_list.tables[i].pointer;
+               }
+ 
+               if (!acpi_gbl_permanent_mmap) {
+                       acpi_gbl_root_table_list.tables[i].pointer = NULL;
+               }
+ 
+               return (status);
+       }
+ 
+       return (AE_NOT_FOUND);
+ }
+ 
+ ACPI_EXPORT_SYMBOL(acpi_get_table)
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_get_table_by_index
+  *
+  * PARAMETERS:  table_index         - Table index
+  *              Table               - Where the pointer to the table is returned
+  *
+  * RETURN:      Status and pointer to the table
+  *
+  * DESCRIPTION: Obtain a table by an index into the global table list.
+  *
+  ******************************************************************************/
+ acpi_status
+ acpi_get_table_by_index(u32 table_index, struct acpi_table_header **table)
+ {
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(acpi_get_table_by_index);
+ 
+       /* Parameter validation */
+ 
+       if (!table) {
+               return_ACPI_STATUS(AE_BAD_PARAMETER);
+       }
+ 
+       (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+ 
+       /* Validate index */
+ 
+       if (table_index >= acpi_gbl_root_table_list.count) {
+               (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+               return_ACPI_STATUS(AE_BAD_PARAMETER);
+       }
+ 
+       if (!acpi_gbl_root_table_list.tables[table_index].pointer) {
+ 
+               /* Table is not mapped, map it */
+ 
+               status =
+                   acpi_tb_verify_table(&acpi_gbl_root_table_list.
+                                        tables[table_index]);
+               if (ACPI_FAILURE(status)) {
+                       (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+                       return_ACPI_STATUS(status);
+               }
+       }
+ 
+       *table = acpi_gbl_root_table_list.tables[table_index].pointer;
+       (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+       return_ACPI_STATUS(AE_OK);
+ }
+ 
+ ACPI_EXPORT_SYMBOL(acpi_get_table_by_index)
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_tb_load_namespace
+  *
+  * PARAMETERS:  None
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Load the namespace from the DSDT and all SSDTs/PSDTs found in
+  *              the RSDT/XSDT.
+  *
+  ******************************************************************************/
- -static acpi_status acpi_tb_load_namespace(void)
++static acpi_status __init acpi_tb_load_namespace(void)
+ {
+       acpi_status status;
+       struct acpi_table_header *table;
+       u32 i;
+ 
+       ACPI_FUNCTION_TRACE(tb_load_namespace);
+ 
+       (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+ 
+       /*
+        * Load the namespace. The DSDT is required, but any SSDT and PSDT tables
+        * are optional.
+        */
+       if (!acpi_gbl_root_table_list.count ||
+           !ACPI_COMPARE_NAME(&
+                              (acpi_gbl_root_table_list.
+                               tables[ACPI_TABLE_INDEX_DSDT].signature),
+                              ACPI_SIG_DSDT)
+           ||
+           ACPI_FAILURE(acpi_tb_verify_table
+                        (&acpi_gbl_root_table_list.
+                         tables[ACPI_TABLE_INDEX_DSDT]))) {
+               status = AE_NO_ACPI_TABLES;
+               goto unlock_and_exit;
+       }
+ 
+       /*
+        * Find DSDT table
+        */
+       status =
+           acpi_os_table_override(acpi_gbl_root_table_list.
+                                  tables[ACPI_TABLE_INDEX_DSDT].pointer,
+                                  &table);
+       if (ACPI_SUCCESS(status) && table) {
+               /*
+                * DSDT table has been found
+                */
+               acpi_tb_delete_table(&acpi_gbl_root_table_list.
+                                    tables[ACPI_TABLE_INDEX_DSDT]);
+               acpi_gbl_root_table_list.tables[ACPI_TABLE_INDEX_DSDT].pointer =
+                   table;
+               acpi_gbl_root_table_list.tables[ACPI_TABLE_INDEX_DSDT].length =
+                   table->length;
+               acpi_gbl_root_table_list.tables[ACPI_TABLE_INDEX_DSDT].flags =
+                   ACPI_TABLE_ORIGIN_UNKNOWN;
+ 
+               ACPI_INFO((AE_INFO, "Table DSDT replaced by host OS"));
+               acpi_tb_print_table_header(0, table);
+ 
+               if (no_auto_ssdt == 0) {
+                       printk(KERN_WARNING "ACPI: DSDT override uses original SSDTs unless \"acpi_no_auto_ssdt\"\n");
+               }
+       }
+ 
+       status =
+           acpi_tb_verify_table(&acpi_gbl_root_table_list.
+                                tables[ACPI_TABLE_INDEX_DSDT]);
+       if (ACPI_FAILURE(status)) {
+ 
+               /* A valid DSDT is required */
+ 
+               status = AE_NO_ACPI_TABLES;
+               goto unlock_and_exit;
+       }
+ 
+       (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+ 
+       /*
+        * Load and parse tables.
+        */
+       status = acpi_ns_load_table(ACPI_TABLE_INDEX_DSDT, acpi_gbl_root_node);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+ 
+       /*
+        * Load any SSDT or PSDT tables. Note: Loop leaves tables locked
+        */
+       (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+       for (i = 0; i < acpi_gbl_root_table_list.count; ++i) {
+               if ((!ACPI_COMPARE_NAME
+                    (&(acpi_gbl_root_table_list.tables[i].signature),
+                     ACPI_SIG_SSDT)
+                    &&
+                    !ACPI_COMPARE_NAME(&
+                                       (acpi_gbl_root_table_list.tables[i].
+                                        signature), ACPI_SIG_PSDT))
+                   ||
+                   ACPI_FAILURE(acpi_tb_verify_table
+                                (&acpi_gbl_root_table_list.tables[i]))) {
+                       continue;
+               }
+ 
+               if (no_auto_ssdt) {
+                       printk(KERN_WARNING "ACPI: SSDT ignored due to \"acpi_no_auto_ssdt\"\n");
+                       continue;
+               }
+ 
+               /* Ignore errors while loading tables, get as many as possible */
+ 
+               (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+               (void)acpi_ns_load_table(i, acpi_gbl_root_node);
+               (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+       }
+ 
+       ACPI_DEBUG_PRINT((ACPI_DB_INIT, "ACPI Tables successfully acquired\n"));
+ 
+       unlock_and_exit:
+       (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_load_tables
+  *
+  * PARAMETERS:  None
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Load the ACPI tables from the RSDT/XSDT
+  *
+  ******************************************************************************/
+ 
- -acpi_status acpi_load_tables(void)
++acpi_status __init acpi_load_tables(void)
+ {
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(acpi_load_tables);
+ 
+       /*
+        * Load the namespace from the tables
+        */
+       status = acpi_tb_load_namespace();
+       if (ACPI_FAILURE(status)) {
+               ACPI_EXCEPTION((AE_INFO, status,
+                               "While loading namespace from ACPI tables"));
+       }
+ 
+       return_ACPI_STATUS(status);
+ }
+ 
- -ACPI_EXPORT_SYMBOL(acpi_load_tables)
- -
- -
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_install_table_handler
+  *
+  * PARAMETERS:  Handler         - Table event handler
+  *              Context         - Value passed to the handler on each event
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Install table event handler
+  *
+  ******************************************************************************/
+ acpi_status
+ acpi_install_table_handler(acpi_tbl_handler handler, void *context)
+ {
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(acpi_install_table_handler);
+ 
+       if (!handler) {
+               return_ACPI_STATUS(AE_BAD_PARAMETER);
+       }
+ 
+       status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+ 
+       /* Don't allow more than one handler */
+ 
+       if (acpi_gbl_table_handler) {
+               status = AE_ALREADY_EXISTS;
+               goto cleanup;
+       }
+ 
+       /* Install the handler */
+ 
+       acpi_gbl_table_handler = handler;
+       acpi_gbl_table_handler_context = context;
+ 
+       cleanup:
+       (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
+       return_ACPI_STATUS(status);
+ }
+ 
+ ACPI_EXPORT_SYMBOL(acpi_install_table_handler)
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_remove_table_handler
+  *
+  * PARAMETERS:  Handler         - Table event handler that was installed
+  *                                previously.
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Remove table event handler
+  *
+  ******************************************************************************/
+ acpi_status acpi_remove_table_handler(acpi_tbl_handler handler)
+ {
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(acpi_remove_table_handler);
+ 
+       status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+ 
+       /* Make sure that the installed handler is the same */
+ 
+       if (!handler || handler != acpi_gbl_table_handler) {
+               status = AE_BAD_PARAMETER;
+               goto cleanup;
+       }
+ 
+       /* Remove the handler */
+ 
+       acpi_gbl_table_handler = NULL;
+ 
+       cleanup:
+       (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
+       return_ACPI_STATUS(status);
+ }
+ 
+ ACPI_EXPORT_SYMBOL(acpi_remove_table_handler)
+ 
+ 
+ static int __init acpi_no_auto_ssdt_setup(char *s) {
+ 
+         printk(KERN_NOTICE "ACPI: SSDT auto-load disabled\n");
+ 
+         no_auto_ssdt = 1;
+ 
+         return 1;
+ }
+ 
+ __setup("acpi_no_auto_ssdt", acpi_no_auto_ssdt_setup);
diff --cc drivers/acpi/acpica/uteval.c

index 0000000,da9450b..68391c3

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@@ -1,0 -1,752 +1,753 @@@
+ /******************************************************************************
+  *
+  * Module Name: uteval - Object evaluation
+  *
+  *****************************************************************************/
+ 
+ /*
+  * Copyright (C) 2000 - 2008, Intel Corp.
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions, and the following disclaimer,
+  *    without modification.
+  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+  *    substantially similar to the "NO WARRANTY" disclaimer below
+  *    ("Disclaimer") and any redistribution must be conditioned upon
+  *    including a substantially similar Disclaimer requirement for further
+  *    binary redistribution.
+  * 3. Neither the names of the above-listed copyright holders nor the names
+  *    of any contributors may be used to endorse or promote products derived
+  *    from this software without specific prior written permission.
+  *
+  * Alternatively, this software may be distributed under the terms of the
+  * GNU General Public License ("GPL") version 2 as published by the Free
+  * Software Foundation.
+  *
+  * NO WARRANTY
+  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGES.
+  */
+ 
+ #include <acpi/acpi.h>
+ #include "accommon.h"
+ #include "acnamesp.h"
+ #include "acinterp.h"
+ 
+ #define _COMPONENT          ACPI_UTILITIES
+ ACPI_MODULE_NAME("uteval")
+ 
+ /* Local prototypes */
+ static void
+ acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length);
+ 
+ static acpi_status
+ acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
+                         struct acpi_compatible_id *one_cid);
+ 
+ /*
+  * Strings supported by the _OSI predefined (internal) method.
+  */
+ static char *acpi_interfaces_supported[] = {
+       /* Operating System Vendor Strings */
+ 
+       "Windows 2000",         /* Windows 2000 */
+       "Windows 2001",         /* Windows XP */
+       "Windows 2001 SP1",     /* Windows XP SP1 */
+       "Windows 2001 SP2",     /* Windows XP SP2 */
+       "Windows 2001.1",       /* Windows Server 2003 */
+       "Windows 2001.1 SP1",   /* Windows Server 2003 SP1 - Added 03/2006 */
+       "Windows 2006",         /* Windows Vista - Added 03/2006 */
++      "SLE11",                /* SLE11 identification */
+ 
+       /* Feature Group Strings */
+ 
+       "Extended Address Space Descriptor"
+           /*
+            * All "optional" feature group strings (features that are implemented
+            * by the host) should be implemented in the host version of
+            * acpi_os_validate_interface and should not be added here.
+            */
+ };
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_osi_implementation
+  *
+  * PARAMETERS:  walk_state          - Current walk state
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Implementation of the _OSI predefined control method
+  *
+  ******************************************************************************/
+ 
+ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
+ {
+       acpi_status status;
+       union acpi_operand_object *string_desc;
+       union acpi_operand_object *return_desc;
+       u32 i;
+ 
+       ACPI_FUNCTION_TRACE(ut_osi_implementation);
+ 
+       /* Validate the string input argument */
+ 
+       string_desc = walk_state->arguments[0].object;
+       if (!string_desc || (string_desc->common.type != ACPI_TYPE_STRING)) {
+               return_ACPI_STATUS(AE_TYPE);
+       }
+ 
+       /* Create a return object */
+ 
+       return_desc = acpi_ut_create_internal_object(ACPI_TYPE_INTEGER);
+       if (!return_desc) {
+               return_ACPI_STATUS(AE_NO_MEMORY);
+       }
+ 
+       /* Default return value is SUPPORTED */
+ 
+       return_desc->integer.value = ACPI_UINT32_MAX;
+       walk_state->return_desc = return_desc;
+ 
+       /* Compare input string to static table of supported interfaces */
+ 
+       for (i = 0; i < ACPI_ARRAY_LENGTH(acpi_interfaces_supported); i++) {
+               if (!ACPI_STRCMP
+                   (string_desc->string.pointer,
+                    acpi_interfaces_supported[i])) {
+ 
+                       /* The interface is supported */
+ 
+                       return_ACPI_STATUS(AE_OK);
+               }
+       }
+ 
+       /*
+        * Did not match the string in the static table, call the host OSL to
+        * check for a match with one of the optional strings (such as
+        * "Module Device", "3.0 Thermal Model", etc.)
+        */
+       status = acpi_os_validate_interface(string_desc->string.pointer);
+       if (ACPI_SUCCESS(status)) {
+ 
+               /* The interface is supported */
+ 
+               return_ACPI_STATUS(AE_OK);
+       }
+ 
+       /* The interface is not supported */
+ 
+       return_desc->integer.value = 0;
+       return_ACPI_STATUS(AE_OK);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_osi_invalidate
+  *
+  * PARAMETERS:  interface_string
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: invalidate string in pre-defiend _OSI string list
+  *
+  ******************************************************************************/
+ 
+ acpi_status acpi_osi_invalidate(char *interface)
+ {
+       int i;
+ 
+       for (i = 0; i < ACPI_ARRAY_LENGTH(acpi_interfaces_supported); i++) {
+               if (!ACPI_STRCMP(interface, acpi_interfaces_supported[i])) {
+                       *acpi_interfaces_supported[i] = '\0';
+                       return AE_OK;
+               }
+       }
+       return AE_NOT_FOUND;
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_evaluate_object
+  *
+  * PARAMETERS:  prefix_node         - Starting node
+  *              Path                - Path to object from starting node
+  *              expected_return_types - Bitmap of allowed return types
+  *              return_desc         - Where a return value is stored
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Evaluates a namespace object and verifies the type of the
+  *              return object.  Common code that simplifies accessing objects
+  *              that have required return objects of fixed types.
+  *
+  *              NOTE: Internal function, no parameter validation
+  *
+  ******************************************************************************/
+ 
+ acpi_status
+ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
+                       char *path,
+                       u32 expected_return_btypes,
+                       union acpi_operand_object **return_desc)
+ {
+       struct acpi_evaluate_info *info;
+       acpi_status status;
+       u32 return_btype;
+ 
+       ACPI_FUNCTION_TRACE(ut_evaluate_object);
+ 
+       /* Allocate the evaluation information block */
+ 
+       info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
+       if (!info) {
+               return_ACPI_STATUS(AE_NO_MEMORY);
+       }
+ 
+       info->prefix_node = prefix_node;
+       info->pathname = path;
+ 
+       /* Evaluate the object/method */
+ 
+       status = acpi_ns_evaluate(info);
+       if (ACPI_FAILURE(status)) {
+               if (status == AE_NOT_FOUND) {
+                       ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+                                         "[%4.4s.%s] was not found\n",
+                                         acpi_ut_get_node_name(prefix_node),
+                                         path));
+               } else {
+                       ACPI_ERROR_METHOD("Method execution failed",
+                                         prefix_node, path, status);
+               }
+ 
+               goto cleanup;
+       }
+ 
+       /* Did we get a return object? */
+ 
+       if (!info->return_object) {
+               if (expected_return_btypes) {
+                       ACPI_ERROR_METHOD("No object was returned from",
+                                         prefix_node, path, AE_NOT_EXIST);
+ 
+                       status = AE_NOT_EXIST;
+               }
+ 
+               goto cleanup;
+       }
+ 
+       /* Map the return object type to the bitmapped type */
+ 
+       switch (ACPI_GET_OBJECT_TYPE(info->return_object)) {
+       case ACPI_TYPE_INTEGER:
+               return_btype = ACPI_BTYPE_INTEGER;
+               break;
+ 
+       case ACPI_TYPE_BUFFER:
+               return_btype = ACPI_BTYPE_BUFFER;
+               break;
+ 
+       case ACPI_TYPE_STRING:
+               return_btype = ACPI_BTYPE_STRING;
+               break;
+ 
+       case ACPI_TYPE_PACKAGE:
+               return_btype = ACPI_BTYPE_PACKAGE;
+               break;
+ 
+       default:
+               return_btype = 0;
+               break;
+       }
+ 
+       if ((acpi_gbl_enable_interpreter_slack) && (!expected_return_btypes)) {
+               /*
+                * We received a return object, but one was not expected.  This can
+                * happen frequently if the "implicit return" feature is enabled.
+                * Just delete the return object and return AE_OK.
+                */
+               acpi_ut_remove_reference(info->return_object);
+               goto cleanup;
+       }
+ 
+       /* Is the return object one of the expected types? */
+ 
+       if (!(expected_return_btypes & return_btype)) {
+               ACPI_ERROR_METHOD("Return object type is incorrect",
+                                 prefix_node, path, AE_TYPE);
+ 
+               ACPI_ERROR((AE_INFO,
+                           "Type returned from %s was incorrect: %s, expected Btypes: %X",
+                           path,
+                           acpi_ut_get_object_type_name(info->return_object),
+                           expected_return_btypes));
+ 
+               /* On error exit, we must delete the return object */
+ 
+               acpi_ut_remove_reference(info->return_object);
+               status = AE_TYPE;
+               goto cleanup;
+       }
+ 
+       /* Object type is OK, return it */
+ 
+       *return_desc = info->return_object;
+ 
+       cleanup:
+       ACPI_FREE(info);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_evaluate_numeric_object
+  *
+  * PARAMETERS:  object_name         - Object name to be evaluated
+  *              device_node         - Node for the device
+  *              Address             - Where the value is returned
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Evaluates a numeric namespace object for a selected device
+  *              and stores result in *Address.
+  *
+  *              NOTE: Internal function, no parameter validation
+  *
+  ******************************************************************************/
+ 
+ acpi_status
+ acpi_ut_evaluate_numeric_object(char *object_name,
+                               struct acpi_namespace_node *device_node,
+                               acpi_integer * address)
+ {
+       union acpi_operand_object *obj_desc;
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(ut_evaluate_numeric_object);
+ 
+       status = acpi_ut_evaluate_object(device_node, object_name,
+                                        ACPI_BTYPE_INTEGER, &obj_desc);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+ 
+       /* Get the returned Integer */
+ 
+       *address = obj_desc->integer.value;
+ 
+       /* On exit, we must delete the return object */
+ 
+       acpi_ut_remove_reference(obj_desc);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_copy_id_string
+  *
+  * PARAMETERS:  Destination         - Where to copy the string
+  *              Source              - Source string
+  *              max_length          - Length of the destination buffer
+  *
+  * RETURN:      None
+  *
+  * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods.
+  *              Performs removal of a leading asterisk if present -- workaround
+  *              for a known issue on a bunch of machines.
+  *
+  ******************************************************************************/
+ 
+ static void
+ acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length)
+ {
+ 
+       /*
+        * Workaround for ID strings that have a leading asterisk. This construct
+        * is not allowed by the ACPI specification  (ID strings must be
+        * alphanumeric), but enough existing machines have this embedded in their
+        * ID strings that the following code is useful.
+        */
+       if (*source == '*') {
+               source++;
+       }
+ 
+       /* Do the actual copy */
+ 
+       ACPI_STRNCPY(destination, source, max_length);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_execute_HID
+  *
+  * PARAMETERS:  device_node         - Node for the device
+  *              Hid                 - Where the HID is returned
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Executes the _HID control method that returns the hardware
+  *              ID of the device.
+  *
+  *              NOTE: Internal function, no parameter validation
+  *
+  ******************************************************************************/
+ 
+ acpi_status
+ acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
+                   struct acpica_device_id *hid)
+ {
+       union acpi_operand_object *obj_desc;
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(ut_execute_HID);
+ 
+       status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID,
+                                        ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+                                        &obj_desc);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+ 
+       if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) {
+ 
+               /* Convert the Numeric HID to string */
+ 
+               acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
+                                         hid->value);
+       } else {
+               /* Copy the String HID from the returned object */
+ 
+               acpi_ut_copy_id_string(hid->value, obj_desc->string.pointer,
+                                      sizeof(hid->value));
+       }
+ 
+       /* On exit, we must delete the return object */
+ 
+       acpi_ut_remove_reference(obj_desc);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_translate_one_cid
+  *
+  * PARAMETERS:  obj_desc            - _CID object, must be integer or string
+  *              one_cid             - Where the CID string is returned
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Return a numeric or string _CID value as a string.
+  *              (Compatible ID)
+  *
+  *              NOTE:  Assumes a maximum _CID string length of
+  *                     ACPI_MAX_CID_LENGTH.
+  *
+  ******************************************************************************/
+ 
+ static acpi_status
+ acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
+                         struct acpi_compatible_id *one_cid)
+ {
+ 
+       switch (ACPI_GET_OBJECT_TYPE(obj_desc)) {
+       case ACPI_TYPE_INTEGER:
+ 
+               /* Convert the Numeric CID to string */
+ 
+               acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
+                                         one_cid->value);
+               return (AE_OK);
+ 
+       case ACPI_TYPE_STRING:
+ 
+               if (obj_desc->string.length > ACPI_MAX_CID_LENGTH) {
+                       return (AE_AML_STRING_LIMIT);
+               }
+ 
+               /* Copy the String CID from the returned object */
+ 
+               acpi_ut_copy_id_string(one_cid->value, obj_desc->string.pointer,
+                                      ACPI_MAX_CID_LENGTH);
+               return (AE_OK);
+ 
+       default:
+ 
+               return (AE_TYPE);
+       }
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_execute_CID
+  *
+  * PARAMETERS:  device_node         - Node for the device
+  *              return_cid_list     - Where the CID list is returned
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Executes the _CID control method that returns one or more
+  *              compatible hardware IDs for the device.
+  *
+  *              NOTE: Internal function, no parameter validation
+  *
+  ******************************************************************************/
+ 
+ acpi_status
+ acpi_ut_execute_CID(struct acpi_namespace_node * device_node,
+                   struct acpi_compatible_id_list ** return_cid_list)
+ {
+       union acpi_operand_object *obj_desc;
+       acpi_status status;
+       u32 count;
+       u32 size;
+       struct acpi_compatible_id_list *cid_list;
+       u32 i;
+ 
+       ACPI_FUNCTION_TRACE(ut_execute_CID);
+ 
+       /* Evaluate the _CID method for this device */
+ 
+       status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID,
+                                        ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING
+                                        | ACPI_BTYPE_PACKAGE, &obj_desc);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+ 
+       /* Get the number of _CIDs returned */
+ 
+       count = 1;
+       if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_PACKAGE) {
+               count = obj_desc->package.count;
+       }
+ 
+       /* Allocate a worst-case buffer for the _CIDs */
+ 
+       size = (((count - 1) * sizeof(struct acpi_compatible_id)) +
+               sizeof(struct acpi_compatible_id_list));
+ 
+       cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size);
+       if (!cid_list) {
+               return_ACPI_STATUS(AE_NO_MEMORY);
+       }
+ 
+       /* Init CID list */
+ 
+       cid_list->count = count;
+       cid_list->size = size;
+ 
+       /*
+        *  A _CID can return either a single compatible ID or a package of
+        *  compatible IDs.  Each compatible ID can be one of the following:
+        *  1) Integer (32 bit compressed EISA ID) or
+        *  2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss")
+        */
+ 
+       /* The _CID object can be either a single CID or a package (list) of CIDs */
+ 
+       if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_PACKAGE) {
+ 
+               /* Translate each package element */
+ 
+               for (i = 0; i < count; i++) {
+                       status =
+                           acpi_ut_translate_one_cid(obj_desc->package.
+                                                     elements[i],
+                                                     &cid_list->id[i]);
+                       if (ACPI_FAILURE(status)) {
+                               break;
+                       }
+               }
+       } else {
+               /* Only one CID, translate to a string */
+ 
+               status = acpi_ut_translate_one_cid(obj_desc, cid_list->id);
+       }
+ 
+       /* Cleanup on error */
+ 
+       if (ACPI_FAILURE(status)) {
+               ACPI_FREE(cid_list);
+       } else {
+               *return_cid_list = cid_list;
+       }
+ 
+       /* On exit, we must delete the _CID return object */
+ 
+       acpi_ut_remove_reference(obj_desc);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_execute_UID
+  *
+  * PARAMETERS:  device_node         - Node for the device
+  *              Uid                 - Where the UID is returned
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Executes the _UID control method that returns the hardware
+  *              ID of the device.
+  *
+  *              NOTE: Internal function, no parameter validation
+  *
+  ******************************************************************************/
+ 
+ acpi_status
+ acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
+                   struct acpica_device_id *uid)
+ {
+       union acpi_operand_object *obj_desc;
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(ut_execute_UID);
+ 
+       status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID,
+                                        ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+                                        &obj_desc);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+ 
+       if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) {
+ 
+               /* Convert the Numeric UID to string */
+ 
+               acpi_ex_unsigned_integer_to_string(obj_desc->integer.value,
+                                                  uid->value);
+       } else {
+               /* Copy the String UID from the returned object */
+ 
+               acpi_ut_copy_id_string(uid->value, obj_desc->string.pointer,
+                                      sizeof(uid->value));
+       }
+ 
+       /* On exit, we must delete the return object */
+ 
+       acpi_ut_remove_reference(obj_desc);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_execute_STA
+  *
+  * PARAMETERS:  device_node         - Node for the device
+  *              Flags               - Where the status flags are returned
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Executes _STA for selected device and stores results in
+  *              *Flags.
+  *
+  *              NOTE: Internal function, no parameter validation
+  *
+  ******************************************************************************/
+ 
+ acpi_status
+ acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 * flags)
+ {
+       union acpi_operand_object *obj_desc;
+       acpi_status status;
+ 
+       ACPI_FUNCTION_TRACE(ut_execute_STA);
+ 
+       status = acpi_ut_evaluate_object(device_node, METHOD_NAME__STA,
+                                        ACPI_BTYPE_INTEGER, &obj_desc);
+       if (ACPI_FAILURE(status)) {
+               if (AE_NOT_FOUND == status) {
+                       ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+                                         "_STA on %4.4s was not found, assuming device is present\n",
+                                         acpi_ut_get_node_name(device_node)));
+ 
+                       *flags = ACPI_UINT32_MAX;
+                       status = AE_OK;
+               }
+ 
+               return_ACPI_STATUS(status);
+       }
+ 
+       /* Extract the status flags */
+ 
+       *flags = (u32) obj_desc->integer.value;
+ 
+       /* On exit, we must delete the return object */
+ 
+       acpi_ut_remove_reference(obj_desc);
+       return_ACPI_STATUS(status);
+ }
+ 
+ /*******************************************************************************
+  *
+  * FUNCTION:    acpi_ut_execute_Sxds
+  *
+  * PARAMETERS:  device_node         - Node for the device
+  *              Flags               - Where the status flags are returned
+  *
+  * RETURN:      Status
+  *
+  * DESCRIPTION: Executes _STA for selected device and stores results in
+  *              *Flags.
+  *
+  *              NOTE: Internal function, no parameter validation
+  *
+  ******************************************************************************/
+ 
+ acpi_status
+ acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest)
+ {
+       union acpi_operand_object *obj_desc;
+       acpi_status status;
+       u32 i;
+ 
+       ACPI_FUNCTION_TRACE(ut_execute_sxds);
+ 
+       for (i = 0; i < 4; i++) {
+               highest[i] = 0xFF;
+               status = acpi_ut_evaluate_object(device_node,
+                                                ACPI_CAST_PTR(char,
+                                                              acpi_gbl_highest_dstate_names
+                                                              [i]),
+                                                ACPI_BTYPE_INTEGER, &obj_desc);
+               if (ACPI_FAILURE(status)) {
+                       if (status != AE_NOT_FOUND) {
+                               ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+                                                 "%s on Device %4.4s, %s\n",
+                                                 ACPI_CAST_PTR(char,
+                                                               acpi_gbl_highest_dstate_names
+                                                               [i]),
+                                                 acpi_ut_get_node_name
+                                                 (device_node),
+                                                 acpi_format_exception
+                                                 (status)));
+ 
+                               return_ACPI_STATUS(status);
+                       }
+               } else {
+                       /* Extract the Dstate value */
+ 
+                       highest[i] = (u8) obj_desc->integer.value;
+ 
+                       /* Delete the return object */
+ 
+                       acpi_ut_remove_reference(obj_desc);
+               }
+       }
+ 
+       return_ACPI_STATUS(AE_OK);
+ }
diff --cc drivers/acpi/ec.c

index 6a49a8e,a2b82c9..c0dbe34
--- 1/drivers/acpi/ec.c
--- 2/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@@ -121,33 -120,6 +120,8 @@@ static struct acpi_ec 
         spinlock_t curr_lock;
   } *boot_ec, *first_ec;
   
+ +int acpi_ec_intr = 1; /* Default is interrupt mode */
+ +
- /* 
-  * Some Asus system have exchanged ECDT data/command IO addresses.
-  */
- static int print_ecdt_error(const struct dmi_system_id *id)
- {
-       printk(KERN_NOTICE PREFIX "%s detected - "
-               "ECDT has exchanged control/data I/O address\n",
-               id->ident);
-       return 0;
- }
- 
- static struct dmi_system_id __cpuinitdata ec_dmi_table[] = {
-       {
-       print_ecdt_error, "Asus L4R", {
-       DMI_MATCH(DMI_BIOS_VERSION, "1008.006"),
-       DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),
-       DMI_MATCH(DMI_BOARD_NAME, "L4R") }, NULL},
-       {
-       print_ecdt_error, "Asus M6R", {
-       DMI_MATCH(DMI_BIOS_VERSION, "0207"),
-       DMI_MATCH(DMI_PRODUCT_NAME, "M6R"),
-       DMI_MATCH(DMI_BOARD_NAME, "M6R") }, NULL},
-       {},
- };
- 
   /* --------------------------------------------------------------------------
                                Transaction Management
      -------------------------------------------------------------------------- */
@@@ -899,10 -880,8 +882,10 @@@ static int ec_install_handlers(struct a
                                   &acpi_ec_gpe_handler, ec);
         if (ACPI_FAILURE(status))
                 return -ENODEV;
+ +      if (!acpi_ec_intr)
+ +              set_bit(EC_FLAGS_NO_GPE, &ec->flags);
         acpi_set_gpe_type(NULL, ec->gpe, ACPI_GPE_TYPE_RUNTIME);
-       acpi_enable_gpe(NULL, ec->gpe, ACPI_NOT_ISR);
+       acpi_enable_gpe(NULL, ec->gpe);
         status = acpi_install_address_space_handler(ec->handle,
                                                     ACPI_ADR_SPACE_EC,
                                                     &acpi_ec_space_handler,
diff --cc drivers/acpi/osl.c

index b56df48,6729a49..8f529ec
--- 1/drivers/acpi/osl.c
--- 2/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@@ -96,53 -95,38 +95,43 @@@ static DEFINE_SPINLOCK(acpi_res_lock)
   #define       OSI_STRING_LENGTH_MAX 64        /* arbitrary */
   static char osi_additional_string[OSI_STRING_LENGTH_MAX];
   
+ +#ifdef CONFIG_ACPI_CUSTOM_DSDT_INITRD
+ +static int acpi_no_initrd_override;
+ +extern struct acpi_table_header *acpi_find_dsdt_initrd(void);
+ +#endif
+ +
   /*
-  * "Ode to _OSI(Linux)"
+  * The story of _OSI(Linux)
    *
-  * osi_linux -- Control response to BIOS _OSI(Linux) query.
+  * From pre-history through Linux-2.6.22,
+  * Linux responded TRUE upon a BIOS OSI(Linux) query.
    *
-  * As Linux evolves, the features that it supports change.
-  * So an OSI string such as "Linux" is not specific enough
-  * to be useful across multiple versions of Linux.  It
-  * doesn't identify any particular feature, interface,
-  * or even any particular version of Linux...
+  * Unfortunately, reference BIOS writers got wind of this
+  * and put OSI(Linux) in their example code, quickly exposing
+  * this string as ill-conceived and opening the door to
+  * an un-bounded number of BIOS incompatibilities.
    *
-  * Unfortunately, Linux-2.6.22 and earlier responded "yes"
-  * to a BIOS _OSI(Linux) query.  When
-  * a reference mobile BIOS started using it, its use
-  * started to spread to many vendor platforms.
-  * As it is not supportable, we need to halt that spread.
+  * For example, OSI(Linux) was used on resume to re-POST a
+  * video card on one system, because Linux at that time
+  * could not do a speedy restore in its native driver.
+  * But then upon gaining quick native restore capability,
+  * Linux has no way to tell the BIOS to skip the time-consuming
+  * POST -- putting Linux at a permanent performance disadvantage.
+  * On another system, the BIOS writer used OSI(Linux)
+  * to infer native OS support for IPMI!  On other systems,
+  * OSI(Linux) simply got in the way of Linux claiming to
+  * be compatible with other operating systems, exposing
+  * BIOS issues such as skipped device initialization.
    *
-  * Today, most BIOS references to _OSI(Linux) are noise --
-  * they have no functional effect and are just dead code
-  * carried over from the reference BIOS.
-  *
-  * The next most common case is that _OSI(Linux) harms Linux,
-  * usually by causing the BIOS to follow paths that are
-  * not tested during Windows validation.
-  *
-  * Finally, there is a short list of platforms
-  * where OSI(Linux) benefits Linux.
-  *
-  * In Linux-2.6.23, OSI(Linux) is first disabled by default.
-  * DMI is used to disable the dmesg warning about OSI(Linux)
-  * on platforms where it is known to have no effect.
-  * But a dmesg warning remains for systems where
-  * we do not know if OSI(Linux) is good or bad for the system.
-  * DMI is also used to enable OSI(Linux) for the machines
-  * that are known to need it.
+  * So "Linux" turned out to be a really poor chose of
+  * OSI string, and from Linux-2.6.23 onward we respond FALSE.
    *
    * BIOS writers should NOT query _OSI(Linux) on future systems.
-  * It will be ignored by default, and to get Linux to
-  * not ignore it will require a kernel source update to
-  * add a DMI entry, or a boot-time "acpi_osi=Linux" invocation.
+  * Linux will complain on the console when it sees it, and return FALSE.
+  * To get Linux to return TRUE for your system  will require
+  * a kernel source update to add a DMI entry,
+  * or boot with "acpi_osi=Linux"
    */
- #define OSI_LINUX_ENABLE 0
   
   static struct osi_linux {
         unsigned int    enable:1;
diff --cc drivers/acpi/processor_idle.c
Simple merge
diff --cc drivers/acpi/tables.c

index 00542cc,775c97a..a502677
--- 1/drivers/acpi/tables.c
--- 2/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@@ -325,11 -293,6 +293,9 @@@ static void __init check_multiple_madt(
   
   int __init acpi_table_init(void)
   {
-       if (dmi_check_system(acpi_rsdt_dmi_table))
-               acpi_gbl_force_rsdt = 1;
-       if (acpi_gbl_force_rsdt)
++      if (acpi_rsdt_forced)
+ +              printk(KERN_INFO "Using RSDT as ACPI root table\n");
+ +
         acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
         check_multiple_madt();
         return 0;
diff --cc drivers/acpi/thermal.c
Simple merge
diff --cc drivers/acpi/video.c
Simple merge
diff --cc drivers/acpi/video_detect.c
Simple merge
diff --cc drivers/ata/ata_piix.c

index 218f77b,54961c0..54b8d22
--- 1/drivers/ata/ata_piix.c
--- 2/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@@ -1579,9 -1528,19 +1580,19 @@@ static int __devinit piix_init_one(stru
         }
   
         pci_set_master(pdev);
- -      return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht);
+ +      return ata_pci_sff_activate_host(host, piix_interrupt, &piix_sht);
   }
   
+ static void piix_remove_one(struct pci_dev *pdev)
+ {
+       struct ata_host *host = dev_get_drvdata(&pdev->dev);
+       struct piix_host_priv *hpriv = host->private_data;
+ 
+       pci_write_config_dword(pdev, PIIX_IOCFG, hpriv->saved_iocfg);
+ 
+       ata_pci_remove_one(pdev);
+ }
+ 
   static int __init piix_init(void)
   {
         int rc;
diff --cc drivers/ata/libata-core.c
Simple merge
diff --cc drivers/ata/pata_of_platform.c
Simple merge
diff --cc drivers/ata/sata_fsl.c
Simple merge
diff --cc drivers/atm/fore200e.c

index 73338d2,937c9c0..a06a9d6
--- 1/drivers/atm/fore200e.c
--- 2/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@@ -2701,6 -2642,66 +2642,67 @@@ fore200e_init(struct fore200e* fore200e
       return 0;
   }
   
+ #ifdef CONFIG_SBUS
+ static int __devinit fore200e_sba_probe(struct of_device *op,
+                                       const struct of_device_id *match)
+ {
+       const struct fore200e_bus *bus = match->data;
+       struct fore200e *fore200e;
+       static int index = 0;
+       int err;
+ 
+       fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
+       if (!fore200e)
+               return -ENOMEM;
+ 
+       fore200e->bus = bus;
+       fore200e->bus_dev = op;
+       fore200e->irq = op->irqs[0];
+       fore200e->phys_base = op->resource[0].start;
+ 
+       sprintf(fore200e->name, "%s-%d", bus->model_name, index);
+ 
+       err = fore200e_init(fore200e);
+       if (err < 0) {
+               fore200e_shutdown(fore200e);
+               kfree(fore200e);
+               return err;
+       }
+ 
+       index++;
+       dev_set_drvdata(&op->dev, fore200e);
+ 
+       return 0;
+ }
+ 
+ static int __devexit fore200e_sba_remove(struct of_device *op)
+ {
+       struct fore200e *fore200e = dev_get_drvdata(&op->dev);
+ 
+       fore200e_shutdown(fore200e);
+       kfree(fore200e);
+ 
+       return 0;
+ }
+ 
+ static const struct of_device_id fore200e_sba_match[] = {
+       {
+               .name = SBA200E_PROM_NAME,
+               .data = (void *) &fore200e_bus[1],
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, fore200e_sba_match);
+ 
+ static struct of_platform_driver fore200e_sba_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "fore_200e",
+       .match_table    = fore200e_sba_match,
+       .probe          = fore200e_sba_probe,
+       .remove         = __devexit_p(fore200e_sba_remove),
+ };
+ #endif
+ 
   #ifdef CONFIG_PCI
   static int __devinit
   fore200e_pca_detect(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
diff --cc drivers/block/Kconfig
Simple merge
diff --cc drivers/block/Makefile

index 0bc277e,204332b..cb5425e
--- 1/drivers/block/Makefile
--- 2/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@@ -31,5 -31,4 +31,5 @@@ obj-$(CONFIG_BLK_DEV_SX8)     += sx8.
   obj-$(CONFIG_BLK_DEV_UB)      += ub.o
   obj-$(CONFIG_BLK_DEV_HD)      += hd.o
   
- obj-$(CONFIG_XEN_BLKFRONT)    += xen-blkfront.o
+ obj-$(CONFIG_XEN_BLKDEV_FRONTEND)     += xen-blkfront.o
+ +obj-$(CONFIG_CIPHER_TWOFISH)  += loop_fish2.o
diff --cc drivers/block/loop.c

index b324a72,edbaac6..8e79324
--- 1/drivers/block/loop.c
--- 2/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@@ -534,13 -476,9 +532,13 @@@ static int do_bio_filebacked(struct loo
   
         pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
         if (bio_rw(bio) == WRITE)
-               ret = lo_send(lo, bio, lo->lo_blocksize, pos);
+               ret = lo_send(lo, bio, pos);
         else
                 ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
+ +
+ +      if ((barrier || sync) && !ret)
+ +              ret = sync_file(lo->lo_backing_file, 1);
+ +
         return ret;
   }
   
diff --cc drivers/char/Kconfig
Simple merge
diff --cc drivers/char/Makefile
Simple merge
diff --cc drivers/char/agp/intel-agp.c
Simple merge
diff --cc drivers/char/bsr.c
Simple merge
diff --cc drivers/char/hw_random/n2-drv.c
Simple merge
diff --cc drivers/char/ipmi/ipmi_si_intf.c
Simple merge
diff --cc drivers/char/ipmi/ipmi_watchdog.c
Simple merge
diff --cc drivers/char/keyboard.c
Simple merge
diff --cc drivers/char/lp.c
Simple merge
diff --cc drivers/char/n_tty.c
Simple merge
diff --cc drivers/char/raw.c
Simple merge
diff --cc drivers/char/tty_io.c

index 97882bd,bc84e12..e880ba0
--- 1/drivers/char/tty_io.c
--- 2/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@@ -3028,26 -2559,7 +2559,22 @@@ long tty_ioctl(struct file *file, unsig
                 return put_user(tty->ldisc.ops->num, (int __user *)p);
         case TIOCSETD:
                 return tiocsetd(tty, p);
- #ifdef CONFIG_VT
-       case TIOCLINUX:
-               return tioclinux(tty, arg);
- #endif
         /*
+ +       * Without the real device to which /dev/console is connected,
+ +       * blogd can not work.
+ +       *      blogd spawns a pty/tty pair,
+ +       *      set /dev/console to the tty of that pair (ioctl TIOCCONS),
+ +       *      then reads in all input from the current /dev/console,
+ +       *      buffer or write the readed data to /var/log/boot.msg
+ +       *      _and_ to the original real device.
+ +       */
+ +      case TIOCGDEV:
+ +      {
+ +              unsigned int ret = new_encode_dev(tty_devnum(real_tty));
+ +              return put_user(ret, (unsigned int __user *)p);
+ +      }
+ +
+ +      /*
          * Break handling
          */
         case TIOCSBRK:  /* Turn break on, unconditionally */
diff --cc drivers/char/vt.c
Simple merge
diff --cc drivers/connector/cn_proc.c
Simple merge
diff --cc drivers/cpufreq/cpufreq.c
Simple merge
diff --cc drivers/cpufreq/cpufreq_conservative.c
Simple merge
diff --cc drivers/cpufreq/cpufreq_ondemand.c
Simple merge
diff --cc drivers/crypto/talitos.c
Simple merge
diff --cc drivers/dma/fsldma.c

index 0eee85d,70126a6..d469ad2
--- 1/drivers/dma/fsldma.c
--- 2/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@@ -1117,10 -1006,10 +1006,11 @@@ static struct of_device_id of_fsl_dma_i
   };
   
   static struct of_platform_driver of_fsl_dma_driver = {
+ +      .owner = THIS_MODULE,
-       .name = "of-fsl-dma",
+       .name = "fsl-elo-dma",
         .match_table = of_fsl_dma_ids,
         .probe = of_fsl_dma_probe,
+       .remove = of_fsl_dma_remove,
   };
   
   static __init int of_fsl_dma_init(void)
diff --cc drivers/firmware/memmap.c
Simple merge
diff --cc drivers/gpu/drm/Kconfig

index 0326f28,5130b72..4881277
--- 1/drivers/gpu/drm/Kconfig
--- 2/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@@ -99,13 -117,6 +117,13 @@@ config DRM_VI
           Choose this option if you have a Via unichrome or compatible video
           chipset. If M is selected the module will be called via.
   
+ +config DRM_VIA_CHROME9
+ +      tristate "Via chrome9 video cards"
-       depends on DRM
++      depends on DRM && X86
+ +      help
+ +        Choose this option if you have a Via chrome9 or compatible video
+ +        chipset. If M is selected the module will be called via_chrome9.
+ +
   config DRM_SAVAGE
         tristate "Savage video cards"
         depends on DRM
diff --cc drivers/gpu/drm/Makefile
Simple merge
diff --cc drivers/gpu/drm/via/via_drv.c

index 44e65e2,0993b44..7ea259d
--- 1/drivers/gpu/drm/via/via_drv.c
--- 2/drivers/gpu/drm/via/via_drv.c
+++ b/drivers/gpu/drm/via/via_drv.c
@@@ -45,8 -40,7 +45,8 @@@ int  via_driver_open(struct drm_device 
   static struct drm_driver driver = {
         .driver_features =
             DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_HAVE_IRQ |
-           DRIVER_IRQ_SHARED | DRIVER_IRQ_VBL,
+           DRIVER_IRQ_SHARED,
+ +      .open = via_driver_open,
         .load = via_driver_load,
         .unload = via_driver_unload,
         .context_dtor = via_final_context,
diff --cc drivers/gpu/drm/via/via_drv.h
Simple merge
diff --cc drivers/gpu/drm/via/via_map.c

index 36ed097,2c4f0b4..47e3ff9
--- 1/drivers/gpu/drm/via/via_map.c
--- 2/drivers/gpu/drm/via/via_map.c
+++ b/drivers/gpu/drm/via/via_map.c
@@@ -150,19 -119,14 +148,30 @@@ int via_driver_load(struct drm_device *
   
         return 0;
   }
+ +int via_get_drm_info(struct drm_device *dev, void *data,
+ +      struct drm_file *file_priv)
+ +{
+ +      drm_via_private_t *dev_priv = (drm_via_private_t *)dev->dev_private;
+ +      struct drm_via_info *info = data;
+ +
+ +      if (!dev_priv->initialize)
+ +              return -EINVAL;
+ +
+ +      info->RegSize = dev_priv->mmio->size;
+ +      info->AgpSize = dev->agp_buffer_map->size;
+ +      info->RegHandle = dev_priv->mmio->offset;
+ +      info->AgpHandle = dev->agp_buffer_map->offset;
+ +
+ +      return 0;
+ +}
+ 
+ int via_driver_unload(struct drm_device *dev)
+ {
+       drm_via_private_t *dev_priv = dev->dev_private;
+ 
+       drm_sman_takedown(&dev_priv->sman);
+ 
+       drm_free(dev_priv, sizeof(drm_via_private_t), DRM_MEM_DRIVER);
+ 
+       return 0;
+ }
diff --cc drivers/gpu/drm/via/via_mm.c
Simple merge
diff --cc drivers/hid/hid-apple.c

index 0000000,aa28aed..786d3b8

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@@ -1,0 -1,472 +1,472 @@@
+ /*
+  *  USB HID quirks support for Linux
+  *
+  *  Copyright (c) 1999 Andreas Gal
+  *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+  *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+  *  Copyright (c) 2006-2007 Jiri Kosina
+  *  Copyright (c) 2007 Paul Walmsley
+  *  Copyright (c) 2008 Jiri Slaby <jirislaby@gmail.com>
+  */
+ 
+ /*
+  * This program is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License as published by the Free
+  * Software Foundation; either version 2 of the License, or (at your option)
+  * any later version.
+  */
+ 
+ #include <linux/device.h>
+ #include <linux/hid.h>
+ #include <linux/module.h>
+ #include <linux/usb.h>
+ 
+ #include "hid-ids.h"
+ 
+ #define APPLE_RDESC_JIS               0x0001
+ #define APPLE_IGNORE_MOUSE    0x0002
+ #define APPLE_HAS_FN          0x0004
+ #define APPLE_HIDDEV          0x0008
+ #define APPLE_ISO_KEYBOARD    0x0010
+ #define APPLE_MIGHTYMOUSE     0x0020
+ #define APPLE_INVERT_HWHEEL   0x0040
+ #define APPLE_IGNORE_HIDINPUT 0x0080
+ #define APPLE_NUMLOCK_EMULATION       0x0100
+ 
+ #define APPLE_FLAG_FKEY               0x01
+ 
- -static unsigned int fnmode = 1;
++static unsigned int fnmode = 2;
+ module_param(fnmode, uint, 0644);
+ MODULE_PARM_DESC(fnmode, "Mode of fn key on Apple keyboards (0 = disabled, "
+               "[1] = fkeyslast, 2 = fkeysfirst)");
+ 
+ struct apple_sc {
+       unsigned long quirks;
+       unsigned int fn_on;
+       DECLARE_BITMAP(pressed_fn, KEY_CNT);
+       DECLARE_BITMAP(pressed_numlock, KEY_CNT);
+ };
+ 
+ struct apple_key_translation {
+       u16 from;
+       u16 to;
+       u8 flags;
+ };
+ 
+ static struct apple_key_translation apple_fn_keys[] = {
+       { KEY_BACKSPACE, KEY_DELETE },
+       { KEY_ENTER,    KEY_INSERT },
+       { KEY_F1,       KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
+       { KEY_F2,       KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
+       { KEY_F3,       KEY_SCALE,          APPLE_FLAG_FKEY },
+       { KEY_F4,       KEY_DASHBOARD,      APPLE_FLAG_FKEY },
+       { KEY_F5,       KEY_KBDILLUMDOWN,   APPLE_FLAG_FKEY },
+       { KEY_F6,       KEY_KBDILLUMUP,     APPLE_FLAG_FKEY },
+       { KEY_F7,       KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
+       { KEY_F8,       KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
+       { KEY_F9,       KEY_NEXTSONG,       APPLE_FLAG_FKEY },
+       { KEY_F10,      KEY_MUTE,           APPLE_FLAG_FKEY },
+       { KEY_F11,      KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
+       { KEY_F12,      KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
+       { KEY_UP,       KEY_PAGEUP },
+       { KEY_DOWN,     KEY_PAGEDOWN },
+       { KEY_LEFT,     KEY_HOME },
+       { KEY_RIGHT,    KEY_END },
+       { }
+ };
+ 
+ static struct apple_key_translation powerbook_fn_keys[] = {
+       { KEY_BACKSPACE, KEY_DELETE },
+       { KEY_F1,       KEY_BRIGHTNESSDOWN,     APPLE_FLAG_FKEY },
+       { KEY_F2,       KEY_BRIGHTNESSUP,       APPLE_FLAG_FKEY },
+       { KEY_F3,       KEY_MUTE,               APPLE_FLAG_FKEY },
+       { KEY_F4,       KEY_VOLUMEDOWN,         APPLE_FLAG_FKEY },
+       { KEY_F5,       KEY_VOLUMEUP,           APPLE_FLAG_FKEY },
+       { KEY_F6,       KEY_NUMLOCK,            APPLE_FLAG_FKEY },
+       { KEY_F7,       KEY_SWITCHVIDEOMODE,    APPLE_FLAG_FKEY },
+       { KEY_F8,       KEY_KBDILLUMTOGGLE,     APPLE_FLAG_FKEY },
+       { KEY_F9,       KEY_KBDILLUMDOWN,       APPLE_FLAG_FKEY },
+       { KEY_F10,      KEY_KBDILLUMUP,         APPLE_FLAG_FKEY },
+       { KEY_UP,       KEY_PAGEUP },
+       { KEY_DOWN,     KEY_PAGEDOWN },
+       { KEY_LEFT,     KEY_HOME },
+       { KEY_RIGHT,    KEY_END },
+       { }
+ };
+ 
+ static struct apple_key_translation powerbook_numlock_keys[] = {
+       { KEY_J,        KEY_KP1 },
+       { KEY_K,        KEY_KP2 },
+       { KEY_L,        KEY_KP3 },
+       { KEY_U,        KEY_KP4 },
+       { KEY_I,        KEY_KP5 },
+       { KEY_O,        KEY_KP6 },
+       { KEY_7,        KEY_KP7 },
+       { KEY_8,        KEY_KP8 },
+       { KEY_9,        KEY_KP9 },
+       { KEY_M,        KEY_KP0 },
+       { KEY_DOT,      KEY_KPDOT },
+       { KEY_SLASH,    KEY_KPPLUS },
+       { KEY_SEMICOLON, KEY_KPMINUS },
+       { KEY_P,        KEY_KPASTERISK },
+       { KEY_MINUS,    KEY_KPEQUAL },
+       { KEY_0,        KEY_KPSLASH },
+       { KEY_F6,       KEY_NUMLOCK },
+       { KEY_KPENTER,  KEY_KPENTER },
+       { KEY_BACKSPACE, KEY_BACKSPACE },
+       { }
+ };
+ 
+ static struct apple_key_translation apple_iso_keyboard[] = {
+       { KEY_GRAVE,    KEY_102ND },
+       { KEY_102ND,    KEY_GRAVE },
+       { }
+ };
+ 
+ static struct apple_key_translation *apple_find_translation(
+               struct apple_key_translation *table, u16 from)
+ {
+       struct apple_key_translation *trans;
+ 
+       /* Look for the translation */
+       for (trans = table; trans->from; trans++)
+               if (trans->from == from)
+                       return trans;
+ 
+       return NULL;
+ }
+ 
+ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
+               struct hid_usage *usage, __s32 value)
+ {
+       struct apple_sc *asc = hid_get_drvdata(hid);
+       struct apple_key_translation *trans;
+ 
+       if (usage->code == KEY_FN) {
+               asc->fn_on = !!value;
+               input_event(input, usage->type, usage->code, value);
+               return 1;
+       }
+ 
+       if (fnmode) {
+               int do_translate;
+ 
+               trans = apple_find_translation((hid->product < 0x220 ||
+                                       hid->product >= 0x300) ?
+                                       powerbook_fn_keys : apple_fn_keys,
+                                       usage->code);
+               if (trans) {
+                       if (test_bit(usage->code, asc->pressed_fn))
+                               do_translate = 1;
+                       else if (trans->flags & APPLE_FLAG_FKEY)
+                               do_translate = (fnmode == 2 && asc->fn_on) ||
+                                       (fnmode == 1 && !asc->fn_on);
+                       else
+                               do_translate = asc->fn_on;
+ 
+                       if (do_translate) {
+                               if (value)
+                                       set_bit(usage->code, asc->pressed_fn);
+                               else
+                                       clear_bit(usage->code, asc->pressed_fn);
+ 
+                               input_event(input, usage->type, trans->to,
+                                               value);
+ 
+                               return 1;
+                       }
+               }
+ 
+               if (asc->quirks & APPLE_NUMLOCK_EMULATION &&
+                               (test_bit(usage->code, asc->pressed_numlock) ||
+                               test_bit(LED_NUML, input->led))) {
+                       trans = apple_find_translation(powerbook_numlock_keys,
+                                       usage->code);
+ 
+                       if (trans) {
+                               if (value)
+                                       set_bit(usage->code,
+                                                       asc->pressed_numlock);
+                               else
+                                       clear_bit(usage->code,
+                                                       asc->pressed_numlock);
+ 
+                               input_event(input, usage->type, trans->to,
+                                               value);
+                       }
+ 
+                       return 1;
+               }
+       }
+ 
+       if (asc->quirks & APPLE_ISO_KEYBOARD) {
+               trans = apple_find_translation(apple_iso_keyboard, usage->code);
+               if (trans) {
+                       input_event(input, usage->type, trans->to, value);
+                       return 1;
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ static int apple_event(struct hid_device *hdev, struct hid_field *field,
+               struct hid_usage *usage, __s32 value)
+ {
+       struct apple_sc *asc = hid_get_drvdata(hdev);
+ 
+       if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput ||
+                       !usage->type)
+               return 0;
+ 
+       if ((asc->quirks & APPLE_INVERT_HWHEEL) &&
+                       usage->code == REL_HWHEEL) {
+               input_event(field->hidinput->input, usage->type, usage->code,
+                               -value);
+               return 1;
+       }
+ 
+       if ((asc->quirks & APPLE_HAS_FN) &&
+                       hidinput_apple_event(hdev, field->hidinput->input,
+                               usage, value))
+               return 1;
+ 
+ 
+       return 0;
+ }
+ 
+ /*
+  * MacBook JIS keyboard has wrong logical maximum
+  */
+ static void apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+               unsigned int rsize)
+ {
+       struct apple_sc *asc = hid_get_drvdata(hdev);
+ 
+       if ((asc->quirks & APPLE_RDESC_JIS) && rsize >= 60 &&
+                       rdesc[53] == 0x65 && rdesc[59] == 0x65) {
+               dev_info(&hdev->dev, "fixing up MacBook JIS keyboard report "
+                               "descriptor\n");
+               rdesc[53] = rdesc[59] = 0xe7;
+       }
+ }
+ 
+ static void apple_setup_input(struct input_dev *input)
+ {
+       struct apple_key_translation *trans;
+ 
+       set_bit(KEY_NUMLOCK, input->keybit);
+ 
+       /* Enable all needed keys */
+       for (trans = apple_fn_keys; trans->from; trans++)
+               set_bit(trans->to, input->keybit);
+ 
+       for (trans = powerbook_fn_keys; trans->from; trans++)
+               set_bit(trans->to, input->keybit);
+ 
+       for (trans = powerbook_numlock_keys; trans->from; trans++)
+               set_bit(trans->to, input->keybit);
+ 
+       for (trans = apple_iso_keyboard; trans->from; trans++)
+               set_bit(trans->to, input->keybit);
+ }
+ 
+ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+               struct hid_field *field, struct hid_usage *usage,
+               unsigned long **bit, int *max)
+ {
+       if (usage->hid == (HID_UP_CUSTOM | 0x0003)) {
+               /* The fn key on Apple USB keyboards */
+               set_bit(EV_REP, hi->input->evbit);
+               hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN);
+               apple_setup_input(hi->input);
+               return 1;
+       }
+ 
+       /* we want the hid layer to go through standard path (set and ignore) */
+       return 0;
+ }
+ 
+ static int apple_input_mapped(struct hid_device *hdev, struct hid_input *hi,
+               struct hid_field *field, struct hid_usage *usage,
+               unsigned long **bit, int *max)
+ {
+       struct apple_sc *asc = hid_get_drvdata(hdev);
+ 
+       if (asc->quirks & APPLE_MIGHTYMOUSE) {
+               if (usage->hid == HID_GD_Z)
+                       hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
+               else if (usage->code == BTN_1)
+                       hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_2);
+               else if (usage->code == BTN_2)
+                       hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_1);
+       }
+ 
+       return 0;
+ }
+ 
+ static int apple_probe(struct hid_device *hdev,
+               const struct hid_device_id *id)
+ {
+       unsigned long quirks = id->driver_data;
+       struct apple_sc *asc;
+       unsigned int connect_mask = HID_CONNECT_DEFAULT;
+       int ret;
+ 
+       asc = kzalloc(sizeof(*asc), GFP_KERNEL);
+       if (asc == NULL) {
+               dev_err(&hdev->dev, "can't alloc apple descriptor\n");
+               return -ENOMEM;
+       }
+ 
+       asc->quirks = quirks;
+ 
+       hid_set_drvdata(hdev, asc);
+ 
+       ret = hid_parse(hdev);
+       if (ret) {
+               dev_err(&hdev->dev, "parse failed\n");
+               goto err_free;
+       }
+ 
+       if (quirks & APPLE_HIDDEV)
+               connect_mask |= HID_CONNECT_HIDDEV_FORCE;
+       if (quirks & APPLE_IGNORE_HIDINPUT)
+               connect_mask &= ~HID_CONNECT_HIDINPUT;
+ 
+       ret = hid_hw_start(hdev, connect_mask);
+       if (ret) {
+               dev_err(&hdev->dev, "hw start failed\n");
+               goto err_free;
+       }
+ 
+       return 0;
+ err_free:
+       kfree(asc);
+       return ret;
+ }
+ 
+ static void apple_remove(struct hid_device *hdev)
+ {
+       hid_hw_stop(hdev);
+       kfree(hid_get_drvdata(hdev));
+ }
+ 
+ static const struct hid_device_id apple_devices[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ATV_IRCONTROL),
+               .driver_data = APPLE_HIDDEV | APPLE_IGNORE_HIDINPUT },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4),
+               .driver_data = APPLE_HIDDEV | APPLE_IGNORE_HIDINPUT },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE),
+               .driver_data = APPLE_MIGHTYMOUSE | APPLE_INVERT_HWHEEL },
+ 
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_RDESC_JIS },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_ISO_KEYBOARD },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS),
+               .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS),
+               .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS),
+               .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+ 
+       /* Apple wireless Mighty Mouse */
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c),
+               .driver_data = APPLE_MIGHTYMOUSE | APPLE_INVERT_HWHEEL },
+ 
+       { }
+ };
+ MODULE_DEVICE_TABLE(hid, apple_devices);
+ 
+ static struct hid_driver apple_driver = {
+       .name = "apple",
+       .id_table = apple_devices,
+       .report_fixup = apple_report_fixup,
+       .probe = apple_probe,
+       .remove = apple_remove,
+       .event = apple_event,
+       .input_mapping = apple_input_mapping,
+       .input_mapped = apple_input_mapped,
+ };
+ 
+ static int apple_init(void)
+ {
+       int ret;
+ 
+       ret = hid_register_driver(&apple_driver);
+       if (ret)
+               printk(KERN_ERR "can't register apple driver\n");
+ 
+       return ret;
+ }
+ 
+ static void apple_exit(void)
+ {
+       hid_unregister_driver(&apple_driver);
+ }
+ 
+ module_init(apple_init);
+ module_exit(apple_exit);
+ MODULE_LICENSE("GPL");
+ 
+ HID_COMPAT_LOAD_DRIVER(apple);
diff --cc drivers/hid/hid-core.c

index 426ac5a,5d7640e..60ee00e
--- 1/drivers/hid/hid-core.c
--- 2/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@@ -1006,9 -1113,738 +1113,740 @@@ int hid_input_report(struct hid_device 
   }
   EXPORT_SYMBOL_GPL(hid_input_report);
   
+ static bool hid_match_one_id(struct hid_device *hdev,
+               const struct hid_device_id *id)
+ {
+       return id->bus == hdev->bus &&
+               (id->vendor == HID_ANY_ID || id->vendor == hdev->vendor) &&
+               (id->product == HID_ANY_ID || id->product == hdev->product);
+ }
+ 
+ static const struct hid_device_id *hid_match_id(struct hid_device *hdev,
+               const struct hid_device_id *id)
+ {
+       for (; id->bus; id++)
+               if (hid_match_one_id(hdev, id))
+                       return id;
+ 
+       return NULL;
+ }
+ 
+ static const struct hid_device_id hid_hiddev_list[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS1) },
+       { }
+ };
+ 
+ static bool hid_hiddev(struct hid_device *hdev)
+ {
+       return !!hid_match_id(hdev, hid_hiddev_list);
+ }
+ 
+ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
+ {
+       static const char *types[] = { "Device", "Pointer", "Mouse", "Device",
+               "Joystick", "Gamepad", "Keyboard", "Keypad",
+               "Multi-Axis Controller"
+       };
+       const char *type, *bus;
+       char buf[64];
+       unsigned int i;
+       int len;
+ 
+       if (hdev->bus != BUS_USB)
+               connect_mask &= ~HID_CONNECT_HIDDEV;
+       if (hid_hiddev(hdev))
+               connect_mask |= HID_CONNECT_HIDDEV_FORCE;
+ 
+       if ((connect_mask & HID_CONNECT_HIDINPUT) && !hidinput_connect(hdev,
+                               connect_mask & HID_CONNECT_HIDINPUT_FORCE))
+               hdev->claimed |= HID_CLAIMED_INPUT;
+       if ((connect_mask & HID_CONNECT_HIDDEV) && hdev->hiddev_connect &&
+                       !hdev->hiddev_connect(hdev,
+                               connect_mask & HID_CONNECT_HIDDEV_FORCE))
+               hdev->claimed |= HID_CLAIMED_HIDDEV;
+       if ((connect_mask & HID_CONNECT_HIDRAW) && !hidraw_connect(hdev))
+               hdev->claimed |= HID_CLAIMED_HIDRAW;
+ 
+       if (!hdev->claimed) {
+               dev_err(&hdev->dev, "claimed by neither input, hiddev nor "
+                               "hidraw\n");
+               return -ENODEV;
+       }
+ 
+       if ((hdev->claimed & HID_CLAIMED_INPUT) &&
+                       (connect_mask & HID_CONNECT_FF) && hdev->ff_init)
+               hdev->ff_init(hdev);
+ 
+       len = 0;
+       if (hdev->claimed & HID_CLAIMED_INPUT)
+               len += sprintf(buf + len, "input");
+       if (hdev->claimed & HID_CLAIMED_HIDDEV)
+               len += sprintf(buf + len, "%shiddev%d", len ? "," : "",
+                               hdev->minor);
+       if (hdev->claimed & HID_CLAIMED_HIDRAW)
+               len += sprintf(buf + len, "%shidraw%d", len ? "," : "",
+                               ((struct hidraw *)hdev->hidraw)->minor);
+ 
+       type = "Device";
+       for (i = 0; i < hdev->maxcollection; i++) {
+               struct hid_collection *col = &hdev->collection[i];
+               if (col->type == HID_COLLECTION_APPLICATION &&
+                  (col->usage & HID_USAGE_PAGE) == HID_UP_GENDESK &&
+                  (col->usage & 0xffff) < ARRAY_SIZE(types)) {
+                       type = types[col->usage & 0xffff];
+                       break;
+               }
+       }
+ 
+       switch (hdev->bus) {
+       case BUS_USB:
+               bus = "USB";
+               break;
+       case BUS_BLUETOOTH:
+               bus = "BLUETOOTH";
+               break;
+       default:
+               bus = "<UNKNOWN>";
+       }
+ 
+       dev_info(&hdev->dev, "%s: %s HID v%x.%02x %s [%s] on %s\n",
+                       buf, bus, hdev->version >> 8, hdev->version & 0xff,
+                       type, hdev->name, hdev->phys);
+ 
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(hid_connect);
+ 
+ static const struct hid_device_id hid_blacklist[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ATV_IRCONTROL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_EDGE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_ELITE_KBD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FORCE3D_PRO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_LK6K) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
+ 
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
+       { }
+ };
+ 
+ struct hid_dynid {
+       struct list_head list;
+       struct hid_device_id id;
+ };
+ 
+ /**
+  * store_new_id - add a new HID device ID to this driver and re-probe devices
+  * @driver: target device driver
+  * @buf: buffer for scanning device ID data
+  * @count: input size
+  *
+  * Adds a new dynamic hid device ID to this driver,
+  * and causes the driver to probe for all devices again.
+  */
+ static ssize_t store_new_id(struct device_driver *drv, const char *buf,
+               size_t count)
+ {
+       struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
+       struct hid_dynid *dynid;
+       __u32 bus, vendor, product;
+       unsigned long driver_data = 0;
+       int ret;
+ 
+       ret = sscanf(buf, "%x %x %x %lx",
+                       &bus, &vendor, &product, &driver_data);
+       if (ret < 3)
+               return -EINVAL;
+ 
+       dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+       if (!dynid)
+               return -ENOMEM;
+ 
+       dynid->id.bus = bus;
+       dynid->id.vendor = vendor;
+       dynid->id.product = product;
+       dynid->id.driver_data = driver_data;
+ 
+       spin_lock(&hdrv->dyn_lock);
+       list_add_tail(&dynid->list, &hdrv->dyn_list);
+       spin_unlock(&hdrv->dyn_lock);
+ 
+       ret = 0;
+       if (get_driver(&hdrv->driver)) {
+               ret = driver_attach(&hdrv->driver);
+               put_driver(&hdrv->driver);
+       }
+ 
+       return ret ? : count;
+ }
+ static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
+ 
+ static void hid_free_dynids(struct hid_driver *hdrv)
+ {
+       struct hid_dynid *dynid, *n;
+ 
+       spin_lock(&hdrv->dyn_lock);
+       list_for_each_entry_safe(dynid, n, &hdrv->dyn_list, list) {
+               list_del(&dynid->list);
+               kfree(dynid);
+       }
+       spin_unlock(&hdrv->dyn_lock);
+ }
+ 
+ static const struct hid_device_id *hid_match_device(struct hid_device *hdev,
+               struct hid_driver *hdrv)
+ {
+       struct hid_dynid *dynid;
+ 
+       spin_lock(&hdrv->dyn_lock);
+       list_for_each_entry(dynid, &hdrv->dyn_list, list) {
+               if (hid_match_one_id(hdev, &dynid->id)) {
+                       spin_unlock(&hdrv->dyn_lock);
+                       return &dynid->id;
+               }
+       }
+       spin_unlock(&hdrv->dyn_lock);
+ 
+       return hid_match_id(hdev, hdrv->id_table);
+ }
+ 
+ static int hid_bus_match(struct device *dev, struct device_driver *drv)
+ {
+       struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
+       struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+ 
+       if (!hid_match_device(hdev, hdrv))
+               return 0;
+ 
+       /* generic wants all non-blacklisted */
+       if (!strncmp(hdrv->name, "generic-", 8))
+               return !hid_match_id(hdev, hid_blacklist);
+ 
+       return 1;
+ }
+ 
+ static int hid_device_probe(struct device *dev)
+ {
+       struct hid_driver *hdrv = container_of(dev->driver,
+                       struct hid_driver, driver);
+       struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+       const struct hid_device_id *id;
+       int ret = 0;
+ 
+       if (!hdev->driver) {
+               id = hid_match_device(hdev, hdrv);
+               if (id == NULL)
+                       return -ENODEV;
+ 
+               hdev->driver = hdrv;
+               if (hdrv->probe) {
+                       ret = hdrv->probe(hdev, id);
+               } else { /* default probe */
+                       ret = hid_parse(hdev);
+                       if (!ret)
+                               ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+               }
+               if (ret)
+                       hdev->driver = NULL;
+       }
+       return ret;
+ }
+ 
+ static int hid_device_remove(struct device *dev)
+ {
+       struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+       struct hid_driver *hdrv = hdev->driver;
+ 
+       if (hdrv) {
+               if (hdrv->remove)
+                       hdrv->remove(hdev);
+               else /* default remove */
+                       hid_hw_stop(hdev);
+               hdev->driver = NULL;
+       }
+ 
+       return 0;
+ }
+ 
+ static int hid_uevent(struct device *dev, struct kobj_uevent_env *env)
+ {
+       struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+ 
+       if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X",
+                       hdev->bus, hdev->vendor, hdev->product))
+               return -ENOMEM;
+ 
+       if (add_uevent_var(env, "HID_NAME=%s", hdev->name))
+               return -ENOMEM;
+ 
+       if (add_uevent_var(env, "HID_PHYS=%s", hdev->phys))
+               return -ENOMEM;
+ 
+       if (add_uevent_var(env, "HID_UNIQ=%s", hdev->uniq))
+               return -ENOMEM;
+ 
+       if (add_uevent_var(env, "MODALIAS=hid:b%04Xv%08Xp%08X",
+                       hdev->bus, hdev->vendor, hdev->product))
+               return -ENOMEM;
+ 
+       return 0;
+ }
+ 
+ static struct bus_type hid_bus_type = {
+       .name           = "hid",
+       .match          = hid_bus_match,
+       .probe          = hid_device_probe,
+       .remove         = hid_device_remove,
+       .uevent         = hid_uevent,
+ };
+ 
+ static const struct hid_device_id hid_ignore_list[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_FLAIR) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_302) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ADS_TECH, USB_DEVICE_ID_ADS_TECH_RADIO_SI470X) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_01) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_10) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_20) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_21) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_22) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_23) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_24) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AIRCABLE, USB_DEVICE_ID_AIRCABLE1) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ALCOR, USB_DEVICE_ID_ALCOR_USBRS232) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_LCM)},
+       { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_LCM2)},
+       { HID_USB_DEVICE(USB_VENDOR_ID_AVERMEDIA, USB_DEVICE_ID_AVER_FM_MR800) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CIDC, 0x0103) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM109) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_HIDCOM) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_ULTRAMOUSE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) },
++      { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_4000U) },
++      { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_4500U) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0001) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0002) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0003) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0004) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_4_PHIDGETSERVO_30) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_1_PHIDGETSERVO_30) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_0_4_IF_KIT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_16_16_IF_KIT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_8_8_8_IF_KIT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_8_7_IF_KIT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_8_8_IF_KIT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_PHIDGET_MOTORCONTROL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_SUPER_Q2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_GOGOPEN) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_PENPOWER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GRETAGMACBETH, USB_DEVICE_ID_GRETAGMACBETH_HUEY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_POWERMATE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_SOUNDKNOB) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_90) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_100) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_101) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_103) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_104) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_105) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_106) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_107) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_108) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_200) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_201) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_202) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_203) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_204) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_205) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_206) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_207) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_300) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_301) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_302) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_303) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_304) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_305) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_306) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_307) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_308) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_309) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_400) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_401) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_402) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_403) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_404) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_405) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_500) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_501) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_502) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_503) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_504) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1000) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1001) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1002) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1003) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1004) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1005) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1006) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1007) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_IMATION, USB_DEVICE_ID_DISC_STAKKA) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_KBGEAR, USB_DEVICE_ID_KBGEAR_JAMSTUDIO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_KWORLD, USB_DEVICE_ID_KWORLD_RADIO_FM700) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_GPEN_560) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY1) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_VIDEOCOM) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_COM3LAB) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_TELEPORT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_NETWORKANALYSER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERCONTROL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETEST) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1024LS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1208LS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT1) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_NATIONAL_SEMICONDUCTOR, USB_DEVICE_ID_N_S_HARMONY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 20) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 30) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 100) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 108) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 118) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 200) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 300) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 400) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 500) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0001) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD3) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_TENX, USB_DEVICE_ID_TENX_IBUDDY1) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_TENX, USB_DEVICE_ID_TENX_IBUDDY2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LABPRO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_GOTEMP) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_SKIP) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_CYCLOPS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LCSPEC) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WACOM, HID_ANY_ID) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_4_PHIDGETSERVO_20) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_1_PHIDGETSERVO_20) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_8_8_4_IF_KIT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
+       { }
+ };
+ 
+ /**
+  * hid_mouse_ignore_list - mouse devices which should not be handled by the hid layer
+  *
+  * There are composite devices for which we want to ignore only a certain
+  * interface. This is a list of devices for which only the mouse interface will
+  * be ignored. This allows a dedicated driver to take care of the interface.
+  */
+ static const struct hid_device_id hid_mouse_ignore_list[] = {
+       /* appletouch driver */
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+       { }
+ };
+ 
+ static bool hid_ignore(struct hid_device *hdev)
+ {
+       switch (hdev->vendor) {
+       case USB_VENDOR_ID_CODEMERCS:
+               /* ignore all Code Mercenaries IOWarrior devices */
+               if (hdev->product >= USB_DEVICE_ID_CODEMERCS_IOW_FIRST &&
+                               hdev->product <= USB_DEVICE_ID_CODEMERCS_IOW_LAST)
+                       return true;
+               break;
+       case USB_VENDOR_ID_LOGITECH:
+               if (hdev->product >= USB_DEVICE_ID_LOGITECH_HARMONY_FIRST &&
+                               hdev->product <= USB_DEVICE_ID_LOGITECH_HARMONY_LAST)
+                       return true;
+               break;
+       }
+ 
+       if (hdev->type == HID_TYPE_USBMOUSE &&
+                       hid_match_id(hdev, hid_mouse_ignore_list))
+               return true;
+ 
+       return !!hid_match_id(hdev, hid_ignore_list);
+ }
+ 
+ int hid_add_device(struct hid_device *hdev)
+ {
+       static atomic_t id = ATOMIC_INIT(0);
+       int ret;
+ 
+       if (WARN_ON(hdev->status & HID_STAT_ADDED))
+               return -EBUSY;
+ 
+       /* we need to kill them here, otherwise they will stay allocated to
+        * wait for coming driver */
+       if (hid_ignore(hdev))
+               return -ENODEV;
+ 
+       /* XXX hack, any other cleaner solution after the driver core
+        * is converted to allow more than 20 bytes as the device name? */
+       dev_set_name(&hdev->dev, "%04X:%04X:%04X.%04X", hdev->bus,
+                    hdev->vendor, hdev->product, atomic_inc_return(&id));
+ 
+       ret = device_add(&hdev->dev);
+       if (!ret)
+               hdev->status |= HID_STAT_ADDED;
+ 
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(hid_add_device);
+ 
+ /**
+  * hid_allocate_device - allocate new hid device descriptor
+  *
+  * Allocate and initialize hid device, so that hid_destroy_device might be
+  * used to free it.
+  *
+  * New hid_device pointer is returned on success, otherwise ERR_PTR encoded
+  * error value.
+  */
+ struct hid_device *hid_allocate_device(void)
+ {
+       struct hid_device *hdev;
+       unsigned int i;
+       int ret = -ENOMEM;
+ 
+       hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+       if (hdev == NULL)
+               return ERR_PTR(ret);
+ 
+       device_initialize(&hdev->dev);
+       hdev->dev.release = hid_device_release;
+       hdev->dev.bus = &hid_bus_type;
+ 
+       hdev->collection = kcalloc(HID_DEFAULT_NUM_COLLECTIONS,
+                       sizeof(struct hid_collection), GFP_KERNEL);
+       if (hdev->collection == NULL)
+               goto err;
+       hdev->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
+ 
+       for (i = 0; i < HID_REPORT_TYPES; i++)
+               INIT_LIST_HEAD(&hdev->report_enum[i].report_list);
+ 
+       return hdev;
+ err:
+       put_device(&hdev->dev);
+       return ERR_PTR(ret);
+ }
+ EXPORT_SYMBOL_GPL(hid_allocate_device);
+ 
+ static void hid_remove_device(struct hid_device *hdev)
+ {
+       if (hdev->status & HID_STAT_ADDED) {
+               device_del(&hdev->dev);
+               hdev->status &= ~HID_STAT_ADDED;
+       }
+ }
+ 
+ /**
+  * hid_destroy_device - free previously allocated device
+  *
+  * @hdev: hid device
+  *
+  * If you allocate hid_device through hid_allocate_device, you should ever
+  * free by this function.
+  */
+ void hid_destroy_device(struct hid_device *hdev)
+ {
+       hid_remove_device(hdev);
+       put_device(&hdev->dev);
+ }
+ EXPORT_SYMBOL_GPL(hid_destroy_device);
+ 
+ int __hid_register_driver(struct hid_driver *hdrv, struct module *owner,
+               const char *mod_name)
+ {
+       int ret;
+ 
+       hdrv->driver.name = hdrv->name;
+       hdrv->driver.bus = &hid_bus_type;
+       hdrv->driver.owner = owner;
+       hdrv->driver.mod_name = mod_name;
+ 
+       INIT_LIST_HEAD(&hdrv->dyn_list);
+       spin_lock_init(&hdrv->dyn_lock);
+ 
+       ret = driver_register(&hdrv->driver);
+       if (ret)
+               return ret;
+ 
+       ret = driver_create_file(&hdrv->driver, &driver_attr_new_id);
+       if (ret)
+               driver_unregister(&hdrv->driver);
+ 
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(__hid_register_driver);
+ 
+ void hid_unregister_driver(struct hid_driver *hdrv)
+ {
+       driver_remove_file(&hdrv->driver, &driver_attr_new_id);
+       driver_unregister(&hdrv->driver);
+       hid_free_dynids(hdrv);
+ }
+ EXPORT_SYMBOL_GPL(hid_unregister_driver);
+ 
+ #ifdef CONFIG_HID_COMPAT
+ static void hid_compat_load(struct work_struct *ws)
+ {
+       request_module("hid-dummy");
+ }
+ static DECLARE_WORK(hid_compat_work, hid_compat_load);
+ static struct workqueue_struct *hid_compat_wq;
+ #endif
+ 
   static int __init hid_init(void)
   {
-       return hidraw_init();
+       int ret;
+ 
+       ret = bus_register(&hid_bus_type);
+       if (ret) {
+               printk(KERN_ERR "HID: can't register hid bus\n");
+               goto err;
+       }
+ 
+       ret = hidraw_init();
+       if (ret)
+               goto err_bus;
+ 
+ #ifdef CONFIG_HID_COMPAT
+       hid_compat_wq = create_singlethread_workqueue("hid_compat");
+       if (!hid_compat_wq) {
+               hidraw_exit();
+               goto err;
+       }
+       queue_work(hid_compat_wq, &hid_compat_work);
+ #endif
+ 
+       return 0;
+ err_bus:
+       bus_unregister(&hid_bus_type);
+ err:
+       return ret;
   }
   
   static void __exit hid_exit(void)
diff --cc drivers/hid/hid-ids.h

index 0000000,acc1abc..680e22e

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@@ -1,0 -1,418 +1,420 @@@
+ /*
+  *  USB HID quirks support for Linux
+  *
+  *  Copyright (c) 1999 Andreas Gal
+  *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+  *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+  *  Copyright (c) 2006-2007 Jiri Kosina
+  *  Copyright (c) 2007 Paul Walmsley
+  */
+ 
+ /*
+  * This program is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License as published by the Free
+  * Software Foundation; either version 2 of the License, or (at your option)
+  * any later version.
+  */
+ 
+ #ifndef HID_IDS_H_FILE
+ #define HID_IDS_H_FILE
+ 
+ #define USB_VENDOR_ID_A4TECH          0x09da
+ #define USB_DEVICE_ID_A4TECH_WCP32PU  0x0006
+ #define USB_DEVICE_ID_A4TECH_X5_005D  0x000a
+ 
+ #define USB_VENDOR_ID_AASHIMA         0x06d6
+ #define USB_DEVICE_ID_AASHIMA_GAMEPAD 0x0025
+ #define USB_DEVICE_ID_AASHIMA_PREDATOR        0x0026
+ 
+ #define USB_VENDOR_ID_ACECAD          0x0460
+ #define USB_DEVICE_ID_ACECAD_FLAIR    0x0004
+ #define USB_DEVICE_ID_ACECAD_302      0x0008
+ 
+ #define USB_VENDOR_ID_ADS_TECH                0x06e1
+ #define USB_DEVICE_ID_ADS_TECH_RADIO_SI470X   0xa155
+ 
+ #define USB_VENDOR_ID_AFATECH         0x15a4
+ #define USB_DEVICE_ID_AFATECH_AF9016  0x9016
+ 
+ #define USB_VENDOR_ID_AIPTEK          0x08ca
+ #define USB_DEVICE_ID_AIPTEK_01               0x0001
+ #define USB_DEVICE_ID_AIPTEK_10               0x0010
+ #define USB_DEVICE_ID_AIPTEK_20               0x0020
+ #define USB_DEVICE_ID_AIPTEK_21               0x0021
+ #define USB_DEVICE_ID_AIPTEK_22               0x0022
+ #define USB_DEVICE_ID_AIPTEK_23               0x0023
+ #define USB_DEVICE_ID_AIPTEK_24               0x0024
+ 
+ #define USB_VENDOR_ID_AIRCABLE                0x16CA
+ #define USB_DEVICE_ID_AIRCABLE1               0x1502
+ 
+ #define USB_VENDOR_ID_ALCOR           0x058f
+ #define USB_DEVICE_ID_ALCOR_USBRS232  0x9720
+ 
+ #define USB_VENDOR_ID_ALPS            0x0433
+ #define USB_DEVICE_ID_IBM_GAMEPAD     0x1101
+ 
+ #define USB_VENDOR_ID_APPLE           0x05ac
+ #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE       0x0304
+ #define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI     0x020e
+ #define USB_DEVICE_ID_APPLE_FOUNTAIN_ISO      0x020f
+ #define USB_DEVICE_ID_APPLE_GEYSER_ANSI       0x0214
+ #define USB_DEVICE_ID_APPLE_GEYSER_ISO        0x0215
+ #define USB_DEVICE_ID_APPLE_GEYSER_JIS        0x0216
+ #define USB_DEVICE_ID_APPLE_GEYSER3_ANSI      0x0217
+ #define USB_DEVICE_ID_APPLE_GEYSER3_ISO       0x0218
+ #define USB_DEVICE_ID_APPLE_GEYSER3_JIS       0x0219
+ #define USB_DEVICE_ID_APPLE_GEYSER4_ANSI      0x021a
+ #define USB_DEVICE_ID_APPLE_GEYSER4_ISO       0x021b
+ #define USB_DEVICE_ID_APPLE_GEYSER4_JIS       0x021c
+ #define USB_DEVICE_ID_APPLE_ALU_ANSI  0x0220
+ #define USB_DEVICE_ID_APPLE_ALU_ISO   0x0221
+ #define USB_DEVICE_ID_APPLE_ALU_JIS   0x0222
+ #define USB_DEVICE_ID_APPLE_WELLSPRING_ANSI   0x0223
+ #define USB_DEVICE_ID_APPLE_WELLSPRING_ISO    0x0224
+ #define USB_DEVICE_ID_APPLE_WELLSPRING_JIS    0x0225
+ #define USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI    0x0229
+ #define USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO     0x022a
+ #define USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS     0x022b
+ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI  0x022c
+ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO   0x022d
+ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS   0x022e
+ #define USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI  0x0230
+ #define USB_DEVICE_ID_APPLE_WELLSPRING2_ISO   0x0231
+ #define USB_DEVICE_ID_APPLE_WELLSPRING2_JIS   0x0232
+ #define USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI  0x0236
+ #define USB_DEVICE_ID_APPLE_WELLSPRING3_ISO   0x0237
+ #define USB_DEVICE_ID_APPLE_WELLSPRING3_JIS   0x0238
+ #define USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY  0x030a
+ #define USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY   0x030b
+ #define USB_DEVICE_ID_APPLE_ATV_IRCONTROL     0x8241
+ #define USB_DEVICE_ID_APPLE_IRCONTROL4        0x8242
+ 
+ #define USB_VENDOR_ID_ASUS            0x0b05
+ #define USB_DEVICE_ID_ASUS_LCM                0x1726
+ #define USB_DEVICE_ID_ASUS_LCM2               0x175b
+ 
+ #define USB_VENDOR_ID_ATEN            0x0557
+ #define USB_DEVICE_ID_ATEN_UC100KM    0x2004
+ #define USB_DEVICE_ID_ATEN_CS124U     0x2202
+ #define USB_DEVICE_ID_ATEN_2PORTKVM   0x2204
+ #define USB_DEVICE_ID_ATEN_4PORTKVM   0x2205
+ #define USB_DEVICE_ID_ATEN_4PORTKVMC  0x2208
+ 
+ #define USB_VENDOR_ID_AVERMEDIA               0x07ca
+ #define USB_DEVICE_ID_AVER_FM_MR800   0xb800
+ 
+ #define USB_VENDOR_ID_BELKIN           0x050d
+ #define USB_DEVICE_ID_FLIP_KVM         0x3201
+ 
+ #define USB_VENDOR_ID_BERKSHIRE               0x0c98
+ #define USB_DEVICE_ID_BERKSHIRE_PCWD  0x1140
+ 
+ #define USB_VENDOR_ID_CHERRY          0x046a
+ #define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023
+ 
+ #define USB_VENDOR_ID_CHIC            0x05fe
+ #define USB_DEVICE_ID_CHIC_GAMEPAD    0x0014
+ 
+ #define USB_VENDOR_ID_CHICONY         0x04f2
+ #define USB_DEVICE_ID_CHICONY_TACTICAL_PAD    0x0418
+ 
+ #define USB_VENDOR_ID_CIDC            0x1677
+ 
+ #define USB_VENDOR_ID_CMEDIA          0x0d8c
+ #define USB_DEVICE_ID_CM109           0x000e
+ 
+ #define USB_VENDOR_ID_CODEMERCS               0x07c0
+ #define USB_DEVICE_ID_CODEMERCS_IOW_FIRST     0x1500
+ #define USB_DEVICE_ID_CODEMERCS_IOW_LAST      0x15ff
+ 
+ #define USB_VENDOR_ID_CYGNAL          0x10c4
+ #define USB_DEVICE_ID_CYGNAL_RADIO_SI470X     0x818a
+ 
+ #define USB_VENDOR_ID_CYPRESS         0x04b4
+ #define USB_DEVICE_ID_CYPRESS_MOUSE   0x0001
+ #define USB_DEVICE_ID_CYPRESS_HIDCOM  0x5500
+ #define USB_DEVICE_ID_CYPRESS_ULTRAMOUSE      0x7417
+ #define USB_DEVICE_ID_CYPRESS_BARCODE_1       0xde61
+ #define USB_DEVICE_ID_CYPRESS_BARCODE_2       0xde64
+ 
+ #define USB_VENDOR_ID_DEALEXTREAME    0x10c5
+ #define USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701       0x819a
+ 
+ #define USB_VENDOR_ID_DELORME         0x1163
+ #define USB_DEVICE_ID_DELORME_EARTHMATE 0x0100
+ #define USB_DEVICE_ID_DELORME_EM_LT20 0x0200
+ 
+ #define USB_VENDOR_ID_DMI             0x0c0b
+ #define USB_DEVICE_ID_DMI_ENC         0x5fab
+ 
+ #define USB_VENDOR_ID_ELO             0x04E7
++#define USB_DEVICE_ID_ELO_4000U               0x0009
+ #define USB_DEVICE_ID_ELO_TS2700      0x0020
++#define USB_DEVICE_ID_ELO_4500U               0x0030
+ 
+ #define USB_VENDOR_ID_ESSENTIAL_REALITY       0x0d7f
+ #define USB_DEVICE_ID_ESSENTIAL_REALITY_P5 0x0100
+ 
+ #define USB_VENDOR_ID_EZKEY           0x0518
+ #define USB_DEVICE_ID_BTC_8193                0x0002
+ 
+ #define USB_VENDOR_ID_GAMERON         0x0810
+ #define USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR        0x0001
+ #define USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR        0x0002
+ 
+ #define USB_VENDOR_ID_GENERAL_TOUCH   0x0dfc
+ 
+ #define USB_VENDOR_ID_GLAB            0x06c2
+ #define USB_DEVICE_ID_4_PHIDGETSERVO_30       0x0038
+ #define USB_DEVICE_ID_1_PHIDGETSERVO_30       0x0039
+ #define USB_DEVICE_ID_0_0_4_IF_KIT    0x0040
+ #define USB_DEVICE_ID_0_16_16_IF_KIT  0x0044
+ #define USB_DEVICE_ID_8_8_8_IF_KIT    0x0045
+ #define USB_DEVICE_ID_0_8_7_IF_KIT    0x0051
+ #define USB_DEVICE_ID_0_8_8_IF_KIT    0x0053
+ #define USB_DEVICE_ID_PHIDGET_MOTORCONTROL    0x0058
+ 
+ #define USB_VENDOR_ID_GOTOP           0x08f2
+ #define USB_DEVICE_ID_SUPER_Q2                0x007f
+ #define USB_DEVICE_ID_GOGOPEN         0x00ce
+ #define USB_DEVICE_ID_PENPOWER                0x00f4
+ 
+ #define USB_VENDOR_ID_GREENASIA               0x0e8f
+ 
+ #define USB_VENDOR_ID_GRETAGMACBETH   0x0971
+ #define USB_DEVICE_ID_GRETAGMACBETH_HUEY      0x2005
+ 
+ #define USB_VENDOR_ID_GRIFFIN         0x077d
+ #define USB_DEVICE_ID_POWERMATE               0x0410
+ #define USB_DEVICE_ID_SOUNDKNOB               0x04AA
+ 
+ #define USB_VENDOR_ID_GTCO            0x078c
+ #define USB_DEVICE_ID_GTCO_90         0x0090
+ #define USB_DEVICE_ID_GTCO_100                0x0100
+ #define USB_DEVICE_ID_GTCO_101                0x0101
+ #define USB_DEVICE_ID_GTCO_103                0x0103
+ #define USB_DEVICE_ID_GTCO_104                0x0104
+ #define USB_DEVICE_ID_GTCO_105                0x0105
+ #define USB_DEVICE_ID_GTCO_106                0x0106
+ #define USB_DEVICE_ID_GTCO_107                0x0107
+ #define USB_DEVICE_ID_GTCO_108                0x0108
+ #define USB_DEVICE_ID_GTCO_200                0x0200
+ #define USB_DEVICE_ID_GTCO_201                0x0201
+ #define USB_DEVICE_ID_GTCO_202                0x0202
+ #define USB_DEVICE_ID_GTCO_203                0x0203
+ #define USB_DEVICE_ID_GTCO_204                0x0204
+ #define USB_DEVICE_ID_GTCO_205                0x0205
+ #define USB_DEVICE_ID_GTCO_206                0x0206
+ #define USB_DEVICE_ID_GTCO_207                0x0207
+ #define USB_DEVICE_ID_GTCO_300                0x0300
+ #define USB_DEVICE_ID_GTCO_301                0x0301
+ #define USB_DEVICE_ID_GTCO_302                0x0302
+ #define USB_DEVICE_ID_GTCO_303                0x0303
+ #define USB_DEVICE_ID_GTCO_304                0x0304
+ #define USB_DEVICE_ID_GTCO_305                0x0305
+ #define USB_DEVICE_ID_GTCO_306                0x0306
+ #define USB_DEVICE_ID_GTCO_307                0x0307
+ #define USB_DEVICE_ID_GTCO_308                0x0308
+ #define USB_DEVICE_ID_GTCO_309                0x0309
+ #define USB_DEVICE_ID_GTCO_400                0x0400
+ #define USB_DEVICE_ID_GTCO_401                0x0401
+ #define USB_DEVICE_ID_GTCO_402                0x0402
+ #define USB_DEVICE_ID_GTCO_403                0x0403
+ #define USB_DEVICE_ID_GTCO_404                0x0404
+ #define USB_DEVICE_ID_GTCO_405                0x0405
+ #define USB_DEVICE_ID_GTCO_500                0x0500
+ #define USB_DEVICE_ID_GTCO_501                0x0501
+ #define USB_DEVICE_ID_GTCO_502                0x0502
+ #define USB_DEVICE_ID_GTCO_503                0x0503
+ #define USB_DEVICE_ID_GTCO_504                0x0504
+ #define USB_DEVICE_ID_GTCO_1000               0x1000
+ #define USB_DEVICE_ID_GTCO_1001               0x1001
+ #define USB_DEVICE_ID_GTCO_1002               0x1002
+ #define USB_DEVICE_ID_GTCO_1003               0x1003
+ #define USB_DEVICE_ID_GTCO_1004               0x1004
+ #define USB_DEVICE_ID_GTCO_1005               0x1005
+ #define USB_DEVICE_ID_GTCO_1006               0x1006
+ #define USB_DEVICE_ID_GTCO_1007               0x1007
+ 
+ #define USB_VENDOR_ID_GYRATION                0x0c16
+ #define USB_DEVICE_ID_GYRATION_REMOTE 0x0002
+ #define USB_DEVICE_ID_GYRATION_REMOTE_2 0x0003
+ 
+ #define USB_VENDOR_ID_HAPP            0x078b
+ #define USB_DEVICE_ID_UGCI_DRIVING    0x0010
+ #define USB_DEVICE_ID_UGCI_FLYING     0x0020
+ #define USB_DEVICE_ID_UGCI_FIGHTING   0x0030
+ 
+ #define USB_VENDOR_ID_IMATION         0x0718
+ #define USB_DEVICE_ID_DISC_STAKKA     0xd000
+ 
+ #define USB_VENDOR_ID_KBGEAR          0x084e
+ #define USB_DEVICE_ID_KBGEAR_JAMSTUDIO        0x1001
+ 
+ #define USB_VENDOR_ID_KWORLD          0x1b80
+ #define USB_DEVICE_ID_KWORLD_RADIO_FM700      0xd700
+ 
+ #define USB_VENDOR_ID_LABTEC          0x1020
+ #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD        0x0006
+ 
+ #define USB_VENDOR_ID_LD              0x0f11
+ #define USB_DEVICE_ID_LD_CASSY                0x1000
+ #define USB_DEVICE_ID_LD_POCKETCASSY  0x1010
+ #define USB_DEVICE_ID_LD_MOBILECASSY  0x1020
+ #define USB_DEVICE_ID_LD_JWM          0x1080
+ #define USB_DEVICE_ID_LD_DMMP         0x1081
+ #define USB_DEVICE_ID_LD_UMIP         0x1090
+ #define USB_DEVICE_ID_LD_XRAY1                0x1100
+ #define USB_DEVICE_ID_LD_XRAY2                0x1101
+ #define USB_DEVICE_ID_LD_VIDEOCOM     0x1200
+ #define USB_DEVICE_ID_LD_COM3LAB      0x2000
+ #define USB_DEVICE_ID_LD_TELEPORT     0x2010
+ #define USB_DEVICE_ID_LD_NETWORKANALYSER 0x2020
+ #define USB_DEVICE_ID_LD_POWERCONTROL 0x2030
+ #define USB_DEVICE_ID_LD_MACHINETEST  0x2040
+ 
+ #define USB_VENDOR_ID_LOGITECH                0x046d
+ #define USB_DEVICE_ID_LOGITECH_RECEIVER       0xc101
+ #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST  0xc110
+ #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
+ #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD      0xc211
+ #define USB_DEVICE_ID_LOGITECH_EXTREME_3D     0xc215
+ #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD2     0xc218
+ #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2   0xc219
+ #define USB_DEVICE_ID_LOGITECH_WINGMAN_F3D    0xc283
+ #define USB_DEVICE_ID_LOGITECH_FORCE3D_PRO    0xc286
+ #define USB_DEVICE_ID_LOGITECH_WHEEL  0xc294
+ #define USB_DEVICE_ID_LOGITECH_MOMO_WHEEL     0xc295
+ #define USB_DEVICE_ID_LOGITECH_ELITE_KBD      0xc30a
+ #define USB_DEVICE_ID_S510_RECEIVER   0xc50c
+ #define USB_DEVICE_ID_S510_RECEIVER_2 0xc517
+ #define USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500 0xc512
+ #define USB_DEVICE_ID_MX3000_RECEIVER 0xc513
+ #define USB_DEVICE_ID_DINOVO_DESKTOP  0xc704
+ #define USB_DEVICE_ID_DINOVO_EDGE     0xc714
+ #define USB_DEVICE_ID_DINOVO_MINI     0xc71f
+ #define USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2    0xca03
+ 
+ #define USB_VENDOR_ID_MCC             0x09db
+ #define USB_DEVICE_ID_MCC_PMD1024LS   0x0076
+ #define USB_DEVICE_ID_MCC_PMD1208LS   0x007a
+ 
+ #define USB_VENDOR_ID_MGE             0x0463
+ #define USB_DEVICE_ID_MGE_UPS         0xffff
+ #define USB_DEVICE_ID_MGE_UPS1                0x0001
+ 
+ #define USB_VENDOR_ID_MICROCHIP               0x04d8
+ #define USB_DEVICE_ID_PICKIT1         0x0032
+ #define USB_DEVICE_ID_PICKIT2         0x0033
+ 
+ #define USB_VENDOR_ID_MICROSOFT               0x045e
+ #define USB_DEVICE_ID_SIDEWINDER_GV   0x003b
+ #define USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0 0x009d
+ #define USB_DEVICE_ID_MS_NE4K         0x00db
+ #define USB_DEVICE_ID_MS_LK6K         0x00f9
+ #define USB_DEVICE_ID_MS_PRESENTER_8K_BT      0x0701
+ #define USB_DEVICE_ID_MS_PRESENTER_8K_USB     0x0713
+ 
+ 
+ #define USB_VENDOR_ID_MONTEREY                0x0566
+ #define USB_DEVICE_ID_GENIUS_KB29E    0x3004
+ 
+ #define USB_VENDOR_ID_NCR             0x0404
+ #define USB_DEVICE_ID_NCR_FIRST               0x0300
+ #define USB_DEVICE_ID_NCR_LAST                0x03ff
+ 
+ #define USB_VENDOR_ID_NATIONAL_SEMICONDUCTOR 0x0400
+ #define USB_DEVICE_ID_N_S_HARMONY       0xc359
+ 
+ #define USB_VENDOR_ID_NATSU             0x08b7
+ #define USB_DEVICE_ID_NATSU_GAMEPAD     0x0001
+ 
+ #define USB_VENDOR_ID_NEC             0x073e
+ #define USB_DEVICE_ID_NEC_USB_GAME_PAD        0x0301
+ 
+ #define USB_VENDOR_ID_NTRIG                0x1b96
+ #define USB_DEVICE_ID_NTRIG_TOUCH_SCREEN   0x0001
+ 
+ #define USB_VENDOR_ID_ONTRAK          0x0a07
+ #define USB_DEVICE_ID_ONTRAK_ADU100   0x0064
+ 
+ #define USB_VENDOR_ID_PANJIT          0x134c
+ 
+ #define USB_VENDOR_ID_PANTHERLORD     0x0810
+ #define USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK   0x0001
+ 
+ #define USB_VENDOR_ID_PETALYNX                0x18b1
+ #define USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE  0x0037
+ 
+ #define USB_VENDOR_ID_PLAYDOTCOM      0x0b43
+ #define USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII    0x0003
+ 
+ #define USB_VENDOR_ID_SAITEK          0x06a3
+ #define USB_DEVICE_ID_SAITEK_RUMBLEPAD        0xff17
+ 
+ #define USB_VENDOR_ID_SAMSUNG         0x0419
+ #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE       0x0001
+ 
+ #define USB_VENDOR_ID_SONY                    0x054c
+ #define USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE     0x024b
+ #define USB_DEVICE_ID_SONY_PS3_CONTROLLER     0x0268
+ 
+ #define USB_VENDOR_ID_SOUNDGRAPH      0x15c2
+ #define USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD     0x0038
+ #define USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD2    0x0036
+ #define USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD3    0x0034
+ 
+ #define USB_VENDOR_ID_SUN             0x0430
+ #define USB_DEVICE_ID_RARITAN_KVM_DONGLE      0xcdab
+ 
+ #define USB_VENDOR_ID_SUNPLUS         0x04fc
+ #define USB_DEVICE_ID_SUNPLUS_WDESKTOP        0x05d8
+ 
+ #define USB_VENDOR_ID_TENX            0x1130
+ #define USB_DEVICE_ID_TENX_IBUDDY1    0x0001
+ #define USB_DEVICE_ID_TENX_IBUDDY2    0x0002
+ 
+ #define USB_VENDOR_ID_THRUSTMASTER    0x044f
+ 
+ #define USB_VENDOR_ID_TOPMAX          0x0663
+ #define USB_DEVICE_ID_TOPMAX_COBRAPAD 0x0103
+ 
+ #define USB_VENDOR_ID_TOPSEED         0x0766
+ #define USB_DEVICE_ID_TOPSEED_CYBERLINK       0x0204
+ 
+ #define USB_VENDOR_ID_TURBOX          0x062a
+ #define USB_DEVICE_ID_TURBOX_KEYBOARD 0x0201
+ 
+ #define USB_VENDOR_ID_UCLOGIC         0x5543
+ #define USB_DEVICE_ID_UCLOGIC_TABLET_PF1209   0x0042
+ 
+ #define USB_VENDOR_ID_VERNIER         0x08f7
+ #define USB_DEVICE_ID_VERNIER_LABPRO  0x0001
+ #define USB_DEVICE_ID_VERNIER_GOTEMP  0x0002
+ #define USB_DEVICE_ID_VERNIER_SKIP    0x0003
+ #define USB_DEVICE_ID_VERNIER_CYCLOPS 0x0004
+ #define USB_DEVICE_ID_VERNIER_LCSPEC  0x0006
+ 
+ #define USB_VENDOR_ID_WACOM           0x056a
+ 
+ #define USB_VENDOR_ID_WISEGROUP               0x0925
+ #define USB_DEVICE_ID_1_PHIDGETSERVO_20       0x8101
+ #define USB_DEVICE_ID_4_PHIDGETSERVO_20       0x8104
+ #define USB_DEVICE_ID_8_8_4_IF_KIT    0x8201
+ #define USB_DEVICE_ID_QUAD_USB_JOYPAD 0x8800
+ #define USB_DEVICE_ID_DUAL_USB_JOYPAD 0x8866
+ 
+ #define USB_VENDOR_ID_WISEGROUP_LTD   0x6666
+ #define USB_VENDOR_ID_WISEGROUP_LTD2  0x6677
+ #define USB_DEVICE_ID_SMARTJOY_DUAL_PLUS 0x8802
+ 
+ #define USB_VENDOR_ID_YEALINK         0x6993
+ #define USB_DEVICE_ID_YEALINK_P1K_P4K_B2K     0xb001
+ 
+ #define USB_VENDOR_ID_ZEROPLUS                0x0c12
+ 
+ #define USB_VENDOR_ID_KYE             0x0458
+ #define USB_DEVICE_ID_KYE_GPEN_560    0x5003
+ 
+ #endif
diff --cc drivers/hid/usbhid/hid-core.c

index 0bcc05e,f0a0f72..64f3a8d
--- 1/drivers/hid/usbhid/hid-core.c
--- 2/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@@ -33,6 -34,6 +34,7 @@@
   #include <linux/hiddev.h>
   #include <linux/hid-debug.h>
   #include <linux/hidraw.h>
++
   #include "usbhid.h"
   
   /*
diff --cc drivers/hid/usbhid/usbkbd.c
Simple merge
diff --cc drivers/hwmon/ultra45_env.c

index 0000000,68e90ab..8408438

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/hwmon/ultra45_env.c
+++ b/drivers/hwmon/ultra45_env.c
@@@ -1,0 -1,320 +1,321 @@@
+ /* ultra45_env.c: Driver for Ultra45 PIC16F747 environmental monitor.
+  *
+  * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/slab.h>
+ #include <linux/of_device.h>
+ #include <linux/io.h>
+ #include <linux/hwmon.h>
+ #include <linux/hwmon-sysfs.h>
+ 
+ #define DRV_MODULE_VERSION    "0.1"
+ 
+ MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+ MODULE_DESCRIPTION("Ultra45 environmental monitor driver");
+ MODULE_LICENSE("GPL");
+ MODULE_VERSION(DRV_MODULE_VERSION);
+ 
+ /* PIC device registers */
+ #define REG_CMD               0x00UL
+ #define  REG_CMD_RESET        0x80
+ #define  REG_CMD_ESTAR        0x01
+ #define REG_STAT      0x01UL
+ #define  REG_STAT_FWVER       0xf0
+ #define  REG_STAT_TGOOD       0x08
+ #define  REG_STAT_STALE       0x04
+ #define  REG_STAT_BUSY        0x02
+ #define  REG_STAT_FAULT       0x01
+ #define REG_DATA      0x40UL
+ #define REG_ADDR      0x41UL
+ #define REG_SIZE      0x42UL
+ 
+ /* Registers accessed indirectly via REG_DATA/REG_ADDR */
+ #define IREG_FAN0             0x00
+ #define IREG_FAN1             0x01
+ #define IREG_FAN2             0x02
+ #define IREG_FAN3             0x03
+ #define IREG_FAN4             0x04
+ #define IREG_FAN5             0x05
+ #define IREG_LCL_TEMP         0x06
+ #define IREG_RMT1_TEMP                0x07
+ #define IREG_RMT2_TEMP                0x08
+ #define IREG_RMT3_TEMP                0x09
+ #define IREG_LM95221_TEMP     0x0a
+ #define IREG_FIRE_TEMP                0x0b
+ #define IREG_LSI1064_TEMP     0x0c
+ #define IREG_FRONT_TEMP               0x0d
+ #define IREG_FAN_STAT         0x0e
+ #define IREG_VCORE0           0x0f
+ #define IREG_VCORE1           0x10
+ #define IREG_VMEM0            0x11
+ #define IREG_VMEM1            0x12
+ #define IREG_PSU_TEMP         0x13
+ 
+ struct env {
+       void __iomem    *regs;
+       spinlock_t      lock;
+ 
+       struct device   *hwmon_dev;
+ };
+ 
+ static u8 env_read(struct env *p, u8 ireg)
+ {
+       u8 ret;
+ 
+       spin_lock(&p->lock);
+       writeb(ireg, p->regs + REG_ADDR);
+       ret = readb(p->regs + REG_DATA);
+       spin_unlock(&p->lock);
+ 
+       return ret;
+ }
+ 
+ static void env_write(struct env *p, u8 ireg, u8 val)
+ {
+       spin_lock(&p->lock);
+       writeb(ireg, p->regs + REG_ADDR);
+       writeb(val, p->regs + REG_DATA);
+       spin_unlock(&p->lock);
+ }
+ 
+ /* There seems to be a adr7462 providing these values, thus a lot
+  * of these calculations are borrowed from the adt7470 driver.
+  */
+ #define FAN_PERIOD_TO_RPM(x)  ((90000 * 60) / (x))
+ #define FAN_RPM_TO_PERIOD     FAN_PERIOD_TO_RPM
+ #define FAN_PERIOD_INVALID    (0xff << 8)
+ #define FAN_DATA_VALID(x)     ((x) && (x) != FAN_PERIOD_INVALID)
+ 
+ static ssize_t show_fan_speed(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+       int fan_nr = to_sensor_dev_attr(attr)->index;
+       struct env *p = dev_get_drvdata(dev);
+       int rpm, period;
+       u8 val;
+ 
+       val = env_read(p, IREG_FAN0 + fan_nr);
+       period = (int) val << 8;
+       if (FAN_DATA_VALID(period))
+               rpm = FAN_PERIOD_TO_RPM(period);
+       else
+               rpm = 0;
+ 
+       return sprintf(buf, "%d\n", rpm);
+ }
+ 
+ static ssize_t set_fan_speed(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+ {
+       int fan_nr = to_sensor_dev_attr(attr)->index;
+       int rpm = simple_strtol(buf, NULL, 10);
+       struct env *p = dev_get_drvdata(dev);
+       int period;
+       u8 val;
+ 
+       if (!rpm)
+               return -EINVAL;
+ 
+       period = FAN_RPM_TO_PERIOD(rpm);
+       val = period >> 8;
+       env_write(p, IREG_FAN0 + fan_nr, val);
+ 
+       return count;
+ }
+ 
+ static ssize_t show_fan_fault(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+       int fan_nr = to_sensor_dev_attr(attr)->index;
+       struct env *p = dev_get_drvdata(dev);
+       u8 val = env_read(p, IREG_FAN_STAT);
+       return sprintf(buf, "%d\n", (val & (1 << fan_nr)) ? 1 : 0);
+ }
+ 
+ #define fan(index)                                                    \
+ static SENSOR_DEVICE_ATTR(fan##index##_speed, S_IRUGO | S_IWUSR,      \
+               show_fan_speed, set_fan_speed, index);                  \
+ static SENSOR_DEVICE_ATTR(fan##index##_fault, S_IRUGO,                        \
+               show_fan_fault, NULL, index)
+ 
+ fan(0);
+ fan(1);
+ fan(2);
+ fan(3);
+ fan(4);
+ 
+ static SENSOR_DEVICE_ATTR(psu_fan_fault, S_IRUGO, show_fan_fault, NULL, 6);
+ 
+ static ssize_t show_temp(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+       int temp_nr = to_sensor_dev_attr(attr)->index;
+       struct env *p = dev_get_drvdata(dev);
+       s8 val;
+ 
+       val = env_read(p, IREG_LCL_TEMP + temp_nr);
+       return sprintf(buf, "%d\n", ((int) val) - 64);
+ }
+ 
+ static SENSOR_DEVICE_ATTR(adt7462_local_temp, S_IRUGO, show_temp, NULL, 0);
+ static SENSOR_DEVICE_ATTR(cpu0_temp, S_IRUGO, show_temp, NULL, 1);
+ static SENSOR_DEVICE_ATTR(cpu1_temp, S_IRUGO, show_temp, NULL, 2);
+ static SENSOR_DEVICE_ATTR(motherboard_temp, S_IRUGO, show_temp, NULL, 3);
+ static SENSOR_DEVICE_ATTR(lm95221_local_temp, S_IRUGO, show_temp, NULL, 4);
+ static SENSOR_DEVICE_ATTR(fire_temp, S_IRUGO, show_temp, NULL, 5);
+ static SENSOR_DEVICE_ATTR(lsi1064_local_temp, S_IRUGO, show_temp, NULL, 6);
+ static SENSOR_DEVICE_ATTR(front_panel_temp, S_IRUGO, show_temp, NULL, 7);
+ static SENSOR_DEVICE_ATTR(psu_temp, S_IRUGO, show_temp, NULL, 13);
+ 
+ static ssize_t show_stat_bit(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+       int index = to_sensor_dev_attr(attr)->index;
+       struct env *p = dev_get_drvdata(dev);
+       u8 val;
+ 
+       val = readb(p->regs + REG_STAT);
+       return sprintf(buf, "%d\n", (val & (1 << index)) ? 1 : 0);
+ }
+ 
+ static SENSOR_DEVICE_ATTR(fan_failure, S_IRUGO, show_stat_bit, NULL, 0);
+ static SENSOR_DEVICE_ATTR(env_bus_busy, S_IRUGO, show_stat_bit, NULL, 1);
+ static SENSOR_DEVICE_ATTR(env_data_stale, S_IRUGO, show_stat_bit, NULL, 2);
+ static SENSOR_DEVICE_ATTR(tpm_self_test_passed, S_IRUGO, show_stat_bit, NULL, 3);
+ 
+ static ssize_t show_fwver(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+       struct env *p = dev_get_drvdata(dev);
+       u8 val;
+ 
+       val = readb(p->regs + REG_STAT);
+       return sprintf(buf, "%d\n", val >> 4);
+ }
+ 
+ static SENSOR_DEVICE_ATTR(firmware_version, S_IRUGO, show_fwver, NULL, 0);
+ 
+ static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+       return sprintf(buf, "ultra45\n");
+ }
+ 
+ static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, 0);
+ 
+ static struct attribute *env_attributes[] = {
+       &sensor_dev_attr_fan0_speed.dev_attr.attr,
+       &sensor_dev_attr_fan0_fault.dev_attr.attr,
+       &sensor_dev_attr_fan1_speed.dev_attr.attr,
+       &sensor_dev_attr_fan1_fault.dev_attr.attr,
+       &sensor_dev_attr_fan2_speed.dev_attr.attr,
+       &sensor_dev_attr_fan2_fault.dev_attr.attr,
+       &sensor_dev_attr_fan3_speed.dev_attr.attr,
+       &sensor_dev_attr_fan3_fault.dev_attr.attr,
+       &sensor_dev_attr_fan4_speed.dev_attr.attr,
+       &sensor_dev_attr_fan4_fault.dev_attr.attr,
+       &sensor_dev_attr_psu_fan_fault.dev_attr.attr,
+       &sensor_dev_attr_adt7462_local_temp.dev_attr.attr,
+       &sensor_dev_attr_cpu0_temp.dev_attr.attr,
+       &sensor_dev_attr_cpu1_temp.dev_attr.attr,
+       &sensor_dev_attr_motherboard_temp.dev_attr.attr,
+       &sensor_dev_attr_lm95221_local_temp.dev_attr.attr,
+       &sensor_dev_attr_fire_temp.dev_attr.attr,
+       &sensor_dev_attr_lsi1064_local_temp.dev_attr.attr,
+       &sensor_dev_attr_front_panel_temp.dev_attr.attr,
+       &sensor_dev_attr_psu_temp.dev_attr.attr,
+       &sensor_dev_attr_fan_failure.dev_attr.attr,
+       &sensor_dev_attr_env_bus_busy.dev_attr.attr,
+       &sensor_dev_attr_env_data_stale.dev_attr.attr,
+       &sensor_dev_attr_tpm_self_test_passed.dev_attr.attr,
+       &sensor_dev_attr_firmware_version.dev_attr.attr,
+       &sensor_dev_attr_name.dev_attr.attr,
+       NULL,
+ };
+ 
+ static const struct attribute_group env_group = {
+       .attrs = env_attributes,
+ };
+ 
+ static int __devinit env_probe(struct of_device *op,
+                              const struct of_device_id *match)
+ {
+       struct env *p = kzalloc(sizeof(*p), GFP_KERNEL);
+       int err = -ENOMEM;
+ 
+       if (!p)
+               goto out;
+ 
+       spin_lock_init(&p->lock);
+ 
+       p->regs = of_ioremap(&op->resource[0], 0, REG_SIZE, "pic16f747");
+       if (!p->regs)
+               goto out_free;
+ 
+       err = sysfs_create_group(&op->dev.kobj, &env_group);
+       if (err)
+               goto out_iounmap;
+ 
+       p->hwmon_dev = hwmon_device_register(&op->dev);
+       if (IS_ERR(p->hwmon_dev)) {
+               err = PTR_ERR(p->hwmon_dev);
+               goto out_sysfs_remove_group;
+       }
+ 
+       dev_set_drvdata(&op->dev, p);
+       err = 0;
+ 
+ out:
+       return err;
+ 
+ out_sysfs_remove_group:
+       sysfs_remove_group(&op->dev.kobj, &env_group);
+ 
+ out_iounmap:
+       of_iounmap(&op->resource[0], p->regs, REG_SIZE);
+ 
+ out_free:
+       kfree(p);
+       goto out;
+ }
+ 
+ static int __devexit env_remove(struct of_device *op)
+ {
+       struct env *p = dev_get_drvdata(&op->dev);
+ 
+       if (p) {
+               sysfs_remove_group(&op->dev.kobj, &env_group);
+               hwmon_device_unregister(p->hwmon_dev);
+               of_iounmap(&op->resource[0], p->regs, REG_SIZE);
+               kfree(p);
+       }
+ 
+       return 0;
+ }
+ 
+ static const struct of_device_id env_match[] = {
+       {
+               .name = "env-monitor",
+               .compatible = "SUNW,ebus-pic16f747-env",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, env_match);
+ 
+ static struct of_platform_driver env_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "ultra45_env",
+       .match_table    = env_match,
+       .probe          = env_probe,
+       .remove         = __devexit_p(env_remove),
+ };
+ 
+ static int __init env_init(void)
+ {
+       return of_register_driver(&env_driver, &of_bus_type);
+ }
+ 
+ static void __exit env_exit(void)
+ {
+       of_unregister_driver(&env_driver);
+ }
+ 
+ module_init(env_init);
+ module_exit(env_exit);
diff --cc drivers/i2c/busses/i2c-ibm_iic.c
Simple merge
diff --cc drivers/i2c/chips/ds1682.c
Simple merge
diff --cc drivers/ieee1394/sbp2.c
Simple merge
diff --cc drivers/infiniband/hw/ehca/ehca_main.c
Simple merge
diff --cc drivers/infiniband/hw/mthca/mthca_catas.c
Simple merge
diff --cc drivers/input/misc/sparcspkr.c
Simple merge
diff --cc drivers/input/mouse/psmouse-base.c
Simple merge
diff --cc drivers/input/serio/i8042-sparcio.h
Simple merge
diff --cc drivers/input/serio/i8042-x86ia64io.h
Simple merge
diff --cc drivers/input/serio/xilinx_ps2.c
Simple merge
diff --cc drivers/input/touchscreen/Kconfig

index d1ac1f6,bb6486a..72cff4f
--- 1/drivers/input/touchscreen/Kconfig
--- 2/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@@ -91,18 -103,19 +103,31 @@@ config TOUCHSCREEN_EL
           To compile this driver as a module, choose M here: the
           module will be called elo.
   
+ +config TOUCHSCREEN_ELOUSB
+ +       tristate "Elo USB touchscreens"
+ +       select USB
+ +       help
+ +         Say Y here if you have an Elo USB touchscreen connected to
+ +         your system.
+ +
+ +         If unsure, say N.
+ +
+ +         To compile this driver as a module, choose M here: the
+ +         module will be called elousb.
+ +
+ config TOUCHSCREEN_WACOM_W8001
+       tristate "Wacom W8001 penabled serial touchscreen"
+       select SERIO
+       help
+         Say Y here if you have an Wacom W8001 penabled serial touchscreen
+         connected to your system.
+ 
+         If unsure, say N.
+ 
+         To compile this driver as a module, choose M here: the
+         module will be called wacom_w8001.
+ 
+ 
   config TOUCHSCREEN_MTOUCH
         tristate "MicroTouch serial touchscreens"
         select SERIO
diff --cc drivers/input/touchscreen/Makefile
Simple merge
diff --cc drivers/input/touchscreen/elousb.c

index 4a3fd16,0000000..1ba22f6

mode 100644,000000..100644
--- 1/drivers/input/touchscreen/elousb.c
--- /dev/null
+++ b/drivers/input/touchscreen/elousb.c
@@@ -1,305 -1,0 +1,305 @@@
+ +/*
+ + *  Copyright (c) 1999-2001 Vojtech Pavlik
+ + *
+ + *  Elo USB touchscreen support
+ + */
+ +
+ +/*
+ + * This program is free software; you can redistribute it and/or modify
+ + * it under the terms of the GNU General Public License as published by
+ + * the Free Software Foundation; either version 2 of the License.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ + *
+ + * Should you need to contact me, the author, you can do so either by
+ + * e-mail - mail your message to <vojtech@suse.cz>, or by paper mail:
+ + * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
+ + */
+ +
+ +#include <linux/kernel.h>
+ +#include <linux/slab.h>
+ +#include <linux/module.h>
+ +#include <linux/init.h>
+ +#include <linux/usb.h>
+ +#include <linux/usb/input.h>
+ +#include <linux/hid.h>
+ +#include <linux/input.h>
+ +
+ +/*
+ + * Version Information
+ + */
+ +#define DRIVER_VERSION "v1.1"
+ +#define DRIVER_AUTHOR "Vojtech Pavlik <vojtech@suse.cz>"
+ +#define DRIVER_DESC "Elo USB touchscreen driver"
+ +#define DRIVER_LICENSE "GPL"
+ +
+ +MODULE_AUTHOR(DRIVER_AUTHOR);
+ +MODULE_DESCRIPTION(DRIVER_DESC);
+ +MODULE_LICENSE(DRIVER_LICENSE);
+ +
+ +struct elousb {
+ +      char name[128];
+ +      char phys[64];
+ +      struct usb_device *usbdev;
+ +      struct input_dev *dev;
+ +      struct urb *irq;
+ +
+ +      unsigned char *data;
+ +      dma_addr_t data_dma;
+ +};
+ +
+ +static void elousb_irq(struct urb *urb)
+ +{
+ +      struct elousb *elo = urb->context;
+ +      unsigned char *data = elo->data;
+ +      struct input_dev *dev = elo->dev;
+ +      int status;
+ +
+ +      switch (urb->status) {
+ +              case 0:            /* success */
+ +                      break;
+ +              case -ECONNRESET:    /* unlink */
+ +              case -ENOENT:
+ +              case -ESHUTDOWN:
+ +                      return;
+ +                      /* -EPIPE:  should clear the halt */
+ +              default:        /* error */
+ +                      goto resubmit;
+ +      }
+ +
+ +      if (data[0] != 'T')    /* Mandatory ELO packet marker */
+ +              return;
+ +
+ +
+ +      input_report_abs(dev, ABS_X, ((u32)data[3] << 8) | data[2]);
+ +      input_report_abs(dev, ABS_Y, ((u32)data[5] << 8) | data[4]);
+ +
+ +      input_report_abs(dev, ABS_PRESSURE,
+ +                      (data[1] & 0x80) ? (((u32)data[7] << 8) | data[6]): 0);
+ +
+ +      if (data[1] & 0x03) {
+ +              input_report_key(dev, BTN_TOUCH, 1);
+ +              input_sync(dev);
+ +      }
+ +
+ +      if (data[1] & 0x04)
+ +              input_report_key(dev, BTN_TOUCH, 0);
+ +
+ +      input_sync(dev);
+ +
+ +resubmit:
+ +      status = usb_submit_urb (urb, GFP_ATOMIC);
+ +      if (status)
+ +              err ("can't resubmit intr, %s-%s/input0, status %d",
+ +                              elo->usbdev->bus->bus_name,
+ +                              elo->usbdev->devpath, status);
+ +}
+ +
+ +static int elousb_open(struct input_dev *dev)
+ +{
+ +      struct elousb *elo = input_get_drvdata(dev);
+ +
+ +      elo->irq->dev = elo->usbdev;
+ +      if (usb_submit_urb(elo->irq, GFP_KERNEL))
+ +              return -EIO;
+ +
+ +      return 0;
+ +}
+ +
+ +static void elousb_close(struct input_dev *dev)
+ +{
+ +      struct elousb *elo = input_get_drvdata(dev);
+ +
+ +      usb_kill_urb(elo->irq);
+ +}
+ +
+ +static int elousb_probe(struct usb_interface *intf, const struct usb_device_id *id)
+ +{
+ +      struct usb_device *dev = interface_to_usbdev(intf);
+ +      struct usb_host_interface *interface;
+ +      struct usb_endpoint_descriptor *endpoint;
+ +      struct hid_descriptor *hdesc;
+ +      struct elousb *elo;
+ +      struct input_dev *input_dev;
+ +      int pipe, i;
+ +      unsigned int rsize = 0;
+ +      int error = -ENOMEM;
+ +      char *rdesc;
+ +
+ +      interface = intf->cur_altsetting;
+ +
+ +      if (interface->desc.bNumEndpoints != 1)
+ +              return -ENODEV;
+ +
+ +      endpoint = &interface->endpoint[0].desc;
+ +      if (!(endpoint->bEndpointAddress & USB_DIR_IN))
+ +              return -ENODEV;
+ +      if ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_INT)
+ +              return -ENODEV;
+ +
+ +      if (usb_get_extra_descriptor(interface, HID_DT_HID, &hdesc) &&
+ +                      (!interface->desc.bNumEndpoints ||
+ +                       usb_get_extra_descriptor(&interface->endpoint[0], HID_DT_HID, &hdesc))) {
+ +              err("HID class descriptor not present");
+ +              return -ENODEV;
+ +      }
+ +
+ +      for (i = 0; i < hdesc->bNumDescriptors; i++)
+ +              if (hdesc->desc[i].bDescriptorType == HID_DT_REPORT)
+ +                      rsize = le16_to_cpu(hdesc->desc[i].wDescriptorLength);
+ +
+ +      if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) {
+ +              err("weird size of report descriptor (%u)", rsize);
+ +              return -ENODEV;
+ +      }
+ +
+ +
+ +      pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress);
+ +
+ +      elo = kzalloc(sizeof(struct elousb), GFP_KERNEL);
+ +      input_dev = input_allocate_device();
+ +      if (!elo || !input_dev)
+ +              goto fail1;
+ +
+ +      elo->data = usb_buffer_alloc(dev, 8, GFP_ATOMIC, &elo->data_dma);
+ +      if (!elo->data)
+ +              goto fail1;
+ +
+ +      elo->irq = usb_alloc_urb(0, GFP_KERNEL);
+ +      if (!elo->irq)
+ +              goto fail2;
+ +
+ +      if (!(rdesc = kmalloc(rsize, GFP_KERNEL)))
+ +              goto fail3;
+ +
+ +      elo->usbdev = dev;
+ +      elo->dev = input_dev;
+ +
+ +      if ((error = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+ +                                      HID_REQ_SET_IDLE, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0,
+ +                                      interface->desc.bInterfaceNumber,
+ +                                      NULL, 0, USB_CTRL_SET_TIMEOUT)) < 0) {
+ +              err("setting HID idle timeout failed, error %d", error);
+ +              error = -ENODEV;
+ +              goto fail4;
+ +      }
+ +
+ +      if ((error = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
+ +                                      USB_REQ_GET_DESCRIPTOR, USB_RECIP_INTERFACE | USB_DIR_IN,
+ +                                      HID_DT_REPORT << 8, interface->desc.bInterfaceNumber,
+ +                                      rdesc, rsize, USB_CTRL_GET_TIMEOUT)) < rsize) {
+ +              err("reading HID report descriptor failed, error %d", error);
+ +              error = -ENODEV;
+ +              goto fail4;
+ +      }
+ +
+ +      if (dev->manufacturer)
+ +              strlcpy(elo->name, dev->manufacturer, sizeof(elo->name));
+ +
+ +      if (dev->product) {
+ +              if (dev->manufacturer)
+ +                      strlcat(elo->name, " ", sizeof(elo->name));
+ +              strlcat(elo->name, dev->product, sizeof(elo->name));
+ +      }
+ +
+ +      if (!strlen(elo->name))
+ +              snprintf(elo->name, sizeof(elo->name),
+ +                              "Elo touchscreen %04x:%04x",
+ +                              le16_to_cpu(dev->descriptor.idVendor),
+ +                              le16_to_cpu(dev->descriptor.idProduct));
+ +
+ +      usb_make_path(dev, elo->phys, sizeof(elo->phys));
+ +      strlcat(elo->phys, "/input0", sizeof(elo->phys));
+ +
+ +      input_dev->name = elo->name;
+ +      input_dev->phys = elo->phys;
+ +      usb_to_input_id(dev, &input_dev->id);
+ +      input_dev->dev.parent = &intf->dev;
+ +
+ +      input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+ +      set_bit(BTN_TOUCH, input_dev->keybit);
+ +      input_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
+ +      set_bit(ABS_PRESSURE, input_dev->absbit);
+ +
+ +      input_set_abs_params(input_dev, ABS_X, 0, 4000, 0, 0);
+ +      input_set_abs_params(input_dev, ABS_Y, 0, 3840, 0, 0);
+ +      input_set_abs_params(input_dev, ABS_PRESSURE, 0, 256, 0, 0);
+ +
+ +      input_set_drvdata(input_dev, elo);
+ +
+ +      input_dev->open = elousb_open;
+ +      input_dev->close = elousb_close;
+ +
+ +      usb_fill_int_urb(elo->irq, dev, pipe, elo->data, 8,
+ +                      elousb_irq, elo, endpoint->bInterval);
+ +      elo->irq->transfer_dma = elo->data_dma;
+ +      elo->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ +
+ +      input_register_device(elo->dev);
+ +
+ +      usb_set_intfdata(intf, elo);
+ +      return 0;
+ +
+ +fail4:
+ +      kfree(rdesc);
+ +fail3:
+ +      usb_free_urb(elo->irq);
+ +fail2:
+ +      usb_buffer_free(dev, 8, elo->data, elo->data_dma);
+ +fail1:
+ +      input_free_device(input_dev);
+ +      kfree(elo);
+ +      return -ENOMEM;
+ +}
+ +
+ +static void elousb_disconnect(struct usb_interface *intf)
+ +{
+ +      struct elousb *elo = usb_get_intfdata (intf);
+ +
+ +      usb_set_intfdata(intf, NULL);
+ +      if (elo) {
+ +              usb_kill_urb(elo->irq);
+ +              input_unregister_device(elo->dev);
+ +              usb_free_urb(elo->irq);
+ +              usb_buffer_free(interface_to_usbdev(intf), 8, elo->data, elo->data_dma);
+ +              kfree(elo);
+ +      }
+ +}
+ +
+ +static struct usb_device_id elousb_id_table [] = {
+ +      { USB_DEVICE(0x04e7, 0x0009) }, /* CarrolTouch 4000U */
+ +      { USB_DEVICE(0x04e7, 0x0030) }, /* CarrolTouch 4500U */
+ +      { }    /* Terminating entry */
+ +};
+ +
+ +MODULE_DEVICE_TABLE (usb, elousb_id_table);
+ +
+ +static struct usb_driver elousb_driver = {
+ +      .name        = "elousb",
+ +      .probe        = elousb_probe,
+ +      .disconnect    = elousb_disconnect,
+ +      .id_table    = elousb_id_table,
+ +};
+ +
+ +static int __init elousb_init(void)
+ +{
+ +      int retval = usb_register(&elousb_driver);
+ +      if (retval == 0)
-               info(DRIVER_VERSION ":" DRIVER_DESC);
++              printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_VERSION ":" DRIVER_DESC);
+ +      return retval;
+ +}
+ +
+ +static void __exit elousb_exit(void)
+ +{
+ +      usb_deregister(&elousb_driver);
+ +}
+ +
+ +module_init(elousb_init);
+ +module_exit(elousb_exit);
diff --cc drivers/input/touchscreen/usbtouchscreen.c
Simple merge
diff --cc drivers/macintosh/Kconfig
Simple merge
diff --cc drivers/macintosh/adb.c
Simple merge
diff --cc drivers/md/Kconfig

index 11cfa02,2281b50..baef1fa
--- 1/drivers/md/Kconfig
--- 2/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@@ -106,7 -120,7 +120,6 @@@ config MD_RAID1
   
   config MD_RAID456
         tristate "RAID-4/RAID-5/RAID-6 mode"
--      depends on BLK_DEV_MD
         select ASYNC_MEMCPY
         select ASYNC_XOR
         ---help---
@@@ -235,9 -249,9 +248,14 @@@ config DM_SNAPSHO
          ---help---
            Allow volume managers to take writable snapshots of a device.
   
++config DM_RAID
++       tristate
++       depends on BLK_DEV_DM
++
   config DM_MIRROR
          tristate "Mirror target"
          depends on BLK_DEV_DM
++       select DM_RAID
          ---help---
            Allow volume managers to mirror logical volumes, also
            needed for live data migration tools such as 'pvmove'.
@@@ -269,14 -283,6 +287,15 @@@ config DM_DELA
   
         If unsure, say N.
   
+ +config DM_RAID45
+ +      tristate "RAID 4/5 target (EXPERIMENTAL)"
++      depends on DM_RAID
+ +      depends on BLK_DEV_DM && EXPERIMENTAL
+ +      ---help---
+ +      A target that supports RAID4 and RAID5 mappings.
+ +
+ +      If unsure, say N.
+ +
   config DM_UEVENT
         bool "DM uevents (EXPERIMENTAL)"
         depends on BLK_DEV_DM && EXPERIMENTAL
diff --cc drivers/md/Makefile

index 24cfd6a,72880b7..44efe14
--- 1/drivers/md/Makefile
--- 2/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@@ -32,13 -33,9 +33,13 @@@ obj-$(CONFIG_BLK_DEV_MD)    += md-mod.
   obj-$(CONFIG_BLK_DEV_DM)      += dm-mod.o
   obj-$(CONFIG_DM_CRYPT)                += dm-crypt.o
   obj-$(CONFIG_DM_DELAY)                += dm-delay.o
- -obj-$(CONFIG_DM_MULTIPATH)    += dm-multipath.o dm-round-robin.o
+ +obj-$(CONFIG_DM_MULTIPATH)    += dm-multipath.o dm-round-robin.o \
+ +                                 dm-least-pending.o dm-queue-length.o \
+ +                                 dm-service-time.o
   obj-$(CONFIG_DM_SNAPSHOT)     += dm-snapshot.o
- obj-$(CONFIG_DM_MIRROR)               += dm-mirror.o dm-regions.o dm-log.o
- obj-$(CONFIG_DM_RAID45)               += dm-raid45.o dm-log.o dm-memcache.o \
-                                  dm-regions.o dm-message.o
- -obj-$(CONFIG_DM_MIRROR)               += dm-mirror.o dm-log.o dm-region-hash.o
++obj-$(CONFIG_DM_RAID)         += dm-region-hash.o dm-log.o
++obj-$(CONFIG_DM_MIRROR)               += dm-mirror.o
++obj-$(CONFIG_DM_RAID45)               += dm-raid45.o dm-memcache.o dm-message.o
   obj-$(CONFIG_DM_ZERO)         += dm-zero.o
   
   quiet_cmd_unroll = UNROLL  $@
diff --cc drivers/md/dm-ioctl.c
Simple merge
diff --cc drivers/md/dm-least-pending.c

index f62cf45,0000000..2424495

mode 100644,000000..100644
--- 1/drivers/md/dm-least-pending.c
--- /dev/null
+++ b/drivers/md/dm-least-pending.c
@@@ -1,256 -1,0 +1,257 @@@
+ +/*
+ + * (C) Copyright 2008 Hewlett-Packard Development Company, L.P
+ + *
+ + * This file is released under the GPL.
+ + */
+ +
+ +#include "dm-path-selector.h"
+ +
+ +#include <linux/slab.h>
+ +
+ +#define DM_MSG_PREFIX "multipath least-pending"
+ +
+ +/*-----------------------------------------------------------------
+ +* Path-handling code, paths are held in lists
+ +*---------------------------------------------------------------*/
+ +struct path_info {
+ +       struct list_head list;
+ +       struct dm_path *path;
+ +       unsigned repeat_count;
+ +       atomic_t io_count;
+ +};
+ +
+ +static void free_paths(struct list_head *paths)
+ +{
+ +       struct path_info *pi, *next;
+ +
+ +       list_for_each_entry_safe(pi, next, paths, list) {
+ +              list_del(&pi->list);
+ +              kfree(pi);
+ +       }
+ +}
+ +
+ +/*-----------------------------------------------------------------
+ + * Least-pending selector
+ + *---------------------------------------------------------------*/
+ +
+ +#define LPP_MIN_IO     1
+ +
+ +struct selector {
+ +       struct list_head valid_paths;
+ +       struct list_head invalid_paths;
+ +};
+ +
+ +static struct selector *alloc_selector(void)
+ +{
+ +       struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ +
+ +       if (s) {
+ +              INIT_LIST_HEAD(&s->valid_paths);
+ +              INIT_LIST_HEAD(&s->invalid_paths);
+ +       }
+ +
+ +       return s;
+ +}
+ +
+ +static int lpp_create(struct path_selector *ps, unsigned argc, char **argv)
+ +{
+ +       struct selector *s;
+ +
+ +       s = alloc_selector();
+ +       if (!s)
+ +              return -ENOMEM;
+ +
+ +       ps->context = s;
+ +       return 0;
+ +}
+ +
+ +static void lpp_destroy(struct path_selector *ps)
+ +{
+ +       struct selector *s = ps->context;
+ +
+ +       free_paths(&s->valid_paths);
+ +       free_paths(&s->invalid_paths);
+ +       kfree(s);
+ +       ps->context = NULL;
+ +}
+ +
+ +static int lpp_status(struct path_selector *ps, struct dm_path *path,
+ +                      status_type_t type, char *result, unsigned int maxlen)
+ +{
+ +       struct path_info *pi;
+ +       int sz = 0;
+ +
+ +       if (!path)
+ +              switch (type) {
+ +              case STATUSTYPE_INFO:
+ +                      DMEMIT("1 ");
+ +              break;
+ +              case STATUSTYPE_TABLE:
+ +                      DMEMIT("0 ");
+ +              break;
+ +              }
+ +       else {
+ +              pi = path->pscontext;
+ +              switch (type) {
+ +              case STATUSTYPE_INFO:
+ +                      DMEMIT("%u:%u ", pi->repeat_count,
+ +                                       atomic_read(&pi->io_count));
+ +              break;
+ +              case STATUSTYPE_TABLE:
+ +              break;
+ +              }
+ +      }
+ +
+ +       return sz;
+ +}
+ +
+ +/*
+ + * Called during initialisation to register each path with an
+ + * optional repeat_count.
+ + */
+ +static int lpp_add_path(struct path_selector *ps, struct dm_path *path,
+ +                      int argc, char **argv, char **error)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi;
+ +       unsigned repeat_count = LPP_MIN_IO;
+ +
+ +      if (argc > 1) {
+ +              *error = "least-pending ps: incorrect number of arguments";
+ +              return -EINVAL;
+ +      }
+ +
+ +       /* First path argument is number of I/Os before switching path */
+ +       if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
+ +              *error = "least-pending ps: invalid repeat count";
+ +              return -EINVAL;
+ +       }
+ +
+ +       /* allocate the path */
+ +       pi = kmalloc(sizeof(*pi), GFP_KERNEL);
+ +       if (!pi) {
+ +              *error = "least-pending ps: Error allocating path context";
+ +              return -ENOMEM;
+ +       }
+ +
+ +       pi->path = path;
+ +       pi->repeat_count = repeat_count;
+ +       atomic_set(&pi->io_count, 0);
+ +
+ +       path->pscontext = pi;
+ +
+ +       list_add(&pi->list, &s->valid_paths);
+ +
+ +       return 0;
+ +}
+ +
+ +static void lpp_fail_path(struct path_selector *ps, struct dm_path *p)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi = p->pscontext;
+ +
+ +       if (!pi)
+ +      return;
+ +
+ +       atomic_set(&pi->io_count, 0);
+ +
+ +       list_move(&pi->list, &s->invalid_paths);
+ +}
+ +
+ +static int lpp_reinstate_path(struct path_selector *ps, struct dm_path *p)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi = p->pscontext;
+ +
+ +       if (!pi)
+ +      return 1;
+ +
+ +       list_move(&pi->list, &s->valid_paths);
+ +
+ +       return 0;
+ +}
+ +
+ +static struct dm_path *lpp_select_path(struct path_selector *ps,
+ +                                      unsigned *repeat_count, size_t nr_bytes)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi, *next, *least_io_path = NULL;
+ +       struct list_head *paths;
+ +
+ +       if (list_empty(&s->valid_paths))
+ +              return NULL;
+ +
+ +       paths = &s->valid_paths;
+ +
+ +       list_for_each_entry_safe(pi, next, paths, list) {
+ +              if (!least_io_path || atomic_read(&least_io_path->io_count) < atomic_read(&pi->io_count))
+ +                      least_io_path = pi;
+ +              if (!atomic_read(&least_io_path->io_count))
+ +                      break;
+ +       }
+ +
+ +       if (!least_io_path)
+ +              return NULL;
+ +
+ +       atomic_inc(&least_io_path->io_count);
+ +       *repeat_count = pi->repeat_count;
+ +
+ +       return least_io_path->path;
+ +}
+ +
- static int lpp_end_io(struct path_selector *ps, struct dm_path *path, size_t nr_bytes)
++static int lpp_end_io(struct path_selector *ps, struct dm_path *path,
++                    size_t nr_bytes)
+ +{
+ +       struct path_info *pi = NULL;
+ +
+ +       pi = path->pscontext;
+ +       if (!pi)
+ +      return 1;
+ +
+ +       atomic_dec(&pi->io_count);
+ +
+ +       return 0;
+ +}
+ +
+ +static struct path_selector_type lpp_ps = {
+ +       .name = "least-pending",
+ +       .module = THIS_MODULE,
+ +       .table_args = 1,
+ +       .info_args = 0,
+ +       .create = lpp_create,
+ +       .destroy = lpp_destroy,
+ +       .status = lpp_status,
+ +       .add_path = lpp_add_path,
+ +       .fail_path = lpp_fail_path,
+ +       .reinstate_path = lpp_reinstate_path,
+ +       .select_path = lpp_select_path,
+ +       .end_io = lpp_end_io,
+ +};
+ +
+ +static int __init dm_lpp_init(void)
+ +{
+ +       int r = dm_register_path_selector(&lpp_ps);
+ +
+ +       if (r < 0)
+ +              DMERR("register failed %d", r);
+ +
+ +       DMINFO("version 1.0.0 loaded");
+ +
+ +       return r;
+ +}
+ +
+ +static void __exit dm_lpp_exit(void)
+ +{
+ +       int r = dm_unregister_path_selector(&lpp_ps);
+ +
+ +       if (r < 0)
+ +              DMERR("unregister failed %d", r);
+ +}
+ +
+ +module_init(dm_lpp_init);
+ +module_exit(dm_lpp_exit);
+ +
+ +MODULE_DESCRIPTION(DM_NAME " least-pending multipath path selector");
+ +MODULE_AUTHOR("Sakshi Chaitanya Veni <vsakshi@hp.com>");
+ +MODULE_LICENSE("GPL");
+ +
diff --cc drivers/md/dm-mpath.c

index 8e241ab,095f77b..d567478
--- 1/drivers/md/dm-mpath.c
--- 2/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@@ -5,9 -5,12 +5,11 @@@
    * This file is released under the GPL.
    */
   
- #include "dm.h"
+ #include <linux/device-mapper.h>
+ 
   #include "dm-path-selector.h"
- -#include "dm-bio-list.h"
- -#include "dm-bio-record.h"
   #include "dm-uevent.h"
++#include "dm.h"
   
   #include <linux/ctype.h>
   #include <linux/init.h>
@@@ -163,7 -167,13 +166,11 @@@ static void free_pgpaths(struct list_he
   
         list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
                 list_del(&pgpath->list);
- -              if (m->hw_handler_name)
- -                      scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
                 dm_put_device(ti, pgpath->path.dev);
+               spin_lock_irqsave(&m->lock, flags);
+               if (m->pgpath_to_activate == pgpath)
+                       m->pgpath_to_activate = NULL;
+               spin_unlock_irqrestore(&m->lock, flags);
                 free_pgpath(pgpath);
         }
   }
@@@ -591,59 -582,21 +591,31 @@@ static struct pgpath *parse_path(struc
   
         p = alloc_pgpath();
         if (!p)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
   
-       path = shift(as);
-       r = dm_get_device(ti, path, ti->begin, ti->len,
+       r = dm_get_device(ti, shift(as), ti->begin, ti->len,
                           dm_table_get_mode(ti->table), &p->path.dev);
         if (r) {
-               unsigned major, minor;
- 
-               /* Try to add a failed device */
-               if (r == -ENXIO && sscanf(path, "%u:%u", &major, &minor) == 2) {
-                       dev_t dev;
- 
-                       /* Extract the major/minor numbers */
-                       dev = MKDEV(major, minor);
-                       if (MAJOR(dev) != major || MINOR(dev) != minor) {
-                               /* Nice try, didn't work */
-                               DMWARN("Invalid device path %s", path);
-                               ti->error = "error converting devnum";
-                               goto bad;
-                       }
-                       DMWARN("adding disabled device %d:%d", major, minor);
-                       p->path.dev = NULL;
-                       format_dev_t(p->path.pdev, dev);
-                       p->is_active = 0;
-               } else {
-                       ti->error = "error getting device";
-                       goto bad;
-               }
-       } else {
-               memcpy(p->path.pdev, p->path.dev->name, 16);
+               ti->error = "error getting device";
+               goto bad;
         }
   
-       if (p->path.dev) {
+       if (m->hw_handler_name) {
- -              r = scsi_dh_attach(bdev_get_queue(p->path.dev->bdev),
- -                                 m->hw_handler_name);
+ +              struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
+ +
-               if (m->hw_handler_name) {
-                       r = scsi_dh_attach(q, m->hw_handler_name);
-                       if (r == -EBUSY) {
-                               /*
-                                * Already attached to different hw_handler,
-                                * try to reattach with correct one.
-                                */
-                               scsi_dh_detach(q);
-                               r = scsi_dh_attach(q, m->hw_handler_name);
-                       }
-                       if (r < 0) {
-                               ti->error = "error attaching hardware handler";
-                               dm_put_device(ti, p->path.dev);
-                               goto bad;
-                       }
-               } else {
-                       /* Play safe and detach hardware handler */
++              r = scsi_dh_attach(q, m->hw_handler_name);
++              if (r == -EBUSY) {
++                      /*
++                       * Already attached to different hw_handler,
++                       * try to reattach with correct one.
++                       */
+ +                      scsi_dh_detach(q);
++                      r = scsi_dh_attach(q, m->hw_handler_name);
++              }
+               if (r < 0) {
++                      ti->error = "error attaching hardware handler";
+                       dm_put_device(ti, p->path.dev);
+                       goto bad;
                 }
         }
   
@@@ -1453,14 -1412,11 +1421,11 @@@ static int multipath_ioctl(struct dm_ta
         spin_lock_irqsave(&m->lock, flags);
   
         if (!m->current_pgpath)
- -              __choose_pgpath(m);
+ +              __choose_pgpath(m, 1 << 19); /* Assume 512KB */
   
-       if (m->current_pgpath && m->current_pgpath->path.dev) {
+       if (m->current_pgpath) {
                 bdev = m->current_pgpath->path.dev->bdev;
-               fake_dentry.d_inode = bdev->bd_inode;
-               fake_file.f_mode = m->current_pgpath->path.dev->mode;
+               mode = m->current_pgpath->path.dev->mode;
         }
   
         if (m->queue_io)
@@@ -1470,79 -1426,9 +1435,78 @@@
   
         spin_unlock_irqrestore(&m->lock, flags);
   
-       return r ? : blkdev_driver_ioctl(bdev->bd_inode, &fake_file,
-                                        bdev->bd_disk, cmd, arg);
+       return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
   }
   
+ +static int __pgpath_busy(struct pgpath *pgpath)
+ +{
+ +      struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+ +
+ +      return dm_underlying_device_busy(q);
+ +}
+ +
+ +/*
+ + * We return "busy", only when we can map I/Os but underlying devices
+ + * are busy (so even if we map I/Os now, the I/Os will wait on
+ + * the underlying queue).
+ + * In other words, if we want to kill I/Os or queue them inside us
+ + * due to map unavailability, we don't return "busy".  Otherwise,
+ + * dm core won't give us the I/Os and we can't do what we want.
+ + */
+ +static int multipath_busy(struct dm_target *ti)
+ +{
+ +      int busy = 0, has_active = 0;
+ +      struct multipath *m = (struct multipath *) ti->private;
+ +      struct priority_group *pg;
+ +      struct pgpath *pgpath;
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(&m->lock, flags);
+ +
+ +      /* Guess which priority_group will be used at next mapping time */
+ +      if (unlikely(!m->current_pgpath && m->next_pg))
+ +              pg = m->next_pg;
+ +      else if (likely(m->current_pg))
+ +              pg = m->current_pg;
+ +      else
+ +              /*
+ +               * We don't know which pg will be used at next mapping time.
+ +               * We don't call __choose_pgpath() here to avoid to trigger
+ +               * pg_init just by busy checking.
+ +               * So we don't know whether underlying devices we will be using
+ +               * at next mapping time are busy or not. Just try mapping.
+ +               */
+ +              goto out;
+ +
+ +      /*
+ +       * If there is one non-busy active path at least, the path selector
+ +       * will be able to select it. So we consider such a pg as not busy.
+ +       */
+ +      busy = 1;
+ +      list_for_each_entry(pgpath, &pg->pgpaths, list)
+ +              if (pgpath->is_active) {
+ +                      has_active = 1;
+ +
+ +                      if (!__pgpath_busy(pgpath)) {
+ +                              busy = 0;
+ +                              break;
+ +                      }
+ +              }
+ +
+ +      if (!has_active)
+ +              /*
+ +               * No active path in this pg, so this pg won't be used and
+ +               * the current_pg will be changed at next mapping time.
+ +               * We need to try mapping to determine it.
+ +               */
+ +              busy = 0;
+ +
+ +out:
+ +      spin_unlock_irqrestore(&m->lock, flags);
+ +
+ +      return busy;
+ +}
+ +
   /*-----------------------------------------------------------------
    * Module setup
    *---------------------------------------------------------------*/
diff --cc drivers/md/dm-raid45.c

index f17ad53,0000000..8e4769e

mode 100644,000000..100644
--- 1/drivers/md/dm-raid45.c
--- /dev/null
+++ b/drivers/md/dm-raid45.c
@@@ -1,4516 -1,0 +1,4524 @@@
+ +/*
+ + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+ + *
+ + * Module Author: Heinz Mauelshagen <Mauelshagen@RedHat.com>
+ + *
+ + * This file is released under the GPL.
+ + *
+ + *
+ + * Linux 2.6 Device Mapper RAID4 and RAID5 target.
+ + *
+ + * Supports:
+ + *    o RAID4 with dedicated and selectable parity device
+ + *    o RAID5 with rotating parity (left+right, symmetric+asymmetric)
+ + *    o run time optimization of xor algorithm used to calculate parity
+ + *
+ + *
+ + * Thanks to MD for:
+ + *    o the raid address calculation algorithm
+ + *    o the base of the biovec <-> page list copier.
+ + *
+ + *
+ + * Uses region hash to keep track of how many writes are in flight to
+ + * regions in order to use dirty log to keep state of regions to recover:
+ + *
+ + *    o clean regions (those which are synchronized
+ + *    and don't have write io in flight)
+ + *    o dirty regions (those with write io in flight)
+ + *
+ + *
+ + * On startup, any dirty regions are migrated to the 'nosync' state
+ + * and are subject to recovery by the daemon.
+ + *
+ + * See raid_ctr() for table definition.
+ + *
+ + *
+ + * FIXME:
+ + * o add virtual interface for locking
+ + * o remove instrumentation (REMOVEME:)
+ + *
+ + */
+ +
+ +static const char *version = "v0.2431";
+ +
+ +#include "dm.h"
+ +#include "dm-bio-list.h"
+ +#include "dm-memcache.h"
+ +#include "dm-message.h"
+ +#include "dm-raid45.h"
+ +
+ +#include <linux/kernel.h>
+ +#include <linux/vmalloc.h>
+ +
+ +#include <linux/dm-io.h>
+ +#include <linux/dm-dirty-log.h>
- #include <linux/dm-regions.h>
++#include <linux/dm-region-hash.h>
+ +
+ +/* # of parallel recovered regions */
+ +/* FIXME: cope with multiple recovery stripes in raid_set struct. */
+ +#define MAX_RECOVER   1 /* needs to be 1! */
+ +
+ +/*
+ + * Configurable parameters
+ + */
+ +#define       INLINE
+ +
+ +/* Default # of stripes if not set in constructor. */
+ +#define       STRIPES                 64
+ +
+ +/* Minimum/maximum # of selectable stripes. */
+ +#define       STRIPES_MIN             8
+ +#define       STRIPES_MAX             16384
+ +
+ +/* Default chunk size in sectors if not set in constructor. */
+ +#define       CHUNK_SIZE              64
+ +
+ +/* Default io size in sectors if not set in constructor. */
+ +#define       IO_SIZE_MIN             SECTORS_PER_PAGE
+ +#define       IO_SIZE                 IO_SIZE_MIN
+ +
+ +/* Maximum setable chunk size in sectors. */
+ +#define       CHUNK_SIZE_MAX          16384
+ +
+ +/* Recover io size default in sectors. */
+ +#define       RECOVER_IO_SIZE_MIN     64
+ +#define       RECOVER_IO_SIZE         256
+ +
+ +/* Default percentage recover io bandwidth. */
+ +#define       BANDWIDTH               10
+ +#define       BANDWIDTH_MIN           1
+ +#define       BANDWIDTH_MAX           100
+ +/*
+ + * END Configurable parameters
+ + */
+ +
+ +#define       TARGET  "dm-raid45"
+ +#define       DAEMON  "kraid45d"
+ +#define       DM_MSG_PREFIX   TARGET
+ +
+ +#define       SECTORS_PER_PAGE        (PAGE_SIZE >> SECTOR_SHIFT)
+ +
+ +/* Amount/size for __xor(). */
+ +#define       SECTORS_PER_XOR SECTORS_PER_PAGE
+ +#define       XOR_SIZE        PAGE_SIZE
+ +
+ +/* Derive raid_set from stripe_cache pointer. */
+ +#define       RS(x)   container_of(x, struct raid_set, sc)
+ +
+ +/* Check value in range. */
+ +#define       range_ok(i, min, max)   (i >= min && i <= max)
+ +
+ +/* Page reference. */
+ +#define PAGE(stripe, p)       ((stripe)->obj[p].pl->page)
+ +
+ +/* Bio list reference. */
+ +#define       BL(stripe, p, rw)       (stripe->ss[p].bl + rw)
+ +
+ +/* Page list reference. */
+ +#define       PL(stripe, p)           (stripe->obj[p].pl)
+ +
+ +/* Check argument is power of 2. */
+ +#define POWER_OF_2(a) (!(a & (a - 1)))
+ +
+ +/* Factor out to dm-bio-list.h */
+ +static inline void bio_list_push(struct bio_list *bl, struct bio *bio)
+ +{
+ +      bio->bi_next = bl->head;
+ +      bl->head = bio;
+ +
+ +      if (!bl->tail)
+ +              bl->tail = bio;
+ +}
+ +
+ +/* Factor out to dm.h */
+ +#define TI_ERR_RET(str, ret) \
+ +      do { ti->error = DM_MSG_PREFIX ": " str; return ret; } while (0);
+ +#define TI_ERR(str)     TI_ERR_RET(str, -EINVAL)
+ +
+ +/*-----------------------------------------------------------------
+ + * Stripe cache
+ + *
+ + * Cache for all reads and writes to raid sets (operational or degraded)
+ + *
+ + * We need to run all data to and from a RAID set through this cache,
+ + * because parity chunks need to get calculated from data chunks
+ + * or, in the degraded/resynchronization case, missing chunks need
+ + * to be reconstructed using the other chunks of the stripe.
+ + *---------------------------------------------------------------*/
+ +/* Protect kmem cache # counter. */
+ +static atomic_t _stripe_sc_nr = ATOMIC_INIT(-1); /* kmem cache # counter. */
+ +
+ +/* A stripe set (holds bios hanging off). */
+ +struct stripe_set {
+ +      struct stripe *stripe;  /* Backpointer to stripe for endio(). */
+ +      struct bio_list bl[3]; /* Reads, writes, and writes merged. */
+ +#define       WRITE_MERGED    2
+ +};
+ +
+ +#if READ != 0 || WRITE != 1
+ +#error dm-raid45: READ/WRITE != 0/1 used as index!!!
+ +#endif
+ +
+ +/*
+ + * Stripe linked list indexes. Keep order, because the stripe
+ + * and the stripe cache rely on the first 3!
+ + */
+ +enum list_types {
+ +      LIST_IO = 0,    /* Stripes with io pending. */
+ +      LIST_ENDIO,     /* Stripes to endio. */
+ +      LIST_LRU,       /* Least recently used stripes. */
+ +      LIST_HASH,      /* Hashed stripes. */
+ +      LIST_RECOVER = LIST_HASH,       /* For recovery type stripes only. */
+ +      NR_LISTS,       /* To size array in struct stripe. */
+ +};
+ +
+ +enum lock_types {
+ +      LOCK_ENDIO = 0, /* Protect endio list. */
+ +      LOCK_LRU,       /* Protect lru list. */
+ +      NR_LOCKS,       /* To size array in struct stripe_cache. */
+ +};
+ +
+ +/* A stripe: the io object to handle all reads and writes to a RAID set. */
+ +struct stripe {
+ +      struct stripe_cache *sc;        /* Backpointer to stripe cache. */
+ +
+ +      sector_t key;           /* Hash key. */
-       sector_t region;        /* Region stripe is mapped to. */
++      region_t region;        /* Region stripe is mapped to. */
+ +
+ +      /* Reference count. */
+ +      atomic_t cnt;
+ +
+ +      struct {
+ +              unsigned long flags;    /* flags (see below). */
+ +
+ +              /*
+ +               * Pending ios in flight:
+ +               *
+ +               * used as a 'lock' to control move of stripe to endio list
+ +               */
+ +              atomic_t pending;       /* Pending ios in flight. */
+ +
+ +              /* Sectors to read and write for multi page stripe sets. */
+ +              unsigned size;
+ +      } io;
+ +
+ +      /* Lock on stripe (for clustering). */
+ +      void *lock;
+ +
+ +      /*
+ +       * 4 linked lists:
+ +       *   o io list to flush io
+ +       *   o endio list
+ +       *   o LRU list to put stripes w/o reference count on
+ +       *   o stripe cache hash
+ +       */
+ +      struct list_head lists[NR_LISTS];
+ +
+ +      struct {
+ +              unsigned short parity;  /* Parity chunk index. */
+ +              short recover;          /* Recovery chunk index. */
+ +      } idx;
+ +
+ +      /* This sets memory cache object (dm-mem-cache). */
+ +      struct dm_mem_cache_object *obj;
+ +
+ +      /* Array of stripe sets (dynamically allocated). */
+ +      struct stripe_set ss[0];
+ +};
+ +
+ +/* States stripes can be in (flags field). */
+ +enum stripe_states {
+ +      STRIPE_ACTIVE,          /* Active io on stripe. */
+ +      STRIPE_ERROR,           /* io error on stripe. */
+ +      STRIPE_MERGED,          /* Writes got merged. */
+ +      STRIPE_READ,            /* Read. */
+ +      STRIPE_RBW,             /* Read-before-write. */
+ +      STRIPE_RECONSTRUCT,     /* reconstruct of a missing chunk required. */
+ +      STRIPE_RECOVER,         /* Stripe used for RAID set recovery. */
+ +};
+ +
+ +/* ... and macros to access them. */
+ +#define       BITOPS(name, what, var, flag) \
+ +static inline int TestClear ## name ## what(struct var *v) \
+ +{ return test_and_clear_bit(flag, &v->io.flags); } \
+ +static inline int TestSet ## name ## what(struct var *v) \
+ +{ return test_and_set_bit(flag, &v->io.flags); } \
+ +static inline void Clear ## name ## what(struct var *v) \
+ +{ clear_bit(flag, &v->io.flags); } \
+ +static inline void Set ## name ## what(struct var *v) \
+ +{ set_bit(flag, &v->io.flags); } \
+ +static inline int name ## what(struct var *v) \
+ +{ return test_bit(flag, &v->io.flags); }
+ +
+ +
+ +BITOPS(Stripe, Active, stripe, STRIPE_ACTIVE)
+ +BITOPS(Stripe, Merged, stripe, STRIPE_MERGED)
+ +BITOPS(Stripe, Error, stripe, STRIPE_ERROR)
+ +BITOPS(Stripe, Read, stripe, STRIPE_READ)
+ +BITOPS(Stripe, RBW, stripe, STRIPE_RBW)
+ +BITOPS(Stripe, Reconstruct, stripe, STRIPE_RECONSTRUCT)
+ +BITOPS(Stripe, Recover, stripe, STRIPE_RECOVER)
+ +
+ +/* A stripe hash. */
+ +struct stripe_hash {
+ +      struct list_head *hash;
+ +      unsigned buckets;
+ +      unsigned mask;
+ +      unsigned prime;
+ +      unsigned shift;
+ +};
+ +
+ +/* A stripe cache. */
+ +struct stripe_cache {
+ +      /* Stripe hash. */
+ +      struct stripe_hash hash;
+ +
+ +      /* Stripes with io to flush, stripes to endio and LRU lists. */
+ +      struct list_head lists[3];
+ +
+ +      /* Locks to protect endio and lru lists. */
+ +      spinlock_t locks[NR_LOCKS];
+ +
+ +      /* Slab cache to allocate stripes from. */
+ +      struct {
+ +              struct kmem_cache *cache;       /* Cache itself. */
+ +              char name[32];  /* Unique name. */
+ +      } kc;
+ +
+ +      struct dm_io_client *dm_io_client; /* dm-io client resource context. */
+ +
+ +      /* dm-mem-cache client resource context. */
+ +      struct dm_mem_cache_client *mem_cache_client;
+ +
+ +      int stripes_parm;           /* # stripes parameter from constructor. */
+ +      atomic_t stripes;           /* actual # of stripes in cache. */
+ +      atomic_t stripes_to_shrink; /* # of stripes to shrink cache by. */
+ +      atomic_t stripes_last;      /* last # of stripes in cache. */
+ +      atomic_t active_stripes;    /* actual # of active stripes in cache. */
+ +
+ +      /* REMOVEME: */
+ +      atomic_t max_active_stripes; /* actual # of active stripes in cache. */
+ +};
+ +
+ +/* Flag specs for raid_dev */ ;
+ +enum raid_dev_flags { DEVICE_FAILED, IO_QUEUED };
+ +
+ +/* The raid device in a set. */
+ +struct raid_dev {
+ +      struct dm_dev *dev;
+ +      unsigned long flags;    /* raid_dev_flags. */
+ +      sector_t start;         /* offset to map to. */
+ +};
+ +
+ +/* Flags spec for raid_set. */
+ +enum raid_set_flags {
+ +      RS_CHECK_OVERWRITE,     /* Check for chunk overwrites. */
+ +      RS_DEAD,                /* RAID set inoperational. */
+ +      RS_DEVEL_STATS,         /* REMOVEME: display status information. */
+ +      RS_IO_ERROR,            /* io error on set. */
+ +      RS_RECOVER,             /* Do recovery. */
+ +      RS_RECOVERY_BANDWIDTH,  /* Allow recovery bandwidth (delayed bios). */
+ +      RS_REGION_GET,          /* get a region to recover. */
+ +      RS_SC_BUSY,             /* stripe cache busy -> send an event. */
+ +      RS_SUSPENDED,           /* RAID set suspendedn. */
+ +};
+ +
+ +/* REMOVEME: devel stats counters. */
+ +enum stats_types {
+ +      S_BIOS_READ,
+ +      S_BIOS_ADDED_READ,
+ +      S_BIOS_ENDIO_READ,
+ +      S_BIOS_WRITE,
+ +      S_BIOS_ADDED_WRITE,
+ +      S_BIOS_ENDIO_WRITE,
+ +      S_CAN_MERGE,
+ +      S_CANT_MERGE,
+ +      S_CONGESTED,
+ +      S_DM_IO_READ,
+ +      S_DM_IO_WRITE,
+ +      S_ACTIVE_READS,
+ +      S_BANDWIDTH,
+ +      S_BARRIER,
+ +      S_BIO_COPY_PL_NEXT,
+ +      S_DEGRADED,
+ +      S_DELAYED_BIOS,
+ +      S_EVICT,
+ +      S_FLUSHS,
+ +      S_HITS_1ST,
+ +      S_IOS_POST,
+ +      S_INSCACHE,
+ +      S_MAX_LOOKUP,
+ +      S_MERGE_PAGE_LOCKED,
+ +      S_NO_BANDWIDTH,
+ +      S_NOT_CONGESTED,
+ +      S_NO_RW,
+ +      S_NOSYNC,
+ +      S_PROHIBITPAGEIO,
+ +      S_RECONSTRUCT_EI,
+ +      S_RECONSTRUCT_DEV,
+ +      S_REDO,
+ +      S_REQUEUE,
+ +      S_STRIPE_ERROR,
+ +      S_SUM_DELAYED_BIOS,
+ +      S_XORS,
+ +      S_NR_STATS,     /* # of stats counters. */
+ +};
+ +
+ +/* Status type -> string mappings. */
+ +struct stats_map {
+ +      const enum stats_types type;
+ +      const char *str;
+ +};
+ +
+ +static struct stats_map stats_map[] = {
+ +      { S_BIOS_READ, "r=" },
+ +      { S_BIOS_ADDED_READ, "/" },
+ +      { S_BIOS_ENDIO_READ, "/" },
+ +      { S_BIOS_WRITE, " w=" },
+ +      { S_BIOS_ADDED_WRITE, "/" },
+ +      { S_BIOS_ENDIO_WRITE, "/" },
+ +      { S_DM_IO_READ, " rc=" },
+ +      { S_DM_IO_WRITE, " wc=" },
+ +      { S_ACTIVE_READS, " active_reads=" },
+ +      { S_BANDWIDTH, " bandwidth=" },
+ +      { S_NO_BANDWIDTH, " no_bandwidth=" },
+ +      { S_BARRIER, " barrier=" },
+ +      { S_BIO_COPY_PL_NEXT, " bio_copy_pl_next=" },
+ +      { S_CAN_MERGE, " can_merge=" },
+ +      { S_MERGE_PAGE_LOCKED, "/page_locked=" },
+ +      { S_CANT_MERGE, "/cant_merge=" },
+ +      { S_CONGESTED, " congested=" },
+ +      { S_NOT_CONGESTED, "/not_congested=" },
+ +      { S_DEGRADED, " degraded=" },
+ +      { S_DELAYED_BIOS, " delayed_bios=" },
+ +      { S_SUM_DELAYED_BIOS, "/sum_delayed_bios=" },
+ +      { S_EVICT, " evict=" },
+ +      { S_FLUSHS, " flushs=" },
+ +      { S_HITS_1ST, " hits_1st=" },
+ +      { S_IOS_POST, " ios_post=" },
+ +      { S_INSCACHE, " inscache=" },
+ +      { S_MAX_LOOKUP, " max_lookup=" },
+ +      { S_NO_RW, " no_rw=" },
+ +      { S_NOSYNC, " nosync=" },
+ +      { S_PROHIBITPAGEIO, " ProhibitPageIO=" },
+ +      { S_RECONSTRUCT_EI, " reconstruct_ei=" },
+ +      { S_RECONSTRUCT_DEV, " reconstruct_dev=" },
+ +      { S_REDO, " redo=" },
+ +      { S_REQUEUE, " requeue=" },
+ +      { S_STRIPE_ERROR, " stripe_error=" },
+ +      { S_XORS, " xors=" },
+ +};
+ +
+ +/*
+ + * A RAID set.
+ + */
+ +typedef void (*xor_function_t)(unsigned count, unsigned long **data);
+ +struct raid_set {
+ +      struct dm_target *ti;   /* Target pointer. */
+ +
+ +      struct {
+ +              unsigned long flags;    /* State flags. */
+ +              spinlock_t in_lock;     /* Protects central input list below. */
+ +              struct bio_list in;     /* Pending ios (central input list). */
+ +              struct bio_list work;   /* ios work set. */
+ +              wait_queue_head_t suspendq;     /* suspend synchronization. */
+ +              atomic_t in_process;    /* counter of queued bios (suspendq). */
+ +              atomic_t in_process_max;/* counter of queued bios max. */
+ +
+ +              /* io work. */
+ +              struct workqueue_struct *wq;
+ +              struct delayed_work dws;
+ +      } io;
+ +
+ +      /* External locking. */
+ +      struct dm_raid45_locking_type *locking;
+ +
+ +      struct stripe_cache sc; /* Stripe cache for this set. */
+ +
+ +      /* Xor optimization. */
+ +      struct {
+ +              struct xor_func *f;
+ +              unsigned chunks;
+ +              unsigned speed;
+ +      } xor;
+ +
+ +      /* Recovery parameters. */
+ +      struct recover {
+ +              struct dm_dirty_log *dl;        /* Dirty log. */
-               struct dm_rh_client *rh;        /* Region hash. */
++              struct dm_region_hash *rh;      /* Region hash. */
+ +
+ +              /* dm-mem-cache client resource context for recovery stripes. */
+ +              struct dm_mem_cache_client *mem_cache_client;
+ +
+ +              struct list_head stripes;       /* List of recovery stripes. */
+ +
+ +              region_t nr_regions;
+ +              region_t nr_regions_to_recover;
+ +              region_t nr_regions_recovered;
+ +              unsigned long start_jiffies;
+ +              unsigned long end_jiffies;
+ +
+ +              unsigned bandwidth;          /* Recovery bandwidth [%]. */
+ +              unsigned bandwidth_work; /* Recovery bandwidth [factor]. */
+ +              unsigned bandwidth_parm; /*  " constructor parm. */
+ +              unsigned io_size;        /* io size <= chunk size. */
+ +              unsigned io_size_parm;   /* io size ctr parameter. */
+ +
+ +              /* recovery io throttling. */
+ +              atomic_t io_count[2];   /* counter recover/regular io. */
+ +              unsigned long last_jiffies;
+ +
+ +              struct dm_region *reg;  /* Actual region to recover. */
+ +              sector_t pos;   /* Position within region to recover. */
+ +              sector_t end;   /* End of region to recover. */
+ +      } recover;
+ +
+ +      /* RAID set parameters. */
+ +      struct {
+ +              struct raid_type *raid_type;    /* RAID type (eg, RAID4). */
+ +              unsigned raid_parms;    /* # variable raid parameters. */
+ +
+ +              unsigned chunk_size;    /* Sectors per chunk. */
+ +              unsigned chunk_size_parm;
+ +              unsigned chunk_mask;    /* Mask for amount. */
+ +              unsigned chunk_shift;   /* rsector chunk size shift. */
+ +
+ +              unsigned io_size;       /* Sectors per io. */
+ +              unsigned io_size_parm;
+ +              unsigned io_mask;       /* Mask for amount. */
+ +              unsigned io_shift_mask; /* Mask for raid_address(). */
+ +              unsigned io_shift;      /* rsector io size shift. */
+ +              unsigned pages_per_io;  /* Pages per io. */
+ +
+ +              sector_t sectors_per_dev;       /* Sectors per device. */
+ +
+ +              atomic_t failed_devs;           /* Amount of devices failed. */
+ +
+ +              /* Index of device to initialize. */
+ +              int dev_to_init;
+ +              int dev_to_init_parm;
+ +
+ +              /* Raid devices dynamically allocated. */
+ +              unsigned raid_devs;     /* # of RAID devices below. */
+ +              unsigned data_devs;     /* # of RAID data devices. */
+ +
+ +              int ei;         /* index of failed RAID device. */
+ +
+ +              /* index of dedicated parity device (i.e. RAID4). */
+ +              int pi;
+ +              int pi_parm;    /* constructor parm for status output. */
+ +      } set;
+ +
+ +      /* REMOVEME: devel stats counters. */
+ +      atomic_t stats[S_NR_STATS];
+ +
+ +      /* Dynamically allocated temporary pointers for xor(). */
+ +      unsigned long **data;
+ +
+ +      /* Dynamically allocated RAID devices. Alignment? */
+ +      struct raid_dev dev[0];
+ +};
+ +
+ +
+ +BITOPS(RS, Bandwidth, raid_set, RS_RECOVERY_BANDWIDTH)
+ +BITOPS(RS, CheckOverwrite, raid_set, RS_CHECK_OVERWRITE)
+ +BITOPS(RS, Dead, raid_set, RS_DEAD)
+ +BITOPS(RS, DevelStats, raid_set, RS_DEVEL_STATS)
+ +BITOPS(RS, IoError, raid_set, RS_IO_ERROR)
+ +BITOPS(RS, Recover, raid_set, RS_RECOVER)
+ +BITOPS(RS, RegionGet, raid_set, RS_REGION_GET)
+ +BITOPS(RS, ScBusy, raid_set, RS_SC_BUSY)
+ +BITOPS(RS, Suspended, raid_set, RS_SUSPENDED)
+ +#undef BITOPS
+ +
+ +#define       PageIO(page)            PageChecked(page)
+ +#define       AllowPageIO(page)       SetPageChecked(page)
+ +#define       ProhibitPageIO(page)    ClearPageChecked(page)
+ +
+ +/*-----------------------------------------------------------------
+ + * Raid-4/5 set structures.
+ + *---------------------------------------------------------------*/
+ +/* RAID level definitions. */
+ +enum raid_level {
+ +      raid4,
+ +      raid5,
+ +};
+ +
+ +/* Symmetric/Asymmetric, Left/Right parity rotating algorithms. */
+ +enum raid_algorithm {
+ +      none,
+ +      left_asym,
+ +      right_asym,
+ +      left_sym,
+ +      right_sym,
+ +};
+ +
+ +struct raid_type {
+ +      const char *name;               /* RAID algorithm. */
+ +      const char *descr;              /* Descriptor text for logging. */
+ +      const unsigned parity_devs;     /* # of parity devices. */
+ +      const unsigned minimal_devs;    /* minimal # of devices in set. */
+ +      const enum raid_level level;            /* RAID level. */
+ +      const enum raid_algorithm algorithm;    /* RAID algorithm. */
+ +};
+ +
+ +/* Supported raid types and properties. */
+ +static struct raid_type raid_types[] = {
+ +      {"raid4", "RAID4 (dedicated parity disk)", 1, 3, raid4, none},
+ +      {"raid5_la", "RAID5 (left asymmetric)", 1, 3, raid5, left_asym},
+ +      {"raid5_ra", "RAID5 (right asymmetric)", 1, 3, raid5, right_asym},
+ +      {"raid5_ls", "RAID5 (left symmetric)", 1, 3, raid5, left_sym},
+ +      {"raid5_rs", "RAID5 (right symmetric)", 1, 3, raid5, right_sym},
+ +};
+ +
+ +/* Address as calculated by raid_address(). */
+ +struct address {
+ +      sector_t key;           /* Hash key (start address of stripe). */
+ +      unsigned di, pi;        /* Data and parity disks index. */
+ +};
+ +
+ +/* REMOVEME: reset statistics counters. */
+ +static void stats_reset(struct raid_set *rs)
+ +{
+ +      unsigned s = S_NR_STATS;
+ +
+ +      while (s--)
+ +              atomic_set(rs->stats + s, 0);
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * RAID set management routines.
+ + *--------------------------------------------------------------*/
+ +/*
+ + * Begin small helper functions.
+ + */
+ +/* Queue (optionally delayed) io work. */
+ +static void wake_do_raid_delayed(struct raid_set *rs, unsigned long delay)
+ +{
+ +      struct delayed_work *dws = &rs->io.dws;
+ +
+ +      cancel_delayed_work(dws);
+ +      queue_delayed_work(rs->io.wq, dws, delay);
+ +}
+ +
+ +/* Queue io work immediately (called from region hash too). */
+ +static INLINE void wake_do_raid(void *context)
+ +{
+ +      wake_do_raid_delayed(context, 0);
+ +}
+ +
+ +/* Wait until all io has been processed. */
+ +static INLINE void wait_ios(struct raid_set *rs)
+ +{
+ +      wait_event(rs->io.suspendq, !atomic_read(&rs->io.in_process));
+ +}
+ +
+ +/* Declare io queued to device. */
+ +static INLINE void io_dev_queued(struct raid_dev *dev)
+ +{
+ +      set_bit(IO_QUEUED, &dev->flags);
+ +}
+ +
+ +/* Io on device and reset ? */
+ +static inline int io_dev_clear(struct raid_dev *dev)
+ +{
+ +      return test_and_clear_bit(IO_QUEUED, &dev->flags);
+ +}
+ +
+ +/* Get an io reference. */
+ +static INLINE void io_get(struct raid_set *rs)
+ +{
+ +      int p = atomic_inc_return(&rs->io.in_process);
+ +
+ +      if (p > atomic_read(&rs->io.in_process_max))
+ +              atomic_set(&rs->io.in_process_max, p); /* REMOVEME: max. */
+ +}
+ +
+ +/* Put the io reference and conditionally wake io waiters. */
+ +static INLINE void io_put(struct raid_set *rs)
+ +{
+ +      /* Intel: rebuild data corrupter? */
+ +      if (!atomic_read(&rs->io.in_process)) {
+ +              DMERR("%s would go negative!!!", __func__);
+ +              return;
+ +      }
+ +
+ +      if (atomic_dec_and_test(&rs->io.in_process))
+ +              wake_up(&rs->io.suspendq);
+ +}
+ +
+ +/* Calculate device sector offset. */
+ +static INLINE sector_t _sector(struct raid_set *rs, struct bio *bio)
+ +{
+ +      sector_t sector = bio->bi_sector;
+ +
+ +      sector_div(sector, rs->set.data_devs);
+ +      return sector;
+ +}
+ +
+ +/* Test device operational. */
+ +static INLINE int dev_operational(struct raid_set *rs, unsigned p)
+ +{
+ +      return !test_bit(DEVICE_FAILED, &rs->dev[p].flags);
+ +}
+ +
+ +/* Return # of active stripes in stripe cache. */
+ +static INLINE int sc_active(struct stripe_cache *sc)
+ +{
+ +      return atomic_read(&sc->active_stripes);
+ +}
+ +
+ +/* Test io pending on stripe. */
+ +static INLINE int stripe_io(struct stripe *stripe)
+ +{
+ +      return atomic_read(&stripe->io.pending);
+ +}
+ +
+ +static INLINE void stripe_io_inc(struct stripe *stripe)
+ +{
+ +      atomic_inc(&stripe->io.pending);
+ +}
+ +
+ +static INLINE void stripe_io_dec(struct stripe *stripe)
+ +{
+ +      atomic_dec(&stripe->io.pending);
+ +}
+ +
+ +/* Wrapper needed by for_each_io_dev(). */
+ +static void _stripe_io_inc(struct stripe *stripe, unsigned p)
+ +{
+ +      stripe_io_inc(stripe);
+ +}
+ +
+ +/* Error a stripe. */
+ +static INLINE void stripe_error(struct stripe *stripe, struct page *page)
+ +{
+ +      SetStripeError(stripe);
+ +      SetPageError(page);
+ +      atomic_inc(RS(stripe->sc)->stats + S_STRIPE_ERROR);
+ +}
+ +
+ +/* Page IOed ok. */
+ +enum dirty_type { CLEAN, DIRTY };
+ +static INLINE void page_set(struct page *page, enum dirty_type type)
+ +{
+ +      switch (type) {
+ +      case DIRTY:
+ +              SetPageDirty(page);
+ +              AllowPageIO(page);
+ +              break;
+ +
+ +      case CLEAN:
+ +              ClearPageDirty(page);
+ +              break;
+ +
+ +      default:
+ +              BUG();
+ +      }
+ +
+ +      SetPageUptodate(page);
+ +      ClearPageError(page);
+ +}
+ +
+ +/* Return region state for a sector. */
+ +static INLINE int
+ +region_state(struct raid_set *rs, sector_t sector, unsigned long state)
+ +{
-       struct dm_rh_client *rh = rs->recover.rh;
++      struct dm_region_hash *rh = rs->recover.rh;
+ +
+ +      return RSRecover(rs) ?
+ +             (dm_rh_get_state(rh, dm_rh_sector_to_region(rh, sector), 1) &
+ +              state) : 0;
+ +}
+ +
+ +/* Check maximum devices which may fail in a raid set. */
+ +static inline int raid_set_degraded(struct raid_set *rs)
+ +{
+ +      return RSIoError(rs);
+ +}
+ +
+ +/* Check # of devices which may fail in a raid set. */
+ +static INLINE int raid_set_operational(struct raid_set *rs)
+ +{
+ +      /* Too many failed devices -> BAD. */
+ +      return atomic_read(&rs->set.failed_devs) <=
+ +             rs->set.raid_type->parity_devs;
+ +}
+ +
+ +/*
+ + * Return true in case a page_list should be read/written
+ + *
+ + * Conditions to read/write:
+ + *    o 1st page in list not uptodate
+ + *    o 1st page in list dirty
+ + *    o if we optimized io away, we flag it using the pages checked bit.
+ + */
+ +static INLINE unsigned page_io(struct page *page)
+ +{
+ +      /* Optimization: page was flagged to need io during first run. */
+ +      if (PagePrivate(page)) {
+ +              ClearPagePrivate(page);
+ +              return 1;
+ +      }
+ +
+ +      /* Avoid io if prohibited or a locked page. */
+ +      if (!PageIO(page) || PageLocked(page))
+ +              return 0;
+ +
+ +      if (!PageUptodate(page) || PageDirty(page)) {
+ +              /* Flag page needs io for second run optimization. */
+ +              SetPagePrivate(page);
+ +              return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Call a function on each page list needing io. */
+ +static INLINE unsigned
+ +for_each_io_dev(struct raid_set *rs, struct stripe *stripe,
+ +              void (*f_io)(struct stripe *stripe, unsigned p))
+ +{
+ +      unsigned p = rs->set.raid_devs, r = 0;
+ +
+ +      while (p--) {
+ +              if (page_io(PAGE(stripe, p))) {
+ +                      f_io(stripe, p);
+ +                      r++;
+ +              }
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/* Reconstruct a particular device ?. */
+ +static INLINE int dev_to_init(struct raid_set *rs)
+ +{
+ +      return rs->set.dev_to_init > -1;
+ +}
+ +
+ +/*
+ + * Index of device to calculate parity on.
+ + * Either the parity device index *or* the selected device to init
+ + * after a spare replacement.
+ + */
+ +static INLINE unsigned dev_for_parity(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +
+ +      return dev_to_init(rs) ? rs->set.dev_to_init : stripe->idx.parity;
+ +}
+ +
+ +/* Return the index of the device to be recovered. */
+ +static int idx_get(struct raid_set *rs)
+ +{
+ +      /* Avoid to read in the pages to be reconstructed anyway. */
+ +      if (dev_to_init(rs))
+ +              return rs->set.dev_to_init;
+ +      else if (rs->set.raid_type->level == raid4)
+ +              return rs->set.pi;
+ +
+ +      return -1;
+ +}
+ +
+ +/* RAID set congested function. */
+ +static int raid_set_congested(void *congested_data, int bdi_bits)
+ +{
+ +      struct raid_set *rs = congested_data;
+ +      int r = 0; /* Assume uncongested. */
+ +      unsigned p = rs->set.raid_devs;
+ +
+ +      /* If any of our component devices are overloaded. */
+ +      while (p--) {
+ +              struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+ +
+ +              r |= bdi_congested(&q->backing_dev_info, bdi_bits);
+ +      }
+ +
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(rs->stats + (r ? S_CONGESTED : S_NOT_CONGESTED));
+ +      return r;
+ +}
+ +
+ +/* Display RAID set dead message once. */
+ +static void raid_set_dead(struct raid_set *rs)
+ +{
+ +      if (!TestSetRSDead(rs)) {
+ +              unsigned p;
+ +              char buf[BDEVNAME_SIZE];
+ +
+ +              DMERR("FATAL: too many devices failed -> RAID set dead");
+ +
+ +              for (p = 0; p < rs->set.raid_devs; p++) {
+ +                      if (!dev_operational(rs, p))
+ +                              DMERR("device /dev/%s failed",
+ +                                    bdevname(rs->dev[p].dev->bdev, buf));
+ +              }
+ +      }
+ +}
+ +
+ +/* RAID set degrade check. */
+ +static INLINE int
+ +raid_set_check_and_degrade(struct raid_set *rs,
+ +                         struct stripe *stripe, unsigned p)
+ +{
+ +      if (test_and_set_bit(DEVICE_FAILED, &rs->dev[p].flags))
+ +              return -EPERM;
+ +
+ +      /* Through an event in case of member device errors. */
+ +      dm_table_event(rs->ti->table);
+ +      atomic_inc(&rs->set.failed_devs);
+ +
+ +      /* Only log the first member error. */
+ +      if (!TestSetRSIoError(rs)) {
+ +              char buf[BDEVNAME_SIZE];
+ +
+ +              /* Store index for recovery. */
+ +              mb();
+ +              rs->set.ei = p;
+ +              mb();
+ +
+ +              DMERR("CRITICAL: %sio error on device /dev/%s "
+ +                    "in region=%llu; DEGRADING RAID set",
+ +                    stripe ? "" : "FAKED ",
+ +                    bdevname(rs->dev[p].dev->bdev, buf),
+ +                    (unsigned long long) (stripe ? stripe->key : 0));
+ +              DMERR("further device error messages suppressed");
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static void
+ +raid_set_check_degrade(struct raid_set *rs, struct stripe *stripe)
+ +{
+ +      unsigned p = rs->set.raid_devs;
+ +
+ +      while (p--) {
+ +              struct page *page = PAGE(stripe, p);
+ +
+ +              if (PageError(page)) {
+ +                      ClearPageError(page);
+ +                      raid_set_check_and_degrade(rs, stripe, p);
+ +              }
+ +      }
+ +}
+ +
+ +/* RAID set upgrade check. */
+ +static int raid_set_check_and_upgrade(struct raid_set *rs, unsigned p)
+ +{
+ +      if (!test_and_clear_bit(DEVICE_FAILED, &rs->dev[p].flags))
+ +              return -EPERM;
+ +
+ +      if (atomic_dec_and_test(&rs->set.failed_devs)) {
+ +              ClearRSIoError(rs);
+ +              rs->set.ei = -1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Lookup a RAID device by name or by major:minor number. */
+ +union dev_lookup {
+ +      const char *dev_name;
+ +      struct raid_dev *dev;
+ +};
+ +enum lookup_type { byname, bymajmin, bynumber };
+ +static int raid_dev_lookup(struct raid_set *rs, enum lookup_type by,
+ +                         union dev_lookup *dl)
+ +{
+ +      unsigned p;
+ +
+ +      /*
+ +       * Must be an incremental loop, because the device array
+ +       * can have empty slots still on calls from raid_ctr()
+ +       */
+ +      for (p = 0; p < rs->set.raid_devs; p++) {
+ +              char buf[BDEVNAME_SIZE];
+ +              struct raid_dev *dev = rs->dev + p;
+ +
+ +              if (!dev->dev)
+ +                      break;
+ +
+ +              /* Format dev string appropriately if necessary. */
+ +              if (by == byname)
+ +                      bdevname(dev->dev->bdev, buf);
+ +              else if (by == bymajmin)
+ +                      format_dev_t(buf, dev->dev->bdev->bd_dev);
+ +
+ +              /* Do the actual check. */
+ +              if (by == bynumber) {
+ +                      if (dl->dev->dev->bdev->bd_dev ==
+ +                          dev->dev->bdev->bd_dev)
+ +                              return p;
+ +              } else if (!strcmp(dl->dev_name, buf))
+ +                      return p;
+ +      }
+ +
+ +      return -ENODEV;
+ +}
+ +
+ +/* End io wrapper. */
+ +static INLINE void
+ +_bio_endio(struct raid_set *rs, struct bio *bio, int error)
+ +{
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(rs->stats + (bio_data_dir(bio) == WRITE ?
+ +                 S_BIOS_ENDIO_WRITE : S_BIOS_ENDIO_READ));
+ +      bio_endio(bio, error);
+ +      io_put(rs);             /* Wake any suspend waiters. */
+ +}
+ +
+ +/*
+ + * End small helper functions.
+ + */
+ +
+ +
+ +/*
+ + * Stripe hash functions
+ + */
+ +/* Initialize/destroy stripe hash. */
+ +static int hash_init(struct stripe_hash *hash, unsigned stripes)
+ +{
+ +      unsigned buckets = 2, max_buckets = stripes / 4;
+ +      unsigned hash_primes[] = {
+ +              /* Table of primes for hash_fn/table size optimization. */
+ +              3, 7, 13, 27, 53, 97, 193, 389, 769,
+ +              1543, 3079, 6151, 12289, 24593,
+ +      };
+ +
+ +      /* Calculate number of buckets (2^^n <= stripes / 4). */
+ +      while (buckets < max_buckets)
+ +              buckets <<= 1;
+ +
+ +      /* Allocate stripe hash. */
+ +      hash->hash = vmalloc(buckets * sizeof(*hash->hash));
+ +      if (!hash->hash)
+ +              return -ENOMEM;
+ +
+ +      hash->buckets = buckets;
+ +      hash->mask = buckets - 1;
+ +      hash->shift = ffs(buckets);
+ +      if (hash->shift > ARRAY_SIZE(hash_primes) + 1)
+ +              hash->shift = ARRAY_SIZE(hash_primes) + 1;
+ +
+ +      BUG_ON(hash->shift - 2 > ARRAY_SIZE(hash_primes) + 1);
+ +      hash->prime = hash_primes[hash->shift - 2];
+ +
+ +      /* Initialize buckets. */
+ +      while (buckets--)
+ +              INIT_LIST_HEAD(hash->hash + buckets);
+ +
+ +      return 0;
+ +}
+ +
+ +static INLINE void hash_exit(struct stripe_hash *hash)
+ +{
+ +      if (hash->hash) {
+ +              vfree(hash->hash);
+ +              hash->hash = NULL;
+ +      }
+ +}
+ +
+ +/* List add (head/tail/locked/unlocked) inlines. */
+ +enum list_lock_type { LIST_LOCKED, LIST_UNLOCKED };
+ +#define       LIST_DEL(name, list) \
+ +static void stripe_ ## name ## _del(struct stripe *stripe, \
+ +                                  enum list_lock_type lock) { \
+ +      struct list_head *lh = stripe->lists + (list); \
+ +      spinlock_t *l = NULL; \
+ +\
+ +      if (lock == LIST_LOCKED) { \
+ +              l = stripe->sc->locks + LOCK_LRU; \
+ +              spin_lock_irq(l); \
+ +      } \
+ +\
+ +\
+ +      if (!list_empty(lh)) \
+ +              list_del_init(lh); \
+ +\
+ +      if (lock == LIST_LOCKED) \
+ +              spin_unlock_irq(l); \
+ +}
+ +
+ +LIST_DEL(hash, LIST_HASH)
+ +LIST_DEL(lru, LIST_LRU)
+ +#undef LIST_DEL
+ +
+ +enum list_pos_type { POS_HEAD, POS_TAIL };
+ +#define       LIST_ADD(name, list) \
+ +static void stripe_ ## name ## _add(struct stripe *stripe, \
+ +                                  enum list_pos_type pos, \
+ +                                  enum list_lock_type lock) { \
+ +      struct list_head *lh = stripe->lists + (list); \
+ +      struct stripe_cache *sc = stripe->sc; \
+ +      spinlock_t *l = NULL; \
+ +\
+ +      if (lock == LIST_LOCKED) { \
+ +              l = sc->locks + LOCK_LRU; \
+ +              spin_lock_irq(l); \
+ +      } \
+ +\
+ +      if (list_empty(lh)) { \
+ +              if (pos == POS_HEAD) \
+ +                      list_add(lh, sc->lists + (list)); \
+ +              else \
+ +                      list_add_tail(lh, sc->lists + (list)); \
+ +      } \
+ +\
+ +      if (lock == LIST_LOCKED) \
+ +              spin_unlock_irq(l); \
+ +}
+ +
+ +LIST_ADD(endio, LIST_ENDIO)
+ +LIST_ADD(io, LIST_IO)
+ +LIST_ADD(lru, LIST_LRU)
+ +#undef LIST_ADD
+ +
+ +#define POP(list) \
+ +      do { \
+ +              if (list_empty(sc->lists + list)) \
+ +                      stripe = NULL; \
+ +              else { \
+ +                      stripe = list_first_entry(&sc->lists[list], \
+ +                                                struct stripe, \
+ +                                                lists[list]); \
+ +                      list_del_init(&stripe->lists[list]); \
+ +              } \
+ +      } while (0);
+ +
+ +/* Pop an available stripe off the lru list. */
+ +static struct stripe *stripe_lru_pop(struct stripe_cache *sc)
+ +{
+ +      struct stripe *stripe;
+ +      spinlock_t *lock = sc->locks + LOCK_LRU;
+ +
+ +      spin_lock_irq(lock);
+ +      POP(LIST_LRU);
+ +      spin_unlock_irq(lock);
+ +
+ +      if (stripe)
+ +              /* Remove from hash before reuse. */
+ +              stripe_hash_del(stripe, LIST_UNLOCKED);
+ +
+ +      return stripe;
+ +}
+ +
+ +static inline unsigned hash_fn(struct stripe_hash *hash, sector_t key)
+ +{
+ +      return (unsigned) (((key * hash->prime) >> hash->shift) & hash->mask);
+ +}
+ +
+ +static inline struct list_head *
+ +hash_bucket(struct stripe_hash *hash, sector_t key)
+ +{
+ +      return hash->hash + hash_fn(hash, key);
+ +}
+ +
+ +/* Insert an entry into a hash. */
+ +static inline void hash_insert(struct stripe_hash *hash, struct stripe *stripe)
+ +{
+ +      list_add(stripe->lists + LIST_HASH, hash_bucket(hash, stripe->key));
+ +}
+ +
+ +/* Insert an entry into the stripe hash. */
+ +static inline void
+ +sc_insert(struct stripe_cache *sc, struct stripe *stripe)
+ +{
+ +      hash_insert(&sc->hash, stripe);
+ +}
+ +
+ +/* Lookup an entry in the stripe hash. */
+ +static inline struct stripe *
+ +stripe_lookup(struct stripe_cache *sc, sector_t key)
+ +{
+ +      unsigned c = 0;
+ +      struct stripe *stripe;
+ +      struct list_head *bucket = hash_bucket(&sc->hash, key);
+ +
+ +      list_for_each_entry(stripe, bucket, lists[LIST_HASH]) {
+ +              /* REMOVEME: statisics. */
+ +              if (++c > atomic_read(RS(sc)->stats + S_MAX_LOOKUP))
+ +                      atomic_set(RS(sc)->stats + S_MAX_LOOKUP, c);
+ +
+ +              if (stripe->key == key)
+ +                      return stripe;
+ +      }
+ +
+ +      return NULL;
+ +}
+ +
+ +/* Resize the stripe cache hash on size changes. */
+ +static int hash_resize(struct stripe_cache *sc)
+ +{
+ +      /* Resize threshold reached? */
+ +      if (atomic_read(&sc->stripes) > 2 * atomic_read(&sc->stripes_last)
+ +          || atomic_read(&sc->stripes) < atomic_read(&sc->stripes_last) / 4) {
+ +              int r;
+ +              struct stripe_hash hash, hash_tmp;
+ +              spinlock_t *lock;
+ +
+ +              r = hash_init(&hash, atomic_read(&sc->stripes));
+ +              if (r)
+ +                      return r;
+ +
+ +              lock = sc->locks + LOCK_LRU;
+ +              spin_lock_irq(lock);
+ +              if (sc->hash.hash) {
+ +                      unsigned b = sc->hash.buckets;
+ +                      struct list_head *pos, *tmp;
+ +
+ +                      /* Walk old buckets and insert into new. */
+ +                      while (b--) {
+ +                              list_for_each_safe(pos, tmp, sc->hash.hash + b)
+ +                                  hash_insert(&hash,
+ +                                              list_entry(pos, struct stripe,
+ +                                                         lists[LIST_HASH]));
+ +                      }
+ +
+ +              }
+ +
+ +              memcpy(&hash_tmp, &sc->hash, sizeof(hash_tmp));
+ +              memcpy(&sc->hash, &hash, sizeof(sc->hash));
+ +              atomic_set(&sc->stripes_last, atomic_read(&sc->stripes));
+ +              spin_unlock_irq(lock);
+ +
+ +              hash_exit(&hash_tmp);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Stripe cache locking functions
+ + */
+ +/* Dummy lock function for local RAID4+5. */
+ +static void *no_lock(sector_t key, enum dm_lock_type type)
+ +{
+ +      return &no_lock;
+ +}
+ +
+ +/* Dummy unlock function for local RAID4+5. */
+ +static void no_unlock(void *lock_handle)
+ +{
+ +}
+ +
+ +/* No locking (for local RAID 4+5). */
+ +static struct dm_raid45_locking_type locking_none = {
+ +      .lock = no_lock,
+ +      .unlock = no_unlock,
+ +};
+ +
+ +/* Clustered RAID 4+5. */
+ +/* FIXME: code this. */
+ +static struct dm_raid45_locking_type locking_cluster = {
+ +      .lock = no_lock,
+ +      .unlock = no_unlock,
+ +};
+ +
+ +/* Lock a stripe (for clustering). */
+ +static int
+ +stripe_lock(struct raid_set *rs, struct stripe *stripe, int rw, sector_t key)
+ +{
+ +      stripe->lock = rs->locking->lock(key, rw == READ ? DM_RAID45_SHARED :
+ +                                                         DM_RAID45_EX);
+ +      return stripe->lock ? 0 : -EPERM;
+ +}
+ +
+ +/* Unlock a stripe (for clustering). */
+ +static void stripe_unlock(struct raid_set *rs, struct stripe *stripe)
+ +{
+ +      rs->locking->unlock(stripe->lock);
+ +      stripe->lock = NULL;
+ +}
+ +
+ +/*
+ + * Stripe cache functions.
+ + */
+ +/*
+ + * Invalidate all page lists pages of a stripe.
+ + *
+ + * I only keep state for the whole list in the first page.
+ + */
+ +static INLINE void
+ +stripe_pages_invalidate(struct stripe *stripe)
+ +{
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--) {
+ +              struct page *page = PAGE(stripe, p);
+ +
+ +              ProhibitPageIO(page);
+ +              ClearPageChecked(page);
+ +              ClearPageDirty(page);
+ +              ClearPageError(page);
-               clear_page_locked(page);
++              __clear_page_locked(page);
+ +              ClearPagePrivate(page);
+ +              ClearPageUptodate(page);
+ +      }
+ +}
+ +
+ +/* Prepare stripe for (re)use. */
+ +static INLINE void stripe_invalidate(struct stripe *stripe)
+ +{
+ +      stripe->io.flags = 0;
+ +      stripe_pages_invalidate(stripe);
+ +}
+ +
+ +/* Allow io on all chunks of a stripe. */
+ +static INLINE void stripe_allow_io(struct stripe *stripe)
+ +{
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--)
+ +              AllowPageIO(PAGE(stripe, p));
+ +}
+ +
+ +/* Initialize a stripe. */
+ +static void
+ +stripe_init(struct stripe_cache *sc, struct stripe *stripe)
+ +{
+ +      unsigned p = RS(sc)->set.raid_devs;
+ +      unsigned i;
+ +
+ +      /* Work all io chunks. */
+ +      while (p--) {
+ +              struct stripe_set *ss = stripe->ss + p;
+ +
+ +              stripe->obj[p].private = ss;
+ +              ss->stripe = stripe;
+ +
+ +              i = ARRAY_SIZE(ss->bl);
+ +              while (i--)
+ +                      bio_list_init(ss->bl + i);
+ +      }
+ +
+ +      stripe->sc = sc;
+ +
+ +      i = ARRAY_SIZE(stripe->lists);
+ +      while (i--)
+ +              INIT_LIST_HEAD(stripe->lists + i);
+ +
+ +      atomic_set(&stripe->cnt, 0);
+ +      atomic_set(&stripe->io.pending, 0);
+ +
+ +      stripe_invalidate(stripe);
+ +}
+ +
+ +/* Number of pages per chunk. */
+ +static inline unsigned chunk_pages(unsigned io_size)
+ +{
+ +      return dm_div_up(io_size, SECTORS_PER_PAGE);
+ +}
+ +
+ +/* Number of pages per stripe. */
+ +static inline unsigned stripe_pages(struct raid_set *rs, unsigned io_size)
+ +{
+ +      return chunk_pages(io_size) * rs->set.raid_devs;
+ +}
+ +
+ +/* Initialize part of page_list (recovery). */
+ +static INLINE void stripe_zero_pl_part(struct stripe *stripe, unsigned p,
+ +                                     unsigned start, unsigned count)
+ +{
+ +      unsigned pages = chunk_pages(count);
+ +      /* Get offset into the page_list. */
+ +      struct page_list *pl = pl_elem(PL(stripe, p), start / SECTORS_PER_PAGE);
+ +
+ +      BUG_ON(!pl);
+ +      while (pl && pages--) {
+ +              BUG_ON(!pl->page);
+ +              memset(page_address(pl->page), 0, PAGE_SIZE);
+ +              pl = pl->next;
+ +      }
+ +}
+ +
+ +/* Initialize parity chunk of stripe. */
+ +static INLINE void stripe_zero_chunk(struct stripe *stripe, unsigned p)
+ +{
+ +      stripe_zero_pl_part(stripe, p, 0, stripe->io.size);
+ +}
+ +
+ +/* Return dynamic stripe structure size. */
+ +static INLINE size_t stripe_size(struct raid_set *rs)
+ +{
+ +      return sizeof(struct stripe) +
+ +                    rs->set.raid_devs * sizeof(struct stripe_set);
+ +}
+ +
+ +/* Allocate a stripe and its memory object. */
+ +/* XXX adjust to cope with stripe cache and recovery stripe caches. */
+ +enum grow { SC_GROW, SC_KEEP };
+ +static struct stripe *stripe_alloc(struct stripe_cache *sc,
+ +                                 struct dm_mem_cache_client *mc,
+ +                                 enum grow grow)
+ +{
+ +      int r;
+ +      struct stripe *stripe;
+ +
+ +      stripe = kmem_cache_zalloc(sc->kc.cache, GFP_KERNEL);
+ +      if (stripe) {
+ +              /* Grow the dm-mem-cache by one object. */
+ +              if (grow == SC_GROW) {
+ +                      r = dm_mem_cache_grow(mc, 1);
+ +                      if (r)
+ +                              goto err_free;
+ +              }
+ +
+ +              stripe->obj = dm_mem_cache_alloc(mc);
+ +              if (!stripe->obj)
+ +                      goto err_shrink;
+ +
+ +              stripe_init(sc, stripe);
+ +      }
+ +
+ +      return stripe;
+ +
+ +err_shrink:
+ +      if (grow == SC_GROW)
+ +              dm_mem_cache_shrink(mc, 1);
+ +err_free:
+ +      kmem_cache_free(sc->kc.cache, stripe);
+ +      return NULL;
+ +}
+ +
+ +/*
+ + * Free a stripes memory object, shrink the
+ + * memory cache and free the stripe itself
+ + */
+ +static void stripe_free(struct stripe *stripe, struct dm_mem_cache_client *mc)
+ +{
+ +      dm_mem_cache_free(mc, stripe->obj);
+ +      dm_mem_cache_shrink(mc, 1);
+ +      kmem_cache_free(stripe->sc->kc.cache, stripe);
+ +}
+ +
+ +/* Free the recovery stripe. */
+ +static void stripe_recover_free(struct raid_set *rs)
+ +{
+ +      struct recover *rec = &rs->recover;
+ +      struct list_head *stripes = &rec->stripes;
+ +
+ +      while (!list_empty(stripes)) {
+ +              struct stripe *stripe = list_first_entry(stripes, struct stripe,
+ +                                                       lists[LIST_RECOVER]);
+ +              list_del(stripe->lists + LIST_RECOVER);
+ +              stripe_free(stripe, rec->mem_cache_client);
+ +      }
+ +}
+ +
+ +/* Push a stripe safely onto the endio list to be handled by do_endios(). */
+ +static INLINE void stripe_endio_push(struct stripe *stripe)
+ +{
+ +      int wake;
+ +      unsigned long flags;
+ +      struct stripe_cache *sc = stripe->sc;
+ +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
+ +
+ +      spin_lock_irqsave(lock, flags);
+ +      wake = list_empty(sc->lists + LIST_ENDIO);
+ +      stripe_endio_add(stripe, POS_HEAD, LIST_UNLOCKED);
+ +      spin_unlock_irqrestore(lock, flags);
+ +
+ +      if (wake)
+ +              wake_do_raid(RS(sc));
+ +}
+ +
+ +/* Protected check for stripe cache endio list empty. */
+ +static INLINE int stripe_endio_empty(struct stripe_cache *sc)
+ +{
+ +      int r;
+ +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
+ +
+ +      spin_lock_irq(lock);
+ +      r = list_empty(sc->lists + LIST_ENDIO);
+ +      spin_unlock_irq(lock);
+ +
+ +      return r;
+ +}
+ +
+ +/* Pop a stripe off safely off the endio list. */
+ +static struct stripe *stripe_endio_pop(struct stripe_cache *sc)
+ +{
+ +      struct stripe *stripe;
+ +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
+ +
+ +      /* This runs in parallel with endio(). */
+ +      spin_lock_irq(lock);
+ +      POP(LIST_ENDIO)
+ +      spin_unlock_irq(lock);
+ +      return stripe;
+ +}
+ +
+ +#undef POP
+ +
+ +/* Evict stripe from cache. */
+ +static void stripe_evict(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      stripe_hash_del(stripe, LIST_UNLOCKED); /* Take off hash. */
+ +
+ +      if (list_empty(stripe->lists + LIST_LRU)) {
+ +              stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ +              atomic_inc(rs->stats + S_EVICT); /* REMOVEME: statistics. */
+ +      }
+ +}
+ +
+ +/* Grow stripe cache. */
+ +static int
+ +sc_grow(struct stripe_cache *sc, unsigned stripes, enum grow grow)
+ +{
+ +      int r = 0;
+ +      struct raid_set *rs = RS(sc);
+ +
+ +      /* Try to allocate this many (additional) stripes. */
+ +      while (stripes--) {
+ +              struct stripe *stripe =
+ +                      stripe_alloc(sc, sc->mem_cache_client, grow);
+ +
+ +              if (likely(stripe)) {
+ +                      stripe->io.size = rs->set.io_size;
+ +                      stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ +                      atomic_inc(&sc->stripes);
+ +              } else {
+ +                      r = -ENOMEM;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      ClearRSScBusy(rs);
+ +      return r ? r : hash_resize(sc);
+ +}
+ +
+ +/* Shrink stripe cache. */
+ +static int sc_shrink(struct stripe_cache *sc, unsigned stripes)
+ +{
+ +      int r = 0;
+ +
+ +      /* Try to get unused stripe from LRU list. */
+ +      while (stripes--) {
+ +              struct stripe *stripe;
+ +
+ +              stripe = stripe_lru_pop(sc);
+ +              if (stripe) {
+ +                      /* An lru stripe may never have ios pending! */
+ +                      BUG_ON(stripe_io(stripe));
+ +                      stripe_free(stripe, sc->mem_cache_client);
+ +                      atomic_dec(&sc->stripes);
+ +              } else {
+ +                      r = -ENOENT;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      /* Check if stats are still sane. */
+ +      if (atomic_read(&sc->max_active_stripes) >
+ +          atomic_read(&sc->stripes))
+ +              atomic_set(&sc->max_active_stripes, 0);
+ +
+ +      if (r)
+ +              return r;
+ +
+ +      ClearRSScBusy(RS(sc));
+ +      return hash_resize(sc);
+ +}
+ +
+ +/* Create stripe cache. */
+ +static int sc_init(struct raid_set *rs, unsigned stripes)
+ +{
+ +      unsigned i, nr;
+ +      struct stripe_cache *sc = &rs->sc;
+ +      struct stripe *stripe;
+ +      struct recover *rec = &rs->recover;
+ +
+ +      /* Initialize lists and locks. */
+ +      i = ARRAY_SIZE(sc->lists);
+ +      while (i--)
+ +              INIT_LIST_HEAD(sc->lists + i);
+ +
+ +      i = NR_LOCKS;
+ +      while (i--)
+ +              spin_lock_init(sc->locks + i);
+ +
+ +      /* Initialize atomic variables. */
+ +      atomic_set(&sc->stripes, 0);
+ +      atomic_set(&sc->stripes_last, 0);
+ +      atomic_set(&sc->stripes_to_shrink, 0);
+ +      atomic_set(&sc->active_stripes, 0);
+ +      atomic_set(&sc->max_active_stripes, 0); /* REMOVEME: statistics. */
+ +
+ +      /*
+ +       * We need a runtime unique # to suffix the kmem cache name
+ +       * because we'll have one for each active RAID set.
+ +       */
+ +      nr = atomic_inc_return(&_stripe_sc_nr);
+ +      sprintf(sc->kc.name, "%s_%d", TARGET, nr);
+ +      sc->kc.cache = kmem_cache_create(sc->kc.name, stripe_size(rs),
+ +                                       0, 0, NULL);
+ +      if (!sc->kc.cache)
+ +              return -ENOMEM;
+ +
+ +      /* Create memory cache client context for RAID stripe cache. */
+ +      sc->mem_cache_client =
+ +              dm_mem_cache_client_create(stripes, rs->set.raid_devs,
+ +                                         chunk_pages(rs->set.io_size));
+ +      if (IS_ERR(sc->mem_cache_client))
+ +              return PTR_ERR(sc->mem_cache_client);
+ +
+ +      /* Create memory cache client context for RAID recovery stripe(s). */
+ +      rec->mem_cache_client =
+ +              dm_mem_cache_client_create(MAX_RECOVER, rs->set.raid_devs,
+ +                                         chunk_pages(rec->io_size));
+ +      if (IS_ERR(rec->mem_cache_client))
+ +              return PTR_ERR(rec->mem_cache_client);
+ +
+ +      /* Allocate stripe for set recovery. */
+ +      /* XXX: cope with MAX_RECOVERY. */
+ +      INIT_LIST_HEAD(&rec->stripes);
+ +      for (i = 0; i < MAX_RECOVER; i++) {
+ +              stripe = stripe_alloc(sc, rec->mem_cache_client, SC_KEEP);
+ +              if (!stripe)
+ +                      return -ENOMEM;
+ +
+ +              SetStripeRecover(stripe);
+ +              stripe->io.size = rec->io_size;
+ +              list_add(stripe->lists + LIST_RECOVER, &rec->stripes);
+ +      }
+ +
+ +      /*
+ +       * Allocate the stripe objetcs from the
+ +       * cache and add them to the LRU list.
+ +       */
+ +      return sc_grow(sc, stripes, SC_KEEP);
+ +}
+ +
+ +/* Destroy the stripe cache. */
+ +static void sc_exit(struct stripe_cache *sc)
+ +{
+ +      if (sc->kc.cache) {
+ +              BUG_ON(sc_shrink(sc, atomic_read(&sc->stripes)));
+ +              kmem_cache_destroy(sc->kc.cache);
+ +      }
+ +
+ +      if (sc->mem_cache_client)
+ +              dm_mem_cache_client_destroy(sc->mem_cache_client);
+ +
+ +      ClearRSRecover(RS(sc));
+ +      stripe_recover_free(RS(sc));
+ +      if (RS(sc)->recover.mem_cache_client)
+ +              dm_mem_cache_client_destroy(RS(sc)->recover.mem_cache_client);
+ +
+ +      hash_exit(&sc->hash);
+ +}
+ +
+ +/*
+ + * Calculate RAID address
+ + *
+ + * Delivers tuple with the index of the data disk holding the chunk
+ + * in the set, the parity disks index and the start of the stripe
+ + * within the address space of the set (used as the stripe cache hash key).
+ + */
+ +/* thx MD. */
+ +static struct address *
+ +raid_address(struct raid_set *rs, sector_t sector, struct address *addr)
+ +{
+ +      unsigned data_devs = rs->set.data_devs, di, pi,
+ +               raid_devs = rs->set.raid_devs;
+ +      sector_t stripe, tmp;
+ +
+ +      /*
+ +       * chunk_number = sector / chunk_size
+ +       * stripe = chunk_number / data_devs
+ +       * di = stripe % data_devs;
+ +       */
+ +      stripe = sector >> rs->set.chunk_shift;
+ +      di = sector_div(stripe, data_devs);
+ +
+ +      switch (rs->set.raid_type->level) {
+ +      case raid5:
+ +              tmp = stripe;
+ +              pi = sector_div(tmp, raid_devs);
+ +
+ +              switch (rs->set.raid_type->algorithm) {
+ +              case left_asym:         /* Left asymmetric. */
+ +                      pi = data_devs - pi;
+ +              case right_asym:        /* Right asymmetric. */
+ +                      if (di >= pi)
+ +                              di++;
+ +                      break;
+ +
+ +              case left_sym:          /* Left symmetric. */
+ +                      pi = data_devs - pi;
+ +              case right_sym:         /* Right symmetric. */
+ +                      di = (pi + di + 1) % raid_devs;
+ +                      break;
+ +
+ +              default:
+ +                      DMERR("Unknown RAID algorithm %d",
+ +                            rs->set.raid_type->algorithm);
+ +                      goto out;
+ +              }
+ +
+ +              break;
+ +
+ +      case raid4:
+ +              pi = rs->set.pi;
+ +              if (di >= pi)
+ +                      di++;
+ +              break;
+ +
+ +      default:
+ +              DMERR("Unknown RAID level %d", rs->set.raid_type->level);
+ +              goto out;
+ +      }
+ +
+ +      /*
+ +       * Hash key = start offset on any single device of the RAID set;
+ +       * adjusted in case io size differs from chunk size.
+ +       */
+ +      addr->key = (stripe << rs->set.chunk_shift) +
+ +                  (sector & rs->set.io_shift_mask);
+ +      addr->di = di;
+ +      addr->pi = pi;
+ +
+ +out:
+ +      return addr;
+ +}
+ +
+ +/*
+ + * Copy data across between stripe pages and bio vectors.
+ + *
+ + * Pay attention to data alignment in stripe and bio pages.
+ + */
+ +static void
+ +bio_copy_page_list(int rw, struct stripe *stripe,
+ +                 struct page_list *pl, struct bio *bio)
+ +{
+ +      unsigned i, page_offset;
+ +      void *page_addr;
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      struct bio_vec *bv;
+ +
+ +      /* Get start page in page list for this sector. */
+ +      i = (bio->bi_sector & rs->set.io_mask) / SECTORS_PER_PAGE;
+ +      pl = pl_elem(pl, i);
+ +
+ +      page_addr = page_address(pl->page);
+ +      page_offset = to_bytes(bio->bi_sector & (SECTORS_PER_PAGE - 1));
+ +
+ +      /* Walk all segments and copy data across between bio_vecs and pages. */
+ +      bio_for_each_segment(bv, bio, i) {
+ +              int len = bv->bv_len, size;
+ +              unsigned bio_offset = 0;
+ +              void *bio_addr = __bio_kmap_atomic(bio, i, KM_USER0);
+ +redo:
+ +              size = (page_offset + len > PAGE_SIZE) ?
+ +                     PAGE_SIZE - page_offset : len;
+ +
+ +              if (rw == READ)
+ +                      memcpy(bio_addr + bio_offset,
+ +                             page_addr + page_offset, size);
+ +              else
+ +                      memcpy(page_addr + page_offset,
+ +                             bio_addr + bio_offset, size);
+ +
+ +              page_offset += size;
+ +              if (page_offset == PAGE_SIZE) {
+ +                      /*
+ +                       * We reached the end of the chunk page ->
+ +                       * need refer to the next one to copy more data.
+ +                       */
+ +                      len -= size;
+ +                      if (len) {
+ +                              /* Get next page. */
+ +                              pl = pl->next;
+ +                              BUG_ON(!pl);
+ +                              page_addr = page_address(pl->page);
+ +                              page_offset = 0;
+ +                              bio_offset += size;
+ +                              /* REMOVEME: statistics. */
+ +                              atomic_inc(rs->stats + S_BIO_COPY_PL_NEXT);
+ +                              goto redo;
+ +                      }
+ +              }
+ +
+ +              __bio_kunmap_atomic(bio_addr, KM_USER0);
+ +      }
+ +}
+ +
+ +/*
+ + * Xor optimization macros.
+ + */
+ +/* Xor data pointer declaration and initialization macros. */
+ +#define DECLARE_2     unsigned long *d0 = data[0], *d1 = data[1]
+ +#define DECLARE_3     DECLARE_2, *d2 = data[2]
+ +#define DECLARE_4     DECLARE_3, *d3 = data[3]
+ +#define DECLARE_5     DECLARE_4, *d4 = data[4]
+ +#define DECLARE_6     DECLARE_5, *d5 = data[5]
+ +#define DECLARE_7     DECLARE_6, *d6 = data[6]
+ +#define DECLARE_8     DECLARE_7, *d7 = data[7]
+ +
+ +/* Xor unrole macros. */
+ +#define D2(n) d0[n] = d0[n] ^ d1[n]
+ +#define D3(n) D2(n) ^ d2[n]
+ +#define D4(n) D3(n) ^ d3[n]
+ +#define D5(n) D4(n) ^ d4[n]
+ +#define D6(n) D5(n) ^ d5[n]
+ +#define D7(n) D6(n) ^ d6[n]
+ +#define D8(n) D7(n) ^ d7[n]
+ +
+ +#define       X_2(macro, offset)      macro(offset); macro(offset + 1);
+ +#define       X_4(macro, offset)      X_2(macro, offset); X_2(macro, offset + 2);
+ +#define       X_8(macro, offset)      X_4(macro, offset); X_4(macro, offset + 4);
+ +#define       X_16(macro, offset)     X_8(macro, offset); X_8(macro, offset + 8);
+ +#define       X_32(macro, offset)     X_16(macro, offset); X_16(macro, offset + 16);
+ +#define       X_64(macro, offset)     X_32(macro, offset); X_32(macro, offset + 32);
+ +
+ +/* Define a _xor_#chunks_#xors_per_run() function. */
+ +#define       _XOR(chunks, xors_per_run) \
+ +static void _xor ## chunks ## _ ## xors_per_run(unsigned long **data) \
+ +{ \
+ +      unsigned end = XOR_SIZE / sizeof(data[0]), i; \
+ +      DECLARE_ ## chunks; \
+ +\
+ +      for (i = 0; i < end; i += xors_per_run) { \
+ +              X_ ## xors_per_run(D ## chunks, i); \
+ +      } \
+ +}
+ +
+ +/* Define xor functions for 2 - 8 chunks. */
+ +#define       MAKE_XOR_PER_RUN(xors_per_run) \
+ +      _XOR(2, xors_per_run); _XOR(3, xors_per_run); \
+ +      _XOR(4, xors_per_run); _XOR(5, xors_per_run); \
+ +      _XOR(6, xors_per_run); _XOR(7, xors_per_run); \
+ +      _XOR(8, xors_per_run);
+ +
+ +MAKE_XOR_PER_RUN(8)   /* Define _xor_*_8() functions. */
+ +MAKE_XOR_PER_RUN(16)  /* Define _xor_*_16() functions. */
+ +MAKE_XOR_PER_RUN(32)  /* Define _xor_*_32() functions. */
+ +MAKE_XOR_PER_RUN(64)  /* Define _xor_*_64() functions. */
+ +
+ +#define MAKE_XOR(xors_per_run) \
+ +struct { \
+ +      void (*f)(unsigned long **); \
+ +} static xor_funcs ## xors_per_run[] = { \
+ +      { NULL }, \
+ +      { NULL }, \
+ +      { _xor2_ ## xors_per_run }, \
+ +      { _xor3_ ## xors_per_run }, \
+ +      { _xor4_ ## xors_per_run }, \
+ +      { _xor5_ ## xors_per_run }, \
+ +      { _xor6_ ## xors_per_run }, \
+ +      { _xor7_ ## xors_per_run }, \
+ +      { _xor8_ ## xors_per_run }, \
+ +}; \
+ +\
+ +static void xor_ ## xors_per_run(unsigned n, unsigned long **data) \
+ +{ \
+ +      /* Call respective function for amount of chunks. */ \
+ +      xor_funcs ## xors_per_run[n].f(data); \
+ +}
+ +
+ +/* Define xor_8() - xor_64 functions. */
+ +MAKE_XOR(8)
+ +MAKE_XOR(16)
+ +MAKE_XOR(32)
+ +MAKE_XOR(64)
+ +
+ +/* Maximum number of chunks, which can be xor'ed in one go. */
+ +#define       XOR_CHUNKS_MAX  (ARRAY_SIZE(xor_funcs8) - 1)
+ +
+ +struct xor_func {
+ +      xor_function_t f;
+ +      const char *name;
+ +} static xor_funcs[] = {
+ +      {xor_8,   "xor_8"},
+ +      {xor_16,  "xor_16"},
+ +      {xor_32,  "xor_32"},
+ +      {xor_64,  "xor_64"},
+ +};
+ +
+ +/*
+ + * Calculate crc.
+ + *
+ + * This indexes into the page list of the stripe.
+ + *
+ + * All chunks will be xored into the parity chunk
+ + * in maximum groups of xor.chunks.
+ + *
+ + * FIXME: try mapping the pages on discontiguous memory.
+ + */
+ +static void xor(struct stripe *stripe, unsigned pi, unsigned sector)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned max_chunks = rs->xor.chunks, n, p;
+ +      unsigned o = sector / SECTORS_PER_PAGE; /* Offset into the page_list. */
+ +      unsigned long **d = rs->data;
+ +      xor_function_t xor_f = rs->xor.f->f;
+ +
+ +      /* Address of parity page to xor into. */
+ +      d[0] = page_address(pl_elem(PL(stripe, pi), o)->page);
+ +
+ +      /* Preset pointers to data pages. */
+ +      for (n = 1, p = rs->set.raid_devs; p--; ) {
+ +              if (p != pi && PageIO(PAGE(stripe, p)))
+ +                      d[n++] = page_address(pl_elem(PL(stripe, p), o)->page);
+ +
+ +              /* If max chunks -> xor .*/
+ +              if (n == max_chunks) {
+ +                      xor_f(n, d);
+ +                      n = 1;
+ +              }
+ +      }
+ +
+ +      /* If chunks -> xor. */
+ +      if (n > 1)
+ +              xor_f(n, d);
+ +
+ +      /* Set parity page uptodate and clean. */
+ +      page_set(PAGE(stripe, pi), CLEAN);
+ +}
+ +
+ +/* Common xor loop through all stripe page lists. */
+ +static void common_xor(struct stripe *stripe, sector_t count,
+ +                     unsigned off, unsigned p)
+ +{
+ +      unsigned sector;
+ +
+ +      for (sector = off; sector < count; sector += SECTORS_PER_XOR)
+ +              xor(stripe, p, sector);
+ +
+ +      atomic_inc(RS(stripe->sc)->stats + S_XORS); /* REMOVEME: statistics. */
+ +}
+ +
+ +/*
+ + * Calculate parity sectors on intact stripes.
+ + *
+ + * Need to calculate raid address for recover stripe, because its
+ + * chunk sizes differs and is typically larger than io chunk size.
+ + */
+ +static void parity_xor(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned chunk_size = rs->set.chunk_size,
+ +               io_size = stripe->io.size,
+ +               xor_size = chunk_size > io_size ? io_size : chunk_size;
+ +      sector_t off;
+ +
+ +      /* This can be the recover stripe with a larger io size. */
+ +      for (off = 0; off < io_size; off += xor_size) {
+ +              unsigned pi;
+ +
+ +              /*
+ +               * Recover stripe likely is bigger than regular io
+ +               * ones and has no precalculated parity disk index ->
+ +               * need to calculate RAID address.
+ +               */
+ +              if (unlikely(StripeRecover(stripe))) {
+ +                      struct address addr;
+ +
+ +                      raid_address(rs,
+ +                                   (stripe->key + off) * rs->set.data_devs,
+ +                                   &addr);
+ +                      pi = addr.pi;
+ +                      stripe_zero_pl_part(stripe, pi, off,
+ +                                          rs->set.chunk_size);
+ +              } else
+ +                      pi = stripe->idx.parity;
+ +
+ +              common_xor(stripe, xor_size, off, pi);
+ +              page_set(PAGE(stripe, pi), DIRTY);
+ +      }
+ +}
+ +
+ +/* Reconstruct missing chunk. */
+ +static void reconstruct_xor(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      int p = stripe->idx.recover;
+ +
+ +      BUG_ON(p < 0);
+ +
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(rs->stats + (raid_set_degraded(rs) ?
+ +                  S_RECONSTRUCT_EI : S_RECONSTRUCT_DEV));
+ +
+ +      /* Zero chunk to be reconstructed. */
+ +      stripe_zero_chunk(stripe, p);
+ +      common_xor(stripe, stripe->io.size, 0, p);
+ +}
+ +
+ +/*
+ + * Try getting a stripe either from the hash or from the lru list
+ + */
+ +static inline void _stripe_get(struct stripe *stripe)
+ +{
+ +      atomic_inc(&stripe->cnt);
+ +}
+ +
+ +static struct stripe *stripe_get(struct raid_set *rs, struct address *addr)
+ +{
+ +      struct stripe_cache *sc = &rs->sc;
+ +      struct stripe *stripe;
+ +
+ +      stripe = stripe_lookup(sc, addr->key);
+ +      if (stripe) {
+ +              _stripe_get(stripe);
+ +              /* Remove from the lru list if on. */
+ +              stripe_lru_del(stripe, LIST_LOCKED);
+ +              atomic_inc(rs->stats + S_HITS_1ST); /* REMOVEME: statistics. */
+ +      } else {
+ +              /* Second try to get an LRU stripe. */
+ +              stripe = stripe_lru_pop(sc);
+ +              if (stripe) {
+ +                      _stripe_get(stripe);
+ +                      /* Invalidate before reinserting with changed key. */
+ +                      stripe_invalidate(stripe);
+ +                      stripe->key = addr->key;
+ +                      stripe->region = dm_rh_sector_to_region(rs->recover.rh,
+ +                                                              addr->key);
+ +                      stripe->idx.parity = addr->pi;
+ +                      sc_insert(sc, stripe);
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_INSCACHE);
+ +              }
+ +      }
+ +
+ +      return stripe;
+ +}
+ +
+ +/*
+ + * Decrement reference count on a stripe.
+ + *
+ + * Move it to list of LRU stripes if zero.
+ + */
+ +static void stripe_put(struct stripe *stripe)
+ +{
+ +      if (atomic_dec_and_test(&stripe->cnt)) {
+ +              if (TestClearStripeActive(stripe))
+ +                      atomic_dec(&stripe->sc->active_stripes);
+ +
+ +              /* Put stripe onto the LRU list. */
+ +              stripe_lru_add(stripe, POS_TAIL, LIST_LOCKED);
+ +      }
+ +
+ +      BUG_ON(atomic_read(&stripe->cnt) < 0);
+ +}
+ +
+ +/*
+ + * Process end io
+ + *
+ + * I need to do it here because I can't in interrupt
+ + *
+ + * Read and write functions are split in order to avoid
+ + * conditionals in the main loop for performamce reasons.
+ + */
+ +
+ +/* Helper read bios on a page list. */
+ +static void _bio_copy_page_list(struct stripe *stripe, struct page_list *pl,
+ +                              struct bio *bio)
+ +{
+ +      bio_copy_page_list(READ, stripe, pl, bio);
+ +}
+ +
+ +/* Helper write bios on a page list. */
+ +static void _rh_dec(struct stripe *stripe, struct page_list *pl,
+ +                  struct bio *bio)
+ +{
+ +      dm_rh_dec(RS(stripe->sc)->recover.rh, stripe->region);
+ +}
+ +
+ +/* End io all bios on a page list. */
+ +static inline int
+ +page_list_endio(int rw, struct stripe *stripe, unsigned p, unsigned *count)
+ +{
+ +      int r = 0;
+ +      struct bio_list *bl = BL(stripe, p, rw);
+ +
+ +      if (!bio_list_empty(bl)) {
+ +              struct page_list *pl = PL(stripe, p);
+ +              struct page *page = pl->page;
+ +
+ +              if (PageLocked(page))
+ +                      r = -EBUSY;
+ +              /*
+ +               * FIXME: PageUptodate() not cleared
+ +               *        properly for missing chunks ?
+ +               */
+ +              else if (PageUptodate(page)) {
+ +                      struct bio *bio;
+ +                      struct raid_set *rs = RS(stripe->sc);
+ +                      void (*h_f)(struct stripe *, struct page_list *,
+ +                                  struct bio *) =
+ +                              (rw == READ) ? _bio_copy_page_list : _rh_dec;
+ +
+ +                      while ((bio = bio_list_pop(bl))) {
+ +                              h_f(stripe, pl, bio);
+ +                              _bio_endio(rs, bio, 0);
+ +                              stripe_put(stripe);
+ +                              if (count)
+ +                                      (*count)++;
+ +                      }
+ +              } else
+ +                      r = -EAGAIN;
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/*
+ + * End io all reads/writes on a stripe copying
+ + * read date accross from stripe to bios.
+ + */
+ +static int stripe_endio(int rw, struct stripe *stripe, unsigned *count)
+ +{
+ +      int r = 0;
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--) {
+ +              int rr = page_list_endio(rw, stripe, p, count);
+ +
+ +              if (rr && r != -EIO)
+ +                      r = rr;
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/* Fail all ios on a bio list and return # of bios. */
+ +static unsigned
+ +bio_list_fail(struct raid_set *rs, struct stripe *stripe, struct bio_list *bl)
+ +{
+ +      unsigned r;
+ +      struct bio *bio;
+ +
+ +      raid_set_dead(rs);
+ +
+ +      /* Update region counters. */
+ +      if (stripe) {
-               struct dm_rh_client *rh = rs->recover.rh;
++              struct dm_region_hash *rh = rs->recover.rh;
+ +
+ +              bio_list_for_each(bio, bl) {
+ +                      if (bio_data_dir(bio) == WRITE)
+ +                              dm_rh_dec(rh, stripe->region);
+ +              }
+ +      }
+ +
+ +      /* Error end io all bios. */
+ +      for (r = 0; (bio = bio_list_pop(bl)); r++)
+ +              _bio_endio(rs, bio, -EIO);
+ +
+ +      return r;
+ +}
+ +
+ +/* Fail all ios of a bio list of a stripe and drop io pending count. */
+ +static void
+ +stripe_bio_list_fail(struct raid_set *rs, struct stripe *stripe,
+ +                   struct bio_list *bl)
+ +{
+ +      unsigned put = bio_list_fail(rs, stripe, bl);
+ +
+ +      while (put--)
+ +              stripe_put(stripe);
+ +}
+ +
+ +/* Fail all ios hanging off all bio lists of a stripe. */
+ +static void stripe_fail_io(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned p = rs->set.raid_devs;
+ +
+ +      stripe_evict(stripe);
+ +
+ +      while (p--) {
+ +              struct stripe_set *ss = stripe->ss + p;
+ +              int i = ARRAY_SIZE(ss->bl);
+ +
+ +              while (i--)
+ +                      stripe_bio_list_fail(rs, stripe, ss->bl + i);
+ +      }
+ +}
+ +
+ +/*
+ + * Handle all stripes by handing them to the daemon, because we can't
+ + * map their pages to copy the data in interrupt context.
+ + *
+ + * We don't want to handle them here either, while interrupts are disabled.
+ + */
+ +
+ +/* Read/write endio function for dm-io (interrupt context). */
+ +static void endio(unsigned long error, void *context)
+ +{
+ +      struct dm_mem_cache_object *obj = context;
+ +      struct stripe_set *ss = obj->private;
+ +      struct stripe *stripe = ss->stripe;
+ +      struct page *page = obj->pl->page;
+ +
+ +      if (unlikely(error))
+ +              stripe_error(stripe, page);
+ +      else
+ +              page_set(page, CLEAN);
+ +
-       clear_page_locked(page);
++      __clear_page_locked(page);
+ +      stripe_io_dec(stripe);
+ +
+ +      /* Add stripe to endio list and wake daemon. */
+ +      stripe_endio_push(stripe);
+ +}
+ +
+ +/*
+ + * Recovery io throttling
+ + */
+ +/* Conditionally reset io counters. */
+ +enum count_type { IO_WORK = 0, IO_RECOVER };
+ +static int recover_io_reset(struct raid_set *rs)
+ +{
+ +      unsigned long j = jiffies;
+ +
+ +      /* Pay attention to jiffies overflows. */
+ +      if (j > rs->recover.last_jiffies + HZ
+ +          || j < rs->recover.last_jiffies) {
+ +              rs->recover.last_jiffies = j;
+ +              atomic_set(rs->recover.io_count + IO_WORK, 0);
+ +              atomic_set(rs->recover.io_count + IO_RECOVER, 0);
+ +              return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Count ios. */
+ +static INLINE void
+ +recover_io_count(struct raid_set *rs, struct stripe *stripe)
+ +{
+ +      if (RSRecover(rs)) {
+ +              recover_io_reset(rs);
+ +              atomic_inc(rs->recover.io_count +
+ +                         (StripeRecover(stripe) ? IO_RECOVER : IO_WORK));
+ +      }
+ +}
+ +
+ +/* Read/Write a page_list asynchronously. */
+ +static void page_list_rw(struct stripe *stripe, unsigned p)
+ +{
+ +      struct stripe_cache *sc = stripe->sc;
+ +      struct raid_set *rs = RS(sc);
+ +      struct dm_mem_cache_object *obj = stripe->obj + p;
+ +      struct page_list *pl = obj->pl;
+ +      struct page *page = pl->page;
+ +      struct raid_dev *dev = rs->dev + p;
+ +      struct dm_io_region io = {
+ +              .bdev = dev->dev->bdev,
+ +              .sector = stripe->key,
+ +              .count = stripe->io.size,
+ +      };
+ +      struct dm_io_request control = {
+ +              .bi_rw = PageDirty(page) ? WRITE : READ,
+ +              .mem.type = DM_IO_PAGE_LIST,
+ +              .mem.ptr.pl = pl,
+ +              .mem.offset = 0,
+ +              .notify.fn = endio,
+ +              .notify.context = obj,
+ +              .client = sc->dm_io_client,
+ +      };
+ +
+ +      BUG_ON(PageLocked(page));
+ +
+ +      /*
+ +       * Don't rw past end of device, which can happen, because
+ +       * typically sectors_per_dev isn't divisable by io_size.
+ +       */
+ +      if (unlikely(io.sector + io.count > rs->set.sectors_per_dev))
+ +              io.count = rs->set.sectors_per_dev - io.sector;
+ +
+ +      io.sector += dev->start;        /* Add <offset>. */
+ +      recover_io_count(rs, stripe);   /* Recovery io accounting. */
+ +
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(rs->stats +
+ +                  (PageDirty(page) ? S_DM_IO_WRITE : S_DM_IO_READ));
+ +
+ +      ClearPageError(page);
-       set_page_locked(page);
++      __set_page_locked(page);
+ +      io_dev_queued(dev);
+ +      BUG_ON(dm_io(&control, 1, &io, NULL));
+ +}
+ +
+ +/*
+ + * Write dirty / read not uptodate page lists of a stripe.
+ + */
+ +static unsigned stripe_page_lists_rw(struct raid_set *rs, struct stripe *stripe)
+ +{
+ +      unsigned r;
+ +
+ +      /*
+ +       * Increment the pending count on the stripe
+ +       * first, so that we don't race in endio().
+ +       *
+ +       * An inc (IO) is needed for any page:
+ +       *
+ +       * o not uptodate
+ +       * o dirtied by writes merged
+ +       * o dirtied by parity calculations
+ +       */
+ +      r = for_each_io_dev(rs, stripe, _stripe_io_inc);
+ +      if (r) {
+ +              /* io needed: chunks are not uptodate/dirty. */
+ +              int max;        /* REMOVEME: */
+ +              struct stripe_cache *sc = &rs->sc;
+ +
+ +              if (!TestSetStripeActive(stripe))
+ +                      atomic_inc(&sc->active_stripes);
+ +
+ +              /* Take off the lru list in case it got added there. */
+ +              stripe_lru_del(stripe, LIST_LOCKED);
+ +
+ +              /* Submit actual io. */
+ +              for_each_io_dev(rs, stripe, page_list_rw);
+ +
+ +              /* REMOVEME: statistics */
+ +              max = sc_active(sc);
+ +              if (atomic_read(&sc->max_active_stripes) < max)
+ +                      atomic_set(&sc->max_active_stripes, max);
+ +
+ +              atomic_inc(rs->stats + S_FLUSHS);
+ +              /* END REMOVEME: statistics */
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/* Work in all pending writes. */
+ +static INLINE void _writes_merge(struct stripe *stripe, unsigned p)
+ +{
+ +      struct bio_list *write = BL(stripe, p, WRITE);
+ +
+ +      if (!bio_list_empty(write)) {
+ +              struct page_list *pl = stripe->obj[p].pl;
+ +              struct bio *bio;
+ +              struct bio_list *write_merged = BL(stripe, p, WRITE_MERGED);
+ +
+ +              /*
+ +               * We can play with the lists without holding a lock,
+ +               * because it is just us accessing them anyway.
+ +               */
+ +              bio_list_for_each(bio, write)
+ +                      bio_copy_page_list(WRITE, stripe, pl, bio);
+ +
+ +              bio_list_merge(write_merged, write);
+ +              bio_list_init(write);
+ +              page_set(pl->page, DIRTY);
+ +      }
+ +}
+ +
+ +/* Merge in all writes hence dirtying respective pages. */
+ +static INLINE void writes_merge(struct stripe *stripe)
+ +{
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--)
+ +              _writes_merge(stripe, p);
+ +}
+ +
+ +/* Check, if a chunk gets completely overwritten. */
+ +static INLINE int stripe_check_overwrite(struct stripe *stripe, unsigned p)
+ +{
+ +      unsigned sectors = 0;
+ +      struct bio *bio;
+ +      struct bio_list *bl = BL(stripe, p, WRITE);
+ +
+ +      bio_list_for_each(bio, bl)
+ +              sectors += bio_sectors(bio);
+ +
+ +      return sectors == RS(stripe->sc)->set.io_size;
+ +}
+ +
+ +/*
+ + * Prepare stripe to avoid io on broken/reconstructed
+ + * drive in order to reconstruct date on endio.
+ + */
+ +enum prepare_type { IO_ALLOW, IO_PROHIBIT };
+ +static void stripe_prepare(struct stripe *stripe, unsigned p,
+ +                         enum prepare_type type)
+ +{
+ +      struct page *page = PAGE(stripe, p);
+ +
+ +      switch (type) {
+ +      case IO_PROHIBIT:
+ +              /*
+ +               * In case we prohibit, we gotta make sure, that
+ +               * io on all other chunks than the one which failed
+ +               * or is being reconstructed is allowed and that it
+ +               * doesn't have state uptodate.
+ +               */
+ +              stripe_allow_io(stripe);
+ +              ClearPageUptodate(page);
+ +              ProhibitPageIO(page);
+ +
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(RS(stripe->sc)->stats + S_PROHIBITPAGEIO);
+ +              stripe->idx.recover = p;
+ +              SetStripeReconstruct(stripe);
+ +              break;
+ +
+ +      case IO_ALLOW:
+ +              AllowPageIO(page);
+ +              stripe->idx.recover = -1;
+ +              ClearStripeReconstruct(stripe);
+ +              break;
+ +
+ +      default:
+ +              BUG();
+ +      }
+ +}
+ +
+ +/*
+ + * Degraded/reconstruction mode.
+ + *
+ + * Check stripe state to figure which chunks don't need IO.
+ + */
+ +static INLINE void stripe_check_reconstruct(struct stripe *stripe,
+ +                                          int prohibited)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +
+ +      /*
+ +       * Degraded mode (device(s) failed) ->
+ +       * avoid io on the failed device.
+ +       */
+ +      if (unlikely(raid_set_degraded(rs))) {
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + S_DEGRADED);
+ +              stripe_prepare(stripe, rs->set.ei, IO_PROHIBIT);
+ +              return;
+ +      } else {
+ +              /*
+ +               * Reconstruction mode (ie. a particular device or
+ +               * some (rotating) parity chunk is being resynchronized) ->
+ +               *   o make sure all needed pages are read in
+ +               *   o writes are allowed to go through
+ +               */
+ +              int r = region_state(rs, stripe->key, DM_RH_NOSYNC);
+ +
+ +              if (r) {
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_NOSYNC);
+ +                      stripe_prepare(stripe, dev_for_parity(stripe),
+ +                                     IO_PROHIBIT);
+ +                      return;
+ +              }
+ +      }
+ +
+ +      /*
+ +       * All disks good. Avoid reading parity chunk and reconstruct it
+ +       * unless we have prohibited io to chunk(s).
+ +       */
+ +      if (!prohibited) {
+ +              if (StripeMerged(stripe))
+ +                      stripe_prepare(stripe, stripe->idx.parity, IO_ALLOW);
+ +              else {
+ +                      stripe_prepare(stripe, stripe->idx.parity, IO_PROHIBIT);
+ +
+ +                      /*
+ +                       * Overrule stripe_prepare to reconstruct the
+ +                       * parity chunk, because it'll be created new anyway.
+ +                       */
+ +                      ClearStripeReconstruct(stripe);
+ +              }
+ +      }
+ +}
+ +
+ +/* Check, if stripe is ready to merge writes. */
+ +static INLINE int stripe_check_merge(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      int prohibited = 0;
+ +      unsigned chunks = 0, p = rs->set.raid_devs;
+ +
+ +      /* Walk all chunks. */
+ +      while (p--) {
+ +              struct page *page = PAGE(stripe, p);
+ +
+ +              /* Can't merge active chunks. */
+ +              if (PageLocked(page)) {
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_MERGE_PAGE_LOCKED);
+ +                      break;
+ +              }
+ +
+ +              /* Can merge uptodate chunks and have to count parity chunk. */
+ +              if (PageUptodate(page) || p == stripe->idx.parity) {
+ +                      chunks++;
+ +                      continue;
+ +              }
+ +
+ +              /* Read before write ordering. */
+ +              if (RSCheckOverwrite(rs) &&
+ +                  bio_list_empty(BL(stripe, p, READ))) {
+ +                      int r = stripe_check_overwrite(stripe, p);
+ +
+ +                      if (r) {
+ +                              chunks++;
+ +                              /* REMOVEME: statistics. */
+ +                              atomic_inc(RS(stripe->sc)->stats +
+ +                                         S_PROHIBITPAGEIO);
+ +                              ProhibitPageIO(page);
+ +                              prohibited = 1;
+ +                      }
+ +              }
+ +      }
+ +
+ +      if (chunks == rs->set.raid_devs) {
+ +              /* All pages are uptodate or get written over or mixture. */
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + S_CAN_MERGE);
+ +              return 0;
+ +      } else
+ +              /* REMOVEME: statistics.*/
+ +              atomic_inc(rs->stats + S_CANT_MERGE);
+ +
+ +      return prohibited ? 1 : -EPERM;
+ +}
+ +
+ +/* Check, if stripe is ready to merge writes. */
+ +static INLINE int stripe_check_read(struct stripe *stripe)
+ +{
+ +      int r = 0;
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      /* Walk all chunks. */
+ +      while (p--) {
+ +              struct page *page = PAGE(stripe, p);
+ +
+ +              if (!PageLocked(page) &&
+ +                  bio_list_empty(BL(stripe, p, READ))) {
+ +                      ProhibitPageIO(page);
+ +                      r = 1;
+ +              }
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/*
+ + * Read/write a stripe.
+ + *
+ + * All stripe read/write activity goes through this function.
+ + *
+ + * States to cover:
+ + *   o stripe to read and/or write
+ + *   o stripe with error to reconstruct
+ + */
+ +static int stripe_rw(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      int prohibited = 0, r;
+ +
+ +      /*
+ +       * Check the state of the RAID set and if degraded (or
+ +       * resynchronizing for reads), read in all other chunks but
+ +       * the one on the dead/resynchronizing device in order to be
+ +       * able to reconstruct the missing one.
+ +       *
+ +       * Merge all writes hanging off uptodate pages of the stripe.
+ +       */
+ +
+ +      /* Initially allow io on all chunks and prohibit below, if necessary. */
+ +      stripe_allow_io(stripe);
+ +
+ +      if (StripeRBW(stripe)) {
+ +              r = stripe_check_merge(stripe);
+ +              if (!r) {
+ +                      /*
+ +                       * If I could rely on valid parity (which would only
+ +                       * be sure in case of a full synchronization),
+ +                       * I could xor a fraction of chunks out of
+ +                       * parity and back in.
+ +                       *
+ +                       * For the time being, I got to redo parity...
+ +                       */
+ +                      /* parity_xor(stripe); */       /* Xor chunks out. */
+ +                      stripe_zero_chunk(stripe, stripe->idx.parity);
+ +                      writes_merge(stripe);           /* Merge writes in. */
+ +                      parity_xor(stripe);             /* Update parity. */
+ +                      ClearStripeRBW(stripe);         /* Disable RBW. */
+ +                      SetStripeMerged(stripe);        /* Writes merged. */
+ +              }
+ +
+ +              if (r > 0)
+ +                      prohibited = 1;
+ +      } else if (!raid_set_degraded(rs))
+ +              /* Only allow for read avoidance if not degraded. */
+ +              prohibited = stripe_check_read(stripe);
+ +
+ +      /*
+ +       * Check, if io needs to be allowed/prohibeted on certain chunks
+ +       * because of a degraded set or reconstruction on a region.
+ +       */
+ +      stripe_check_reconstruct(stripe, prohibited);
+ +
+ +      /* Now submit any reads/writes. */
+ +      r = stripe_page_lists_rw(rs, stripe);
+ +      if (!r) {
+ +              /*
+ +               * No io submitted because of chunk io prohibited or
+ +               * locked pages -> push to end io list for processing.
+ +               */
+ +              atomic_inc(rs->stats + S_NO_RW); /* REMOVEME: statistics. */
+ +              stripe_endio_push(stripe);
+ +              wake_do_raid(rs);       /* Wake myself. */
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Flush stripe either via flush list or imeediately. */
+ +enum flush_type { FLUSH_DELAY, FLUSH_NOW };
+ +static int stripe_flush(struct stripe *stripe, enum flush_type type)
+ +{
+ +      int r = 0;
+ +
+ +      stripe_lru_del(stripe, LIST_LOCKED);
+ +
+ +      /* Immediately flush. */
+ +      if (type == FLUSH_NOW) {
+ +              if (likely(raid_set_operational(RS(stripe->sc))))
+ +                      r = stripe_rw(stripe); /* Read/write stripe. */
+ +              else
+ +                      /* Optimization: Fail early on failed sets. */
+ +                      stripe_fail_io(stripe);
+ +      /* Delay flush by putting it on io list for later processing. */
+ +      } else if (type == FLUSH_DELAY)
+ +              stripe_io_add(stripe, POS_TAIL, LIST_UNLOCKED);
+ +      else
+ +              BUG();
+ +
+ +      return r;
+ +}
+ +
+ +/*
+ + * Queue reads and writes to a stripe by hanging
+ + * their bios off the stripsets read/write lists.
+ + *
+ + * Endio reads on uptodate chunks.
+ + */
+ +static INLINE int stripe_queue_bio(struct raid_set *rs, struct bio *bio,
+ +                                 struct bio_list *reject)
+ +{
+ +      int r = 0;
+ +      struct address addr;
+ +      struct stripe *stripe =
+ +              stripe_get(rs, raid_address(rs, bio->bi_sector, &addr));
+ +
+ +      if (stripe) {
+ +              int rr, rw = bio_data_dir(bio);
+ +
+ +              rr = stripe_lock(rs, stripe, rw, addr.key); /* Lock stripe */
+ +              if (rr) {
+ +                      stripe_put(stripe);
+ +                      goto out;
+ +              }
+ +
+ +              /* Distinguish read and write cases. */
+ +              bio_list_add(BL(stripe, addr.di, rw), bio);
+ +
+ +              /* REMOVEME: statistics */
+ +              atomic_inc(rs->stats + (rw == WRITE ?
+ +                         S_BIOS_ADDED_WRITE : S_BIOS_ADDED_READ));
+ +
+ +              if (rw == READ)
+ +                      SetStripeRead(stripe);
+ +              else {
+ +                      SetStripeRBW(stripe);
+ +
+ +                      /* Inrement pending write count on region. */
+ +                      dm_rh_inc(rs->recover.rh, stripe->region);
+ +                      r = 1;  /* Region hash needs a flush. */
+ +              }
+ +
+ +              /*
+ +               * Optimize stripe flushing:
+ +               *
+ +               * o directly start io for read stripes.
+ +               *
+ +               * o put stripe onto stripe caches io_list for RBW,
+ +               *   so that do_flush() can belabour it after we put
+ +               *   more bios to the stripe for overwrite optimization.
+ +               */
+ +              stripe_flush(stripe,
+ +                           StripeRead(stripe) ? FLUSH_NOW : FLUSH_DELAY);
+ +
+ +      /* Got no stripe from cache -> reject bio. */
+ +      } else {
+ +out:
+ +              bio_list_add(reject, bio);
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + S_IOS_POST);
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/*
+ + * Recovery functions
+ + */
+ +/* Read a stripe off a raid set for recovery. */
+ +static int recover_read(struct raid_set *rs, struct stripe *stripe, int idx)
+ +{
+ +      /* Invalidate all pages so that they get read in. */
+ +      stripe_pages_invalidate(stripe);
+ +
+ +      /* Allow io on all recovery chunks. */
+ +      stripe_allow_io(stripe);
+ +
+ +      if (idx > -1)
+ +              ProhibitPageIO(PAGE(stripe, idx));
+ +
+ +      stripe->key = rs->recover.pos;
+ +      return stripe_page_lists_rw(rs, stripe);
+ +}
+ +
+ +/* Write a stripe to a raid set for recovery. */
+ +static int recover_write(struct raid_set *rs, struct stripe *stripe, int idx)
+ +{
+ +      /*
+ +       * If this is a reconstruct of a particular device, then
+ +       * reconstruct the respective page(s), else create parity page(s).
+ +       */
+ +      if (idx > -1) {
+ +              struct page *page = PAGE(stripe, idx);
+ +
+ +              AllowPageIO(page);
+ +              stripe_zero_chunk(stripe, idx);
+ +              common_xor(stripe, stripe->io.size, 0, idx);
+ +              page_set(page, DIRTY);
+ +      } else
+ +              parity_xor(stripe);
+ +
+ +      return stripe_page_lists_rw(rs, stripe);
+ +}
+ +
+ +/* Recover bandwidth available ?. */
+ +static int recover_bandwidth(struct raid_set *rs)
+ +{
+ +      int r, work;
+ +
+ +      /* On reset -> allow recovery. */
+ +      r = recover_io_reset(rs);
+ +      if (r || RSBandwidth(rs))
+ +              goto out;
+ +
+ +      work = atomic_read(rs->recover.io_count + IO_WORK);
+ +      if (work) {
+ +              /* Pay attention to larger recover stripe size. */
+ +              int recover =
+ +                  atomic_read(rs->recover.io_count + IO_RECOVER) *
+ +                              rs->recover.io_size /
+ +                              rs->set.io_size;
+ +
+ +              /*
+ +               * Don't use more than given bandwidth of
+ +               * the work io for recovery.
+ +               */
+ +              if (recover > work / rs->recover.bandwidth_work) {
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_NO_BANDWIDTH);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +out:
+ +      atomic_inc(rs->stats + S_BANDWIDTH);    /* REMOVEME: statistics. */
+ +      return 1;
+ +}
+ +
+ +/* Try to get a region to recover. */
+ +static int recover_get_region(struct raid_set *rs)
+ +{
+ +      struct recover *rec = &rs->recover;
-       struct dm_rh_client *rh = rec->rh;
++      struct dm_region_hash *rh = rec->rh;
+ +
+ +      /* Start quiescing some regions. */
+ +      if (!RSRegionGet(rs)) {
+ +              int r = recover_bandwidth(rs); /* Enough bandwidth ?. */
+ +
+ +              if (r) {
+ +                      r = dm_rh_recovery_prepare(rh);
+ +                      if (r < 0) {
+ +                              DMINFO("No %sregions to recover",
+ +                                     rec->nr_regions_to_recover ?
+ +                                     "more " : "");
+ +                              return -ENOENT;
+ +                      }
+ +              } else
+ +                      return -EAGAIN;
+ +
+ +              SetRSRegionGet(rs);
+ +      }
+ +
+ +      if (!rec->reg) {
+ +              rec->reg = dm_rh_recovery_start(rh);
+ +              if (rec->reg) {
+ +                      /*
+ +                       * A reference for the the region I'll
+ +                       * keep till I've completely synced it.
+ +                       */
+ +                      io_get(rs);
+ +                      rec->pos = dm_rh_region_to_sector(rh,
+ +                              dm_rh_get_region_key(rec->reg));
+ +                      rec->end = rec->pos + dm_rh_get_region_size(rh);
+ +                      return 1;
+ +              } else
+ +                      return -EAGAIN;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Read/write a recovery stripe. */
+ +static INLINE int recover_stripe_rw(struct raid_set *rs, struct stripe *stripe)
+ +{
+ +      /* Read/write flip-flop. */
+ +      if (TestClearStripeRBW(stripe)) {
+ +              SetStripeRead(stripe);
+ +              return recover_read(rs, stripe, idx_get(rs));
+ +      } else if (TestClearStripeRead(stripe))
+ +              return recover_write(rs, stripe, idx_get(rs));
+ +
+ +      return 0;
+ +}
+ +
+ +/* Reset recovery variables. */
+ +static void recovery_region_reset(struct raid_set *rs)
+ +{
+ +      rs->recover.reg = NULL;
+ +      ClearRSRegionGet(rs);
+ +}
+ +
+ +/* Update region hash state. */
+ +static void recover_rh_update(struct raid_set *rs, int error)
+ +{
+ +      struct recover *rec = &rs->recover;
-       struct dm_rh_client *rh = rec->rh;
+ +      struct dm_region *reg = rec->reg;
+ +
+ +      if (reg) {
-               dm_rh_recovery_end(rh, reg, error);
++              dm_rh_recovery_end(reg, error);
+ +              if (!error)
+ +                      rec->nr_regions_recovered++;
+ +
+ +              recovery_region_reset(rs);
+ +      }
+ +
-       dm_rh_update_states(rh, 1);
-       dm_rh_flush(rh);
++      dm_rh_update_states(reg->rh, 1);
++      dm_rh_flush(reg->rh);
+ +      io_put(rs);     /* Release the io reference for the region. */
+ +}
+ +
+ +/* Called by main io daemon to recover regions. */
+ +/* FIXME: cope with MAX_RECOVER > 1. */
+ +static INLINE void _do_recovery(struct raid_set *rs, struct stripe *stripe)
+ +{
+ +      int r;
+ +      struct recover *rec = &rs->recover;
+ +
+ +      /* If recovery is active -> return. */
+ +      if (StripeActive(stripe))
+ +              return;
+ +
+ +      /* io error is fatal for recovery -> stop it. */
+ +      if (unlikely(StripeError(stripe)))
+ +              goto err;
+ +
+ +      /* Get a region to recover. */
+ +      r = recover_get_region(rs);
+ +      switch (r) {
+ +      case 1: /* Got a new region. */
+ +              /* Flag read before write. */
+ +              ClearStripeRead(stripe);
+ +              SetStripeRBW(stripe);
+ +              break;
+ +
+ +      case 0:
+ +              /* Got a region in the works. */
+ +              r = recover_bandwidth(rs);
+ +              if (r) /* Got enough bandwidth. */
+ +                      break;
+ +
+ +      case -EAGAIN:
+ +              /* No bandwidth/quiesced region yet, try later. */
+ +              wake_do_raid_delayed(rs, HZ / 10);
+ +              return;
+ +
+ +      case -ENOENT:   /* No more regions. */
+ +              dm_table_event(rs->ti->table);
+ +              goto free;
+ +      }
+ +
+ +      /* Read/write a recover stripe. */
+ +      r = recover_stripe_rw(rs, stripe);
+ +      if (r) {
+ +              /* IO initiated, get another reference for the IO. */
+ +              io_get(rs);
+ +              return;
+ +      }
+ +
+ +      /* Update recovery position within region. */
+ +      rec->pos += stripe->io.size;
+ +
+ +      /* If we're at end of region, update region hash. */
+ +      if (rec->pos >= rec->end ||
+ +          rec->pos >= rs->set.sectors_per_dev)
+ +              recover_rh_update(rs, 0);
+ +      else
+ +              SetStripeRBW(stripe);
+ +
+ +      /* Schedule myself for another round... */
+ +      wake_do_raid(rs);
+ +      return;
+ +
+ +err:
+ +      raid_set_check_degrade(rs, stripe);
+ +
+ +      {
+ +              char buf[BDEVNAME_SIZE];
+ +
+ +              DMERR("stopping recovery due to "
+ +                    "ERROR on /dev/%s, stripe at offset %llu",
+ +                    bdevname(rs->dev[rs->set.ei].dev->bdev, buf),
+ +                    (unsigned long long) stripe->key);
+ +
+ +      }
+ +
+ +      /* Make sure, that all quiesced regions get released. */
+ +      do {
+ +              if (rec->reg)
-                       dm_rh_recovery_end(rec->rh, rec->reg, -EIO);
++                      dm_rh_recovery_end(rec->reg, -EIO);
+ +
+ +              rec->reg = dm_rh_recovery_start(rec->rh);
+ +      } while (rec->reg);
+ +
+ +      recover_rh_update(rs, -EIO);
+ +free:
+ +      rs->set.dev_to_init = -1;
+ +
+ +      /* Check for jiffies overrun. */
+ +      rs->recover.end_jiffies = jiffies;
+ +      if (rs->recover.end_jiffies < rs->recover.start_jiffies)
+ +              rs->recover.end_jiffies = ~0;
+ +
+ +      ClearRSRecover(rs);
+ +}
+ +
+ +static INLINE void do_recovery(struct raid_set *rs)
+ +{
+ +      struct stripe *stripe;
+ +
+ +      list_for_each_entry(stripe, &rs->recover.stripes, lists[LIST_RECOVER])
+ +              _do_recovery(rs, stripe);
+ +
+ +      if (!RSRecover(rs))
+ +              stripe_recover_free(rs);
+ +}
+ +
+ +/*
+ + * END recovery functions
+ + */
+ +
+ +/* End io process all stripes handed in by endio() callback. */
+ +static void do_endios(struct raid_set *rs)
+ +{
+ +      struct stripe_cache *sc = &rs->sc;
+ +      struct stripe *stripe;
+ +
+ +      while ((stripe = stripe_endio_pop(sc))) {
+ +              unsigned count;
+ +
+ +              /* Recovery stripe special case. */
+ +              if (unlikely(StripeRecover(stripe))) {
+ +                      if (stripe_io(stripe))
+ +                              continue;
+ +
+ +                      io_put(rs); /* Release region io reference. */
+ +                      ClearStripeActive(stripe);
+ +
+ +                      /* REMOVEME: statistics*/
+ +                      atomic_dec(&sc->active_stripes);
+ +                      continue;
+ +              }
+ +
+ +              /* Early end io all reads on any uptodate chunks. */
+ +              stripe_endio(READ, stripe, (count = 0, &count));
+ +              if (stripe_io(stripe)) {
+ +                      if (count) /* REMOVEME: statistics. */
+ +                              atomic_inc(rs->stats + S_ACTIVE_READS);
+ +
+ +                      continue;
+ +              }
+ +
+ +              /* Set stripe inactive after all io got processed. */
+ +              if (TestClearStripeActive(stripe))
+ +                      atomic_dec(&sc->active_stripes);
+ +
+ +              /* Unlock stripe (for clustering). */
+ +              stripe_unlock(rs, stripe);
+ +
+ +              /*
+ +               * If an io error on a stripe occured and the RAID set
+ +               * is still operational, requeue the stripe for io.
+ +               */
+ +              if (TestClearStripeError(stripe)) {
+ +                      raid_set_check_degrade(rs, stripe);
+ +                      ClearStripeReconstruct(stripe);
+ +
+ +                      if (!StripeMerged(stripe) &&
+ +                          raid_set_operational(rs)) {
+ +                              stripe_pages_invalidate(stripe);
+ +                              stripe_flush(stripe, FLUSH_DELAY);
+ +                              /* REMOVEME: statistics. */
+ +                              atomic_inc(rs->stats + S_REQUEUE);
+ +                              continue;
+ +                      }
+ +              }
+ +
+ +              /* Check if the RAID set is inoperational to error ios. */
+ +              if (!raid_set_operational(rs)) {
+ +                      ClearStripeReconstruct(stripe);
+ +                      stripe_fail_io(stripe);
+ +                      BUG_ON(atomic_read(&stripe->cnt));
+ +                      continue;
+ +              }
+ +
+ +              /* Got to reconstruct a missing chunk. */
+ +              if (TestClearStripeReconstruct(stripe))
+ +                      reconstruct_xor(stripe);
+ +
+ +              /*
+ +               * Now that we've got a complete stripe, we can
+ +               * process the rest of the end ios on reads.
+ +               */
+ +              BUG_ON(stripe_endio(READ, stripe, NULL));
+ +              ClearStripeRead(stripe);
+ +
+ +              /*
+ +               * Read-before-write stripes need to be flushed again in
+ +               * order to work the write data into the pages *after*
+ +               * they were read in.
+ +               */
+ +              if (TestClearStripeMerged(stripe))
+ +                      /* End io all bios which got merged already. */
+ +                      BUG_ON(stripe_endio(WRITE_MERGED, stripe, NULL));
+ +
+ +              /* Got to put on flush list because of new writes. */
+ +              if (StripeRBW(stripe))
+ +                      stripe_flush(stripe, FLUSH_DELAY);
+ +      }
+ +}
+ +
+ +/*
+ + * Stripe cache shrinking.
+ + */
+ +static INLINE void do_sc_shrink(struct raid_set *rs)
+ +{
+ +      unsigned shrink = atomic_read(&rs->sc.stripes_to_shrink);
+ +
+ +      if (shrink) {
+ +              unsigned cur = atomic_read(&rs->sc.stripes);
+ +
+ +              sc_shrink(&rs->sc, shrink);
+ +              shrink -= cur - atomic_read(&rs->sc.stripes);
+ +              atomic_set(&rs->sc.stripes_to_shrink, shrink);
+ +
+ +              /*
+ +               * Wake myself up in case we failed to shrink the
+ +               * requested amount in order to try again later.
+ +               */
+ +              if (shrink)
+ +                      wake_do_raid(rs);
+ +      }
+ +}
+ +
+ +
+ +/*
+ + * Process all ios
+ + *
+ + * We do different things with the io depending on the
+ + * state of the region that it's in:
+ + *
+ + * o reads: hang off stripe cache or postpone if full
+ + *
+ + * o writes:
+ + *
+ + *  CLEAN/DIRTY/NOSYNC:       increment pending and hang io off stripe's stripe set.
+ + *                    In case stripe cache is full or busy, postpone the io.
+ + *
+ + *  RECOVERING:               delay the io until recovery of the region completes.
+ + *
+ + */
+ +static INLINE void do_ios(struct raid_set *rs, struct bio_list *ios)
+ +{
+ +      int r;
+ +      unsigned flush = 0;
-       struct dm_rh_client *rh = rs->recover.rh;
++      struct dm_region_hash *rh = rs->recover.rh;
+ +      struct bio *bio;
+ +      struct bio_list delay, reject;
+ +
+ +      bio_list_init(&delay);
+ +      bio_list_init(&reject);
+ +
+ +      /*
+ +       * Classify each io:
+ +       *    o delay to recovering regions
+ +       *    o queue to all other regions
+ +       */
+ +      while ((bio = bio_list_pop(ios))) {
+ +              /*
+ +               * In case we get a barrier bio, push it back onto
+ +               * the input queue unless all work queues are empty
+ +               * and the stripe cache is inactive.
+ +               */
+ +              if (unlikely(bio_barrier(bio))) {
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_BARRIER);
+ +                      if (!list_empty(rs->sc.lists + LIST_IO) ||
+ +                          !bio_list_empty(&delay) ||
+ +                          !bio_list_empty(&reject) ||
+ +                          sc_active(&rs->sc)) {
+ +                              bio_list_push(ios, bio);
+ +                              break;
+ +                      }
+ +              }
+ +
+ +              r = region_state(rs, _sector(rs, bio), DM_RH_RECOVERING);
+ +              if (unlikely(r)) {
+ +                      /* Got to wait for recovering regions. */
+ +                      bio_list_add(&delay, bio);
+ +                      SetRSBandwidth(rs);
+ +              } else {
+ +                      /*
+ +                       * Process ios to non-recovering regions by queueing
+ +                       * them to stripes (does rh_inc()) for writes).
+ +                       */
+ +                      flush += stripe_queue_bio(rs, bio, &reject);
+ +              }
+ +      }
+ +
+ +      if (flush) {
+ +              r = dm_rh_flush(rh); /* Writes got queued -> flush dirty log. */
+ +              if (r)
+ +                      DMERR("dirty log flush");
+ +      }
+ +
+ +      /* Delay ios to regions which are recovering. */
+ +      while ((bio = bio_list_pop(&delay))) {
+ +              /* REMOVEME: statistics.*/
+ +              atomic_inc(rs->stats + S_DELAYED_BIOS);
+ +              atomic_inc(rs->stats + S_SUM_DELAYED_BIOS);
-               dm_rh_delay_by_region(rh, bio,
-                       dm_rh_sector_to_region(rh, _sector(rs, bio)));
++              dm_rh_delay(rh, bio);
+ +
+ +      }
+ +
+ +      /* Merge any rejected bios back to the head of the input list. */
+ +      bio_list_merge_head(ios, &reject);
+ +}
+ +
+ +/* Flush any stripes on the io list. */
+ +static INLINE void do_flush(struct raid_set *rs)
+ +{
+ +      struct list_head *list = rs->sc.lists + LIST_IO, *pos, *tmp;
+ +
+ +      list_for_each_safe(pos, tmp, list) {
+ +              int r = stripe_flush(list_entry(pos, struct stripe,
+ +                                              lists[LIST_IO]), FLUSH_NOW);
+ +
+ +              /* Remove from the list only if the stripe got processed. */
+ +              if (!r)
+ +                      list_del_init(pos);
+ +      }
+ +}
+ +
+ +/* Send an event in case we're getting too busy. */
+ +static INLINE void do_busy_event(struct raid_set *rs)
+ +{
+ +      if ((sc_active(&rs->sc) > atomic_read(&rs->sc.stripes) * 4 / 5)) {
+ +              if (!TestSetRSScBusy(rs))
+ +                      dm_table_event(rs->ti->table);
+ +      } else
+ +              ClearRSScBusy(rs);
+ +}
+ +
+ +/* Unplug: let the io role on the sets devices. */
+ +static INLINE void do_unplug(struct raid_set *rs)
+ +{
+ +      struct raid_dev *dev = rs->dev + rs->set.raid_devs;
+ +
+ +      while (dev-- > rs->dev) {
+ +              /* Only call any device unplug function, if io got queued. */
+ +              if (io_dev_clear(dev))
+ +                      blk_unplug(bdev_get_queue(dev->dev->bdev));
+ +      }
+ +}
+ +
+ +/*-----------------------------------------------------------------
+ + * RAID daemon
+ + *---------------------------------------------------------------*/
+ +/*
+ + * o belabour all end ios
+ + * o optionally shrink the stripe cache
+ + * o update the region hash states
+ + * o optionally do recovery
+ + * o grab the input queue
+ + * o work an all requeued or new ios and perform stripe cache flushs
+ + *   unless the RAID set is inoperational (when we error ios)
+ + * o check, if the stripe cache gets too busy and throw an event if so
+ + * o unplug any component raid devices with queued bios
+ + */
+ +static void do_raid(struct work_struct *ws)
+ +{
+ +      struct raid_set *rs = container_of(ws, struct raid_set, io.dws.work);
+ +      struct bio_list *ios = &rs->io.work, *ios_in = &rs->io.in;
+ +      spinlock_t *lock = &rs->io.in_lock;
+ +
+ +      /*
+ +       * We always need to end io, so that ios
+ +       * can get errored in case the set failed
+ +       * and the region counters get decremented
+ +       * before we update the region hash states.
+ +       */
+ +redo:
+ +      do_endios(rs);
+ +
+ +      /*
+ +       * Now that we've end io'd, which may have put stripes on
+ +       * the LRU list, we shrink the stripe cache if requested.
+ +       */
+ +      do_sc_shrink(rs);
+ +
+ +      /* Update region hash states before we go any further. */
+ +      dm_rh_update_states(rs->recover.rh, 1);
+ +
+ +      /* Try to recover regions. */
+ +      if (RSRecover(rs))
+ +              do_recovery(rs);
+ +
+ +      /* More endios -> process. */
+ +      if (!stripe_endio_empty(&rs->sc)) {
+ +              atomic_inc(rs->stats + S_REDO);
+ +              goto redo;
+ +      }
+ +
+ +      /* Quickly grab all new ios queued and add them to the work list. */
+ +      spin_lock_irq(lock);
+ +      bio_list_merge(ios, ios_in);
+ +      bio_list_init(ios_in);
+ +      spin_unlock_irq(lock);
+ +
+ +      /* Let's assume we're operational most of the time ;-). */
+ +      if (likely(raid_set_operational(rs))) {
+ +              /* If we got ios, work them into the cache. */
+ +              if (!bio_list_empty(ios)) {
+ +                      do_ios(rs, ios);
+ +                      do_unplug(rs);  /* Unplug the sets device queues. */
+ +              }
+ +
+ +              do_flush(rs);           /* Flush any stripes on io list. */
+ +              do_unplug(rs);          /* Unplug the sets device queues. */
+ +              do_busy_event(rs);      /* Check if we got too busy. */
+ +
+ +              /* More endios -> process. */
+ +              if (!stripe_endio_empty(&rs->sc)) {
+ +                      atomic_inc(rs->stats + S_REDO);
+ +                      goto redo;
+ +              }
+ +      } else
+ +              /* No way to reconstruct data with too many devices failed. */
+ +              bio_list_fail(rs, NULL, ios);
+ +}
+ +
+ +/*
+ + * Callback for region hash to dispatch
+ + * delayed bios queued to recovered regions
+ + * (Gets called via rh_update_states()).
+ + */
- static void dispatch_delayed_bios(void *context, struct bio_list *bl, int dummy)
++static void dispatch_delayed_bios(void *context, struct bio_list *bl)
+ +{
+ +      struct raid_set *rs = context;
+ +      struct bio *bio;
+ +
+ +      /* REMOVEME: decrement pending delayed bios counter. */
+ +      bio_list_for_each(bio, bl)
+ +              atomic_dec(rs->stats + S_DELAYED_BIOS);
+ +
+ +      /* Merge region hash private list to work list. */
+ +      bio_list_merge_head(&rs->io.work, bl);
+ +      bio_list_init(bl);
+ +      ClearRSBandwidth(rs);
+ +}
+ +
+ +/*************************************************************
+ + * Constructor helpers
+ + *************************************************************/
+ +/* Calculate MB/sec. */
+ +static INLINE unsigned mbpers(struct raid_set *rs, unsigned speed)
+ +{
+ +      return to_bytes(speed * rs->set.data_devs *
+ +                      rs->recover.io_size * HZ >> 10) >> 10;
+ +}
+ +
+ +/*
+ + * Discover fastest xor algorithm and # of chunks combination.
+ + */
+ +/* Calculate speed for algorithm and # of chunks. */
+ +static INLINE unsigned xor_speed(struct stripe *stripe)
+ +{
+ +      unsigned r = 0;
+ +      unsigned long j;
+ +
+ +      /* Wait for next tick. */
+ +      for (j = jiffies; j == jiffies;)
+ +              ;
+ +
+ +      /* Do xors for a full tick. */
+ +      for (j = jiffies; j == jiffies;) {
+ +              mb();
+ +              common_xor(stripe, stripe->io.size, 0, 0);
+ +              mb();
+ +              r++;
+ +              mb();
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/* Optimize xor algorithm for this RAID set. */
+ +static unsigned xor_optimize(struct raid_set *rs)
+ +{
+ +      unsigned chunks_max = 2, speed_max = 0;
+ +      struct xor_func *f = ARRAY_END(xor_funcs), *f_max = NULL;
+ +      struct stripe *stripe;
+ +
+ +      BUG_ON(list_empty(&rs->recover.stripes));
+ +      stripe = list_first_entry(&rs->recover.stripes, struct stripe,
+ +                          lists[LIST_RECOVER]);
+ +
+ +      /*
+ +       * Got to allow io on all chunks, so that
+ +       * xor() will actually work on them.
+ +       */
+ +      stripe_allow_io(stripe);
+ +
+ +      /* Try all xor functions. */
+ +      while (f-- > xor_funcs) {
+ +              unsigned speed;
+ +
+ +              /* Set actual xor function for common_xor(). */
+ +              rs->xor.f = f;
+ +              rs->xor.chunks = XOR_CHUNKS_MAX + 1;
+ +
+ +              while (rs->xor.chunks-- > 2) {
+ +                      speed = xor_speed(stripe);
+ +                      if (speed > speed_max) {
+ +                              speed_max = speed;
+ +                              chunks_max = rs->xor.chunks;
+ +                              f_max = f;
+ +                      }
+ +              }
+ +      }
+ +
+ +      /* Memorize optimum parameters. */
+ +      rs->xor.f = f_max;
+ +      rs->xor.chunks = chunks_max;
+ +      return speed_max;
+ +}
+ +
++static inline int array_too_big(unsigned long fixed, unsigned long obj,
++                                unsigned long num)
++{
++      return (num > (ULONG_MAX - fixed) / obj);
++}
++
++static void wakeup_all_recovery_waiters(void *context)
++{
++}
++
+ +/*
+ + * Allocate a RAID context (a RAID set)
+ + */
+ +static int
+ +context_alloc(struct raid_set **raid_set, struct raid_type *raid_type,
+ +            unsigned stripes, unsigned chunk_size, unsigned io_size,
+ +            unsigned recover_io_size, unsigned raid_devs,
+ +            sector_t sectors_per_dev,
+ +            struct dm_target *ti, unsigned dl_parms, char **argv)
+ +{
+ +      int r;
+ +      unsigned p;
+ +      size_t len;
+ +      sector_t region_size, ti_len;
+ +      struct raid_set *rs = NULL;
+ +      struct dm_dirty_log *dl;
+ +      struct recover *rec;
+ +
+ +      /*
+ +       * Create the dirty log
+ +       *
+ +       * We need to change length for the dirty log constructor,
+ +       * because we want an amount of regions for all stripes derived
+ +       * from the single device size, so that we can keep region
+ +       * size = 2^^n independant of the number of devices
+ +       */
+ +      ti_len = ti->len;
+ +      ti->len = sectors_per_dev;
+ +      dl = dm_dirty_log_create(argv[0], ti, dl_parms, argv + 2);
+ +      ti->len = ti_len;
+ +      if (!dl)
+ +              goto bad_dirty_log;
+ +
+ +      /* Chunk size *must* be smaller than region size. */
+ +      region_size = dl->type->get_region_size(dl);
+ +      if (chunk_size > region_size)
+ +              goto bad_chunk_size;
+ +
+ +      /* Recover io size *must* be smaller than region size as well. */
+ +      if (recover_io_size > region_size)
+ +              goto bad_recover_io_size;
+ +
+ +      /* Size and allocate the RAID set structure. */
+ +      len = sizeof(*rs->data) + sizeof(*rs->dev);
+ +      if (array_too_big(sizeof(*rs), len, raid_devs))
+ +              goto bad_array;
+ +
+ +      len = sizeof(*rs) + raid_devs * len;
+ +      rs = kzalloc(len, GFP_KERNEL);
+ +      if (!rs)
+ +              goto bad_alloc;
+ +
+ +      rec = &rs->recover;
+ +      atomic_set(&rs->io.in_process, 0);
+ +      atomic_set(&rs->io.in_process_max, 0);
+ +      rec->io_size = recover_io_size;
+ +
+ +      /* Pointer to data array. */
+ +      rs->data = (unsigned long **)
+ +                 ((void *) rs->dev + raid_devs * sizeof(*rs->dev));
+ +      rec->dl = dl;
+ +      rs->set.raid_devs = p = raid_devs;
+ +      rs->set.data_devs = raid_devs - raid_type->parity_devs;
+ +      rs->set.raid_type = raid_type;
+ +
+ +      /*
+ +       * Set chunk and io size and respective shifts
+ +       * (used to avoid divisions)
+ +       */
+ +      rs->set.chunk_size = chunk_size;
+ +      rs->set.chunk_mask = chunk_size - 1;
+ +      rs->set.chunk_shift = ffs(chunk_size) - 1;
+ +
+ +      rs->set.io_size = io_size;
+ +      rs->set.io_mask = io_size - 1;
+ +      rs->set.io_shift = ffs(io_size) - 1;
+ +      rs->set.io_shift_mask = rs->set.chunk_mask & ~rs->set.io_mask;
+ +
+ +      rs->set.pages_per_io = chunk_pages(io_size);
+ +      rs->set.sectors_per_dev = sectors_per_dev;
+ +
+ +      rs->set.ei = -1;        /* Indicate no failed device. */
+ +      atomic_set(&rs->set.failed_devs, 0);
+ +
+ +      rs->ti = ti;
+ +
+ +      atomic_set(rec->io_count + IO_WORK, 0);
+ +      atomic_set(rec->io_count + IO_RECOVER, 0);
+ +
+ +      /* Initialize io lock and queues. */
+ +      spin_lock_init(&rs->io.in_lock);
+ +      bio_list_init(&rs->io.in);
+ +      bio_list_init(&rs->io.work);
+ +
+ +      init_waitqueue_head(&rs->io.suspendq);  /* Suspend waiters (dm-io). */
+ +
+ +      rec->nr_regions = dm_sector_div_up(sectors_per_dev, region_size);
-       rec->rh = dm_rh_client_create(MAX_RECOVER, dispatch_delayed_bios, rs,
-                                     wake_do_raid, rs, dl, region_size,
-                                     rs->recover.nr_regions);
++
++      rec->rh = dm_region_hash_create(rs, dispatch_delayed_bios, wake_do_raid,
++                                      wakeup_all_recovery_waiters,
++                                      rs->ti->begin, MAX_RECOVER, dl,
++                                      region_size, rs->recover.nr_regions);
+ +      if (IS_ERR(rec->rh))
+ +              goto bad_rh;
+ +
+ +      /* Initialize stripe cache. */
+ +      r = sc_init(rs, stripes);
+ +      if (r)
+ +              goto bad_sc;
+ +
+ +      /* Create dm-io client context. */
+ +      rs->sc.dm_io_client = dm_io_client_create(rs->set.raid_devs *
+ +                                                rs->set.pages_per_io);
+ +      if (IS_ERR(rs->sc.dm_io_client))
+ +              goto bad_dm_io_client;
+ +
+ +      /* REMOVEME: statistics. */
+ +      stats_reset(rs);
+ +      ClearRSDevelStats(rs);  /* Disnable development status. */
+ +
+ +      *raid_set = rs;
+ +      return 0;
+ +
+ +bad_dirty_log:
+ +      TI_ERR_RET("Error creating dirty log", -ENOMEM);
+ +
+ +
+ +bad_chunk_size:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR("Chunk size larger than region size");
+ +
+ +bad_recover_io_size:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR("Recover stripe io size larger than region size");
+ +
+ +bad_array:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR("Arry too big");
+ +
+ +bad_alloc:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR_RET("Cannot allocate raid context", -ENOMEM);
+ +
+ +bad_rh:
+ +      dm_dirty_log_destroy(dl);
+ +      ti->error = DM_MSG_PREFIX "Error creating dirty region hash";
+ +      goto free_rs;
+ +
+ +bad_sc:
+ +      ti->error = DM_MSG_PREFIX "Error creating stripe cache";
+ +      goto free;
+ +
+ +bad_dm_io_client:
+ +      ti->error = DM_MSG_PREFIX "Error allocating dm-io resources";
+ +free:
-       dm_rh_client_destroy(rec->rh);
++      dm_region_hash_destroy(rec->rh);
+ +      sc_exit(&rs->sc);
-       dm_rh_client_destroy(rec->rh); /* Destroys dirty log as well. */
++      dm_region_hash_destroy(rec->rh); /* Destroys dirty log as well. */
+ +free_rs:
+ +      kfree(rs);
+ +      return -ENOMEM;
+ +}
+ +
+ +/* Free a RAID context (a RAID set). */
+ +static void
+ +context_free(struct raid_set *rs, struct dm_target *ti, unsigned r)
+ +{
+ +      while (r--)
+ +              dm_put_device(ti, rs->dev[r].dev);
+ +
+ +      dm_io_client_destroy(rs->sc.dm_io_client);
+ +      sc_exit(&rs->sc);
-       dm_rh_client_destroy(rs->recover.rh);
++      dm_region_hash_destroy(rs->recover.rh);
+ +      dm_dirty_log_destroy(rs->recover.dl);
+ +      kfree(rs);
+ +}
+ +
+ +/* Create work queue and initialize work. */
+ +static int rs_workqueue_init(struct raid_set *rs)
+ +{
+ +      struct dm_target *ti = rs->ti;
+ +
+ +      rs->io.wq = create_singlethread_workqueue(DAEMON);
+ +      if (!rs->io.wq)
+ +              TI_ERR_RET("failed to create " DAEMON, -ENOMEM);
+ +
+ +      INIT_DELAYED_WORK(&rs->io.dws, do_raid);
+ +      return 0;
+ +}
+ +
+ +/* Return pointer to raid_type structure for raid name. */
+ +static struct raid_type *get_raid_type(char *name)
+ +{
+ +      struct raid_type *r = ARRAY_END(raid_types);
+ +
+ +      while (r-- > raid_types) {
+ +              if (!strnicmp(STR_LEN(r->name, name)))
+ +                      return r;
+ +      }
+ +
+ +      return NULL;
+ +}
+ +
+ +/* FIXME: factor out to dm core. */
+ +static int multiple(sector_t a, sector_t b, sector_t *n)
+ +{
+ +      sector_t r = a;
+ +
+ +      sector_div(r, b);
+ +      *n = r;
+ +      return a == r * b;
+ +}
+ +
+ +/* Log RAID set information to kernel log. */
+ +static void raid_set_log(struct raid_set *rs, unsigned speed)
+ +{
+ +      unsigned p;
+ +      char buf[BDEVNAME_SIZE];
+ +
+ +      for (p = 0; p < rs->set.raid_devs; p++)
+ +              DMINFO("/dev/%s is raid disk %u",
+ +                     bdevname(rs->dev[p].dev->bdev, buf), p);
+ +
+ +      DMINFO("%d/%d/%d sectors chunk/io/recovery size, %u stripes",
+ +             rs->set.chunk_size, rs->set.io_size, rs->recover.io_size,
+ +             atomic_read(&rs->sc.stripes));
+ +      DMINFO("algorithm \"%s\", %u chunks with %uMB/s", rs->xor.f->name,
+ +             rs->xor.chunks, mbpers(rs, speed));
+ +      DMINFO("%s set with net %u/%u devices", rs->set.raid_type->descr,
+ +             rs->set.data_devs, rs->set.raid_devs);
+ +}
+ +
+ +/* Get all devices and offsets. */
+ +static int
+ +dev_parms(struct dm_target *ti, struct raid_set *rs,
+ +        char **argv, int *p)
+ +{
+ +      for (*p = 0; *p < rs->set.raid_devs; (*p)++, argv += 2) {
+ +              int r;
+ +              unsigned long long tmp;
+ +              struct raid_dev *dev = rs->dev + *p;
+ +              union dev_lookup dl = {.dev = dev };
+ +
+ +              /* Get offset and device. */
+ +              r = sscanf(argv[1], "%llu", &tmp);
+ +              if (r != 1)
+ +                      TI_ERR("Invalid RAID device offset parameter");
+ +
+ +              dev->start = tmp;
+ +              r = dm_get_device(ti, argv[0], dev->start,
+ +                                rs->set.sectors_per_dev,
+ +                                dm_table_get_mode(ti->table), &dev->dev);
+ +              if (r)
+ +                      TI_ERR_RET("RAID device lookup failure", r);
+ +
+ +              r = raid_dev_lookup(rs, bynumber, &dl);
+ +              if (r != -ENODEV && r < *p) {
+ +                      (*p)++; /* Ensure dm_put_device() on actual device. */
+ +                      TI_ERR_RET("Duplicate RAID device", -ENXIO);
+ +              }
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Set recovery bandwidth. */
+ +static INLINE void
+ +recover_set_bandwidth(struct raid_set *rs, unsigned bandwidth)
+ +{
+ +      rs->recover.bandwidth = bandwidth;
+ +      rs->recover.bandwidth_work = 100 / bandwidth;
+ +}
+ +
+ +/* Handle variable number of RAID parameters. */
+ +static int
+ +raid_variable_parms(struct dm_target *ti, char **argv,
+ +                  unsigned i, int *raid_parms,
+ +                  int *chunk_size, int *chunk_size_parm,
+ +                  int *stripes, int *stripes_parm,
+ +                  int *io_size, int *io_size_parm,
+ +                  int *recover_io_size, int *recover_io_size_parm,
+ +                  int *bandwidth, int *bandwidth_parm)
+ +{
+ +      /* Fetch # of variable raid parameters. */
+ +      if (sscanf(argv[i++], "%d", raid_parms) != 1 ||
+ +          !range_ok(*raid_parms, 0, 5))
+ +              TI_ERR("Bad variable raid parameters number");
+ +
+ +      if (*raid_parms) {
+ +              /*
+ +               * If we've got variable RAID parameters,
+ +               * chunk size is the first one
+ +               */
+ +              if (sscanf(argv[i++], "%d", chunk_size) != 1 ||
+ +                  (*chunk_size != -1 &&
+ +                   (!POWER_OF_2(*chunk_size) ||
+ +                    !range_ok(*chunk_size, IO_SIZE_MIN, CHUNK_SIZE_MAX))))
+ +                      TI_ERR("Invalid chunk size; must be 2^^n and <= 16384");
+ +
+ +              *chunk_size_parm = *chunk_size;
+ +              if (*chunk_size == -1)
+ +                      *chunk_size = CHUNK_SIZE;
+ +
+ +              /*
+ +               * In case we've got 2 or more variable raid
+ +               * parameters, the number of stripes is the second one
+ +               */
+ +              if (*raid_parms > 1) {
+ +                      if (sscanf(argv[i++], "%d", stripes) != 1 ||
+ +                          (*stripes != -1 &&
+ +                           !range_ok(*stripes, STRIPES_MIN,
+ +                                     STRIPES_MAX)))
+ +                              TI_ERR("Invalid number of stripes: must "
+ +                                     "be >= 8 and <= 8192");
+ +              }
+ +
+ +              *stripes_parm = *stripes;
+ +              if (*stripes == -1)
+ +                      *stripes = STRIPES;
+ +
+ +              /*
+ +               * In case we've got 3 or more variable raid
+ +               * parameters, the io size is the third one.
+ +               */
+ +              if (*raid_parms > 2) {
+ +                      if (sscanf(argv[i++], "%d", io_size) != 1 ||
+ +                          (*io_size != -1 &&
+ +                           (!POWER_OF_2(*io_size) ||
+ +                            !range_ok(*io_size, IO_SIZE_MIN,
+ +                                      min(BIO_MAX_SECTORS / 2,
+ +                                      *chunk_size)))))
+ +                              TI_ERR("Invalid io size; must "
+ +                                     "be 2^^n and less equal "
+ +                                     "min(BIO_MAX_SECTORS/2, chunk size)");
+ +              } else
+ +                      *io_size = *chunk_size;
+ +
+ +              *io_size_parm = *io_size;
+ +              if (*io_size == -1)
+ +                      *io_size = *chunk_size;
+ +
+ +              /*
+ +               * In case we've got 4 variable raid parameters,
+ +               * the recovery stripe io_size is the fourth one
+ +               */
+ +              if (*raid_parms > 3) {
+ +                      if (sscanf(argv[i++], "%d", recover_io_size) != 1 ||
+ +                          (*recover_io_size != -1 &&
+ +                           (!POWER_OF_2(*recover_io_size) ||
+ +                           !range_ok(*recover_io_size, RECOVER_IO_SIZE_MIN,
+ +                                     BIO_MAX_SECTORS / 2))))
+ +                              TI_ERR("Invalid recovery io size; must be "
+ +                                     "2^^n and less equal BIO_MAX_SECTORS/2");
+ +              }
+ +
+ +              *recover_io_size_parm = *recover_io_size;
+ +              if (*recover_io_size == -1)
+ +                      *recover_io_size = RECOVER_IO_SIZE;
+ +
+ +              /*
+ +               * In case we've got 5 variable raid parameters,
+ +               * the recovery io bandwidth is the fifth one
+ +               */
+ +              if (*raid_parms > 4) {
+ +                      if (sscanf(argv[i++], "%d", bandwidth) != 1 ||
+ +                          (*bandwidth != -1 &&
+ +                           !range_ok(*bandwidth, BANDWIDTH_MIN,
+ +                                     BANDWIDTH_MAX)))
+ +                              TI_ERR("Invalid recovery bandwidth "
+ +                                     "percentage; must be > 0 and <= 100");
+ +              }
+ +
+ +              *bandwidth_parm = *bandwidth;
+ +              if (*bandwidth == -1)
+ +                      *bandwidth = BANDWIDTH;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Parse optional locking parameters. */
+ +static int
+ +raid_locking_parms(struct dm_target *ti, char **argv,
+ +                 unsigned i, int *locking_parms,
+ +                 struct dm_raid45_locking_type **locking_type)
+ +{
+ +      *locking_parms = 0;
+ +      *locking_type = &locking_none;
+ +
+ +      if (!strnicmp(argv[i], "none", strlen(argv[i])))
+ +              *locking_parms = 1;
+ +      else if (!strnicmp(argv[i + 1], "locking", strlen(argv[i + 1]))) {
+ +              *locking_type = &locking_none;
+ +              *locking_parms = 2;
+ +      } else if (!strnicmp(argv[i + 1], "cluster", strlen(argv[i + 1]))) {
+ +              *locking_type = &locking_cluster;
+ +              /* FIXME: namespace. */
+ +              *locking_parms = 3;
+ +      }
+ +
+ +      return *locking_parms == 1 ? -EINVAL : 0;
+ +}
+ +
+ +/* Set backing device information properties of RAID set. */
+ +static void rs_set_bdi(struct raid_set *rs, unsigned stripes, unsigned chunks)
+ +{
+ +      unsigned p, ra_pages;
+ +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ +      struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
+ +
+ +      /* Set read-ahead for the RAID set and the component devices. */
+ +      bdi->ra_pages = stripes * stripe_pages(rs, rs->set.io_size);
+ +      ra_pages = chunks * chunk_pages(rs->set.io_size);
+ +      for (p = rs->set.raid_devs; p--; ) {
+ +              struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+ +
+ +              q->backing_dev_info.ra_pages = ra_pages;
+ +      }
+ +
+ +      /* Set congested function and data. */
+ +      bdi->congested_fn = raid_set_congested;
+ +      bdi->congested_data = rs;
+ +
+ +      dm_put(md);
+ +}
+ +
+ +/* Get backing device information properties of RAID set. */
+ +static void rs_get_ra(struct raid_set *rs, unsigned *stripes, unsigned *chunks)
+ +{
+ +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ +
+ +       *stripes = dm_disk(md)->queue->backing_dev_info.ra_pages
+ +                  / stripe_pages(rs, rs->set.io_size);
+ +      *chunks = bdev_get_queue(rs->dev->dev->bdev)->backing_dev_info.ra_pages
+ +                / chunk_pages(rs->set.io_size);
+ +
+ +      dm_put(md);
+ +}
+ +
+ +/*
+ + * Construct a RAID4/5 mapping:
+ + *
+ + * log_type #log_params <log_params> \
+ + * raid_type [#parity_dev] #raid_variable_params <raid_params> \
+ + * [locking "none"/"cluster"]
+ + * #raid_devs #dev_to_initialize [<dev_path> <offset>]{3,}
+ + *
+ + * log_type = "core"/"disk",
+ + * #log_params = 1-3 (1-2 for core dirty log type, 3 for disk dirty log only)
+ + * log_params = [dirty_log_path] region_size [[no]sync])
+ + *
+ + * raid_type = "raid4", "raid5_la", "raid5_ra", "raid5_ls", "raid5_rs"
+ + *
+ + * #parity_dev = N if raid_type = "raid4"
+ + * o N = -1: pick default = last device
+ + * o N >= 0 and < #raid_devs: parity device index
+ + *
+ + * #raid_variable_params = 0-5; raid_params (-1 = default):
+ + *   [chunk_size [#stripes [io_size [recover_io_size [%recovery_bandwidth]]]]]
+ + *   o chunk_size (unit to calculate drive addresses; must be 2^^n, > 8
+ + *     and <= CHUNK_SIZE_MAX)
+ + *   o #stripes is number of stripes allocated to stripe cache
+ + *     (must be > 1 and < STRIPES_MAX)
+ + *   o io_size (io unit size per device in sectors; must be 2^^n and > 8)
+ + *   o recover_io_size (io unit size per device for recovery in sectors;
+ +       must be 2^^n, > SECTORS_PER_PAGE and <= region_size)
+ + *   o %recovery_bandwith is the maximum amount spend for recovery during
+ + *     application io (1-100%)
+ + * If raid_variable_params = 0, defaults will be used.
+ + * Any raid_variable_param can be set to -1 to apply a default
+ + *
+ + * #raid_devs = N (N >= 3)
+ + *
+ + * #dev_to_initialize = N
+ + * -1: initialize parity on all devices
+ + * >= 0 and < #raid_devs: initialize raid_path; used to force reconstruction
+ + * of a failed devices content after replacement
+ + *
+ + * <dev_path> = device_path (eg, /dev/sdd1)
+ + * <offset>   = begin at offset on <dev_path>
+ + *
+ + */
+ +#define       MIN_PARMS       13
+ +static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ +{
+ +      int bandwidth = BANDWIDTH, bandwidth_parm = -1,
+ +          chunk_size = CHUNK_SIZE, chunk_size_parm = -1,
+ +          dev_to_init, dl_parms, locking_parms, parity_parm, pi = -1,
+ +          i, io_size = IO_SIZE, io_size_parm = -1,
+ +          r, raid_devs, raid_parms,
+ +          recover_io_size = RECOVER_IO_SIZE, recover_io_size_parm = -1,
+ +          stripes = STRIPES, stripes_parm = -1;
+ +      unsigned speed;
+ +      sector_t tmp, sectors_per_dev;
+ +      struct dm_raid45_locking_type *locking;
+ +      struct raid_set *rs;
+ +      struct raid_type *raid_type;
+ +
+ +      /* Ensure minimum number of parameters. */
+ +      if (argc < MIN_PARMS)
+ +              TI_ERR("Not enough parameters");
+ +
+ +      /* Fetch # of dirty log parameters. */
+ +      if (sscanf(argv[1], "%d", &dl_parms) != 1
+ +          || !range_ok(dl_parms, 1, 4711))
+ +              TI_ERR("Bad dirty log parameters number");
+ +
+ +      /* Check raid_type. */
+ +      raid_type = get_raid_type(argv[dl_parms + 2]);
+ +      if (!raid_type)
+ +              TI_ERR("Bad raid type");
+ +
+ +      /* In case of RAID4, parity drive is selectable. */
+ +      parity_parm = !!(raid_type->level == raid4);
+ +
+ +      /* Handle variable number of RAID parameters. */
+ +      r = raid_variable_parms(ti, argv, dl_parms + parity_parm + 3,
+ +                              &raid_parms,
+ +                              &chunk_size, &chunk_size_parm,
+ +                              &stripes, &stripes_parm,
+ +                              &io_size, &io_size_parm,
+ +                              &recover_io_size, &recover_io_size_parm,
+ +                              &bandwidth, &bandwidth_parm);
+ +      if (r)
+ +              return r;
+ +
+ +      r = raid_locking_parms(ti, argv,
+ +                             dl_parms + parity_parm + raid_parms + 4,
+ +                             &locking_parms, &locking);
+ +      if (r)
+ +              return r;
+ +
+ +      /* # of raid devices. */
+ +      i = dl_parms + parity_parm + raid_parms + locking_parms + 4;
+ +      if (sscanf(argv[i], "%d", &raid_devs) != 1 ||
+ +          raid_devs < raid_type->minimal_devs)
+ +              TI_ERR("Invalid number of raid devices");
+ +
+ +      /* In case of RAID4, check parity drive index is in limits. */
+ +      if (raid_type->level == raid4) {
+ +              /* Fetch index of parity device. */
+ +              if (sscanf(argv[dl_parms + 3], "%d", &pi) != 1 ||
+ +                  !range_ok(pi, 0, raid_devs - 1))
+ +                      TI_ERR("Invalid RAID4 parity device index");
+ +      }
+ +
+ +      /*
+ +       * Index of device to initialize starts at 0
+ +       *
+ +       * o -1 -> don't initialize a particular device,
+ +       * o 0..raid_devs-1 -> initialize respective device
+ +       *   (used for reconstruction of a replaced device)
+ +       */
+ +      if (sscanf
+ +          (argv[dl_parms + parity_parm + raid_parms + locking_parms + 5],
+ +           "%d", &dev_to_init) != 1
+ +          || !range_ok(dev_to_init, -1, raid_devs - 1))
+ +              TI_ERR("Invalid number for raid device to initialize");
+ +
+ +      /* Check # of raid device arguments. */
+ +      if (argc - dl_parms - parity_parm - raid_parms - 6 !=
+ +          2 * raid_devs)
+ +              TI_ERR("Wrong number of raid device/offset arguments");
+ +
+ +      /*
+ +       * Check that the table length is devisable
+ +       * w/o rest by (raid_devs - parity_devs)
+ +       */
+ +      if (!multiple(ti->len, raid_devs - raid_type->parity_devs,
+ +                    &sectors_per_dev))
+ +              TI_ERR
+ +                  ("Target length not divisable by number of data devices");
+ +
+ +      /*
+ +       * Check that the device size is
+ +       * devisable w/o rest by chunk size
+ +       */
+ +      if (!multiple(sectors_per_dev, chunk_size, &tmp))
+ +              TI_ERR("Device length not divisable by chunk_size");
+ +
+ +      /****************************************************************
+ +       * Now that we checked the constructor arguments ->
+ +       * let's allocate the RAID set
+ +       ****************************************************************/
+ +      r = context_alloc(&rs, raid_type, stripes, chunk_size, io_size,
+ +                        recover_io_size, raid_devs, sectors_per_dev,
+ +                        ti, dl_parms, argv);
+ +      if (r)
+ +              return r;
+ +
+ +      /*
+ +       * Set these here in order to avoid passing
+ +       * too many arguments to context_alloc()
+ +       */
+ +      rs->set.dev_to_init_parm = dev_to_init;
+ +      rs->set.dev_to_init = dev_to_init;
+ +      rs->set.pi_parm = pi;
+ +      rs->set.pi = (pi == -1) ? rs->set.data_devs : pi;
+ +      rs->set.raid_parms = raid_parms;
+ +      rs->set.chunk_size_parm = chunk_size_parm;
+ +      rs->set.io_size_parm = io_size_parm;
+ +      rs->sc.stripes_parm = stripes_parm;
+ +      rs->recover.io_size_parm = recover_io_size_parm;
+ +      rs->recover.bandwidth_parm = bandwidth_parm;
+ +      recover_set_bandwidth(rs, bandwidth);
+ +
+ +      /* Use locking type to lock stripe access. */
+ +      rs->locking = locking;
+ +
+ +      /* Get the device/offset tupels. */
+ +      argv += dl_parms + 6 + parity_parm + raid_parms;
+ +      r = dev_parms(ti, rs, argv, &i);
+ +      if (r)
+ +              goto err;
+ +
+ +      /* Initialize recovery. */
+ +      rs->recover.start_jiffies = jiffies;
+ +      rs->recover.end_jiffies = 0;
+ +      recovery_region_reset(rs);
+ +
+ +      /* Allow for recovery of any nosync regions. */
+ +      SetRSRecover(rs);
+ +
+ +      /* Set backing device information (eg. read ahead). */
+ +      rs_set_bdi(rs, chunk_size * 2, io_size * 4);
+ +      SetRSCheckOverwrite(rs); /* Allow chunk overwrite checks. */
+ +
+ +      speed = xor_optimize(rs); /* Select best xor algorithm. */
+ +
+ +      /* Initialize work queue to handle this RAID set's io. */
+ +      r = rs_workqueue_init(rs);
+ +      if (r)
+ +              goto err;
+ +
+ +      raid_set_log(rs, speed); /* Log information about RAID set. */
+ +
+ +      /*
+ +       * Make sure that dm core only hands maximum io size
+ +       * length down and pays attention to io boundaries.
+ +       */
+ +      ti->split_io = rs->set.io_size;
+ +      ti->private = rs;
+ +      return 0;
+ +
+ +err:
+ +      context_free(rs, ti, i);
+ +      return r;
+ +}
+ +
+ +/*
+ + * Destruct a raid mapping
+ + */
+ +static void raid_dtr(struct dm_target *ti)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +
+ +      /* Indicate recovery end so that ios in flight drain. */
+ +      ClearRSRecover(rs);
+ +
+ +      wake_do_raid(rs);       /* Wake daemon. */
+ +      wait_ios(rs);           /* Wait for any io still being processed. */
+ +      destroy_workqueue(rs->io.wq);
+ +      context_free(rs, ti, rs->set.raid_devs);
+ +}
+ +
+ +/* Queues ios to RAID sets. */
+ +static inline void queue_bio(struct raid_set *rs, struct bio *bio)
+ +{
+ +      int wake;
+ +      struct bio_list *in = &rs->io.in;
+ +      spinlock_t *in_lock = &rs->io.in_lock;
+ +
+ +      spin_lock_irq(in_lock);
+ +      wake = bio_list_empty(in);
+ +      bio_list_add(in, bio);
+ +      spin_unlock_irq(in_lock);
+ +
+ +      /* Wake daemon if input list was empty. */
+ +      if (wake)
+ +              wake_do_raid(rs);
+ +}
+ +
+ +/* Raid mapping function. */
+ +static int raid_map(struct dm_target *ti, struct bio *bio,
+ +                  union map_info *map_context)
+ +{
+ +      /* I don't want to waste stripe cache capacity. */
+ +      if (bio_rw(bio) == READA)
+ +              return -EIO;
+ +      else {
+ +              struct raid_set *rs = ti->private;
+ +
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats +
+ +                         (bio_data_dir(bio) == WRITE ?
+ +                          S_BIOS_WRITE : S_BIOS_READ));
+ +
+ +              /*
+ +               * Get io reference to be waiting for to drop
+ +               * to zero on device suspension/destruction.
+ +               */
+ +              io_get(rs);
+ +              bio->bi_sector -= ti->begin;    /* Remap sector. */
+ +              queue_bio(rs, bio);             /* Queue to the daemon. */
+ +              return DM_MAPIO_SUBMITTED;      /* Handle later. */
+ +      }
+ +}
+ +
+ +/* Device suspend. */
+ +static void raid_postsuspend(struct dm_target *ti)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +      struct dm_dirty_log *dl = rs->recover.dl;
+ +
+ +      SetRSSuspended(rs);
+ +
+ +      if (RSRecover(rs))
+ +              dm_rh_stop_recovery(rs->recover.rh); /* Wakes do_raid(). */
+ +      else
+ +              wake_do_raid(rs);
+ +
+ +      wait_ios(rs);   /* Wait for completion of all ios being processed. */
+ +      if (dl->type->postsuspend && dl->type->postsuspend(dl))
+ +              /* Suspend dirty log. */
+ +              /* FIXME: need better error handling. */
+ +              DMWARN("log suspend failed");
+ +}
+ +
+ +/* Device resume. */
+ +static void raid_resume(struct dm_target *ti)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +      struct recover *rec = &rs->recover;
+ +      struct dm_dirty_log *dl = rec->dl;
+ +
+ +      if (dl->type->resume && dl->type->resume(dl))
+ +              /* Resume dirty log. */
+ +              /* FIXME: need better error handling. */
+ +              DMWARN("log resume failed");
+ +
+ +      rec->nr_regions_to_recover =
+ +          rec->nr_regions - dl->type->get_sync_count(dl);
+ +
+ +      ClearRSSuspended(rs);
+ +
+ +      /* Reset any unfinished recovery. */
+ +      if (RSRecover(rs)) {
+ +              recovery_region_reset(rs);
+ +              dm_rh_start_recovery(rec->rh);/* Calls wake_do_raid(). */
+ +      } else
+ +              wake_do_raid(rs);
+ +}
+ +
+ +static INLINE unsigned sc_size(struct raid_set *rs)
+ +{
+ +      return to_sector(atomic_read(&rs->sc.stripes) *
+ +                       (sizeof(struct stripe) +
+ +                        (sizeof(struct stripe_set) +
+ +                         (sizeof(struct page_list) +
+ +                          to_bytes(rs->set.io_size) *
+ +                          rs->set.raid_devs)) +
+ +                        (rs->recover.
+ +                         end_jiffies ? 0 : to_bytes(rs->set.raid_devs *
+ +                                                    rs->recover.
+ +                                                    io_size))));
+ +}
+ +
+ +/* REMOVEME: status output for development. */
+ +static void
+ +raid_devel_stats(struct dm_target *ti, char *result,
+ +               unsigned *size, unsigned maxlen)
+ +{
+ +      unsigned chunks, stripes, sz = *size;
+ +      unsigned long j;
+ +      char buf[BDEVNAME_SIZE], *p;
+ +      struct stats_map *sm, *sm_end = ARRAY_END(stats_map);
+ +      struct raid_set *rs = ti->private;
+ +      struct recover *rec = &rs->recover;
+ +      struct timespec ts;
+ +
+ +      DMEMIT("%s ", version);
+ +      DMEMIT("io_inprocess=%d ", atomic_read(&rs->io.in_process));
+ +      DMEMIT("io_inprocess_max=%d ", atomic_read(&rs->io.in_process_max));
+ +
+ +      for (sm = stats_map; sm < sm_end; sm++)
+ +              DMEMIT("%s%d", sm->str, atomic_read(rs->stats + sm->type));
+ +
+ +      DMEMIT(" overwrite=%s ", RSCheckOverwrite(rs) ? "on" : "off");
+ +      DMEMIT("sc=%u/%u/%u/%u/%u ", rs->set.chunk_size, rs->set.io_size,
+ +             atomic_read(&rs->sc.stripes), rs->sc.hash.buckets,
+ +             sc_size(rs));
+ +
+ +      j = (rec->end_jiffies ? rec->end_jiffies : jiffies) -
+ +          rec->start_jiffies;
+ +      jiffies_to_timespec(j, &ts);
+ +      sprintf(buf, "%ld.%ld", ts.tv_sec, ts.tv_nsec);
+ +      p = strchr(buf, '.');
+ +      p[3] = 0;
+ +
+ +      DMEMIT("rg=%llu%s/%llu/%llu/%u %s ",
+ +             (unsigned long long) rec->nr_regions_recovered,
+ +             RSRegionGet(rs) ? "+" : "",
+ +             (unsigned long long) rec->nr_regions_to_recover,
+ +             (unsigned long long) rec->nr_regions, rec->bandwidth, buf);
+ +
+ +      rs_get_ra(rs, &stripes, &chunks);
+ +      DMEMIT("ra=%u/%u ", stripes, chunks);
+ +
+ +      *size = sz;
+ +}
+ +
+ +static int
+ +raid_status(struct dm_target *ti, status_type_t type,
+ +          char *result, unsigned maxlen)
+ +{
+ +      unsigned i, sz = 0;
+ +      char buf[BDEVNAME_SIZE];
+ +      struct raid_set *rs = ti->private;
+ +
+ +      switch (type) {
+ +      case STATUSTYPE_INFO:
+ +              /* REMOVEME: statistics. */
+ +              if (RSDevelStats(rs))
+ +                      raid_devel_stats(ti, result, &sz, maxlen);
+ +
+ +              DMEMIT("%u ", rs->set.raid_devs);
+ +
+ +              for (i = 0; i < rs->set.raid_devs; i++)
+ +                      DMEMIT("%s ",
+ +                             format_dev_t(buf, rs->dev[i].dev->bdev->bd_dev));
+ +
+ +              DMEMIT("1 ");
+ +              for (i = 0; i < rs->set.raid_devs; i++) {
+ +                      DMEMIT("%c", dev_operational(rs, i) ? 'A' : 'D');
+ +
+ +                      if (rs->set.raid_type->level == raid4 &&
+ +                          i == rs->set.pi)
+ +                              DMEMIT("p");
+ +
+ +                      if (rs->set.dev_to_init == i)
+ +                              DMEMIT("i");
+ +              }
+ +
+ +              break;
+ +
+ +      case STATUSTYPE_TABLE:
+ +              sz = rs->recover.dl->type->status(rs->recover.dl, type,
+ +                                                result, maxlen);
+ +              DMEMIT("%s %u ", rs->set.raid_type->name,
+ +                     rs->set.raid_parms);
+ +
+ +              if (rs->set.raid_type->level == raid4)
+ +                      DMEMIT("%d ", rs->set.pi_parm);
+ +
+ +              if (rs->set.raid_parms)
+ +                      DMEMIT("%d ", rs->set.chunk_size_parm);
+ +
+ +              if (rs->set.raid_parms > 1)
+ +                      DMEMIT("%d ", rs->sc.stripes_parm);
+ +
+ +              if (rs->set.raid_parms > 2)
+ +                      DMEMIT("%d ", rs->set.io_size_parm);
+ +
+ +              if (rs->set.raid_parms > 3)
+ +                      DMEMIT("%d ", rs->recover.io_size_parm);
+ +
+ +              if (rs->set.raid_parms > 4)
+ +                      DMEMIT("%d ", rs->recover.bandwidth_parm);
+ +
+ +              DMEMIT("%u %d ", rs->set.raid_devs, rs->set.dev_to_init);
+ +
+ +              for (i = 0; i < rs->set.raid_devs; i++)
+ +                      DMEMIT("%s %llu ",
+ +                             format_dev_t(buf,
+ +                                          rs->dev[i].dev->bdev->bd_dev),
+ +                             (unsigned long long) rs->dev[i].start);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Message interface
+ + */
+ +enum raid_msg_actions {
+ +      act_bw,                 /* Recovery bandwidth switch. */
+ +      act_dev,                /* Device failure switch. */
+ +      act_overwrite,          /* Stripe overwrite check. */
+ +      act_read_ahead,         /* Set read ahead. */
+ +      act_stats,              /* Development statistics switch. */
+ +      act_sc,                 /* Stripe cache switch. */
+ +
+ +      act_on,                 /* Set entity on. */
+ +      act_off,                /* Set entity off. */
+ +      act_reset,              /* Reset entity. */
+ +
+ +      act_set = act_on,       /* Set # absolute. */
+ +      act_grow = act_off,     /* Grow # by an amount. */
+ +      act_shrink = act_reset, /* Shrink # by an amount. */
+ +};
+ +
+ +/* Turn a delta to absolute. */
+ +static int _absolute(unsigned long action, int act, int r)
+ +{
+ +      /* Make delta absolute. */
+ +      if (test_bit(act_set, &action))
+ +              ;
+ +      else if (test_bit(act_grow, &action))
+ +              r += act;
+ +      else if (test_bit(act_shrink, &action))
+ +              r = act - r;
+ +      else
+ +              r = -EINVAL;
+ +
+ +      return r;
+ +}
+ +
+ + /* Change recovery io bandwidth. */
+ +static int bandwidth_change(struct dm_msg *msg, void *context)
+ +{
+ +      struct raid_set *rs = context;
+ +      int act = rs->recover.bandwidth;
+ +      int bandwidth = DM_MSG_INT_ARG(msg);
+ +
+ +      if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ +              /* Make delta bandwidth absolute. */
+ +              bandwidth = _absolute(msg->action, act, bandwidth);
+ +
+ +              /* Check range. */
+ +              if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ +                      recover_set_bandwidth(rs, bandwidth);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      set_bit(dm_msg_ret_arg, &msg->ret);
+ +      set_bit(dm_msg_ret_inval, &msg->ret);
+ +      return -EINVAL;
+ +}
+ +
+ +/* Change state of a device (running/offline). */
+ +/* FIXME: this only works while recovering!. */
+ +static int device_state(struct dm_msg *msg, void *context)
+ +{
+ +      int r;
+ +      const char *str = "is already ";
+ +      union dev_lookup dl = { .dev_name = DM_MSG_STR_ARG(msg) };
+ +      struct raid_set *rs = context;
+ +
+ +      r = raid_dev_lookup(rs, strchr(dl.dev_name, ':') ?
+ +                          bymajmin : byname, &dl);
+ +      if (r == -ENODEV) {
+ +              DMERR("device %s is no member of this set", dl.dev_name);
+ +              return r;
+ +      }
+ +
+ +      if (test_bit(act_off, &msg->action)) {
+ +              if (dev_operational(rs, r))
+ +                      str = "";
+ +      } else if (!dev_operational(rs, r))
+ +              str = "";
+ +
+ +      DMINFO("/dev/%s %s%s", dl.dev_name, str,
+ +             test_bit(act_off, &msg->action) ? "offline" : "running");
+ +
+ +      return test_bit(act_off, &msg->action) ?
+ +             raid_set_check_and_degrade(rs, NULL, r) :
+ +             raid_set_check_and_upgrade(rs, r);
+ +}
+ +
+ +/* Set/reset development feature flags. */
+ +static int devel_flags(struct dm_msg *msg, void *context)
+ +{
+ +      struct raid_set *rs = context;
+ +
+ +      if (test_bit(act_on, &msg->action))
+ +              return test_and_set_bit(msg->spec->parm,
+ +                                      &rs->io.flags) ? -EPERM : 0;
+ +      else if (test_bit(act_off, &msg->action))
+ +              return test_and_clear_bit(msg->spec->parm,
+ +                                        &rs->io.flags) ? 0 : -EPERM;
+ +      else if (test_bit(act_reset, &msg->action)) {
+ +              if (test_bit(act_stats, &msg->action)) {
+ +                      stats_reset(rs);
+ +                      goto on;
+ +              } else if (test_bit(act_overwrite, &msg->action)) {
+ +on:
+ +                      set_bit(msg->spec->parm, &rs->io.flags);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      return -EINVAL;
+ +}
+ +
+ + /* Set stripe and chunk read ahead pages. */
+ +static int read_ahead_set(struct dm_msg *msg, void *context)
+ +{
+ +      int stripes = DM_MSG_INT_ARGS(msg, 0);
+ +      int chunks  = DM_MSG_INT_ARGS(msg, 1);
+ +
+ +      if (range_ok(stripes, 1, 512) &&
+ +          range_ok(chunks, 1, 512)) {
+ +              rs_set_bdi(context, stripes, chunks);
+ +              return 0;
+ +      }
+ +
+ +      set_bit(dm_msg_ret_arg, &msg->ret);
+ +      set_bit(dm_msg_ret_inval, &msg->ret);
+ +      return -EINVAL;
+ +}
+ +
+ +/* Resize the stripe cache. */
+ +static int stripecache_resize(struct dm_msg *msg, void *context)
+ +{
+ +      int act, stripes;
+ +      struct raid_set *rs = context;
+ +
+ +      /* Deny permission in case the daemon is still shrinking!. */
+ +      if (atomic_read(&rs->sc.stripes_to_shrink))
+ +              return -EPERM;
+ +
+ +      stripes = DM_MSG_INT_ARG(msg);
+ +      if (stripes > 0) {
+ +              act = atomic_read(&rs->sc.stripes);
+ +
+ +              /* Make delta stripes absolute. */
+ +              stripes = _absolute(msg->action, act, stripes);
+ +
+ +              /*
+ +               * Check range and that the # of stripes changes.
+ +               * We can grow from gere but need to leave any
+ +               * shrinking to the worker for synchronization.
+ +               */
+ +              if (range_ok(stripes, STRIPES_MIN, STRIPES_MAX)) {
+ +                      if (stripes > act)
+ +                              return sc_grow(&rs->sc, stripes - act, SC_GROW);
+ +                      else if (stripes < act) {
+ +                              atomic_set(&rs->sc.stripes_to_shrink,
+ +                                         act - stripes);
+ +                              wake_do_raid(rs);
+ +                      }
+ +
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      set_bit(dm_msg_ret_arg, &msg->ret);
+ +      set_bit(dm_msg_ret_inval, &msg->ret);
+ +      return -EINVAL;
+ +}
+ +
+ +/* Parse the RAID message action. */
+ +/*
+ + * 'ba[ndwidth] {se[t],g[row],sh[rink]} #'    # e.g 'ba se 50'
+ + * 'de{vice] o[ffline]/r[unning] DevName/maj:min' # e.g 'device o /dev/sda'
+ + * "o[verwrite]  {on,of[f],r[eset]}'          # e.g. 'o of'
+ + * "r[ead_ahead] set #stripes #chunks         # e.g. 'r se 3 2'
+ + * 'sta[tistics] {on,of[f],r[eset]}'          # e.g. 'stat of'
+ + * 'str[ipecache] {se[t],g[row],sh[rink]} #'  # e.g. 'stripe set 1024'
+ + *
+ + */
+ +static int
+ +raid_message(struct dm_target *ti, unsigned argc, char **argv)
+ +{
+ +      /* Variables to store the parsed parameters im. */
+ +      static int i[2];
+ +      static unsigned long *i_arg[] = {
+ +              (unsigned long *) i + 0,
+ +              (unsigned long *) i + 1,
+ +      };
+ +      static char *p;
+ +      static unsigned long *p_arg[] = { (unsigned long *) &p };
+ +
+ +      /* Declare all message option strings. */
+ +      static char *str_sgs[] = { "set", "grow", "shrink" };
+ +      static char *str_dev[] = { "running", "offline" };
+ +      static char *str_oor[] = { "on", "off", "reset" };
+ +
+ +      /* Declare all actions. */
+ +      static unsigned long act_sgs[] = { act_set, act_grow, act_shrink };
+ +      static unsigned long act_oor[] = { act_on, act_off, act_reset };
+ +
+ +      /* Bandwidth option. */
+ +      static struct dm_message_option bw_opt = { 3, str_sgs, act_sgs };
+ +      static struct dm_message_argument bw_args = {
+ +              1, i_arg, { dm_msg_int_t }
+ +      };
+ +
+ +      /* Device option. */
+ +      static struct dm_message_option dev_opt = { 2, str_dev, act_oor };
+ +      static struct dm_message_argument dev_args = {
+ +              1, p_arg, { dm_msg_base_t }
+ +      };
+ +
+ +      /* Read ahead option. */
+ +      static struct dm_message_option ra_opt = { 1, str_sgs, act_sgs };
+ +      static struct dm_message_argument ra_args = {
+ +              2, i_arg, { dm_msg_int_t, dm_msg_int_t }
+ +      };
+ +
+ +      static struct dm_message_argument null_args = {
+ +              0, NULL, { dm_msg_int_t }
+ +      };
+ +
+ +      /* Overwrite and statistics option. */
+ +      static struct dm_message_option ovr_stats_opt = { 3, str_oor, act_oor };
+ +
+ +      /* Sripecache option. */
+ +      static struct dm_message_option stripe_opt = { 3, str_sgs, act_sgs };
+ +
+ +      /* Declare messages. */
+ +      static struct dm_msg_spec specs[] = {
+ +              { "bandwidth", act_bw, &bw_opt, &bw_args,
+ +                0, bandwidth_change },
+ +              { "device", act_dev, &dev_opt, &dev_args,
+ +                0, device_state },
+ +              { "overwrite", act_overwrite, &ovr_stats_opt, &null_args,
+ +                RS_CHECK_OVERWRITE, devel_flags },
+ +              { "read_ahead", act_read_ahead, &ra_opt, &ra_args,
+ +                0, read_ahead_set },
+ +              { "statistics", act_stats, &ovr_stats_opt, &null_args,
+ +                RS_DEVEL_STATS, devel_flags },
+ +              { "stripecache", act_sc, &stripe_opt, &bw_args,
+ +                0, stripecache_resize },
+ +      };
+ +
+ +      /* The message for the parser. */
+ +      struct dm_msg msg = {
+ +              .num_specs = ARRAY_SIZE(specs),
+ +              .specs = specs,
+ +      };
+ +
+ +      return dm_message_parse(TARGET, &msg, ti->private, argc, argv);
+ +}
+ +/*
+ + * END message interface
+ + */
+ +
+ +static struct target_type raid_target = {
+ +      .name = "raid45",
+ +      .version = {1, 0, 0},
+ +      .module = THIS_MODULE,
+ +      .ctr = raid_ctr,
+ +      .dtr = raid_dtr,
+ +      .map = raid_map,
+ +      .postsuspend = raid_postsuspend,
+ +      .resume = raid_resume,
+ +      .status = raid_status,
+ +      .message = raid_message,
+ +};
+ +
+ +static void init_exit(const char *bad_msg, const char *good_msg, int r)
+ +{
+ +      if (r)
+ +              DMERR("Failed to %sregister target [%d]", bad_msg, r);
+ +      else
+ +              DMINFO("%s %s", good_msg, version);
+ +}
+ +
+ +static int __init dm_raid_init(void)
+ +{
+ +      int r;
+ +
+ +      r = dm_register_target(&raid_target);
+ +      init_exit("", "initialized", r);
+ +      return r;
+ +}
+ +
+ +static void __exit dm_raid_exit(void)
+ +{
-       int r;
- 
-       r = dm_unregister_target(&raid_target);
-       init_exit("un", "exit", r);
++      dm_unregister_target(&raid_target);
++      init_exit("un", "exit", 0);
+ +}
+ +
+ +/* Module hooks. */
+ +module_init(dm_raid_init);
+ +module_exit(dm_raid_exit);
+ +
+ +MODULE_DESCRIPTION(DM_NAME " raid4/5 target");
+ +MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
+ +MODULE_LICENSE("GPL");
diff --cc drivers/md/dm-region-hash.c

index 0000000,59f8d9d..37b88f4

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@@ -1,0 -1,704 +1,621 @@@
+ /*
+  * Copyright (C) 2003 Sistina Software Limited.
+  * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+  *
+  * This file is released under the GPL.
+  */
+ 
+ #include <linux/dm-dirty-log.h>
++#include "dm-bio-list.h"
+ #include <linux/dm-region-hash.h>
+ 
+ #include <linux/ctype.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/vmalloc.h>
+ 
+ #include "dm.h"
- -#include "dm-bio-list.h"
+ 
+ #define       DM_MSG_PREFIX   "region hash"
+ 
+ /*-----------------------------------------------------------------
+  * Region hash
+  *
+  * The mirror splits itself up into discrete regions.  Each
+  * region can be in one of three states: clean, dirty,
+  * nosync.  There is no need to put clean regions in the hash.
+  *
+  * In addition to being present in the hash table a region _may_
+  * be present on one of three lists.
+  *
+  *   clean_regions: Regions on this list have no io pending to
+  *   them, they are in sync, we are no longer interested in them,
+  *   they are dull.  dm_rh_update_states() will remove them from the
+  *   hash table.
+  *
+  *   quiesced_regions: These regions have been spun down, ready
+  *   for recovery.  rh_recovery_start() will remove regions from
+  *   this list and hand them to kmirrord, which will schedule the
+  *   recovery io with kcopyd.
+  *
+  *   recovered_regions: Regions that kcopyd has successfully
+  *   recovered.  dm_rh_update_states() will now schedule any delayed
+  *   io, up the recovery_count, and remove the region from the
+  *   hash.
+  *
+  * There are 2 locks:
+  *   A rw spin lock 'hash_lock' protects just the hash table,
+  *   this is never held in write mode from interrupt context,
+  *   which I believe means that we only have to disable irqs when
+  *   doing a write lock.
+  *
+  *   An ordinary spin lock 'region_lock' that protects the three
+  *   lists in the region_hash, with the 'state', 'list' and
+  *   'delayed_bios' fields of the regions.  This is used from irq
+  *   context, so all other uses will have to suspend local irqs.
+  *---------------------------------------------------------------*/
- -struct dm_region_hash {
- -      uint32_t region_size;
- -      unsigned region_shift;
- -
- -      /* holds persistent region state */
- -      struct dm_dirty_log *log;
- -
- -      /* hash table */
- -      rwlock_t hash_lock;
- -      mempool_t *region_pool;
- -      unsigned mask;
- -      unsigned nr_buckets;
- -      unsigned prime;
- -      unsigned shift;
- -      struct list_head *buckets;
- -
- -      unsigned max_recovery; /* Max # of regions to recover in parallel */
- -
- -      spinlock_t region_lock;
- -      atomic_t recovery_in_flight;
- -      struct semaphore recovery_count;
- -      struct list_head clean_regions;
- -      struct list_head quiesced_regions;
- -      struct list_head recovered_regions;
- -      struct list_head failed_recovered_regions;
- -
- -      void *context;
- -      sector_t target_begin;
- -
- -      /* Callback function to schedule bios writes */
- -      void (*dispatch_bios)(void *context, struct bio_list *bios);
- -
- -      /* Callback function to wakeup callers worker thread. */
- -      void (*wakeup_workers)(void *context);
- -
- -      /* Callback function to wakeup callers recovery waiters. */
- -      void (*wakeup_all_recovery_waiters)(void *context);
- -};
- -
- -struct dm_region {
- -      struct dm_region_hash *rh;      /* FIXME: can we get rid of this ? */
- -      region_t key;
- -      int state;
- -
- -      struct list_head hash_list;
- -      struct list_head list;
- -
- -      atomic_t pending;
- -      struct bio_list delayed_bios;
- -};
- -
- -/*
- - * Conversion fns
- - */
- -static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
- -{
- -      return sector >> rh->region_shift;
- -}
- -
- -sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
- -{
- -      return region << rh->region_shift;
- -}
- -EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
- -
- -region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
- -{
- -      return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
- -}
- -EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
- -
- -void *dm_rh_region_context(struct dm_region *reg)
- -{
- -      return reg->rh->context;
- -}
- -EXPORT_SYMBOL_GPL(dm_rh_region_context);
- -
- -region_t dm_rh_get_region_key(struct dm_region *reg)
- -{
- -      return reg->key;
- -}
- -EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
- -
- -sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
- -{
- -      return rh->region_size;
- -}
- -EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
- -
+ /*
+  * FIXME: shall we pass in a structure instead of all these args to
+  * dm_region_hash_create()????
+  */
+ #define RH_HASH_MULT 2654435387U
+ #define RH_HASH_SHIFT 12
+ 
+ #define MIN_REGIONS 64
+ struct dm_region_hash *dm_region_hash_create(
+               void *context, void (*dispatch_bios)(void *context,
+                                                    struct bio_list *bios),
+               void (*wakeup_workers)(void *context),
+               void (*wakeup_all_recovery_waiters)(void *context),
+               sector_t target_begin, unsigned max_recovery,
+               struct dm_dirty_log *log, uint32_t region_size,
+               region_t nr_regions)
+ {
+       struct dm_region_hash *rh;
+       unsigned nr_buckets, max_buckets;
+       size_t i;
+ 
+       /*
+        * Calculate a suitable number of buckets for our hash
+        * table.
+        */
+       max_buckets = nr_regions >> 6;
+       for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
+               ;
+       nr_buckets >>= 1;
+ 
+       rh = kmalloc(sizeof(*rh), GFP_KERNEL);
+       if (!rh) {
+               DMERR("unable to allocate region hash memory");
+               return ERR_PTR(-ENOMEM);
+       }
+ 
+       rh->context = context;
+       rh->dispatch_bios = dispatch_bios;
+       rh->wakeup_workers = wakeup_workers;
+       rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
+       rh->target_begin = target_begin;
+       rh->max_recovery = max_recovery;
+       rh->log = log;
+       rh->region_size = region_size;
+       rh->region_shift = ffs(region_size) - 1;
+       rwlock_init(&rh->hash_lock);
+       rh->mask = nr_buckets - 1;
+       rh->nr_buckets = nr_buckets;
+ 
+       rh->shift = RH_HASH_SHIFT;
+       rh->prime = RH_HASH_MULT;
+ 
+       rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
+       if (!rh->buckets) {
+               DMERR("unable to allocate region hash bucket memory");
+               kfree(rh);
+               return ERR_PTR(-ENOMEM);
+       }
+ 
+       for (i = 0; i < nr_buckets; i++)
+               INIT_LIST_HEAD(rh->buckets + i);
+ 
+       spin_lock_init(&rh->region_lock);
+       sema_init(&rh->recovery_count, 0);
+       atomic_set(&rh->recovery_in_flight, 0);
+       INIT_LIST_HEAD(&rh->clean_regions);
+       INIT_LIST_HEAD(&rh->quiesced_regions);
+       INIT_LIST_HEAD(&rh->recovered_regions);
+       INIT_LIST_HEAD(&rh->failed_recovered_regions);
+ 
+       rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
+                                                     sizeof(struct dm_region));
+       if (!rh->region_pool) {
+               vfree(rh->buckets);
+               kfree(rh);
+               rh = ERR_PTR(-ENOMEM);
+       }
+ 
+       return rh;
+ }
+ EXPORT_SYMBOL_GPL(dm_region_hash_create);
+ 
+ void dm_region_hash_destroy(struct dm_region_hash *rh)
+ {
+       unsigned h;
+       struct dm_region *reg, *nreg;
+ 
+       BUG_ON(!list_empty(&rh->quiesced_regions));
+       for (h = 0; h < rh->nr_buckets; h++) {
+               list_for_each_entry_safe(reg, nreg, rh->buckets + h,
+                                        hash_list) {
+                       BUG_ON(atomic_read(&reg->pending));
+                       mempool_free(reg, rh->region_pool);
+               }
+       }
+ 
+       if (rh->log)
+               dm_dirty_log_destroy(rh->log);
+ 
+       if (rh->region_pool)
+               mempool_destroy(rh->region_pool);
+ 
+       vfree(rh->buckets);
+       kfree(rh);
+ }
+ EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
+ 
+ struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
+ {
+       return rh->log;
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
+ 
+ static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
+ {
+       return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
+ }
+ 
+ static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
+ {
+       struct dm_region *reg;
+       struct list_head *bucket = rh->buckets + rh_hash(rh, region);
+ 
+       list_for_each_entry(reg, bucket, hash_list)
+               if (reg->key == region)
+                       return reg;
+ 
+       return NULL;
+ }
+ 
+ static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
+ {
+       list_add(&reg->hash_list, rh->buckets + rh_hash(rh, reg->key));
+ }
+ 
+ static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
+ {
+       struct dm_region *reg, *nreg;
+ 
+       nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
+       if (unlikely(!nreg))
+               nreg = kmalloc(sizeof(*nreg), GFP_NOIO);
+ 
+       nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
+                     DM_RH_CLEAN : DM_RH_NOSYNC;
+       nreg->rh = rh;
+       nreg->key = region;
+       INIT_LIST_HEAD(&nreg->list);
+       atomic_set(&nreg->pending, 0);
+       bio_list_init(&nreg->delayed_bios);
+ 
+       write_lock_irq(&rh->hash_lock);
+       reg = __rh_lookup(rh, region);
+       if (reg)
+               /* We lost the race. */
+               mempool_free(nreg, rh->region_pool);
+       else {
+               __rh_insert(rh, nreg);
+               if (nreg->state == DM_RH_CLEAN) {
+                       spin_lock(&rh->region_lock);
+                       list_add(&nreg->list, &rh->clean_regions);
+                       spin_unlock(&rh->region_lock);
+               }
+ 
+               reg = nreg;
+       }
+       write_unlock_irq(&rh->hash_lock);
+ 
+       return reg;
+ }
+ 
+ static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
+ {
+       struct dm_region *reg;
+ 
+       reg = __rh_lookup(rh, region);
+       if (!reg) {
+               read_unlock(&rh->hash_lock);
+               reg = __rh_alloc(rh, region);
+               read_lock(&rh->hash_lock);
+       }
+ 
+       return reg;
+ }
+ 
+ int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
+ {
+       int r;
+       struct dm_region *reg;
+ 
+       read_lock(&rh->hash_lock);
+       reg = __rh_lookup(rh, region);
+       read_unlock(&rh->hash_lock);
+ 
+       if (reg)
+               return reg->state;
+ 
+       /*
+        * The region wasn't in the hash, so we fall back to the
+        * dirty log.
+        */
+       r = rh->log->type->in_sync(rh->log, region, may_block);
+ 
+       /*
+        * Any error from the dirty log (eg. -EWOULDBLOCK) gets
+        * taken as a DM_RH_NOSYNC
+        */
+       return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_get_state);
+ 
+ static void complete_resync_work(struct dm_region *reg, int success)
+ {
+       struct dm_region_hash *rh = reg->rh;
+ 
+       rh->log->type->set_region_sync(rh->log, reg->key, success);
+ 
+       /*
+        * Dispatch the bios before we call 'wake_up_all'.
+        * This is important because if we are suspending,
+        * we want to know that recovery is complete and
+        * the work queue is flushed.  If we wake_up_all
+        * before we dispatch_bios (queue bios and call wake()),
+        * then we risk suspending before the work queue
+        * has been properly flushed.
+        */
+       rh->dispatch_bios(rh->context, &reg->delayed_bios);
+       if (atomic_dec_and_test(&rh->recovery_in_flight))
+               rh->wakeup_all_recovery_waiters(rh->context);
+       up(&rh->recovery_count);
+ }
+ 
+ /* dm_rh_mark_nosync
+  * @ms
+  * @bio
+  * @done
+  * @error
+  *
+  * The bio was written on some mirror(s) but failed on other mirror(s).
+  * We can successfully endio the bio but should avoid the region being
+  * marked clean by setting the state DM_RH_NOSYNC.
+  *
+  * This function is _not_ safe in interrupt context!
+  */
+ void dm_rh_mark_nosync(struct dm_region_hash *rh,
+                      struct bio *bio, unsigned done, int error)
+ {
+       unsigned long flags;
+       struct dm_dirty_log *log = rh->log;
+       struct dm_region *reg;
+       region_t region = dm_rh_bio_to_region(rh, bio);
+       int recovering = 0;
+ 
+       /* We must inform the log that the sync count has changed. */
+       log->type->set_region_sync(log, region, 0);
+ 
+       read_lock(&rh->hash_lock);
+       reg = __rh_find(rh, region);
+       read_unlock(&rh->hash_lock);
+ 
+       /* region hash entry should exist because write was in-flight */
+       BUG_ON(!reg);
+       BUG_ON(!list_empty(&reg->list));
+ 
+       spin_lock_irqsave(&rh->region_lock, flags);
+       /*
+        * Possible cases:
+        *   1) DM_RH_DIRTY
+        *   2) DM_RH_NOSYNC: was dirty, other preceeding writes failed
+        *   3) DM_RH_RECOVERING: flushing pending writes
+        * Either case, the region should have not been connected to list.
+        */
+       recovering = (reg->state == DM_RH_RECOVERING);
+       reg->state = DM_RH_NOSYNC;
+       BUG_ON(!list_empty(&reg->list));
+       spin_unlock_irqrestore(&rh->region_lock, flags);
+ 
+       bio_endio(bio, error);
+       if (recovering)
+               complete_resync_work(reg, 0);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
+ 
+ void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
+ {
+       struct dm_region *reg, *next;
+ 
+       LIST_HEAD(clean);
+       LIST_HEAD(recovered);
+       LIST_HEAD(failed_recovered);
+ 
+       /*
+        * Quickly grab the lists.
+        */
+       write_lock_irq(&rh->hash_lock);
+       spin_lock(&rh->region_lock);
+       if (!list_empty(&rh->clean_regions)) {
+               list_splice_init(&rh->clean_regions, &clean);
+ 
+               list_for_each_entry(reg, &clean, list)
+                       list_del(&reg->hash_list);
+       }
+ 
+       if (!list_empty(&rh->recovered_regions)) {
+               list_splice_init(&rh->recovered_regions, &recovered);
+ 
+               list_for_each_entry(reg, &recovered, list)
+                       list_del(&reg->hash_list);
+       }
+ 
+       if (!list_empty(&rh->failed_recovered_regions)) {
+               list_splice_init(&rh->failed_recovered_regions,
+                                &failed_recovered);
+ 
+               list_for_each_entry(reg, &failed_recovered, list)
+                       list_del(&reg->hash_list);
+       }
+ 
+       spin_unlock(&rh->region_lock);
+       write_unlock_irq(&rh->hash_lock);
+ 
+       /*
+        * All the regions on the recovered and clean lists have
+        * now been pulled out of the system, so no need to do
+        * any more locking.
+        */
+       list_for_each_entry_safe(reg, next, &recovered, list) {
+               rh->log->type->clear_region(rh->log, reg->key);
+               complete_resync_work(reg, 1);
+               mempool_free(reg, rh->region_pool);
+       }
+ 
+       list_for_each_entry_safe(reg, next, &failed_recovered, list) {
+               complete_resync_work(reg, errors_handled ? 0 : 1);
+               mempool_free(reg, rh->region_pool);
+       }
+ 
+       list_for_each_entry_safe(reg, next, &clean, list) {
+               rh->log->type->clear_region(rh->log, reg->key);
+               mempool_free(reg, rh->region_pool);
+       }
+ 
+       rh->log->type->flush(rh->log);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_update_states);
+ 
- -static void rh_inc(struct dm_region_hash *rh, region_t region)
++void dm_rh_inc(struct dm_region_hash *rh, region_t region)
+ {
+       struct dm_region *reg;
+ 
+       read_lock(&rh->hash_lock);
+       reg = __rh_find(rh, region);
+ 
+       spin_lock_irq(&rh->region_lock);
+       atomic_inc(&reg->pending);
+ 
+       if (reg->state == DM_RH_CLEAN) {
+               reg->state = DM_RH_DIRTY;
+               list_del_init(&reg->list);      /* take off the clean list */
+               spin_unlock_irq(&rh->region_lock);
+ 
+               rh->log->type->mark_region(rh->log, reg->key);
+       } else
+               spin_unlock_irq(&rh->region_lock);
+ 
+ 
+       read_unlock(&rh->hash_lock);
+ }
++EXPORT_SYMBOL_GPL(dm_rh_inc);
+ 
+ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
+ {
+       struct bio *bio;
+ 
+       for (bio = bios->head; bio; bio = bio->bi_next)
- -              rh_inc(rh, dm_rh_bio_to_region(rh, bio));
++              dm_rh_inc(rh, dm_rh_bio_to_region(rh, bio));
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
+ 
+ void dm_rh_dec(struct dm_region_hash *rh, region_t region)
+ {
+       unsigned long flags;
+       struct dm_region *reg;
+       int should_wake = 0;
+ 
+       read_lock(&rh->hash_lock);
+       reg = __rh_lookup(rh, region);
+       read_unlock(&rh->hash_lock);
+ 
+       spin_lock_irqsave(&rh->region_lock, flags);
+       if (atomic_dec_and_test(&reg->pending)) {
+               /*
+                * There is no pending I/O for this region.
+                * We can move the region to corresponding list for next action.
+                * At this point, the region is not yet connected to any list.
+                *
+                * If the state is DM_RH_NOSYNC, the region should be kept off
+                * from clean list.
+                * The hash entry for DM_RH_NOSYNC will remain in memory
+                * until the region is recovered or the map is reloaded.
+                */
+ 
+               /* do nothing for DM_RH_NOSYNC */
+               if (reg->state == DM_RH_RECOVERING) {
+                       list_add_tail(&reg->list, &rh->quiesced_regions);
+               } else if (reg->state == DM_RH_DIRTY) {
+                       reg->state = DM_RH_CLEAN;
+                       list_add(&reg->list, &rh->clean_regions);
+               }
+               should_wake = 1;
+       }
+       spin_unlock_irqrestore(&rh->region_lock, flags);
+ 
+       if (should_wake)
+               rh->wakeup_workers(rh->context);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_dec);
+ 
+ /*
+  * Starts quiescing a region in preparation for recovery.
+  */
+ static int __rh_recovery_prepare(struct dm_region_hash *rh)
+ {
+       int r;
+       region_t region;
+       struct dm_region *reg;
+ 
+       /*
+        * Ask the dirty log what's next.
+        */
+       r = rh->log->type->get_resync_work(rh->log, &region);
+       if (r <= 0)
+               return r;
+ 
+       /*
+        * Get this region, and start it quiescing by setting the
+        * recovering flag.
+        */
+       read_lock(&rh->hash_lock);
+       reg = __rh_find(rh, region);
+       read_unlock(&rh->hash_lock);
+ 
+       spin_lock_irq(&rh->region_lock);
+       reg->state = DM_RH_RECOVERING;
+ 
+       /* Already quiesced ? */
+       if (atomic_read(&reg->pending))
+               list_del_init(&reg->list);
+       else
+               list_move(&reg->list, &rh->quiesced_regions);
+ 
+       spin_unlock_irq(&rh->region_lock);
+ 
+       return 1;
+ }
+ 
- -void dm_rh_recovery_prepare(struct dm_region_hash *rh)
++int dm_rh_recovery_prepare(struct dm_region_hash *rh)
+ {
++      int r = 0;
+       /* Extra reference to avoid race with dm_rh_stop_recovery */
+       atomic_inc(&rh->recovery_in_flight);
+ 
+       while (!down_trylock(&rh->recovery_count)) {
+               atomic_inc(&rh->recovery_in_flight);
+               if (__rh_recovery_prepare(rh) <= 0) {
+                       atomic_dec(&rh->recovery_in_flight);
+                       up(&rh->recovery_count);
++                      r = -ENOENT;
+                       break;
+               }
+       }
+ 
+       /* Drop the extra reference */
- -      if (atomic_dec_and_test(&rh->recovery_in_flight))
++      if (atomic_dec_and_test(&rh->recovery_in_flight)) {
+               rh->wakeup_all_recovery_waiters(rh->context);
++              r = -ESRCH;
++      }
++      return r;
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
+ 
+ /*
+  * Returns any quiesced regions.
+  */
+ struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
+ {
+       struct dm_region *reg = NULL;
+ 
+       spin_lock_irq(&rh->region_lock);
+       if (!list_empty(&rh->quiesced_regions)) {
+               reg = list_entry(rh->quiesced_regions.next,
+                                struct dm_region, list);
+               list_del_init(&reg->list);  /* remove from the quiesced list */
+       }
+       spin_unlock_irq(&rh->region_lock);
+ 
+       return reg;
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
+ 
+ void dm_rh_recovery_end(struct dm_region *reg, int success)
+ {
+       struct dm_region_hash *rh = reg->rh;
+ 
+       spin_lock_irq(&rh->region_lock);
+       if (success)
+               list_add(&reg->list, &reg->rh->recovered_regions);
+       else {
+               reg->state = DM_RH_NOSYNC;
+               list_add(&reg->list, &reg->rh->failed_recovered_regions);
+       }
+       spin_unlock_irq(&rh->region_lock);
+ 
+       rh->wakeup_workers(rh->context);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
+ 
+ /* Return recovery in flight count. */
+ int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
+ {
+       return atomic_read(&rh->recovery_in_flight);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
+ 
+ int dm_rh_flush(struct dm_region_hash *rh)
+ {
+       return rh->log->type->flush(rh->log);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_flush);
+ 
+ void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
+ {
+       struct dm_region *reg;
+ 
+       read_lock(&rh->hash_lock);
+       reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
+       bio_list_add(&reg->delayed_bios, bio);
+       read_unlock(&rh->hash_lock);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_delay);
+ 
+ void dm_rh_stop_recovery(struct dm_region_hash *rh)
+ {
+       int i;
+ 
+       /* wait for any recovering regions */
+       for (i = 0; i < rh->max_recovery; i++)
+               down(&rh->recovery_count);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
+ 
+ void dm_rh_start_recovery(struct dm_region_hash *rh)
+ {
+       int i;
+ 
+       for (i = 0; i < rh->max_recovery; i++)
+               up(&rh->recovery_count);
+ 
+       rh->wakeup_workers(rh->context);
+ }
+ EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
+ 
+ MODULE_DESCRIPTION(DM_NAME " region hash");
+ MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
+ MODULE_LICENSE("GPL");
diff --cc drivers/md/dm-round-robin.c
Simple merge
diff --cc drivers/md/dm-service-time.c

index 12e1c7f,0000000..32c63c4

mode 100644,000000..100644
--- 1/drivers/md/dm-service-time.c
--- /dev/null
+++ b/drivers/md/dm-service-time.c
@@@ -1,312 -1,0 +1,312 @@@
+ +/*
+ + * Copyright (C) 2007-2008 NEC Corporation.  All Rights Reserved.
+ + *
+ + * Module Author: Kiyoshi Ueda
+ + *
+ + * This file is released under the GPL.
+ + *
+ + * Throughput oriented path selector.
+ + */
+ +
+ +#include "dm.h"
+ +#include "dm-path-selector.h"
+ +
+ +#define DM_MSG_PREFIX "multipath service-time"
+ +#define ST_MIN_IO     2
+ +#define ST_VERSION    "0.1.0"
+ +
+ +struct selector {
+ +      struct list_head valid_paths;
+ +      struct list_head failed_paths;
+ +};
+ +
+ +struct path_info {
+ +      struct list_head list;
+ +      struct dm_path *path;
+ +      unsigned int repeat_count;
+ +
+ +      atomic_t in_flight;     /* Total size of in-flight I/Os */
+ +      size_t perf;            /* Recent performance of the path */
-       sector_t last_sectors;  /* Total sectors of the last disk_stat_read */
-       size_t last_io_ticks;   /* io_ticks of the last disk_stat_read */
++      sector_t last_sectors;  /* Total sectors of the last part_stat_read */
++      size_t last_io_ticks;   /* io_ticks of the last part_stat_read */
+ +};
+ +
+ +static struct selector *alloc_selector(void)
+ +{
+ +      struct selector *s = kzalloc(sizeof(*s), GFP_KERNEL);
+ +
+ +      if (s) {
+ +              INIT_LIST_HEAD(&s->valid_paths);
+ +              INIT_LIST_HEAD(&s->failed_paths);
+ +      }
+ +
+ +      return s;
+ +}
+ +
+ +static int st_create(struct path_selector *ps, unsigned argc, char **argv)
+ +{
+ +      struct selector *s = alloc_selector();
+ +
+ +      if (!s)
+ +              return -ENOMEM;
+ +
+ +      ps->context = s;
+ +      return 0;
+ +}
+ +
+ +static void free_paths(struct list_head *paths)
+ +{
+ +      struct path_info *pi, *next;
+ +
+ +      list_for_each_entry_safe(pi, next, paths, list) {
+ +              list_del(&pi->list);
+ +              pi->path->pscontext = NULL;
+ +              kfree(pi);
+ +      }
+ +}
+ +
+ +static void st_destroy(struct path_selector *ps)
+ +{
+ +      struct selector *s = (struct selector *) ps->context;
+ +
+ +      free_paths(&s->valid_paths);
+ +      free_paths(&s->failed_paths);
+ +      kfree(s);
+ +      ps->context = NULL;
+ +}
+ +
+ +static int st_status(struct path_selector *ps, struct dm_path *path,
+ +                   status_type_t type, char *result, unsigned int maxlen)
+ +{
+ +      int sz = 0;
+ +      struct path_info *pi;
+ +
+ +      if (!path)
+ +              DMEMIT("0 ");
+ +      else {
+ +              pi = path->pscontext;
+ +
+ +              switch (type) {
+ +              case STATUSTYPE_INFO:
+ +                      DMEMIT("if:%08lu pf:%06lu ",
+ +                             (unsigned long) atomic_read(&pi->in_flight),
+ +                             pi->perf);
+ +                      break;
+ +              case STATUSTYPE_TABLE:
+ +                      DMEMIT("%u ", pi->repeat_count);
+ +                      break;
+ +              }
+ +      }
+ +
+ +      return sz;
+ +}
+ +
+ +static int st_add_path(struct path_selector *ps, struct dm_path *path,
+ +                     int argc, char **argv, char **error)
+ +{
+ +      struct selector *s = (struct selector *) ps->context;
+ +      struct path_info *pi;
+ +      unsigned int repeat_count = ST_MIN_IO;
+ +      struct gendisk *disk = path->dev->bdev->bd_disk;
+ +
+ +      if (argc > 1) {
+ +              *error = "service-time ps: incorrect number of arguments";
+ +              return -EINVAL;
+ +      }
+ +
+ +      /* First path argument is number of I/Os before switching path. */
+ +      if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
+ +              *error = "service-time ps: invalid repeat count";
+ +              return -EINVAL;
+ +      }
+ +
+ +      /* allocate the path */
+ +      pi = kmalloc(sizeof(*pi), GFP_KERNEL);
+ +      if (!pi) {
+ +              *error = "service-time ps: Error allocating path context";
+ +              return -ENOMEM;
+ +      }
+ +
+ +      pi->path = path;
+ +      pi->repeat_count = repeat_count;
+ +
+ +      pi->perf = 0;
-       pi->last_sectors = disk_stat_read(disk, sectors[READ])
-                          + disk_stat_read(disk, sectors[WRITE]);
-       pi->last_io_ticks = disk_stat_read(disk, io_ticks);
++      pi->last_sectors = part_stat_read(&disk->part0, sectors[READ])
++                         + part_stat_read(&disk->part0, sectors[WRITE]);
++      pi->last_io_ticks = part_stat_read(&disk->part0, io_ticks);
+ +      atomic_set(&pi->in_flight, 0);
+ +
+ +      path->pscontext = pi;
+ +
+ +      list_add_tail(&pi->list, &s->valid_paths);
+ +
+ +      return 0;
+ +}
+ +
+ +static void st_fail_path(struct path_selector *ps, struct dm_path *path)
+ +{
+ +      struct selector *s = (struct selector *) ps->context;
+ +      struct path_info *pi = path->pscontext;
+ +
+ +      list_move(&pi->list, &s->failed_paths);
+ +}
+ +
+ +static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
+ +{
+ +      struct selector *s = (struct selector *) ps->context;
+ +      struct path_info *pi = path->pscontext;
+ +
+ +      list_move_tail(&pi->list, &s->valid_paths);
+ +
+ +      return 0;
+ +}
+ +
+ +static void stats_update(struct path_info *pi)
+ +{
+ +      sector_t sectors;
+ +      size_t io_ticks, tmp;
+ +      struct gendisk *disk = pi->path->dev->bdev->bd_disk;
+ +
-       sectors = disk_stat_read(disk, sectors[READ])
-                 + disk_stat_read(disk, sectors[WRITE]);
-       io_ticks = disk_stat_read(disk, io_ticks);
++      sectors = part_stat_read(&disk->part0, sectors[READ])
++                + part_stat_read(&disk->part0, sectors[WRITE]);
++      io_ticks = part_stat_read(&disk->part0, io_ticks);
+ +
+ +      if ((sectors != pi->last_sectors) && (io_ticks != pi->last_io_ticks)) {
+ +              tmp = (sectors - pi->last_sectors) << 9;
+ +              do_div(tmp, jiffies_to_msecs((io_ticks - pi->last_io_ticks)));
+ +              pi->perf = tmp;
+ +
+ +              pi->last_sectors = sectors;
+ +              pi->last_io_ticks = io_ticks;
+ +      }
+ +}
+ +
+ +static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
+ +                         size_t new_io)
+ +{
+ +      size_t if1, if2;
+ +
+ +      if1 = atomic_read(&pi1->in_flight);
+ +      if2 = atomic_read(&pi2->in_flight);
+ +
+ +      /*
+ +       * Case 1: No performace data available. Choose less loaded path.
+ +       */
+ +      if (!pi1->perf || !pi2->perf)
+ +              return if1 - if2;
+ +
+ +      /*
+ +       * Case 2: Calculate service time. Choose faster path.
+ +       *           if ((if1+new_io)/pi1->perf < (if2+new_io)/pi2->perf) pi1.
+ +       *           if ((if1+new_io)/pi1->perf > (if2+new_io)/pi2->perf) pi2.
+ +       *         To avoid do_div(), use
+ +       *           if ((if1+new_io)*pi2->perf < (if2+new_io)*pi1->perf) pi1.
+ +       *           if ((if1+new_io)*pi2->perf > (if2+new_io)*pi1->perf) pi2.
+ +       */
+ +      if1 = (if1 + new_io) << 10;
+ +      if2 = (if2 + new_io) << 10;
+ +      do_div(if1, pi1->perf);
+ +      do_div(if2, pi2->perf);
+ +
+ +      if (if1 != if2)
+ +              return if1 - if2;
+ +
+ +      /*
+ +       * Case 3: Service time is equal. Choose faster path.
+ +       */
+ +      return pi2->perf - pi1->perf;
+ +}
+ +
+ +static struct dm_path *st_select_path(struct path_selector *ps,
+ +                                    unsigned *repeat_count, size_t nr_bytes)
+ +{
+ +      struct selector *s = (struct selector *) ps->context;
+ +      struct path_info *pi = NULL, *best = NULL;
+ +
+ +      if (list_empty(&s->valid_paths))
+ +              return NULL;
+ +
+ +      /* Change preferred (first in list) path to evenly balance. */
+ +      list_move_tail(s->valid_paths.next, &s->valid_paths);
+ +
+ +      /* Update performance information before best path selection */
+ +      list_for_each_entry(pi, &s->valid_paths, list)
+ +              stats_update(pi);
+ +
+ +      list_for_each_entry(pi, &s->valid_paths, list) {
+ +              if (!best)
+ +                      best = pi;
+ +              else if (st_compare_load(pi, best, nr_bytes) < 0)
+ +                      best = pi;
+ +      }
+ +
+ +      if (best) {
+ +              *repeat_count = best->repeat_count;
+ +              return best->path;
+ +      }
+ +
+ +      return NULL;
+ +}
+ +
+ +static int st_start_io(struct path_selector *ps, struct dm_path *path,
+ +                     size_t nr_bytes)
+ +{
+ +      struct path_info *pi = path->pscontext;
+ +
+ +      atomic_add(nr_bytes, &pi->in_flight);
+ +
+ +      return 0;
+ +}
+ +
+ +static int st_end_io(struct path_selector *ps, struct dm_path *path,
+ +                   size_t nr_bytes)
+ +{
+ +      struct path_info *pi = path->pscontext;
+ +
+ +      atomic_sub(nr_bytes, &pi->in_flight);
+ +
+ +      return 0;
+ +}
+ +
+ +static struct path_selector_type st_ps = {
+ +      .name           = "service-time",
+ +      .module         = THIS_MODULE,
+ +      .table_args     = 1,
+ +      .info_args      = 2,
+ +      .create         = st_create,
+ +      .destroy        = st_destroy,
+ +      .status         = st_status,
+ +      .add_path       = st_add_path,
+ +      .fail_path      = st_fail_path,
+ +      .reinstate_path = st_reinstate_path,
+ +      .select_path    = st_select_path,
+ +      .start_io       = st_start_io,
+ +      .end_io         = st_end_io,
+ +};
+ +
+ +static int __init dm_st_init(void)
+ +{
+ +      int r = dm_register_path_selector(&st_ps);
+ +
+ +      if (r < 0)
+ +              DMERR("register failed %d", r);
+ +
+ +      DMINFO("version " ST_VERSION " loaded");
+ +
+ +      return r;
+ +}
+ +
+ +static void __exit dm_st_exit(void)
+ +{
+ +      int r = dm_unregister_path_selector(&st_ps);
+ +
+ +      if (r < 0)
+ +              DMERR("unregister failed %d", r);
+ +}
+ +
+ +module_init(dm_st_init);
+ +module_exit(dm_st_exit);
+ +
+ +MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
+ +MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
+ +MODULE_LICENSE("GPL");
diff --cc drivers/md/dm-table.c

index 91dc13e,2fd66c3..87ae109
--- 1/drivers/md/dm-table.c
--- 2/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@@ -466,23 -459,15 +461,23 @@@ static int __table_get_device(struct dm
                 if (!dd)
                         return -ENOMEM;
   
-               dd->mode = mode;
-               dd->bdev = NULL;
+               dd->dm_dev.mode = mode;
+               dd->dm_dev.bdev = NULL;
   
- -              if ((r = open_dev(dd, dev, t->md))) {
+ +              r = open_dev(dd, dev, t->md);
+ +              if (r == -EROFS) {
-                       dd->mode &= ~FMODE_WRITE;
++                      dd->dm_dev.mode &= ~FMODE_WRITE;
+ +                      r = open_dev(dd, dev, t->md);
+ +              }
+ +              if (r) {
                         kfree(dd);
                         return r;
                 }
   
-               if (dd->mode != mode)
-                       t->mode = dd->mode;
++              if (dd->dm_dev.mode != mode)
++                      t->mode = dd->dm_dev.mode;
+ +
-               format_dev_t(dd->name, dev);
+               format_dev_t(dd->dm_dev.name, dev);
   
                 atomic_set(&dd->count, 0);
                 list_add(&dd->list, &t->devices);
@@@ -966,47 -897,6 +969,47 @@@ void dm_table_set_restrictions(struct d
         else
                 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
   
+ +      if (t->limits.no_request_stacking)
+ +              queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, q);
+ +      else
+ +              queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
+ +}
+ +
+ +void dm_table_set_integrity(struct dm_table *t, struct mapped_device *md)
+ +{
+ +      struct list_head *devices = dm_table_get_devices(t);
-       struct dm_dev *prev, *cur;
++      struct dm_dev_internal *prev, *cur;
+ +
+ +      /*
+ +       * Run through all devices to ensure they have matching
+ +       * integrity profile
+ +       */
+ +      cur = prev = NULL;
+ +
+ +      list_for_each_entry(cur, devices, list) {
+ +
-               if (prev && blk_integrity_compare(prev->bdev->bd_disk,
-                                                 cur->bdev->bd_disk) < 0) {
++              if (prev && blk_integrity_compare(prev->dm_dev.bdev->bd_disk,
++                                                cur->dm_dev.bdev->bd_disk) < 0) {
+ +                      printk(KERN_ERR "%s: %s %s Integrity mismatch!\n",
-                              __func__, prev->bdev->bd_disk->disk_name,
-                              cur->bdev->bd_disk->disk_name);
++                             __func__, prev->dm_dev.bdev->bd_disk->disk_name,
++                             cur->dm_dev.bdev->bd_disk->disk_name);
+ +                      return;
+ +              }
+ +              prev = cur;
+ +      }
+ +
+ +      /* Register dm device as being integrity capable */
-       if (prev && bdev_get_integrity(prev->bdev)) {
++      if (prev && bdev_get_integrity(prev->dm_dev.bdev)) {
+ +              struct gendisk *disk = dm_disk(md);
+ +
+ +              if (blk_integrity_register(dm_disk(md),
-                                          bdev_get_integrity(prev->bdev)))
++                                      bdev_get_integrity(prev->dm_dev.bdev)))
+ +                      printk(KERN_ERR "%s: %s Could not register integrity!\n",
+ +                             __func__, disk->disk_name);
+ +              else
+ +                      printk(KERN_INFO "Enabling data integrity on %s\n",
+ +                             disk->disk_name);
+ +      }
   }
   
   unsigned int dm_table_get_num_targets(struct dm_table *t)
@@@ -1095,23 -992,9 +1105,23 @@@ int dm_table_any_congested(struct dm_ta
         return r;
   }
   
+ +int dm_table_any_busy_target(struct dm_table *t)
+ +{
+ +      int i;
+ +      struct dm_target *ti;
+ +
+ +      for (i = 0; i < t->num_targets; i++) {
+ +              ti = t->targets + i;
+ +              if (ti->type->busy && ti->type->busy(ti))
+ +                      return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   void dm_table_unplug_all(struct dm_table *t)
   {
-       struct dm_dev *dd;
+       struct dm_dev_internal *dd;
         struct list_head *devices = dm_table_get_devices(t);
   
         list_for_each_entry(dd, devices, list) {
diff --cc drivers/md/dm.c

index 8dd687f,51ba1db..00f1db8
--- 1/drivers/md/dm.c
--- 2/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@@ -180,11 -168,8 +178,14 @@@ struct mapped_device 
         /* forced geometry settings */
         struct hd_geometry geometry;
   
+       /* sysfs handle */
+       struct kobject kobj;
++
+ +      /* marker of flush suspend for request-based dm */
+ +      struct request suspend_rq;
+ +
+ +      /* For saving the address of __make_request for request based dm */
+ +      make_request_fn *saved_make_request_fn;
   };
   
   #define MIN_IOS 256
@@@ -307,28 -292,19 +308,28 @@@ static void __exit dm_exit(void
   /*
    * Block device functions
    */
- static int dm_blk_open(struct inode *inode, struct file *file)
+ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
   {
         struct mapped_device *md;
+ +      int retval = 0;
   
         spin_lock(&_minor_lock);
   
-       md = inode->i_bdev->bd_disk->private_data;
+       md = bdev->bd_disk->private_data;
- -      if (!md)
+ +      if (!md) {
+ +              retval = -ENXIO;
                 goto out;
+ +      }
   
         if (test_bit(DMF_FREEING, &md->flags) ||
             test_bit(DMF_DELETING, &md->flags)) {
                 md = NULL;
+ +              retval = -ENXIO;
+ +              goto out;
+ +      }
-       if (md->disk->policy && (file->f_mode & FMODE_WRITE)) {
++      if (get_disk_ro(md->disk) && (mode & FMODE_WRITE)) {
+ +              md = NULL;
+ +              retval = -EROFS;
                 goto out;
         }
   
@@@ -338,14 -314,12 +339,12 @@@
   out:
         spin_unlock(&_minor_lock);
   
- -      return md ? 0 : -ENXIO;
+ +      return retval;
   }
   
- static int dm_blk_close(struct inode *inode, struct file *file)
+ static int dm_blk_close(struct gendisk *disk, fmode_t mode)
   {
-       struct mapped_device *md;
- 
-       md = inode->i_bdev->bd_disk->private_data;
+       struct mapped_device *md = disk->private_data;
         atomic_dec(&md->open_count);
         dm_put(md);
         return 0;
@@@ -405,20 -378,8 +397,20 @@@ static int dm_blk_ioctl(struct block_de
                 goto out;
         }
   
- -      if (tgt->type->ioctl)
- -              r = tgt->type->ioctl(tgt, cmd, arg);
+ +      if (cmd == BLKRRPART) {
+ +              /* Emulate Re-read partitions table */
-               kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE);
++              kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
+ +              r = 0;
+ +      } else {
+ +              /* We only support devices that have a single target */
+ +              if (dm_table_get_num_targets(map) != 1)
+ +                      goto out;
+ +
+ +              tgt = dm_table_get_target(map, 0);
+ +
+ +              if (tgt->type->ioctl)
-                       r = tgt->type->ioctl(tgt, inode, file, cmd, arg);
++                      r = tgt->type->ioctl(tgt, cmd, arg);
+ +      }
   
   out:
         dm_table_put(map);
@@@ -447,28 -407,6 +438,28 @@@ static void free_tio(struct mapped_devi
         mempool_free(tio, md->tio_pool);
   }
   
+ +static inline struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md)
+ +{
+ +      return mempool_alloc(md->tio_pool, GFP_ATOMIC);
+ +}
+ +
+ +static inline void free_rq_tio(struct mapped_device *md,
+ +                             struct dm_rq_target_io *tio)
+ +{
+ +      mempool_free(tio, md->tio_pool);
+ +}
+ +
- static inline struct dm_clone_bio_info *alloc_bio_info(struct mapped_device *md)
++static inline struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
+ +{
+ +      return mempool_alloc(md->io_pool, GFP_ATOMIC);
+ +}
+ +
+ +static inline void free_bio_info(struct mapped_device *md,
-                                struct dm_clone_bio_info *info)
++                               struct dm_rq_clone_bio_info *info)
+ +{
+ +      mempool_free(info, md->io_pool);
+ +}
+ +
   static void start_io_acct(struct dm_io *io)
   {
         struct mapped_device *md = io->md;
@@@ -657,267 -596,6 +649,267 @@@ static void clone_endio(struct bio *bio
         free_tio(md, tio);
   }
   
+ +/*
+ + * Partial completion handling for request-based dm
+ + */
+ +static void end_clone_bio(struct bio *clone, int error)
+ +{
-       struct dm_clone_bio_info *info = clone->bi_private;
++      struct dm_rq_clone_bio_info *info = clone->bi_private;
+ +      struct dm_rq_target_io *tio = info->rq->end_io_data;
+ +      struct bio *bio = info->orig;
+ +      unsigned int nr_bytes = info->orig->bi_size;
+ +
+ +      free_bio_info(tio->md, info);
+ +      clone->bi_private = tio->md->bs;
+ +      bio_put(clone);
+ +
+ +      if (tio->error) {
+ +              /*
+ +               * An error has already been detected on the request.
+ +               * Once error occurred, just let clone->end_io() handle
+ +               * the remainder.
+ +               */
+ +              return;
+ +      } else if (error) {
+ +              /*
+ +               * Don't notice the error to the upper layer yet.
+ +               * The error handling decision is made by the target driver,
+ +               * when the request is completed.
+ +               */
+ +              tio->error = error;
+ +              return;
+ +      }
+ +
+ +      /*
+ +       * I/O for the bio successfully completed.
+ +       * Notice the data completion to the upper layer.
+ +       */
+ +
+ +      /*
+ +       * bios are processed from the head of the list.
+ +       * So the completing bio should always be rq->bio.
+ +       * If it's not, something wrong is happening.
+ +       */
+ +      if (tio->orig->bio != bio)
+ +              DMERR("bio completion is going in the middle of the request");
+ +
+ +      /*
+ +       * Update the original request.
+ +       * Do not use blk_end_request() here, because it may complete
+ +       * the original request before the clone, and break the ordering.
+ +       */
+ +      blk_update_request(tio->orig, 0, nr_bytes);
+ +}
+ +
+ +static void free_bio_clone(struct request *clone)
+ +{
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +      struct mapped_device *md = tio->md;
+ +      struct bio *bio;
+ +
+ +      while ((bio = clone->bio) != NULL) {
+ +              clone->bio = bio->bi_next;
+ +
+ +              if (bio->bi_private) {
-                       struct dm_clone_bio_info *info = bio->bi_private;
++                      struct dm_rq_clone_bio_info *info = bio->bi_private;
+ +                      free_bio_info(md, info);
+ +              }
+ +
+ +              bio->bi_private = md->bs;
+ +              bio_put(bio);
+ +      }
+ +}
+ +
+ +static void dec_rq_pending(struct dm_rq_target_io *tio)
+ +{
+ +      if (!atomic_dec_return(&tio->md->pending))
+ +              /* nudge anyone waiting on suspend queue */
+ +              wake_up(&tio->md->wait);
+ +}
+ +
+ +static void dm_unprep_request(struct request *rq)
+ +{
+ +      struct request *clone = rq->special;
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +
+ +      rq->special = NULL;
+ +      rq->cmd_flags &= ~REQ_DONTPREP;
+ +
+ +      free_bio_clone(clone);
+ +      dec_rq_pending(tio);
+ +      free_rq_tio(tio->md, tio);
+ +}
+ +
+ +/*
+ + * Requeue the original request of a clone.
+ + */
+ +void dm_requeue_request(struct request *clone)
+ +{
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +      struct request *rq = tio->orig;
+ +      struct request_queue *q = rq->q;
+ +      unsigned long flags;
+ +
+ +      dm_unprep_request(rq);
+ +
+ +      spin_lock_irqsave(q->queue_lock, flags);
+ +      if (elv_queue_empty(q))
+ +              blk_plug_device(q);
+ +      blk_requeue_request(q, rq);
+ +      spin_unlock_irqrestore(q->queue_lock, flags);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_requeue_request);
+ +
+ +static inline void __stop_queue(struct request_queue *q)
+ +{
+ +      blk_stop_queue(q);
+ +}
+ +
+ +static void stop_queue(struct request_queue *q)
+ +{
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(q->queue_lock, flags);
+ +      __stop_queue(q);
+ +      spin_unlock_irqrestore(q->queue_lock, flags);
+ +}
+ +
+ +static inline void __start_queue(struct request_queue *q)
+ +{
+ +      if (blk_queue_stopped(q))
+ +              blk_start_queue(q);
+ +}
+ +
+ +static void start_queue(struct request_queue *q)
+ +{
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(q->queue_lock, flags);
+ +      __start_queue(q);
+ +      spin_unlock_irqrestore(q->queue_lock, flags);
+ +}
+ +
+ +/*
+ + * Complete the clone and the original request
+ + */
+ +static void dm_end_request(struct request *clone, int error)
+ +{
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +      struct request *rq = tio->orig;
+ +      struct request_queue *q = rq->q;
+ +      unsigned int nr_bytes = blk_rq_bytes(rq);
+ +
+ +      if (blk_pc_request(rq)) {
+ +              rq->errors = clone->errors;
+ +              rq->data_len = clone->data_len;
+ +
+ +              if (rq->sense)
+ +                      /*
+ +                       * We are using the sense buffer of the original
+ +                       * request.
+ +                       * So setting the length of the sense data is enough.
+ +                       */
+ +                      rq->sense_len = clone->sense_len;
+ +      }
+ +
+ +      free_bio_clone(clone);
+ +      dec_rq_pending(tio);
+ +      free_rq_tio(tio->md, tio);
+ +
+ +      if (unlikely(blk_end_request(rq, error, nr_bytes)))
+ +              BUG();
+ +
+ +      blk_run_queue(q);
+ +}
+ +
+ +/*
+ + * Request completion handler for request-based dm
+ + */
+ +static void dm_softirq_done(struct request *rq)
+ +{
+ +      struct request *clone = rq->completion_data;
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +      dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
+ +      int error = tio->error;
+ +      int r;
+ +
+ +      if (rq->cmd_flags & REQ_FAILED)
+ +              goto end_request;
+ +
+ +      if (rq_end_io) {
+ +              r = rq_end_io(tio->ti, clone, error, &tio->info);
+ +              if (r <= 0)
+ +                      /* The target wants to complete the I/O */
+ +                      error = r;
+ +              else if (r == DM_ENDIO_INCOMPLETE)
+ +                      /* The target will handle the I/O */
+ +                      return;
+ +              else if (r == DM_ENDIO_REQUEUE) {
+ +                      /*
+ +                       * The target wants to requeue the I/O.
+ +                       * Don't invoke blk_run_queue() so that the requeued
+ +                       * request won't be dispatched again soon.
+ +                       */
+ +                      dm_requeue_request(clone);
+ +                      return;
+ +              } else {
+ +                      DMWARN("unimplemented target endio return value: %d",
+ +                             r);
+ +                      BUG();
+ +              }
+ +      }
+ +
+ +end_request:
+ +      dm_end_request(clone, error);
+ +}
+ +
+ +/*
+ + * Called with the queue lock held
+ + */
+ +static void end_clone_request(struct request *clone, int error)
+ +{
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +      struct request *rq = tio->orig;
+ +
+ +      /*
+ +       * For just cleaning up the information of the queue in which
+ +       * the clone was dispatched.
+ +       * The clone is *NOT* freed actually here because it is alloced from
+ +       * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
+ +       */
+ +      __blk_put_request(clone->q, clone);
+ +
+ +      /*
+ +       * Actual request completion is done in a softirq context which doesn't
+ +       * hold the queue lock.  Otherwise, deadlock could occur because:
+ +       *     - another request may be submitted by the upper level driver
+ +       *       of the stacking during the completion
+ +       *     - the submission which requires queue lock may be done
+ +       *       against this queue
+ +       */
+ +      tio->error = error;
+ +      rq->completion_data = clone;
+ +      blk_complete_request(rq);
+ +}
+ +
+ +/*
+ + * Complete the original request of a clone with an error status.
+ + * Target's rq_end_io() function isn't called.
+ + * This may be used by target's map_rq() function when the mapping fails.
+ + */
+ +void dm_kill_request(struct request *clone, int error)
+ +{
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +      struct request *rq = tio->orig;
+ +
+ +      tio->error = error;
+ +      /* Avoid printing "I/O error" message, since we didn't I/O actually */
+ +      rq->cmd_flags |= (REQ_FAILED | REQ_QUIET);
+ +      rq->completion_data = clone;
+ +      blk_complete_request(rq);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_kill_request);
+ +
   static sector_t max_io_len(struct mapped_device *md,
                            sector_t sector, struct dm_target *ti)
   {
@@@ -1296,339 -963,6 +1291,331 @@@ out_req
         return 0;
   }
   
+ +static int dm_make_request(struct request_queue *q, struct bio *bio)
+ +{
+ +      struct mapped_device *md = (struct mapped_device *)q->queuedata;
+ +
+ +      if (unlikely(bio_barrier(bio))) {
+ +              bio_endio(bio, -EOPNOTSUPP);
+ +              return 0;
+ +      }
+ +
-       /*
-        * Submitting to a stopped queue with no map is okay;
-        * might happen during reconfiguration.
-        */
-       if (unlikely(!md->map) && !blk_queue_stopped(q)) {
++      if (unlikely(!md->map)) {
+ +              bio_endio(bio, -EIO);
+ +              return 0;
+ +      }
+ +
+ +      return md->saved_make_request_fn(q, bio); /* call __make_request() */
+ +}
+ +
+ +static inline int dm_request_based(struct mapped_device *md)
+ +{
+ +      return blk_queue_stackable(md->queue);
+ +}
+ +
+ +static int dm_request(struct request_queue *q, struct bio *bio)
+ +{
+ +      struct mapped_device *md = q->queuedata;
+ +
+ +      if (dm_request_based(md))
+ +              return dm_make_request(q, bio);
+ +
+ +      return _dm_request(q, bio);
+ +}
+ +
+ +void dm_dispatch_request(struct request *rq)
+ +{
+ +      int r;
+ +
+ +      rq->start_time = jiffies;
+ +      r = blk_insert_cloned_request(rq->q, rq);
+ +      if (r)
+ +              dm_kill_request(rq, r);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_dispatch_request);
+ +
+ +static void copy_request_info(struct request *clone, struct request *rq)
+ +{
+ +      clone->cmd_flags = (rq_data_dir(rq) | REQ_NOMERGE);
+ +      clone->cmd_type = rq->cmd_type;
+ +      clone->sector = rq->sector;
+ +      clone->hard_sector = rq->hard_sector;
+ +      clone->nr_sectors = rq->nr_sectors;
+ +      clone->hard_nr_sectors = rq->hard_nr_sectors;
+ +      clone->current_nr_sectors = rq->current_nr_sectors;
+ +      clone->hard_cur_sectors = rq->hard_cur_sectors;
+ +      clone->nr_phys_segments = rq->nr_phys_segments;
+ +      clone->ioprio = rq->ioprio;
+ +      clone->buffer = rq->buffer;
+ +      clone->cmd_len = rq->cmd_len;
+ +      if (rq->cmd_len)
+ +              clone->cmd = rq->cmd;
+ +      clone->data_len = rq->data_len;
+ +      clone->extra_len = rq->extra_len;
+ +      clone->sense_len = rq->sense_len;
+ +      clone->data = rq->data;
+ +      clone->sense = rq->sense;
+ +}
+ +
+ +static int clone_request_bios(struct request *clone, struct request *rq,
+ +                            struct mapped_device *md)
+ +{
+ +      struct bio *bio, *clone_bio;
-       struct dm_clone_bio_info *info;
++      struct dm_rq_clone_bio_info *info;
+ +
+ +      for (bio = rq->bio; bio; bio = bio->bi_next) {
+ +              info = alloc_bio_info(md);
+ +              if (!info)
+ +                      goto free_and_out;
+ +
+ +              clone_bio = bio_alloc_bioset(GFP_ATOMIC, bio->bi_max_vecs,
+ +                                           md->bs);
+ +              if (!clone_bio) {
+ +                      free_bio_info(md, info);
+ +                      goto free_and_out;
+ +              }
+ +
+ +              __bio_clone(clone_bio, bio);
+ +              if (bio_integrity(bio))
+ +                      if (bio_integrity_clone(clone_bio, bio, GFP_ATOMIC,
+ +                                              md->bs) < 0)
+ +                              goto free_and_out;
+ +
+ +              clone_bio->bi_destructor = dm_bio_destructor;
+ +              clone_bio->bi_end_io = end_clone_bio;
+ +              info->rq = clone;
+ +              info->orig = bio;
+ +              clone_bio->bi_private = info;
+ +
+ +              if (clone->bio) {
+ +                      clone->biotail->bi_next = clone_bio;
+ +                      clone->biotail = clone_bio;
+ +              } else
+ +                      clone->bio = clone->biotail = clone_bio;
+ +      }
+ +
+ +      return 0;
+ +
+ +free_and_out:
+ +      free_bio_clone(clone);
+ +
+ +      return -ENOMEM;
+ +}
+ +
+ +static int setup_clone(struct request *clone, struct request *rq,
+ +                     struct dm_rq_target_io *tio)
+ +{
+ +      int r;
+ +
+ +      blk_rq_init(NULL, clone);
+ +
+ +      r = clone_request_bios(clone, rq, tio->md);
+ +      if (r)
+ +              return r;
+ +
+ +      copy_request_info(clone, rq);
+ +      clone->start_time = jiffies;
+ +      clone->end_io = end_clone_request;
+ +      clone->end_io_data = tio;
+ +
+ +      return 0;
+ +}
+ +
+ +static inline int dm_flush_suspending(struct mapped_device *md)
+ +{
+ +      return !md->suspend_rq.data;
+ +}
+ +
+ +/*
+ + * Called with the queue lock held.
+ + */
+ +static int dm_prep_fn(struct request_queue *q, struct request *rq)
+ +{
+ +      struct mapped_device *md = (struct mapped_device *)q->queuedata;
+ +      struct dm_rq_target_io *tio;
+ +      struct request *clone;
+ +
+ +      if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend marker */
+ +              if (dm_flush_suspending(md)) {
+ +                      if (q->in_flight)
+ +                              return BLKPREP_DEFER;
+ +                      else {
+ +                              /* This device should be quiet now */
+ +                              __stop_queue(q);
+ +                              smp_mb();
+ +                              BUG_ON(atomic_read(&md->pending));
+ +                              wake_up(&md->wait);
+ +                              return BLKPREP_KILL;
+ +                      }
+ +              } else
+ +                      /*
+ +                       * The suspend process was interrupted.
+ +                       * So no need to suspend now.
+ +                       */
+ +                      return BLKPREP_KILL;
+ +      }
+ +
+ +      if (unlikely(rq->special)) {
+ +              DMWARN("Already has something in rq->special.");
+ +              return BLKPREP_KILL;
+ +      }
+ +
+ +      if (unlikely(!dm_request_based(md))) {
+ +              DMWARN("Request was queued into bio-based device");
+ +              return BLKPREP_KILL;
+ +      }
+ +
+ +      tio = alloc_rq_tio(md); /* Only one for each original request */
+ +      if (!tio)
+ +              /* -ENOMEM */
+ +              return BLKPREP_DEFER;
+ +
+ +      tio->md = md;
+ +      tio->ti = NULL;
+ +      tio->orig = rq;
+ +      tio->error = 0;
+ +      memset(&tio->info, 0, sizeof(tio->info));
+ +
+ +      clone = &tio->clone;
+ +      if (setup_clone(clone, rq, tio)) {
+ +              /* -ENOMEM */
+ +              free_rq_tio(md, tio);
+ +              return BLKPREP_DEFER;
+ +      }
+ +
+ +      rq->special = clone;
+ +      rq->cmd_flags |= REQ_DONTPREP;
+ +
+ +      return BLKPREP_OK;
+ +}
+ +
+ +static void map_request(struct dm_target *ti, struct request *rq,
+ +                      struct mapped_device *md)
+ +{
+ +      int r;
+ +      struct request *clone = rq->special;
+ +      struct dm_rq_target_io *tio = clone->end_io_data;
+ +
+ +      tio->ti = ti;
+ +      atomic_inc(&md->pending);
+ +
- #if 0
-       /* This might trigger accidentally */
- 
+ +      /*
+ +       * Although submitted requests to the md->queue are checked against
+ +       * the table/queue limitations at the submission time, the limitations
+ +       * may be changed by a table swapping while those already checked
+ +       * requests are in the md->queue.
+ +       * If the limitations have been shrunk in such situations, we may be
+ +       * dispatching requests violating the current limitations here.
+ +       * Since struct request is a reliable one in the block-layer
+ +       * and device drivers, dispatching such requests is dangerous.
+ +       * (e.g. it may cause kernel panic easily.)
+ +       * Avoid to dispatch such problematic requests in request-based dm.
+ +       *
+ +       * Since dm_kill_request() decrements the md->pending, this have to
+ +       * be done after incrementing the md->pending.
+ +       */
+ +      r = blk_rq_check_limits(rq->q, rq);
+ +      if (unlikely(r)) {
+ +              DMWARN("violating the queue limitation. the limitation may be"
+ +                     " shrunk while there are some requests in the queue.");
+ +              dm_kill_request(clone, r);
+ +              return;
+ +      }
- #endif
+ +
+ +      r = ti->type->map_rq(ti, clone, &tio->info);
+ +      switch (r) {
+ +      case DM_MAPIO_SUBMITTED:
+ +              /* The target has taken the I/O to submit by itself later */
+ +              break;
+ +      case DM_MAPIO_REMAPPED:
+ +              /* The target has remapped the I/O so dispatch it */
+ +              dm_dispatch_request(clone);
+ +              break;
+ +      case DM_MAPIO_REQUEUE:
+ +              /* The target wants to requeue the I/O */
+ +              dm_requeue_request(clone);
+ +              break;
+ +      default:
+ +              if (r > 0) {
+ +                      DMWARN("unimplemented target map return value: %d", r);
+ +                      BUG();
+ +              }
+ +
+ +              /* The target wants to complete the I/O */
+ +              dm_kill_request(clone, r);
+ +              break;
+ +      }
+ +}
+ +
+ +/*
+ + * q->request_fn for request-based dm.
+ + * Called with the queue lock held.
+ + */
+ +static void dm_request_fn(struct request_queue *q)
+ +{
+ +      struct mapped_device *md = (struct mapped_device *)q->queuedata;
+ +      struct dm_table *map = dm_get_table(md);
+ +      struct dm_target *ti;
+ +      struct request *rq;
+ +
+ +      /*
+ +       * The check for blk_queue_stopped() needs here, because:
+ +       *     - device suspend uses blk_stop_queue() and expects that
+ +       *       no I/O will be dispatched any more after the queue stop
+ +       *     - generic_unplug_device() doesn't call q->request_fn()
+ +       *       when the queue is stopped, so no problem
+ +       *     - but underlying device drivers may call q->request_fn()
+ +       *       without the check through blk_run_queue()
+ +       */
+ +      while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
+ +              rq = elv_next_request(q);
+ +              if (!rq)
+ +                      goto plug_and_out;
+ +
+ +              ti = dm_table_find_target(map, rq->sector);
+ +              if (ti->type->busy && ti->type->busy(ti))
+ +                      goto plug_and_out;
+ +
+ +              blkdev_dequeue_request(rq);
+ +              spin_unlock(q->queue_lock);
+ +              map_request(ti, rq, md);
+ +              spin_lock_irq(q->queue_lock);
+ +      }
+ +
+ +      goto out;
+ +
+ +plug_and_out:
+ +      if (!elv_queue_empty(q))
+ +              /* Some requests still remain, retry later */
+ +              blk_plug_device(q);
+ +
+ +out:
+ +      dm_table_put(map);
+ +
+ +      return;
+ +}
+ +
+ +int dm_underlying_device_busy(struct request_queue *q)
+ +{
+ +      return blk_lld_busy(q);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
+ +
+ +static int dm_lld_busy(struct request_queue *q)
+ +{
+ +      int r;
+ +      struct mapped_device *md = q->queuedata;
+ +      struct dm_table *map = dm_get_table(md);
+ +
+ +      if (!map || test_bit(DMF_BLOCK_IO, &md->flags))
+ +              r = 1;
+ +      else
+ +              r = dm_table_any_busy_target(map);
+ +
+ +      dm_table_put(map);
+ +      return r;
+ +}
+ +
   static void dm_unplug_all(struct request_queue *q)
   {
         struct mapped_device *md = q->queuedata;
@@@ -1650,17 -981,10 +1637,18 @@@ static int dm_any_congested(void *conge
         struct dm_table *map;
   
         if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
--              map = dm_get_table(md);
--              if (map) {
-                       if (dm_request_based(md))
-                               /*
-                                * Request-based dm cares about only own queue for
-                                * the query about congestion status of request_queue
-                                */
-                               r = md->queue->backing_dev_info.state & bdi_bits;
-                       else
- -                      r = dm_table_any_congested(map, bdi_bits);
- -                      dm_table_put(map);
++              if (dm_request_based(md))
++                      /*
++                       * Request-based dm cares about only own queue for
++                       * the query about congestion status of request_queue
++                       */
++                      r = md->queue->backing_dev_info.state & bdi_bits;
++              else {
++                      map = dm_get_table(md);
++                      if (map) {
+ +                              r = dm_table_any_congested(map, bdi_bits);
-                       dm_table_put(map);
++                              dm_table_put(map);
++                      }
                 }
         }
   
@@@ -2505,8 -1704,28 +2496,29 @@@ struct gendisk *dm_disk(struct mapped_d
   {
         return md->disk;
   }
+ +EXPORT_SYMBOL_GPL(dm_disk);
   
+ struct kobject *dm_kobject(struct mapped_device *md)
+ {
+       return &md->kobj;
+ }
+ 
+ /*
+  * struct mapped_device should not be exported outside of dm.c
+  * so use this check to verify that kobj is part of md structure
+  */
+ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
+ {
+       struct mapped_device *md;
+ 
+       md = container_of(kobj, struct mapped_device, kobj);
+       if (&md->kobj != kobj)
+               return NULL;
+ 
+       dm_get(md);
+       return md;
+ }
+ 
   int dm_suspended(struct mapped_device *md)
   {
         return test_bit(DMF_SUSPENDED, &md->flags);
@@@ -2523,65 -1742,6 +2535,65 @@@ int dm_noflush_suspending(struct dm_tar
   }
   EXPORT_SYMBOL_GPL(dm_noflush_suspending);
   
+ +int dm_init_md_mempool(struct mapped_device *md, int type)
+ +{
+ +      if (unlikely(type == DM_TYPE_NONE)) {
+ +              DMWARN("no type is specified, can't initialize mempool");
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (md->mempool_type == type)
+ +              return 0;
+ +
+ +      if (md->map) {
+ +              /* The md has been using, can't change the mempool type */
+ +              DMWARN("can't change mempool type after a table is bound");
+ +              return -EINVAL;
+ +      }
+ +
+ +      /* Not using the md yet, we can still change the mempool type */
+ +      if (md->mempool_type != DM_TYPE_NONE) {
+ +              mempool_destroy(md->io_pool);
+ +              md->io_pool = NULL;
+ +              mempool_destroy(md->tio_pool);
+ +              md->tio_pool = NULL;
+ +              bioset_free(md->bs);
+ +              md->bs = NULL;
+ +              md->mempool_type = DM_TYPE_NONE;
+ +      }
+ +
+ +      md->io_pool = (type == DM_TYPE_BIO_BASED) ?
+ +                    mempool_create_slab_pool(MIN_IOS, _io_cache) :
-                     mempool_create_slab_pool(MIN_IOS, _bio_info_cache);
++                    mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
+ +      if (!md->io_pool)
+ +              return -ENOMEM;
+ +
+ +      md->tio_pool = (type == DM_TYPE_BIO_BASED) ?
+ +                     mempool_create_slab_pool(MIN_IOS, _tio_cache) :
+ +                     mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
+ +      if (!md->tio_pool)
+ +              goto free_io_pool_and_out;
+ +
+ +      md->bs = (type == DM_TYPE_BIO_BASED) ?
+ +               bioset_create(16, 16) : bioset_create(MIN_IOS, MIN_IOS);
+ +      if (!md->bs)
+ +              goto free_tio_pool_and_out;
+ +
+ +      md->mempool_type = type;
+ +
+ +      return 0;
+ +
+ +free_tio_pool_and_out:
+ +      mempool_destroy(md->tio_pool);
+ +      md->tio_pool = NULL;
+ +
+ +free_io_pool_and_out:
+ +      mempool_destroy(md->io_pool);
+ +      md->io_pool = NULL;
+ +
+ +      return -ENOMEM;
+ +}
+ +
   static struct block_device_operations dm_blk_dops = {
         .open = dm_blk_open,
         .release = dm_blk_close,
diff --cc drivers/md/dm.h

index 0868819,20194e0..47c71cb
--- 1/drivers/md/dm.h
--- 2/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@@ -23,22 -23,12 +23,19 @@@
   #define DM_SUSPEND_NOFLUSH_FLAG               (1 << 1)
   
   /*
+ + * Type of table and mapped_device's mempool
+ + */
+ +#define DM_TYPE_NONE          0
+ +#define DM_TYPE_BIO_BASED     1
+ +#define DM_TYPE_REQUEST_BASED 2
+ +
+ +/*
    * List of devices that a metadevice uses and should open/close.
    */
- struct dm_dev {
+ struct dm_dev_internal {
         struct list_head list;
- 
         atomic_t count;
-       int mode;
-       struct block_device *bdev;
-       char name[16];
+       struct dm_dev dm_dev;
   };
   
   struct dm_table;
@@@ -58,11 -47,6 +55,10 @@@ void dm_table_presuspend_targets(struc
   void dm_table_postsuspend_targets(struct dm_table *t);
   int dm_table_resume_targets(struct dm_table *t);
   int dm_table_any_congested(struct dm_table *t, int bdi_bits);
+ +int dm_table_any_busy_target(struct dm_table *t);
+ +int dm_table_set_type(struct dm_table *t);
+ +int dm_table_get_type(struct dm_table *t);
+ +int dm_table_request_based(struct dm_table *t);
- void dm_table_unplug_all(struct dm_table *t);
   
   /*
    * To check the return value from dm_table_find_target().
@@@ -108,11 -90,8 +102,9 @@@ void dm_linear_exit(void)
   int dm_stripe_init(void);
   void dm_stripe_exit(void);
   
- void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
- union map_info *dm_get_mapinfo(struct bio *bio);
   int dm_open_count(struct mapped_device *md);
   int dm_lock_for_deletion(struct mapped_device *md);
+ +union map_info *dm_get_rq_mapinfo(struct request *rq);
   
   void dm_kobject_uevent(struct mapped_device *md);
   
diff --cc drivers/md/md.c

index 438a7df,41e2509..42adc37
--- 1/drivers/md/md.c
--- 2/drivers/md/md.c
+++ b/drivers/md/md.c
@@@ -1504,9 -1496,13 +1528,14 @@@ static int bind_rdev_to_array(mdk_rdev_
                 kobject_del(&rdev->kobj);
                 goto fail;
         }
+       rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
+ 
         list_add_rcu(&rdev->same_set, &mddev->disks);
         bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+ 
+       /* May as well allow recovery to be retried once */
+       mddev->recovery_disabled = 0;
+ +      md_integrity_check(rdev, mddev);
         return 0;
   
    fail:
@@@ -3994,9 -4115,8 +4148,9 @@@ static int do_md_stop(mddev_t * mddev, 
                 printk(KERN_INFO "md: %s switched to read-only mode.\n",
                         mdname(mddev));
         err = 0;
+ +      blk_integrity_unregister(disk);
         md_new_event(mddev);
-       sysfs_notify(&mddev->kobj, NULL, "array_state");
+       sysfs_notify_dirent(mddev->sysfs_state);
   out:
         return err;
   }
diff --cc drivers/misc/sgi-xp/xpc_sn2.c

index 7d05fb5,82fb995..331a68f
--- 1/drivers/misc/sgi-xp/xpc_sn2.c
--- 2/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@@ -1836,7 -1836,6 +1836,7 @@@ xpc_process_msg_chctl_flags_sn2(struct 
                  */
                 xpc_clear_remote_msgqueue_flags_sn2(ch);
   
-               smp_wmb(); /* ensure flags have been cleared before bte_copy */
++              wmb();  /* ensure flags have been cleared before bte_copy */
                 ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
   
                 dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
diff --cc drivers/mtd/maps/physmap_of.c
Simple merge
diff --cc drivers/mtd/maps/sun_uflash.c

index 065e45d,fd7a101..b0d5bd9
--- 1/drivers/mtd/maps/sun_uflash.c
--- 2/drivers/mtd/maps/sun_uflash.c
+++ b/drivers/mtd/maps/sun_uflash.c
@@@ -151,8 -148,7 +148,8 @@@ static const struct of_device_id uflash
   MODULE_DEVICE_TABLE(of, uflash_match);
   
   static struct of_platform_driver uflash_driver = {
+ +      .owner          = THIS_MODULE,
-       .name           = UFLASH_DEVNAME,
+       .name           = DRIVER_NAME,
         .match_table    = uflash_match,
         .probe          = uflash_probe,
         .remove         = __devexit_p(uflash_remove),
diff --cc drivers/mtd/nand/fsl_elbc_nand.c
Simple merge
diff --cc drivers/mtd/nand/fsl_upm.c
Simple merge
diff --cc drivers/mtd/nand/ndfc.c

index 955959e,582cf80..b3595bd
--- 1/drivers/mtd/nand/ndfc.c
--- 2/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c
@@@ -267,29 -285,19 +285,20 @@@ static int __devexit ndfc_remove(struc
         return 0;
   }
   
- /* driver device registration */
- 
- static struct platform_driver ndfc_chip_driver = {
-       .probe          = ndfc_chip_probe,
-       .remove         = ndfc_chip_remove,
-       .driver         = {
-               .name   = "ndfc-chip",
-               .owner  = THIS_MODULE,
-       },
+ static const struct of_device_id ndfc_match[] = {
+       { .compatible = "ibm,ndfc", },
+       {}
   };
+ MODULE_DEVICE_TABLE(of, ndfc_match);
   
- static struct platform_driver ndfc_nand_driver = {
-       .probe          = ndfc_nand_probe,
-       .remove         = ndfc_nand_remove,
-       .driver         = {
-               .name   = "ndfc-nand",
-               .owner  = THIS_MODULE,
+ static struct of_platform_driver ndfc_driver = {
++      .owner          = THIS_MODULE,
+       .driver = {
+               .name   = "ndfc",
         },
+       .match_table = ndfc_match,
+       .probe = ndfc_probe,
+       .remove = __devexit_p(ndfc_remove),
   };
   
   static int __init ndfc_nand_init(void)
diff --cc drivers/mtd/nand/pasemi_nand.c
Simple merge
diff --cc drivers/net/bnx2.c

index b10af68,d4a3dac..646a99f
--- 1/drivers/net/bnx2.c
--- 2/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@@ -2831,8 -2839,13 +2839,11 @@@ bnx2_rx_skb(struct bnx2 *bp, struct bnx
                                 return err;
                         }
   
+                       pci_unmap_page(bp->pdev, mapping_old,
+                                      PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ 
                         frag_size -= frag_len;
- -                      skb->data_len += frag_len;
- -                      skb->truesize += frag_len;
- -                      skb->len += frag_len;
+ +                      skb_add_rx_frag(skb, i, rx_pg->page, 0, frag_len);
   
                         pg_prod = NEXT_RX_BD(pg_prod);
                         pg_cons = RX_PG_RING_IDX(NEXT_RX_BD(pg_cons));
diff --cc drivers/net/e1000/e1000_main.c
Simple merge
diff --cc drivers/net/e1000e/e1000.h

index c55fd6f,37bcb19..77ded8b
--- 1/drivers/net/e1000e/e1000.h
--- 2/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@@ -36,6 -36,6 +36,7 @@@
   #include <linux/workqueue.h>
   #include <linux/io.h>
   #include <linux/netdevice.h>
++#include <asm/cacheflush.h>
   
   #include "hw.h"
   
diff --cc drivers/net/e1000e/hw.h

index c4ffd4b,2d4ce04..113a9f4
--- 1/drivers/net/e1000e/hw.h
--- 2/drivers/net/e1000e/hw.h
+++ b/drivers/net/e1000e/hw.h
@@@ -875,6 -877,6 +877,7 @@@ struct e1000_hw 
   
         u8 __iomem *hw_addr;
         u8 __iomem *flash_address;
++      resource_size_t flash_len;
   
         struct e1000_mac_info  mac;
         struct e1000_fc_info   fc;
diff --cc drivers/net/e1000e/ich8lan.c

index 7aea160,e415e81..f8315cb
--- 1/drivers/net/e1000e/ich8lan.c
--- 2/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@@ -195,12 -199,12 +199,28 @@@ static inline u32 __er32flash(struct e1
   
   static inline void __ew16flash(struct e1000_hw *hw, unsigned long reg, u16 val)
   {
++#ifdef _ASM_X86_CACHEFLUSH_H
++      set_memory_rw((unsigned long)hw->flash_address,
++                    hw->flash_len >> PAGE_SHIFT);
++#endif
         writew(val, hw->flash_address + reg);
++#ifdef _ASM_X86_CACHEFLUSH_H
++      set_memory_ro((unsigned long)hw->flash_address,
++                    hw->flash_len >> PAGE_SHIFT);
++#endif
   }
   
   static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val)
   {
++#ifdef _ASM_X86_CACHEFLUSH_H
++      set_memory_rw((unsigned long)hw->flash_address,
++                    hw->flash_len >> PAGE_SHIFT);
++#endif
         writel(val, hw->flash_address + reg);
++#ifdef _ASM_X86_CACHEFLUSH_H
++      set_memory_ro((unsigned long)hw->flash_address,
++                    hw->flash_len >> PAGE_SHIFT);
++#endif
   }
   
   #define er16flash(reg)                __er16flash(hw, (reg))
diff --cc drivers/net/e1000e/netdev.c

index adbe42c,91817d0..17367e9
--- 1/drivers/net/e1000e/netdev.c
--- 2/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@@ -4688,7 -4736,27 +4733,73 @@@ static void e1000_eeprom_checks(struct 
         }
   }
   
+ static const struct net_device_ops e1000e_netdev_ops = {
+       .ndo_open               = e1000_open,
+       .ndo_stop               = e1000_close,
+       .ndo_start_xmit         = e1000_xmit_frame,
+       .ndo_get_stats          = e1000_get_stats,
+       .ndo_set_multicast_list = e1000_set_multi,
+       .ndo_set_mac_address    = e1000_set_mac,
+       .ndo_change_mtu         = e1000_change_mtu,
+       .ndo_do_ioctl           = e1000_ioctl,
+       .ndo_tx_timeout         = e1000_tx_timeout,
+       .ndo_validate_addr      = eth_validate_addr,
+ 
+       .ndo_vlan_rx_register   = e1000_vlan_rx_register,
+       .ndo_vlan_rx_add_vid    = e1000_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = e1000_vlan_rx_kill_vid,
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = e1000_netpoll,
+ #endif
+ };
+ 
+ /**
++ * e1000e_dump_eeprom - write the eeprom to kernel log
++ * @adapter: our adapter struct
++ *
++ * Dump the eeprom for users having checksum issues
++ **/
++static void e1000e_dump_eeprom(struct e1000_adapter *adapter)
++{
++      struct net_device *netdev = adapter->netdev;
++      struct ethtool_eeprom eeprom;
++      const struct ethtool_ops *ops = netdev->ethtool_ops;
++      u8 *data;
++      int i;
++      u16 csum_old, csum_new = 0;
++
++      eeprom.len = ops->get_eeprom_len(netdev);
++      eeprom.offset = 0;
++
++      data = kzalloc(eeprom.len, GFP_KERNEL);
++      if (!data) {
++              printk(KERN_ERR "Unable to allocate memory to dump EEPROM"
++                     " data\n");
++              return;
++      }
++
++      ops->get_eeprom(netdev, &eeprom, data);
++
++      csum_old = (data[NVM_CHECKSUM_REG * 2]) +
++                 (data[NVM_CHECKSUM_REG * 2 + 1] << 8);
++      for (i = 0; i < NVM_CHECKSUM_REG * 2; i += 2)
++              csum_new += data[i] + (data[i + 1] << 8);
++      csum_new = NVM_SUM - csum_new;
++
++      printk(KERN_ERR "/*********************/\n");
++      printk(KERN_ERR "Current EEPROM Checksum : 0x%04x\n", csum_old);
++      printk(KERN_ERR "Calculated              : 0x%04x\n", csum_new);
++
++      printk(KERN_ERR "Offset    Values\n");
++      printk(KERN_ERR "========  ======\n");
++      print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, data, 128, 0);
++
++      printk(KERN_ERR "/*********************/\n");
++
++      kfree(data);
++}
++
+ +/**
    * e1000_probe - Device Initialization Routine
    * @pdev: PCI device information struct
    * @ent: entry in e1000_pci_tbl
@@@ -4707,7 -4775,7 +4818,6 @@@ static int __devinit e1000_probe(struc
         struct e1000_hw *hw;
         const struct e1000_info *ei = e1000_info_tbl[ent->driver_data];
         resource_size_t mmio_start, mmio_len;
--      resource_size_t flash_start, flash_len;
   
         static int cards_found;
         int i, err, pci_using_dac;
@@@ -4778,11 -4849,11 +4891,15 @@@
   
         if ((adapter->flags & FLAG_HAS_FLASH) &&
             (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
--              flash_start = pci_resource_start(pdev, 1);
--              flash_len = pci_resource_len(pdev, 1);
--              adapter->hw.flash_address = ioremap(flash_start, flash_len);
++              adapter->hw.flash_len = pci_resource_len(pdev, 1);
++              adapter->hw.flash_address = ioremap(pci_resource_start(pdev, 1),
++                                                  adapter->hw.flash_len);
                 if (!adapter->hw.flash_address)
                         goto err_flashmap;
++#ifdef _ASM_X86_CACHEFLUSH_H
++              set_memory_ro((unsigned long)adapter->hw.flash_address,
++                            adapter->hw.flash_len >> PAGE_SHIFT);
++#endif
         }
   
         /* construct the net_device struct */
@@@ -4885,31 -4942,28 +4988,43 @@@
          * attempt. Let's give it a few tries
          */
         for (i = 0;; i++) {
--              if (e1000_validate_nvm_checksum(&adapter->hw) >= 0)
++              if (e1000_validate_nvm_checksum(hw) >= 0) {
++                      /* copy the MAC address out of the NVM */
++                      if (e1000e_read_mac_addr(&adapter->hw))
++                              e_err("NVM Read Error reading MAC address\n");
                         break;
++              }
                 if (i == 2) {
                         e_err("The NVM Checksum Is Not Valid\n");
--                      err = -EIO;
--                      goto err_eeprom;
++                      e1000e_dump_eeprom(adapter);
++                      /*
++                       * set MAC address to all zeroes to invalidate and
++                       * temporary disable this device for the user. This
++                       * blocks regular traffic while still permitting
++                       * ethtool ioctls from reaching the hardware as well as
++                       * allowing the user to run the interface after
++                       * manually setting a hw addr using
++                       * `ip link set address`
++                       */
++                      memset(hw->mac.addr, 0, netdev->addr_len);
++                      break;
                 }
         }
   
         e1000_eeprom_checks(adapter);
   
--      /* copy the MAC address out of the NVM */
--      if (e1000e_read_mac_addr(&adapter->hw))
--              e_err("NVM Read Error while reading MAC address\n");
++      /* debug code ... dump the first bytes of the eeprom for
++       * ich parts that might get a corruption */
++      if (adapter->flags & FLAG_IS_ICH)
++              e1000e_dump_eeprom(adapter);
   
++      /* don't block initalization here due to bad MAC address */
         memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len);
         memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len);
   
         if (!is_valid_ether_addr(netdev->perm_addr)) {
-               e_err("Invalid MAC Address: %02x:%02x:%02x:%02x:%02x:%02x\n",
-                     netdev->perm_addr[0], netdev->perm_addr[1],
-                     netdev->perm_addr[2], netdev->perm_addr[3],
-                     netdev->perm_addr[4], netdev->perm_addr[5]);
+               e_err("Invalid MAC Address: %pM\n", netdev->perm_addr);
                 err = -EIO;
--              goto err_eeprom;
         }
   
         init_timer(&adapter->watchdog_timer);
@@@ -4997,7 -5054,7 +5115,6 @@@
   err_register:
         if (!(adapter->flags & FLAG_HAS_AMT))
                 e1000_release_hw_control(adapter);
--err_eeprom:
         if (!e1000_check_reset_block(&adapter->hw))
                 e1000_phy_hw_reset(&adapter->hw);
   err_hw_init:
diff --cc drivers/net/ehea/ehea_main.c
Simple merge
diff --cc drivers/net/fec_mpc52xx_phy.c
Simple merge
diff --cc drivers/net/fs_enet/fs_enet-main.c
Simple merge
diff --cc drivers/net/fs_enet/mii-bitbang.c
Simple merge
diff --cc drivers/net/fs_enet/mii-fec.c
Simple merge
diff --cc drivers/net/gianfar.c

index 4320a98,3f7eab4..f2f37b5
--- 1/drivers/net/gianfar.c
--- 2/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@@ -2097,8 -2277,20 +2277,21 @@@ static irqreturn_t gfar_error(int irq, 
   /* work with hotplug and coldplug */
   MODULE_ALIAS("platform:fsl-gianfar");
   
+ static struct of_device_id gfar_match[] =
+ {
+       {
+               .type = "network",
+               .compatible = "gianfar",
+       },
+       {},
+ };
+ 
   /* Structure for a device driver */
- static struct platform_driver gfar_driver = {
+ static struct of_platform_driver gfar_driver = {
++      .owner = THIS_MODULE,
+       .name = "fsl-gianfar",
+       .match_table = gfar_match,
+ 
         .probe = gfar_probe,
         .remove = gfar_remove,
         .suspend = gfar_suspend,
diff --cc drivers/net/gianfar_mii.c

index ebcfb27,f3706e4..78f3a23
--- 1/drivers/net/gianfar_mii.c
--- 2/drivers/net/gianfar_mii.c
+++ b/drivers/net/gianfar_mii.c
@@@ -261,9 -343,25 +343,26 @@@ static int gfar_mdio_remove(struct of_d
         return 0;
   }
   
- static struct device_driver gianfar_mdio_driver = {
+ static struct of_device_id gfar_mdio_match[] =
+ {
+       {
+               .compatible = "fsl,gianfar-mdio",
+       },
+       {
+               .compatible = "fsl,gianfar-tbi",
+       },
+       {
+               .type = "mdio",
+               .compatible = "gianfar",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver gianfar_mdio_driver = {
++      .owner = THIS_MODULE,
         .name = "fsl-gianfar_mdio",
-       .bus = &platform_bus_type,
+       .match_table = gfar_mdio_match,
+ 
         .probe = gfar_mdio_probe,
         .remove = gfar_mdio_remove,
   };
diff --cc drivers/net/ibm_newemac/core.c
Simple merge
diff --cc drivers/net/ibm_newemac/mal.c
Simple merge
diff --cc drivers/net/igb/igb_main.c
Simple merge
diff --cc drivers/net/ixgbe/ixgbe_main.c
Simple merge
diff --cc drivers/net/myri_sbus.c
Simple merge
diff --cc drivers/net/niu.c
Simple merge
diff --cc drivers/net/phy/mdio-gpio.c

index 0000000,a439ebe..5db3134

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@@ -1,0 -1,296 +1,297 @@@
+ /*
+  * GPIO based MDIO bitbang driver.
+  * Supports OpenFirmware.
+  *
+  * Copyright (c) 2008 CSE Semaphore Belgium.
+  *  by Laurent Pinchart <laurentp@cse-semaphore.com>
+  *
+  * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+  *
+  * Based on earlier work by
+  *
+  * Copyright (c) 2003 Intracom S.A.
+  *  by Pantelis Antoniou <panto@intracom.gr>
+  *
+  * 2005 (c) MontaVista Software, Inc.
+  * Vitaly Bordug <vbordug@ru.mvista.com>
+  *
+  * This file is licensed under the terms of the GNU General Public License
+  * version 2. This program is licensed "as is" without any warranty of any
+  * kind, whether express or implied.
+  */
+ 
+ #include <linux/module.h>
+ #include <linux/slab.h>
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/platform_device.h>
+ #include <linux/gpio.h>
+ #include <linux/mdio-gpio.h>
+ 
+ #ifdef CONFIG_OF_GPIO
+ #include <linux/of_gpio.h>
+ #include <linux/of_platform.h>
+ #endif
+ 
+ struct mdio_gpio_info {
+       struct mdiobb_ctrl ctrl;
+       int mdc, mdio;
+ };
+ 
+ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
+ {
+       struct mdio_gpio_info *bitbang =
+               container_of(ctrl, struct mdio_gpio_info, ctrl);
+ 
+       if (dir)
+               gpio_direction_output(bitbang->mdio, 1);
+       else
+               gpio_direction_input(bitbang->mdio);
+ }
+ 
+ static int mdio_get(struct mdiobb_ctrl *ctrl)
+ {
+       struct mdio_gpio_info *bitbang =
+               container_of(ctrl, struct mdio_gpio_info, ctrl);
+ 
+       return gpio_get_value(bitbang->mdio);
+ }
+ 
+ static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
+ {
+       struct mdio_gpio_info *bitbang =
+               container_of(ctrl, struct mdio_gpio_info, ctrl);
+ 
+       gpio_set_value(bitbang->mdio, what);
+ }
+ 
+ static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
+ {
+       struct mdio_gpio_info *bitbang =
+               container_of(ctrl, struct mdio_gpio_info, ctrl);
+ 
+       gpio_set_value(bitbang->mdc, what);
+ }
+ 
+ static struct mdiobb_ops mdio_gpio_ops = {
+       .owner = THIS_MODULE,
+       .set_mdc = mdc_set,
+       .set_mdio_dir = mdio_dir,
+       .set_mdio_data = mdio_set,
+       .get_mdio_data = mdio_get,
+ };
+ 
+ static int __devinit mdio_gpio_bus_init(struct device *dev,
+                                       struct mdio_gpio_platform_data *pdata,
+                                       int bus_id)
+ {
+       struct mii_bus *new_bus;
+       struct mdio_gpio_info *bitbang;
+       int ret = -ENOMEM;
+       int i;
+ 
+       bitbang = kzalloc(sizeof(*bitbang), GFP_KERNEL);
+       if (!bitbang)
+               goto out;
+ 
+       bitbang->ctrl.ops = &mdio_gpio_ops;
+       bitbang->mdc = pdata->mdc;
+       bitbang->mdio = pdata->mdio;
+ 
+       new_bus = alloc_mdio_bitbang(&bitbang->ctrl);
+       if (!new_bus)
+               goto out_free_bitbang;
+ 
+       new_bus->name = "GPIO Bitbanged MDIO",
+ 
+       ret = -ENODEV;
+ 
+       new_bus->phy_mask = pdata->phy_mask;
+       new_bus->irq = pdata->irqs;
+       new_bus->parent = dev;
+ 
+       if (new_bus->phy_mask == ~0)
+               goto out_free_bus;
+ 
+       for (i = 0; i < PHY_MAX_ADDR; i++)
+               if (!new_bus->irq[i])
+                       new_bus->irq[i] = PHY_POLL;
+ 
+       snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", bus_id);
+ 
+       if (gpio_request(bitbang->mdc, "mdc"))
+               goto out_free_bus;
+ 
+       if (gpio_request(bitbang->mdio, "mdio"))
+               goto out_free_mdc;
+ 
+       dev_set_drvdata(dev, new_bus);
+ 
+       ret = mdiobus_register(new_bus);
+       if (ret)
+               goto out_free_all;
+ 
+       return 0;
+ 
+ out_free_all:
+       dev_set_drvdata(dev, NULL);
+       gpio_free(bitbang->mdio);
+ out_free_mdc:
+       gpio_free(bitbang->mdc);
+ out_free_bus:
+       free_mdio_bitbang(new_bus);
+ out_free_bitbang:
+       kfree(bitbang);
+ out:
+       return ret;
+ }
+ 
+ static void __devexit mdio_gpio_bus_destroy(struct device *dev)
+ {
+       struct mii_bus *bus = dev_get_drvdata(dev);
+       struct mdio_gpio_info *bitbang = bus->priv;
+ 
+       mdiobus_unregister(bus);
+       free_mdio_bitbang(bus);
+       dev_set_drvdata(dev, NULL);
+       gpio_free(bitbang->mdc);
+       gpio_free(bitbang->mdio);
+       kfree(bitbang);
+ }
+ 
+ static int __devinit mdio_gpio_probe(struct platform_device *pdev)
+ {
+       struct mdio_gpio_platform_data *pdata = pdev->dev.platform_data;
+ 
+       if (!pdata)
+               return -ENODEV;
+ 
+       return mdio_gpio_bus_init(&pdev->dev, pdata, pdev->id);
+ }
+ 
+ static int __devexit mdio_gpio_remove(struct platform_device *pdev)
+ {
+       mdio_gpio_bus_destroy(&pdev->dev);
+ 
+       return 0;
+ }
+ 
+ #ifdef CONFIG_OF_GPIO
+ static void __devinit add_phy(struct mdio_gpio_platform_data *pdata,
+                             struct device_node *np)
+ {
+       const u32 *data;
+       int len, id, irq;
+ 
+       data = of_get_property(np, "reg", &len);
+       if (!data || len != 4)
+               return;
+ 
+       id = *data;
+       pdata->phy_mask &= ~(1 << id);
+ 
+       irq = of_irq_to_resource(np, 0, NULL);
+       if (irq)
+               pdata->irqs[id] = irq;
+ }
+ 
+ static int __devinit mdio_ofgpio_probe(struct of_device *ofdev,
+                                         const struct of_device_id *match)
+ {
+       struct device_node *np = NULL;
+       struct mdio_gpio_platform_data *pdata;
+ 
+       pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+       if (!pdata)
+               return -ENOMEM;
+ 
+       pdata->mdc = of_get_gpio(ofdev->node, 0);
+       pdata->mdio = of_get_gpio(ofdev->node, 1);
+ 
+       if (pdata->mdc < 0 || pdata->mdio < 0)
+               goto out_free;
+ 
+       while ((np = of_get_next_child(ofdev->node, np)))
+               if (!strcmp(np->type, "ethernet-phy"))
+                       add_phy(pdata, np);
+ 
+       return mdio_gpio_bus_init(&ofdev->dev, pdata, pdata->mdc);
+ 
+ out_free:
+       kfree(pdata);
+       return -ENODEV;
+ }
+ 
+ static int __devexit mdio_ofgpio_remove(struct of_device *ofdev)
+ {
+       mdio_gpio_bus_destroy(&ofdev->dev);
+       kfree(ofdev->dev.platform_data);
+ 
+       return 0;
+ }
+ 
+ static struct of_device_id mdio_ofgpio_match[] = {
+       {
+               .compatible = "virtual,mdio-gpio",
+       },
+       {},
+ };
+ 
+ static struct of_platform_driver mdio_ofgpio_driver = {
++      .owner = THIS_MODULE,
+       .name = "mdio-gpio",
+       .match_table = mdio_ofgpio_match,
+       .probe = mdio_ofgpio_probe,
+       .remove = __devexit_p(mdio_ofgpio_remove),
+ };
+ 
+ static inline int __init mdio_ofgpio_init(void)
+ {
+       return of_register_platform_driver(&mdio_ofgpio_driver);
+ }
+ 
+ static inline void __exit mdio_ofgpio_exit(void)
+ {
+       of_unregister_platform_driver(&mdio_ofgpio_driver);
+ }
+ #else
+ static inline int __init mdio_ofgpio_init(void) { return 0; }
+ static inline void __exit mdio_ofgpio_exit(void) { }
+ #endif /* CONFIG_OF_GPIO */
+ 
+ static struct platform_driver mdio_gpio_driver = {
+       .probe = mdio_gpio_probe,
+       .remove = __devexit_p(mdio_gpio_remove),
+       .driver         = {
+               .name   = "mdio-gpio",
+               .owner  = THIS_MODULE,
+       },
+ };
+ 
+ static int __init mdio_gpio_init(void)
+ {
+       int ret;
+ 
+       ret = mdio_ofgpio_init();
+       if (ret)
+               return ret;
+ 
+       ret = platform_driver_register(&mdio_gpio_driver);
+       if (ret)
+               mdio_ofgpio_exit();
+ 
+       return ret;
+ }
+ module_init(mdio_gpio_init);
+ 
+ static void __exit mdio_gpio_exit(void)
+ {
+       platform_driver_unregister(&mdio_gpio_driver);
+       mdio_ofgpio_exit();
+ }
+ module_exit(mdio_gpio_exit);
+ 
+ MODULE_ALIAS("platform:mdio-gpio");
+ MODULE_AUTHOR("Laurent Pinchart, Paulius Zaleckas");
+ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("Generic driver for MDIO bus emulation using GPIO");
diff --cc drivers/net/sky2.c
Simple merge
diff --cc drivers/net/sunbmac.c
Simple merge
diff --cc drivers/net/sunhme.c
Simple merge
diff --cc drivers/net/sunlance.c
Simple merge
diff --cc drivers/net/sunqe.c
Simple merge
diff --cc drivers/net/tg3.c

index 067e500,8b3f846..cc423af
--- 1/drivers/net/tg3.c
--- 2/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@@ -205,6 -217,9 +217,8 @@@ static struct pci_device_id tg3_pci_tbl
         {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5723)},
         {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761)},
         {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761E)},
+       {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5761S)},
+       {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5761SE)},
- -      {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5785)},
         {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57780)},
         {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57760)},
         {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57790)},
@@@ -11339,11 -10947,9 +10946,11 @@@ static void __devinit tg3_get_eeprom_hw
                 if (val & VCPU_CFGSHDW_ASPM_DBNC)
                         tp->tg3_flags |= TG3_FLAG_ASPM_WORKAROUND;
                 if ((val & VCPU_CFGSHDW_WOL_ENABLE) &&
- -                  (val & VCPU_CFGSHDW_WOL_MAGPKT))
+ +                  (val & VCPU_CFGSHDW_WOL_MAGPKT)) {
                         tp->tg3_flags |= TG3_FLAG_WOL_ENABLE;
+ +                      device_set_wakeup_enable(&tp->pdev->dev, true);
+ +              }
-               return;
+               goto done;
         }
   
         tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val);
@@@ -11472,10 -11081,8 +11082,10 @@@
                         tp->tg3_flags &= ~TG3_FLAG_WOL_CAP;
   
                 if ((tp->tg3_flags & TG3_FLAG_WOL_CAP) &&
- -                  (nic_cfg & NIC_SRAM_DATA_CFG_WOL_ENABLE))
+ +                  (nic_cfg & NIC_SRAM_DATA_CFG_WOL_ENABLE)) {
                         tp->tg3_flags |= TG3_FLAG_WOL_ENABLE;
-                       device_set_wakeup_enable(&tp->pdev->dev, true);
++                      device_set_wakeup_enable(&tp->pdev->dev, true);
+ +              }
   
                 if (cfg2 & (1 << 17))
                         tp->tg3_flags2 |= TG3_FLG2_CAPACITIVE_COUPLING;
diff --cc drivers/net/tg3.h
Simple merge
diff --cc drivers/net/tulip/tulip_core.c
Simple merge
diff --cc drivers/net/ucc_geth.c
Simple merge
diff --cc drivers/net/ucc_geth_mii.c
Simple merge
diff --cc drivers/net/wireless/Kconfig
Simple merge
diff --cc drivers/net/wireless/iwlwifi/iwl-3945.h

index fba5ef2,2c0ddc5..ff801cb
--- 1/drivers/net/wireless/iwlwifi/iwl-3945.h
--- 2/drivers/net/wireless/iwlwifi/iwl-3945.h
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.h
@@@ -893,10 -893,8 +893,9 @@@ struct iwl3945_priv 
         struct delayed_work alive_start;
         struct delayed_work activity_timer;
         struct delayed_work thermal_periodic;
+ +      struct delayed_work rfkill_poll;
         struct delayed_work gather_stats;
         struct delayed_work scan_check;
-       struct delayed_work post_associate;
   
   #define IWL_DEFAULT_TX_POWER 0x0F
         s8 user_txpower_limit;
diff --cc drivers/net/wireless/iwlwifi/iwl-agn.c

index 9c1869b,0dc8eed..7d09eb0
--- 1/drivers/net/wireless/iwlwifi/iwl-agn.c
--- 2/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@@ -1640,19 -1394,16 +1394,22 @@@ static void iwl_irq_tasklet(struct iwl_
                         hw_rf_kill = 1;
   
                 IWL_DEBUG(IWL_DL_RF_KILL, "RF_KILL bit toggled to %s.\n",
-                               hw_rf_kill ? "disable radio":"enable radio");
+                               hw_rf_kill ? "disable radio" : "enable radio");
   
                 /* driver only loads ucode once setting the interface up.
-                * the driver allows loading the ucode even if the radio
-                * is killed. Hence update the killswitch state here. The
-                * rfkill handler will care about restarting if needed.
-                */
-               if (!test_bit(STATUS_ALIVE, &priv->status)) {
-                       if (hw_rf_kill)
-                               set_bit(STATUS_RF_KILL_HW, &priv->status);
-                       else
-                               clear_bit(STATUS_RF_KILL_HW, &priv->status);
-                       queue_work(priv->workqueue, &priv->rf_kill);
- -               * the driver as well won't allow loading if RFKILL is set
- -               * therefore no need to restart the driver from this handler
++               * the driver allows loading the ucode even if the radio
++               * is killed. Hence update the killswitch state here. The
++               * rfkill handler will care about restarting if needed.
+                */
- -              if (!hw_rf_kill && !test_bit(STATUS_ALIVE, &priv->status)) {
- -                      clear_bit(STATUS_RF_KILL_HW, &priv->status);
- -                      if (priv->is_open && !iwl_is_rfkill(priv))
- -                              queue_work(priv->workqueue, &priv->up);
++              if (!test_bit(STATUS_ALIVE, &priv->status)) {
++                      if (hw_rf_kill)
++                              set_bit(STATUS_RF_KILL_HW, &priv->status);
++                      else {
++                              clear_bit(STATUS_RF_KILL_HW, &priv->status);
++                              if (priv->is_open && !iwl_is_rfkill(priv))
++                                      queue_work(priv->workqueue, &priv->up);
++                      }
++                      queue_work(priv->workqueue, &priv->rf_kill);
                 }
   
                 handled |= CSR_INT_BIT_RF_KILL;
@@@ -2653,9 -2425,18 +2409,9 @@@ out
         priv->is_open = 1;
         IWL_DEBUG_MAC80211("leave\n");
         return 0;
- -
- -out_release_irq:
- -      free_irq(priv->pci_dev->irq, priv);
- -out_disable_msi:
- -      pci_disable_msi(priv->pci_dev);
- -      pci_disable_device(priv->pci_dev);
- -      priv->is_open = 0;
- -      IWL_DEBUG_MAC80211("leave - failed\n");
- -      return ret;
   }
   
- static void iwl4965_mac_stop(struct ieee80211_hw *hw)
+ static void iwl_mac_stop(struct ieee80211_hw *hw)
   {
         struct iwl_priv *priv = hw->priv;
   
@@@ -2677,13 -2458,13 +2433,13 @@@
                 mutex_unlock(&priv->mutex);
         }
   
-       iwl4965_down(priv);
+       iwl_down(priv);
   
         flush_workqueue(priv->workqueue);
- -      free_irq(priv->pci_dev->irq, priv);
- -      pci_disable_msi(priv->pci_dev);
- -      pci_save_state(priv->pci_dev);
- -      pci_disable_device(priv->pci_dev);
+ +
+ +      /* enable interrupts again in order to receive rfkill changes */
+ +      iwl_write32(priv, CSR_INT, 0xFFFFFFFF);
-       iwl4965_enable_interrupts(priv);
++      iwl_enable_interrupts(priv);
   
         IWL_DEBUG_MAC80211("leave\n");
   }
@@@ -4150,8 -3760,6 +3735,7 @@@ static int iwl_pci_probe(struct pci_de
         struct ieee80211_hw *hw;
         struct iwl_cfg *cfg = (struct iwl_cfg *)(ent->driver_data);
         unsigned long flags;
-       DECLARE_MAC_BUF(mac);
+ +      u16 pci_cmd;
   
         /************************
          * 1. Allocating HW data
@@@ -4293,18 -3902,10 +3878,18 @@@
          * 8. Setup services
          ********************/
         spin_lock_irqsave(&priv->lock, flags);
-       iwl4965_disable_interrupts(priv);
+       iwl_disable_interrupts(priv);
         spin_unlock_irqrestore(&priv->lock, flags);
   
+ +      pci_enable_msi(priv->pci_dev);
+ +
-       err = request_irq(priv->pci_dev->irq, iwl4965_isr, IRQF_SHARED,
++      err = request_irq(priv->pci_dev->irq, iwl_isr, IRQF_SHARED,
+ +                        DRV_NAME, priv);
+ +      if (err) {
+ +              IWL_ERROR("Error allocating IRQ %d\n", priv->pci_dev->irq);
+ +              goto out_disable_msi;
+ +      }
-       err = sysfs_create_group(&pdev->dev.kobj, &iwl4965_attribute_group);
+       err = sysfs_create_group(&pdev->dev.kobj, &iwl_attribute_group);
         if (err) {
                 IWL_ERROR("failed to create sysfs device attributes\n");
                 goto out_uninit_drv;
@@@ -4313,19 -3914,17 +3898,19 @@@
         iwl_setup_deferred_work(priv);
         iwl_setup_rx_handlers(priv);
   
- -      /********************
- -       * 9. Conclude
- -       ********************/
- -      pci_save_state(pdev);
- -      pci_disable_device(pdev);
- -
         /**********************************
- -       * 10. Setup and register mac80211
+ +       * 9. Setup and register mac80211
          **********************************/
   
+ +      /* enable interrupts if needed: hw bug w/a */
+ +      pci_read_config_word(priv->pci_dev, PCI_COMMAND, &pci_cmd);
+ +      if (pci_cmd & PCI_COMMAND_INTX_DISABLE) {
+ +              pci_cmd &= ~PCI_COMMAND_INTX_DISABLE;
+ +              pci_write_config_word(priv->pci_dev, PCI_COMMAND, pci_cmd);
+ +      }
+ +
-       iwl4965_enable_interrupts(priv);
++      iwl_enable_interrupts(priv);
+ +
         err = iwl_setup_mac(priv);
         if (err)
                 goto out_remove_sysfs;
@@@ -4351,10 -3941,7 +3936,10 @@@
         return 0;
   
    out_remove_sysfs:
-       sysfs_remove_group(&pdev->dev.kobj, &iwl4965_attribute_group);
+       sysfs_remove_group(&pdev->dev.kobj, &iwl_attribute_group);
+ + out_disable_msi:
+ +      pci_disable_msi(priv->pci_dev);
+ +      pci_disable_device(priv->pci_dev);
    out_uninit_drv:
         iwl_uninit_drv(priv);
    out_free_eeprom:
@@@ -4465,12 -4048,9 +4050,12 @@@ static int iwl_pci_resume(struct pci_de
         struct iwl_priv *priv = pci_get_drvdata(pdev);
   
         pci_set_power_state(pdev, PCI_D0);
+ +      pci_enable_device(pdev);
+ +      pci_restore_state(pdev);
-       iwl4965_enable_interrupts(priv);
++      iwl_enable_interrupts(priv);
   
         if (priv->is_open)
-               iwl4965_mac_start(priv->hw);
+               iwl_mac_start(priv->hw);
   
         clear_bit(STATUS_IN_SUSPEND, &priv->status);
         return 0;
diff --cc drivers/net/wireless/iwlwifi/iwl3945-base.c

index 2661c65,95d0198..3e1a4d5
--- 1/drivers/net/wireless/iwlwifi/iwl3945-base.c
--- 2/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@@ -6041,43 -6018,6 +6019,25 @@@ static void iwl3945_bg_rf_kill(struct w
         iwl3945_rfkill_set_hw_state(priv);
   }
   
+ +static void iwl3945_rfkill_poll(struct work_struct *data)
+ +{
+ +      struct iwl3945_priv *priv =
+ +          container_of(data, struct iwl3945_priv, rfkill_poll.work);
+ +      unsigned long status = priv->status;
+ +
+ +      if (iwl3945_read32(priv, CSR_GP_CNTRL) & CSR_GP_CNTRL_REG_FLAG_HW_RF_KILL_SW)
+ +              clear_bit(STATUS_RF_KILL_HW, &priv->status);
+ +      else
+ +              set_bit(STATUS_RF_KILL_HW, &priv->status);
+ +
+ +      if (test_bit(STATUS_RF_KILL_HW, &status) != test_bit(STATUS_RF_KILL_HW, &priv->status))
+ +              queue_work(priv->workqueue, &priv->rf_kill);
+ +
+ +      queue_delayed_work(priv->workqueue, &priv->rfkill_poll,
+ +                         round_jiffies_relative(2 * HZ));
+ +
+ +}
+ +
- static void iwl3945_bg_set_monitor(struct work_struct *work)
- {
-       struct iwl3945_priv *priv = container_of(work,
-                               struct iwl3945_priv, set_monitor);
- 
-       IWL_DEBUG(IWL_DL_STATE, "setting monitor mode\n");
- 
-       mutex_lock(&priv->mutex);
- 
-       if (!iwl3945_is_ready(priv))
-               IWL_DEBUG(IWL_DL_STATE, "leave - not ready\n");
-       else
-               if (iwl3945_set_mode(priv, IEEE80211_IF_TYPE_MNTR) != 0)
-                       IWL_ERROR("iwl3945_set_mode() failed\n");
- 
-       mutex_unlock(&priv->mutex);
- }
- 
   #define IWL_SCAN_CHECK_WATCHDOG (7 * HZ)
   
   static void iwl3945_bg_scan_check(struct work_struct *data)
@@@ -8059,21 -8042,17 +8058,22 @@@ static int iwl3945_pci_probe(struct pci
                 IWL_ERROR("Unable to initialize RFKILL system. "
                                   "Ignoring error: %d\n", err);
   
+ +      /* Start monitoring the killswitch */
+ +      queue_delayed_work(priv->workqueue, &priv->rfkill_poll,
+ +                         2 * HZ);
+ +
         return 0;
   
+  out_remove_sysfs:
+       sysfs_remove_group(&pdev->dev.kobj, &iwl3945_attribute_group);
    out_free_geos:
         iwl3945_free_geos(priv);
    out_free_channel_map:
         iwl3945_free_channel_map(priv);
-  out_remove_sysfs:
-       sysfs_remove_group(&pdev->dev.kobj, &iwl3945_attribute_group);
+ 
   
    out_release_irq:
+ +      free_irq(priv->pci_dev->irq, priv);
         destroy_workqueue(priv->workqueue);
         priv->workqueue = NULL;
         iwl3945_unset_hw_setting(priv);
diff --cc drivers/parport/parport_sunbpp.c
Simple merge
diff --cc drivers/pcmcia/electra_cf.c
Simple merge
diff --cc drivers/pcmcia/m8xx_pcmcia.c
Simple merge
diff --cc drivers/platform/x86/thinkpad_acpi.c

index 0000000,bcbc051..19374bb

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@@ -1,0 -1,7554 +1,7584 @@@
+ /*
+  *  thinkpad_acpi.c - ThinkPad ACPI Extras
+  *
+  *
+  *  Copyright (C) 2004-2005 Borislav Deianov <borislav@users.sf.net>
+  *  Copyright (C) 2006-2008 Henrique de Moraes Holschuh <hmh@hmh.eng.br>
+  *
+  *  This program is free software; you can redistribute it and/or modify
+  *  it under the terms of the GNU General Public License as published by
+  *  the Free Software Foundation; either version 2 of the License, or
+  *  (at your option) any later version.
+  *
+  *  This program is distributed in the hope that it will be useful,
+  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  *  GNU General Public License for more details.
+  *
+  *  You should have received a copy of the GNU General Public License
+  *  along with this program; if not, write to the Free Software
+  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  *  02110-1301, USA.
+  */
+ 
+ #define TPACPI_VERSION "0.22"
+ #define TPACPI_SYSFS_VERSION 0x020200
+ 
+ /*
+  *  Changelog:
+  *  2007-10-20                changelog trimmed down
+  *
+  *  2007-03-27  0.14  renamed to thinkpad_acpi and moved to
+  *                    drivers/misc.
+  *
+  *  2006-11-22        0.13    new maintainer
+  *                    changelog now lives in git commit history, and will
+  *                    not be updated further in-file.
+  *
+  *  2005-03-17        0.11    support for 600e, 770x
+  *                        thanks to Jamie Lentin <lentinj@dial.pipex.com>
+  *
+  *  2005-01-16        0.9     use MODULE_VERSION
+  *                        thanks to Henrik Brix Andersen <brix@gentoo.org>
+  *                    fix parameter passing on module loading
+  *                        thanks to Rusty Russell <rusty@rustcorp.com.au>
+  *                        thanks to Jim Radford <radford@blackbean.org>
+  *  2004-11-08        0.8     fix init error case, don't return from a macro
+  *                        thanks to Chris Wright <chrisw@osdl.org>
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/types.h>
+ #include <linux/string.h>
+ #include <linux/list.h>
+ #include <linux/mutex.h>
+ #include <linux/kthread.h>
+ #include <linux/freezer.h>
+ #include <linux/delay.h>
+ 
+ #include <linux/nvram.h>
+ #include <linux/proc_fs.h>
+ #include <linux/sysfs.h>
+ #include <linux/backlight.h>
+ #include <linux/fb.h>
+ #include <linux/platform_device.h>
+ #include <linux/hwmon.h>
+ #include <linux/hwmon-sysfs.h>
+ #include <linux/input.h>
+ #include <linux/leds.h>
+ #include <linux/rfkill.h>
+ #include <asm/uaccess.h>
+ 
+ #include <linux/dmi.h>
+ #include <linux/jiffies.h>
+ #include <linux/workqueue.h>
+ 
+ #include <acpi/acpi_drivers.h>
+ 
+ #include <linux/pci_ids.h>
+ 
+ 
+ /* ThinkPad CMOS commands */
+ #define TP_CMOS_VOLUME_DOWN   0
+ #define TP_CMOS_VOLUME_UP     1
+ #define TP_CMOS_VOLUME_MUTE   2
+ #define TP_CMOS_BRIGHTNESS_UP 4
+ #define TP_CMOS_BRIGHTNESS_DOWN       5
+ #define TP_CMOS_THINKLIGHT_ON 12
+ #define TP_CMOS_THINKLIGHT_OFF        13
+ 
+ /* NVRAM Addresses */
+ enum tp_nvram_addr {
+       TP_NVRAM_ADDR_HK2               = 0x57,
+       TP_NVRAM_ADDR_THINKLIGHT        = 0x58,
+       TP_NVRAM_ADDR_VIDEO             = 0x59,
+       TP_NVRAM_ADDR_BRIGHTNESS        = 0x5e,
+       TP_NVRAM_ADDR_MIXER             = 0x60,
+ };
+ 
+ /* NVRAM bit masks */
+ enum {
+       TP_NVRAM_MASK_HKT_THINKPAD      = 0x08,
+       TP_NVRAM_MASK_HKT_ZOOM          = 0x20,
+       TP_NVRAM_MASK_HKT_DISPLAY       = 0x40,
+       TP_NVRAM_MASK_HKT_HIBERNATE     = 0x80,
+       TP_NVRAM_MASK_THINKLIGHT        = 0x10,
+       TP_NVRAM_MASK_HKT_DISPEXPND     = 0x30,
+       TP_NVRAM_MASK_HKT_BRIGHTNESS    = 0x20,
+       TP_NVRAM_MASK_LEVEL_BRIGHTNESS  = 0x0f,
+       TP_NVRAM_POS_LEVEL_BRIGHTNESS   = 0,
+       TP_NVRAM_MASK_MUTE              = 0x40,
+       TP_NVRAM_MASK_HKT_VOLUME        = 0x80,
+       TP_NVRAM_MASK_LEVEL_VOLUME      = 0x0f,
+       TP_NVRAM_POS_LEVEL_VOLUME       = 0,
+ };
+ 
+ /* ACPI HIDs */
+ #define TPACPI_ACPI_HKEY_HID          "IBM0068"
+ 
+ /* Input IDs */
+ #define TPACPI_HKEY_INPUT_PRODUCT     0x5054 /* "TP" */
+ #define TPACPI_HKEY_INPUT_VERSION     0x4101
+ 
+ /* ACPI \WGSV commands */
+ enum {
+       TP_ACPI_WGSV_GET_STATE          = 0x01, /* Get state information */
+       TP_ACPI_WGSV_PWR_ON_ON_RESUME   = 0x02, /* Resume WWAN powered on */
+       TP_ACPI_WGSV_PWR_OFF_ON_RESUME  = 0x03, /* Resume WWAN powered off */
+       TP_ACPI_WGSV_SAVE_STATE         = 0x04, /* Save state for S4/S5 */
+ };
+ 
+ /* TP_ACPI_WGSV_GET_STATE bits */
+ enum {
+       TP_ACPI_WGSV_STATE_WWANEXIST    = 0x0001, /* WWAN hw available */
+       TP_ACPI_WGSV_STATE_WWANPWR      = 0x0002, /* WWAN radio enabled */
+       TP_ACPI_WGSV_STATE_WWANPWRRES   = 0x0004, /* WWAN state at resume */
+       TP_ACPI_WGSV_STATE_WWANBIOSOFF  = 0x0008, /* WWAN disabled in BIOS */
+       TP_ACPI_WGSV_STATE_BLTHEXIST    = 0x0001, /* BLTH hw available */
+       TP_ACPI_WGSV_STATE_BLTHPWR      = 0x0002, /* BLTH radio enabled */
+       TP_ACPI_WGSV_STATE_BLTHPWRRES   = 0x0004, /* BLTH state at resume */
+       TP_ACPI_WGSV_STATE_BLTHBIOSOFF  = 0x0008, /* BLTH disabled in BIOS */
+       TP_ACPI_WGSV_STATE_UWBEXIST     = 0x0010, /* UWB hw available */
+       TP_ACPI_WGSV_STATE_UWBPWR       = 0x0020, /* UWB radio enabled */
+ };
+ 
+ /****************************************************************************
+  * Main driver
+  */
+ 
+ #define TPACPI_NAME "thinkpad"
+ #define TPACPI_DESC "ThinkPad ACPI Extras"
+ #define TPACPI_FILE TPACPI_NAME "_acpi"
+ #define TPACPI_URL "http://ibm-acpi.sf.net/"
+ #define TPACPI_MAIL "ibm-acpi-devel@lists.sourceforge.net"
+ 
+ #define TPACPI_PROC_DIR "ibm"
+ #define TPACPI_ACPI_EVENT_PREFIX "ibm"
+ #define TPACPI_DRVR_NAME TPACPI_FILE
+ #define TPACPI_DRVR_SHORTNAME "tpacpi"
+ #define TPACPI_HWMON_DRVR_NAME TPACPI_NAME "_hwmon"
+ 
+ #define TPACPI_NVRAM_KTHREAD_NAME "ktpacpi_nvramd"
+ #define TPACPI_WORKQUEUE_NAME "ktpacpid"
+ 
+ #define TPACPI_MAX_ACPI_ARGS 3
+ 
+ /* rfkill switches */
+ enum {
+       TPACPI_RFK_BLUETOOTH_SW_ID = 0,
+       TPACPI_RFK_WWAN_SW_ID,
+       TPACPI_RFK_UWB_SW_ID,
+ };
+ 
+ /* Debugging */
+ #define TPACPI_LOG TPACPI_FILE ": "
+ #define TPACPI_ALERT  KERN_ALERT  TPACPI_LOG
+ #define TPACPI_CRIT   KERN_CRIT   TPACPI_LOG
+ #define TPACPI_ERR    KERN_ERR    TPACPI_LOG
+ #define TPACPI_NOTICE KERN_NOTICE TPACPI_LOG
+ #define TPACPI_INFO   KERN_INFO   TPACPI_LOG
+ #define TPACPI_DEBUG  KERN_DEBUG  TPACPI_LOG
+ 
+ #define TPACPI_DBG_ALL                0xffff
+ #define TPACPI_DBG_INIT               0x0001
+ #define TPACPI_DBG_EXIT               0x0002
+ #define dbg_printk(a_dbg_level, format, arg...) \
+       do { if (dbg_level & a_dbg_level) \
+               printk(TPACPI_DEBUG "%s: " format, __func__ , ## arg); \
+       } while (0)
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUG
+ #define vdbg_printk(a_dbg_level, format, arg...) \
+       dbg_printk(a_dbg_level, format, ## arg)
+ static const char *str_supported(int is_supported);
+ #else
+ #define vdbg_printk(a_dbg_level, format, arg...)
+ #endif
+ 
+ #define onoff(status, bit) ((status) & (1 << (bit)) ? "on" : "off")
+ #define enabled(status, bit) ((status) & (1 << (bit)) ? "enabled" : "disabled")
+ #define strlencmp(a, b) (strncmp((a), (b), strlen(b)))
+ 
+ 
+ /****************************************************************************
+  * Driver-wide structs and misc. variables
+  */
+ 
+ struct ibm_struct;
+ 
+ struct tp_acpi_drv_struct {
+       const struct acpi_device_id *hid;
+       struct acpi_driver *driver;
+ 
+       void (*notify) (struct ibm_struct *, u32);
+       acpi_handle *handle;
+       u32 type;
+       struct acpi_device *device;
+ };
+ 
+ struct ibm_struct {
+       char *name;
+ 
+       int (*read) (char *);
+       int (*write) (char *);
+       void (*exit) (void);
+       void (*resume) (void);
+       void (*suspend) (pm_message_t state);
+       void (*shutdown) (void);
+ 
+       struct list_head all_drivers;
+ 
+       struct tp_acpi_drv_struct *acpi;
+ 
+       struct {
+               u8 acpi_driver_registered:1;
+               u8 acpi_notify_installed:1;
+               u8 proc_created:1;
+               u8 init_called:1;
+               u8 experimental:1;
+       } flags;
+ };
+ 
+ struct ibm_init_struct {
+       char param[32];
+ 
+       int (*init) (struct ibm_init_struct *);
+       struct ibm_struct *data;
+ };
+ 
+ static struct {
+ #ifdef CONFIG_THINKPAD_ACPI_BAY
+       u32 bay_status:1;
+       u32 bay_eject:1;
+       u32 bay_status2:1;
+       u32 bay_eject2:1;
+ #endif
+       u32 bluetooth:1;
+       u32 hotkey:1;
+       u32 hotkey_mask:1;
+       u32 hotkey_wlsw:1;
+       u32 hotkey_tablet:1;
+       u32 light:1;
+       u32 light_status:1;
+       u32 bright_16levels:1;
+       u32 bright_acpimode:1;
++      u32 bright_igdmode:1;
+       u32 wan:1;
+       u32 uwb:1;
+       u32 fan_ctrl_status_undef:1;
+       u32 input_device_registered:1;
+       u32 platform_drv_registered:1;
+       u32 platform_drv_attrs_registered:1;
+       u32 sensors_pdrv_registered:1;
+       u32 sensors_pdrv_attrs_registered:1;
+       u32 sensors_pdev_attrs_registered:1;
+       u32 hotkey_poll_active:1;
+ } tp_features;
+ 
+ static struct {
+       u16 hotkey_mask_ff:1;
+       u16 bright_cmos_ec_unsync:1;
+ } tp_warned;
+ 
+ struct thinkpad_id_data {
+       unsigned int vendor;    /* ThinkPad vendor:
+                                * PCI_VENDOR_ID_IBM/PCI_VENDOR_ID_LENOVO */
+ 
+       char *bios_version_str; /* Something like 1ZET51WW (1.03z) */
+       char *ec_version_str;   /* Something like 1ZHT51WW-1.04a */
+ 
+       u16 bios_model;         /* Big Endian, TP-1Y = 0x5931, 0 = unknown */
+       u16 ec_model;
+ 
+       char *model_str;        /* ThinkPad T43 */
+       char *nummodel_str;     /* 9384A9C for a 9384-A9C model */
+ };
+ static struct thinkpad_id_data thinkpad_id;
+ 
+ static enum {
+       TPACPI_LIFE_INIT = 0,
+       TPACPI_LIFE_RUNNING,
+       TPACPI_LIFE_EXITING,
+ } tpacpi_lifecycle;
+ 
+ static int experimental;
+ static u32 dbg_level;
+ 
+ static struct workqueue_struct *tpacpi_wq;
+ 
+ /* Special LED class that can defer work */
+ struct tpacpi_led_classdev {
+       struct led_classdev led_classdev;
+       struct work_struct work;
+       enum led_brightness new_brightness;
+       unsigned int led;
+ };
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+ static int dbg_wlswemul;
+ static int tpacpi_wlsw_emulstate;
+ static int dbg_bluetoothemul;
+ static int tpacpi_bluetooth_emulstate;
+ static int dbg_wwanemul;
+ static int tpacpi_wwan_emulstate;
+ static int dbg_uwbemul;
+ static int tpacpi_uwb_emulstate;
+ #endif
+ 
+ 
+ /****************************************************************************
+  ****************************************************************************
+  *
+  * ACPI Helpers and device model
+  *
+  ****************************************************************************
+  ****************************************************************************/
+ 
+ /*************************************************************************
+  * ACPI basic handles
+  */
+ 
+ static acpi_handle root_handle;
+ 
+ #define TPACPI_HANDLE(object, parent, paths...)                       \
+       static acpi_handle  object##_handle;                    \
+       static acpi_handle *object##_parent = &parent##_handle; \
+       static char        *object##_path;                      \
+       static char        *object##_paths[] = { paths }
+ 
+ TPACPI_HANDLE(ec, root, "\\_SB.PCI0.ISA.EC0", /* 240, 240x */
+          "\\_SB.PCI.ISA.EC",  /* 570 */
+          "\\_SB.PCI0.ISA0.EC0",       /* 600e/x, 770e, 770x */
+          "\\_SB.PCI0.ISA.EC", /* A21e, A2xm/p, T20-22, X20-21 */
+          "\\_SB.PCI0.AD4S.EC0",       /* i1400, R30 */
+          "\\_SB.PCI0.ICH3.EC0",       /* R31 */
+          "\\_SB.PCI0.LPC.EC", /* all others */
+          );
+ 
+ TPACPI_HANDLE(ecrd, ec, "ECRD");      /* 570 */
+ TPACPI_HANDLE(ecwr, ec, "ECWR");      /* 570 */
+ 
+ TPACPI_HANDLE(cmos, root, "\\UCMS",   /* R50, R50e, R50p, R51, */
+                                       /* T4x, X31, X40 */
+          "\\CMOS",            /* A3x, G4x, R32, T23, T30, X22-24, X30 */
+          "\\CMS",             /* R40, R40e */
+          );                   /* all others */
+ 
+ TPACPI_HANDLE(hkey, ec, "\\_SB.HKEY", /* 600e/x, 770e, 770x */
+          "^HKEY",             /* R30, R31 */
+          "HKEY",              /* all others */
+          );                   /* 570 */
+ 
+ TPACPI_HANDLE(vid, root, "\\_SB.PCI.AGP.VGA", /* 570 */
+          "\\_SB.PCI0.AGP0.VID0",      /* 600e/x, 770x */
+          "\\_SB.PCI0.VID0",   /* 770e */
+          "\\_SB.PCI0.VID",    /* A21e, G4x, R50e, X30, X40 */
+          "\\_SB.PCI0.AGP.VID",        /* all others */
+          );                           /* R30, R31 */
+ 
+ 
+ /*************************************************************************
+  * ACPI helpers
+  */
+ 
+ static int acpi_evalf(acpi_handle handle,
+                     void *res, char *method, char *fmt, ...)
+ {
+       char *fmt0 = fmt;
+       struct acpi_object_list params;
+       union acpi_object in_objs[TPACPI_MAX_ACPI_ARGS];
+       struct acpi_buffer result, *resultp;
+       union acpi_object out_obj;
+       acpi_status status;
+       va_list ap;
+       char res_type;
+       int success;
+       int quiet;
+ 
+       if (!*fmt) {
+               printk(TPACPI_ERR "acpi_evalf() called with empty format\n");
+               return 0;
+       }
+ 
+       if (*fmt == 'q') {
+               quiet = 1;
+               fmt++;
+       } else
+               quiet = 0;
+ 
+       res_type = *(fmt++);
+ 
+       params.count = 0;
+       params.pointer = &in_objs[0];
+ 
+       va_start(ap, fmt);
+       while (*fmt) {
+               char c = *(fmt++);
+               switch (c) {
+               case 'd':       /* int */
+                       in_objs[params.count].integer.value = va_arg(ap, int);
+                       in_objs[params.count++].type = ACPI_TYPE_INTEGER;
+                       break;
+                       /* add more types as needed */
+               default:
+                       printk(TPACPI_ERR "acpi_evalf() called "
+                              "with invalid format character '%c'\n", c);
+                       return 0;
+               }
+       }
+       va_end(ap);
+ 
+       if (res_type != 'v') {
+               result.length = sizeof(out_obj);
+               result.pointer = &out_obj;
+               resultp = &result;
+       } else
+               resultp = NULL;
+ 
+       status = acpi_evaluate_object(handle, method, &params, resultp);
+ 
+       switch (res_type) {
+       case 'd':               /* int */
+               if (res)
+                       *(int *)res = out_obj.integer.value;
+               success = status == AE_OK && out_obj.type == ACPI_TYPE_INTEGER;
+               break;
+       case 'v':               /* void */
+               success = status == AE_OK;
+               break;
+               /* add more types as needed */
+       default:
+               printk(TPACPI_ERR "acpi_evalf() called "
+                      "with invalid format character '%c'\n", res_type);
+               return 0;
+       }
+ 
+       if (!success && !quiet)
+               printk(TPACPI_ERR "acpi_evalf(%s, %s, ...) failed: %d\n",
+                      method, fmt0, status);
+ 
+       return success;
+ }
+ 
+ static int acpi_ec_read(int i, u8 *p)
+ {
+       int v;
+ 
+       if (ecrd_handle) {
+               if (!acpi_evalf(ecrd_handle, &v, NULL, "dd", i))
+                       return 0;
+               *p = v;
+       } else {
+               if (ec_read(i, p) < 0)
+                       return 0;
+       }
+ 
+       return 1;
+ }
+ 
+ static int acpi_ec_write(int i, u8 v)
+ {
+       if (ecwr_handle) {
+               if (!acpi_evalf(ecwr_handle, NULL, NULL, "vdd", i, v))
+                       return 0;
+       } else {
+               if (ec_write(i, v) < 0)
+                       return 0;
+       }
+ 
+       return 1;
+ }
+ 
+ #if defined(CONFIG_THINKPAD_ACPI_DOCK) || defined(CONFIG_THINKPAD_ACPI_BAY)
+ static int _sta(acpi_handle handle)
+ {
+       int status;
+ 
+       if (!handle || !acpi_evalf(handle, &status, "_STA", "d"))
+               status = 0;
+ 
+       return status;
+ }
+ #endif
+ 
+ static int issue_thinkpad_cmos_command(int cmos_cmd)
+ {
+       if (!cmos_handle)
+               return -ENXIO;
+ 
+       if (!acpi_evalf(cmos_handle, NULL, NULL, "vd", cmos_cmd))
+               return -EIO;
+ 
+       return 0;
+ }
+ 
+ /*************************************************************************
+  * ACPI device model
+  */
+ 
+ #define TPACPI_ACPIHANDLE_INIT(object) \
+       drv_acpi_handle_init(#object, &object##_handle, *object##_parent, \
+               object##_paths, ARRAY_SIZE(object##_paths), &object##_path)
+ 
+ static void drv_acpi_handle_init(char *name,
+                          acpi_handle *handle, acpi_handle parent,
+                          char **paths, int num_paths, char **path)
+ {
+       int i;
+       acpi_status status;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "trying to locate ACPI handle for %s\n",
+               name);
+ 
+       for (i = 0; i < num_paths; i++) {
+               status = acpi_get_handle(parent, paths[i], handle);
+               if (ACPI_SUCCESS(status)) {
+                       *path = paths[i];
+                       dbg_printk(TPACPI_DBG_INIT,
+                                  "Found ACPI handle %s for %s\n",
+                                  *path, name);
+                       return;
+               }
+       }
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "ACPI handle for %s not found\n",
+                   name);
+       *handle = NULL;
+ }
+ 
+ static void dispatch_acpi_notify(acpi_handle handle, u32 event, void *data)
+ {
+       struct ibm_struct *ibm = data;
+ 
+       if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING)
+               return;
+ 
+       if (!ibm || !ibm->acpi || !ibm->acpi->notify)
+               return;
+ 
+       ibm->acpi->notify(ibm, event);
+ }
+ 
+ static int __init setup_acpi_notify(struct ibm_struct *ibm)
+ {
+       acpi_status status;
+       int rc;
+ 
+       BUG_ON(!ibm->acpi);
+ 
+       if (!*ibm->acpi->handle)
+               return 0;
+ 
+       vdbg_printk(TPACPI_DBG_INIT,
+               "setting up ACPI notify for %s\n", ibm->name);
+ 
+       rc = acpi_bus_get_device(*ibm->acpi->handle, &ibm->acpi->device);
+       if (rc < 0) {
+               printk(TPACPI_ERR "acpi_bus_get_device(%s) failed: %d\n",
+                       ibm->name, rc);
+               return -ENODEV;
+       }
+ 
+       ibm->acpi->device->driver_data = ibm;
+       sprintf(acpi_device_class(ibm->acpi->device), "%s/%s",
+               TPACPI_ACPI_EVENT_PREFIX,
+               ibm->name);
+ 
+       status = acpi_install_notify_handler(*ibm->acpi->handle,
+                       ibm->acpi->type, dispatch_acpi_notify, ibm);
+       if (ACPI_FAILURE(status)) {
+               if (status == AE_ALREADY_EXISTS) {
+                       printk(TPACPI_NOTICE
+                              "another device driver is already "
+                              "handling %s events\n", ibm->name);
+               } else {
+                       printk(TPACPI_ERR
+                              "acpi_install_notify_handler(%s) failed: %d\n",
+                              ibm->name, status);
+               }
+               return -ENODEV;
+       }
+       ibm->flags.acpi_notify_installed = 1;
+       return 0;
+ }
+ 
+ static int __init tpacpi_device_add(struct acpi_device *device)
+ {
+       return 0;
+ }
+ 
+ static int __init register_tpacpi_subdriver(struct ibm_struct *ibm)
+ {
+       int rc;
+ 
+       dbg_printk(TPACPI_DBG_INIT,
+               "registering %s as an ACPI driver\n", ibm->name);
+ 
+       BUG_ON(!ibm->acpi);
+ 
+       ibm->acpi->driver = kzalloc(sizeof(struct acpi_driver), GFP_KERNEL);
+       if (!ibm->acpi->driver) {
+               printk(TPACPI_ERR
+                      "failed to allocate memory for ibm->acpi->driver\n");
+               return -ENOMEM;
+       }
+ 
+       sprintf(ibm->acpi->driver->name, "%s_%s", TPACPI_NAME, ibm->name);
+       ibm->acpi->driver->ids = ibm->acpi->hid;
+ 
+       ibm->acpi->driver->ops.add = &tpacpi_device_add;
+ 
+       rc = acpi_bus_register_driver(ibm->acpi->driver);
+       if (rc < 0) {
+               printk(TPACPI_ERR "acpi_bus_register_driver(%s) failed: %d\n",
+                      ibm->name, rc);
+               kfree(ibm->acpi->driver);
+               ibm->acpi->driver = NULL;
+       } else if (!rc)
+               ibm->flags.acpi_driver_registered = 1;
+ 
+       return rc;
+ }
+ 
+ 
+ /****************************************************************************
+  ****************************************************************************
+  *
+  * Procfs Helpers
+  *
+  ****************************************************************************
+  ****************************************************************************/
+ 
+ static int dispatch_procfs_read(char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+ {
+       struct ibm_struct *ibm = data;
+       int len;
+ 
+       if (!ibm || !ibm->read)
+               return -EINVAL;
+ 
+       len = ibm->read(page);
+       if (len < 0)
+               return len;
+ 
+       if (len <= off + count)
+               *eof = 1;
+       *start = page + off;
+       len -= off;
+       if (len > count)
+               len = count;
+       if (len < 0)
+               len = 0;
+ 
+       return len;
+ }
+ 
+ static int dispatch_procfs_write(struct file *file,
+                       const char __user *userbuf,
+                       unsigned long count, void *data)
+ {
+       struct ibm_struct *ibm = data;
+       char *kernbuf;
+       int ret;
+ 
+       if (!ibm || !ibm->write)
+               return -EINVAL;
+ 
+       kernbuf = kmalloc(count + 2, GFP_KERNEL);
+       if (!kernbuf)
+               return -ENOMEM;
+ 
+       if (copy_from_user(kernbuf, userbuf, count)) {
+               kfree(kernbuf);
+               return -EFAULT;
+       }
+ 
+       kernbuf[count] = 0;
+       strcat(kernbuf, ",");
+       ret = ibm->write(kernbuf);
+       if (ret == 0)
+               ret = count;
+ 
+       kfree(kernbuf);
+ 
+       return ret;
+ }
+ 
+ static char *next_cmd(char **cmds)
+ {
+       char *start = *cmds;
+       char *end;
+ 
+       while ((end = strchr(start, ',')) && end == start)
+               start = end + 1;
+ 
+       if (!end)
+               return NULL;
+ 
+       *end = 0;
+       *cmds = end + 1;
+       return start;
+ }
+ 
+ 
+ /****************************************************************************
+  ****************************************************************************
+  *
+  * Device model: input, hwmon and platform
+  *
+  ****************************************************************************
+  ****************************************************************************/
+ 
+ static struct platform_device *tpacpi_pdev;
+ static struct platform_device *tpacpi_sensors_pdev;
+ static struct device *tpacpi_hwmon;
+ static struct input_dev *tpacpi_inputdev;
+ static struct mutex tpacpi_inputdev_send_mutex;
+ static LIST_HEAD(tpacpi_all_drivers);
+ 
+ static int tpacpi_suspend_handler(struct platform_device *pdev,
+                                 pm_message_t state)
+ {
+       struct ibm_struct *ibm, *itmp;
+ 
+       list_for_each_entry_safe(ibm, itmp,
+                                &tpacpi_all_drivers,
+                                all_drivers) {
+               if (ibm->suspend)
+                       (ibm->suspend)(state);
+       }
+ 
+       return 0;
+ }
+ 
+ static int tpacpi_resume_handler(struct platform_device *pdev)
+ {
+       struct ibm_struct *ibm, *itmp;
+ 
+       list_for_each_entry_safe(ibm, itmp,
+                                &tpacpi_all_drivers,
+                                all_drivers) {
+               if (ibm->resume)
+                       (ibm->resume)();
+       }
+ 
+       return 0;
+ }
+ 
+ static void tpacpi_shutdown_handler(struct platform_device *pdev)
+ {
+       struct ibm_struct *ibm, *itmp;
+ 
+       list_for_each_entry_safe(ibm, itmp,
+                                &tpacpi_all_drivers,
+                                all_drivers) {
+               if (ibm->shutdown)
+                       (ibm->shutdown)();
+       }
+ }
+ 
+ static struct platform_driver tpacpi_pdriver = {
+       .driver = {
+               .name = TPACPI_DRVR_NAME,
+               .owner = THIS_MODULE,
+       },
+       .suspend = tpacpi_suspend_handler,
+       .resume = tpacpi_resume_handler,
+       .shutdown = tpacpi_shutdown_handler,
+ };
+ 
+ static struct platform_driver tpacpi_hwmon_pdriver = {
+       .driver = {
+               .name = TPACPI_HWMON_DRVR_NAME,
+               .owner = THIS_MODULE,
+       },
+ };
+ 
+ /*************************************************************************
+  * sysfs support helpers
+  */
+ 
+ struct attribute_set {
+       unsigned int members, max_members;
+       struct attribute_group group;
+ };
+ 
+ struct attribute_set_obj {
+       struct attribute_set s;
+       struct attribute *a;
+ } __attribute__((packed));
+ 
+ static struct attribute_set *create_attr_set(unsigned int max_members,
+                                               const char *name)
+ {
+       struct attribute_set_obj *sobj;
+ 
+       if (max_members == 0)
+               return NULL;
+ 
+       /* Allocates space for implicit NULL at the end too */
+       sobj = kzalloc(sizeof(struct attribute_set_obj) +
+                   max_members * sizeof(struct attribute *),
+                   GFP_KERNEL);
+       if (!sobj)
+               return NULL;
+       sobj->s.max_members = max_members;
+       sobj->s.group.attrs = &sobj->a;
+       sobj->s.group.name = name;
+ 
+       return &sobj->s;
+ }
+ 
+ #define destroy_attr_set(_set) \
+       kfree(_set);
+ 
+ /* not multi-threaded safe, use it in a single thread per set */
+ static int add_to_attr_set(struct attribute_set *s, struct attribute *attr)
+ {
+       if (!s || !attr)
+               return -EINVAL;
+ 
+       if (s->members >= s->max_members)
+               return -ENOMEM;
+ 
+       s->group.attrs[s->members] = attr;
+       s->members++;
+ 
+       return 0;
+ }
+ 
+ static int add_many_to_attr_set(struct attribute_set *s,
+                       struct attribute **attr,
+                       unsigned int count)
+ {
+       int i, res;
+ 
+       for (i = 0; i < count; i++) {
+               res = add_to_attr_set(s, attr[i]);
+               if (res)
+                       return res;
+       }
+ 
+       return 0;
+ }
+ 
+ static void delete_attr_set(struct attribute_set *s, struct kobject *kobj)
+ {
+       sysfs_remove_group(kobj, &s->group);
+       destroy_attr_set(s);
+ }
+ 
+ #define register_attr_set_with_sysfs(_attr_set, _kobj) \
+       sysfs_create_group(_kobj, &_attr_set->group)
+ 
+ static int parse_strtoul(const char *buf,
+               unsigned long max, unsigned long *value)
+ {
+       char *endp;
+ 
+       while (*buf && isspace(*buf))
+               buf++;
+       *value = simple_strtoul(buf, &endp, 0);
+       while (*endp && isspace(*endp))
+               endp++;
+       if (*endp || *value > max)
+               return -EINVAL;
+ 
+       return 0;
+ }
+ 
+ static void tpacpi_disable_brightness_delay(void)
+ {
+       if (acpi_evalf(hkey_handle, NULL, "PWMS", "qvd", 0))
+               printk(TPACPI_NOTICE
+                       "ACPI backlight control delay disabled\n");
+ }
+ 
+ static int __init tpacpi_query_bcl_levels(acpi_handle handle)
+ {
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       union acpi_object *obj;
+       int rc;
+ 
+       if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) {
+               obj = (union acpi_object *)buffer.pointer;
+               if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {
+                       printk(TPACPI_ERR "Unknown _BCL data, "
+                              "please report this to %s\n", TPACPI_MAIL);
+                       rc = 0;
+               } else {
+                       rc = obj->package.count;
+               }
+       } else {
+               return 0;
+       }
+ 
+       kfree(buffer.pointer);
+       return rc;
+ }
+ 
+ static acpi_status __init tpacpi_acpi_walk_find_bcl(acpi_handle handle,
+                                       u32 lvl, void *context, void **rv)
+ {
+       char name[ACPI_PATH_SEGMENT_LENGTH];
+       struct acpi_buffer buffer = { sizeof(name), &name };
+ 
+       if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) &&
+           !strncmp("_BCL", name, sizeof(name) - 1)) {
+               BUG_ON(!rv || !*rv);
+               **(int **)rv = tpacpi_query_bcl_levels(handle);
+               return AE_CTRL_TERMINATE;
+       } else {
+               return AE_OK;
+       }
+ }
+ 
+ /*
+  * Returns 0 (no ACPI _BCL or _BCL invalid), or size of brightness map
+  */
+ static int __init tpacpi_check_std_acpi_brightness_support(void)
+ {
+       int status;
+       int bcl_levels = 0;
+       void *bcl_ptr = &bcl_levels;
+ 
+       if (!vid_handle) {
+               TPACPI_ACPIHANDLE_INIT(vid);
+       }
+       if (!vid_handle)
+               return 0;
+ 
+       /*
+        * Search for a _BCL method, and execute it.  This is safe on all
+        * ThinkPads, and as a side-effect, _BCL will place a Lenovo Vista
+        * BIOS in ACPI backlight control mode.  We do NOT have to care
+        * about calling the _BCL method in an enabled video device, any
+        * will do for our purposes.
+        */
+ 
+       status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3,
+                                    tpacpi_acpi_walk_find_bcl, NULL,
+                                    &bcl_ptr);
+ 
+       if (ACPI_SUCCESS(status) && bcl_levels > 2) {
+               tp_features.bright_acpimode = 1;
+               return (bcl_levels - 2);
+       }
+ 
+       return 0;
+ }
+ 
+ static int __init tpacpi_new_rfkill(const unsigned int id,
+                       struct rfkill **rfk,
+                       const enum rfkill_type rfktype,
+                       const char *name,
+                       const bool set_default,
+                       int (*toggle_radio)(void *, enum rfkill_state),
+                       int (*get_state)(void *, enum rfkill_state *))
+ {
+       int res;
+       enum rfkill_state initial_state = RFKILL_STATE_SOFT_BLOCKED;
+ 
+       res = get_state(NULL, &initial_state);
+       if (res < 0) {
+               printk(TPACPI_ERR
+                       "failed to read initial state for %s, error %d; "
+                       "will turn radio off\n", name, res);
+       } else if (set_default) {
+               /* try to set the initial state as the default for the rfkill
+                * type, since we ask the firmware to preserve it across S5 in
+                * NVRAM */
+               rfkill_set_default(rfktype,
+                               (initial_state == RFKILL_STATE_UNBLOCKED) ?
+                                       RFKILL_STATE_UNBLOCKED :
+                                       RFKILL_STATE_SOFT_BLOCKED);
+       }
+ 
+       *rfk = rfkill_allocate(&tpacpi_pdev->dev, rfktype);
+       if (!*rfk) {
+               printk(TPACPI_ERR
+                       "failed to allocate memory for rfkill class\n");
+               return -ENOMEM;
+       }
+ 
+       (*rfk)->name = name;
+       (*rfk)->get_state = get_state;
+       (*rfk)->toggle_radio = toggle_radio;
+       (*rfk)->state = initial_state;
+ 
+       res = rfkill_register(*rfk);
+       if (res < 0) {
+               printk(TPACPI_ERR
+                       "failed to register %s rfkill switch: %d\n",
+                       name, res);
+               rfkill_free(*rfk);
+               *rfk = NULL;
+               return res;
+       }
+ 
+       return 0;
+ }
+ 
+ /*************************************************************************
+  * thinkpad-acpi driver attributes
+  */
+ 
+ /* interface_version --------------------------------------------------- */
+ static ssize_t tpacpi_driver_interface_version_show(
+                               struct device_driver *drv,
+                               char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "0x%08x\n", TPACPI_SYSFS_VERSION);
+ }
+ 
+ static DRIVER_ATTR(interface_version, S_IRUGO,
+               tpacpi_driver_interface_version_show, NULL);
+ 
+ /* debug_level --------------------------------------------------------- */
+ static ssize_t tpacpi_driver_debug_show(struct device_driver *drv,
+                                               char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "0x%04x\n", dbg_level);
+ }
+ 
+ static ssize_t tpacpi_driver_debug_store(struct device_driver *drv,
+                                               const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 0xffff, &t))
+               return -EINVAL;
+ 
+       dbg_level = t;
+ 
+       return count;
+ }
+ 
+ static DRIVER_ATTR(debug_level, S_IWUSR | S_IRUGO,
+               tpacpi_driver_debug_show, tpacpi_driver_debug_store);
+ 
+ /* version ------------------------------------------------------------- */
+ static ssize_t tpacpi_driver_version_show(struct device_driver *drv,
+                                               char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%s v%s\n",
+                       TPACPI_DESC, TPACPI_VERSION);
+ }
+ 
+ static DRIVER_ATTR(version, S_IRUGO,
+               tpacpi_driver_version_show, NULL);
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+ 
+ static void tpacpi_send_radiosw_update(void);
+ 
+ /* wlsw_emulstate ------------------------------------------------------ */
+ static ssize_t tpacpi_driver_wlsw_emulstate_show(struct device_driver *drv,
+                                               char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_wlsw_emulstate);
+ }
+ 
+ static ssize_t tpacpi_driver_wlsw_emulstate_store(struct device_driver *drv,
+                                               const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 1, &t))
+               return -EINVAL;
+ 
+       if (tpacpi_wlsw_emulstate != t) {
+               tpacpi_wlsw_emulstate = !!t;
+               tpacpi_send_radiosw_update();
+       } else
+               tpacpi_wlsw_emulstate = !!t;
+ 
+       return count;
+ }
+ 
+ static DRIVER_ATTR(wlsw_emulstate, S_IWUSR | S_IRUGO,
+               tpacpi_driver_wlsw_emulstate_show,
+               tpacpi_driver_wlsw_emulstate_store);
+ 
+ /* bluetooth_emulstate ------------------------------------------------- */
+ static ssize_t tpacpi_driver_bluetooth_emulstate_show(
+                                       struct device_driver *drv,
+                                       char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_bluetooth_emulstate);
+ }
+ 
+ static ssize_t tpacpi_driver_bluetooth_emulstate_store(
+                                       struct device_driver *drv,
+                                       const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 1, &t))
+               return -EINVAL;
+ 
+       tpacpi_bluetooth_emulstate = !!t;
+ 
+       return count;
+ }
+ 
+ static DRIVER_ATTR(bluetooth_emulstate, S_IWUSR | S_IRUGO,
+               tpacpi_driver_bluetooth_emulstate_show,
+               tpacpi_driver_bluetooth_emulstate_store);
+ 
+ /* wwan_emulstate ------------------------------------------------- */
+ static ssize_t tpacpi_driver_wwan_emulstate_show(
+                                       struct device_driver *drv,
+                                       char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_wwan_emulstate);
+ }
+ 
+ static ssize_t tpacpi_driver_wwan_emulstate_store(
+                                       struct device_driver *drv,
+                                       const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 1, &t))
+               return -EINVAL;
+ 
+       tpacpi_wwan_emulstate = !!t;
+ 
+       return count;
+ }
+ 
+ static DRIVER_ATTR(wwan_emulstate, S_IWUSR | S_IRUGO,
+               tpacpi_driver_wwan_emulstate_show,
+               tpacpi_driver_wwan_emulstate_store);
+ 
+ /* uwb_emulstate ------------------------------------------------- */
+ static ssize_t tpacpi_driver_uwb_emulstate_show(
+                                       struct device_driver *drv,
+                                       char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_uwb_emulstate);
+ }
+ 
+ static ssize_t tpacpi_driver_uwb_emulstate_store(
+                                       struct device_driver *drv,
+                                       const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 1, &t))
+               return -EINVAL;
+ 
+       tpacpi_uwb_emulstate = !!t;
+ 
+       return count;
+ }
+ 
+ static DRIVER_ATTR(uwb_emulstate, S_IWUSR | S_IRUGO,
+               tpacpi_driver_uwb_emulstate_show,
+               tpacpi_driver_uwb_emulstate_store);
+ #endif
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static struct driver_attribute *tpacpi_driver_attributes[] = {
+       &driver_attr_debug_level, &driver_attr_version,
+       &driver_attr_interface_version,
+ };
+ 
+ static int __init tpacpi_create_driver_attributes(struct device_driver *drv)
+ {
+       int i, res;
+ 
+       i = 0;
+       res = 0;
+       while (!res && i < ARRAY_SIZE(tpacpi_driver_attributes)) {
+               res = driver_create_file(drv, tpacpi_driver_attributes[i]);
+               i++;
+       }
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (!res && dbg_wlswemul)
+               res = driver_create_file(drv, &driver_attr_wlsw_emulstate);
+       if (!res && dbg_bluetoothemul)
+               res = driver_create_file(drv, &driver_attr_bluetooth_emulstate);
+       if (!res && dbg_wwanemul)
+               res = driver_create_file(drv, &driver_attr_wwan_emulstate);
+       if (!res && dbg_uwbemul)
+               res = driver_create_file(drv, &driver_attr_uwb_emulstate);
+ #endif
+ 
+       return res;
+ }
+ 
+ static void tpacpi_remove_driver_attributes(struct device_driver *drv)
+ {
+       int i;
+ 
+       for (i = 0; i < ARRAY_SIZE(tpacpi_driver_attributes); i++)
+               driver_remove_file(drv, tpacpi_driver_attributes[i]);
+ 
+ #ifdef THINKPAD_ACPI_DEBUGFACILITIES
+       driver_remove_file(drv, &driver_attr_wlsw_emulstate);
+       driver_remove_file(drv, &driver_attr_bluetooth_emulstate);
+       driver_remove_file(drv, &driver_attr_wwan_emulstate);
+       driver_remove_file(drv, &driver_attr_uwb_emulstate);
+ #endif
+ }
+ 
+ /****************************************************************************
+  ****************************************************************************
+  *
+  * Subdrivers
+  *
+  ****************************************************************************
+  ****************************************************************************/
+ 
+ /*************************************************************************
+  * thinkpad-acpi init subdriver
+  */
+ 
+ static int __init thinkpad_acpi_driver_init(struct ibm_init_struct *iibm)
+ {
+       printk(TPACPI_INFO "%s v%s\n", TPACPI_DESC, TPACPI_VERSION);
+       printk(TPACPI_INFO "%s\n", TPACPI_URL);
+ 
+       printk(TPACPI_INFO "ThinkPad BIOS %s, EC %s\n",
+               (thinkpad_id.bios_version_str) ?
+                       thinkpad_id.bios_version_str : "unknown",
+               (thinkpad_id.ec_version_str) ?
+                       thinkpad_id.ec_version_str : "unknown");
+ 
+       if (thinkpad_id.vendor && thinkpad_id.model_str)
+               printk(TPACPI_INFO "%s %s, model %s\n",
+                       (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) ?
+                               "IBM" : ((thinkpad_id.vendor ==
+                                               PCI_VENDOR_ID_LENOVO) ?
+                                       "Lenovo" : "Unknown vendor"),
+                       thinkpad_id.model_str,
+                       (thinkpad_id.nummodel_str) ?
+                               thinkpad_id.nummodel_str : "unknown");
+ 
+       return 0;
+ }
+ 
+ static int thinkpad_acpi_driver_read(char *p)
+ {
+       int len = 0;
+ 
+       len += sprintf(p + len, "driver:\t\t%s\n", TPACPI_DESC);
+       len += sprintf(p + len, "version:\t%s\n", TPACPI_VERSION);
+ 
+       return len;
+ }
+ 
+ static struct ibm_struct thinkpad_acpi_driver_data = {
+       .name = "driver",
+       .read = thinkpad_acpi_driver_read,
+ };
+ 
+ /*************************************************************************
+  * Hotkey subdriver
+  */
+ 
+ enum {        /* hot key scan codes (derived from ACPI DSDT) */
+       TP_ACPI_HOTKEYSCAN_FNF1         = 0,
+       TP_ACPI_HOTKEYSCAN_FNF2,
+       TP_ACPI_HOTKEYSCAN_FNF3,
+       TP_ACPI_HOTKEYSCAN_FNF4,
+       TP_ACPI_HOTKEYSCAN_FNF5,
+       TP_ACPI_HOTKEYSCAN_FNF6,
+       TP_ACPI_HOTKEYSCAN_FNF7,
+       TP_ACPI_HOTKEYSCAN_FNF8,
+       TP_ACPI_HOTKEYSCAN_FNF9,
+       TP_ACPI_HOTKEYSCAN_FNF10,
+       TP_ACPI_HOTKEYSCAN_FNF11,
+       TP_ACPI_HOTKEYSCAN_FNF12,
+       TP_ACPI_HOTKEYSCAN_FNBACKSPACE,
+       TP_ACPI_HOTKEYSCAN_FNINSERT,
+       TP_ACPI_HOTKEYSCAN_FNDELETE,
+       TP_ACPI_HOTKEYSCAN_FNHOME,
+       TP_ACPI_HOTKEYSCAN_FNEND,
+       TP_ACPI_HOTKEYSCAN_FNPAGEUP,
+       TP_ACPI_HOTKEYSCAN_FNPAGEDOWN,
+       TP_ACPI_HOTKEYSCAN_FNSPACE,
+       TP_ACPI_HOTKEYSCAN_VOLUMEUP,
+       TP_ACPI_HOTKEYSCAN_VOLUMEDOWN,
+       TP_ACPI_HOTKEYSCAN_MUTE,
+       TP_ACPI_HOTKEYSCAN_THINKPAD,
+ };
+ 
+ enum {        /* Keys available through NVRAM polling */
+       TPACPI_HKEY_NVRAM_KNOWN_MASK = 0x00fb88c0U,
+       TPACPI_HKEY_NVRAM_GOOD_MASK  = 0x00fb8000U,
+ };
+ 
+ enum {        /* Positions of some of the keys in hotkey masks */
+       TP_ACPI_HKEY_DISPSWTCH_MASK     = 1 << TP_ACPI_HOTKEYSCAN_FNF7,
+       TP_ACPI_HKEY_DISPXPAND_MASK     = 1 << TP_ACPI_HOTKEYSCAN_FNF8,
+       TP_ACPI_HKEY_HIBERNATE_MASK     = 1 << TP_ACPI_HOTKEYSCAN_FNF12,
+       TP_ACPI_HKEY_BRGHTUP_MASK       = 1 << TP_ACPI_HOTKEYSCAN_FNHOME,
+       TP_ACPI_HKEY_BRGHTDWN_MASK      = 1 << TP_ACPI_HOTKEYSCAN_FNEND,
+       TP_ACPI_HKEY_THNKLGHT_MASK      = 1 << TP_ACPI_HOTKEYSCAN_FNPAGEUP,
+       TP_ACPI_HKEY_ZOOM_MASK          = 1 << TP_ACPI_HOTKEYSCAN_FNSPACE,
+       TP_ACPI_HKEY_VOLUP_MASK         = 1 << TP_ACPI_HOTKEYSCAN_VOLUMEUP,
+       TP_ACPI_HKEY_VOLDWN_MASK        = 1 << TP_ACPI_HOTKEYSCAN_VOLUMEDOWN,
+       TP_ACPI_HKEY_MUTE_MASK          = 1 << TP_ACPI_HOTKEYSCAN_MUTE,
+       TP_ACPI_HKEY_THINKPAD_MASK      = 1 << TP_ACPI_HOTKEYSCAN_THINKPAD,
+ };
+ 
+ enum {        /* NVRAM to ACPI HKEY group map */
+       TP_NVRAM_HKEY_GROUP_HK2         = TP_ACPI_HKEY_THINKPAD_MASK |
+                                         TP_ACPI_HKEY_ZOOM_MASK |
+                                         TP_ACPI_HKEY_DISPSWTCH_MASK |
+                                         TP_ACPI_HKEY_HIBERNATE_MASK,
+       TP_NVRAM_HKEY_GROUP_BRIGHTNESS  = TP_ACPI_HKEY_BRGHTUP_MASK |
+                                         TP_ACPI_HKEY_BRGHTDWN_MASK,
+       TP_NVRAM_HKEY_GROUP_VOLUME      = TP_ACPI_HKEY_VOLUP_MASK |
+                                         TP_ACPI_HKEY_VOLDWN_MASK |
+                                         TP_ACPI_HKEY_MUTE_MASK,
+ };
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+ struct tp_nvram_state {
+        u16 thinkpad_toggle:1;
+        u16 zoom_toggle:1;
+        u16 display_toggle:1;
+        u16 thinklight_toggle:1;
+        u16 hibernate_toggle:1;
+        u16 displayexp_toggle:1;
+        u16 display_state:1;
+        u16 brightness_toggle:1;
+        u16 volume_toggle:1;
+        u16 mute:1;
+ 
+        u8 brightness_level;
+        u8 volume_level;
+ };
+ 
+ static struct task_struct *tpacpi_hotkey_task;
+ static u32 hotkey_source_mask;                /* bit mask 0=ACPI,1=NVRAM */
+ static int hotkey_poll_freq = 10;     /* Hz */
+ static struct mutex hotkey_thread_mutex;
+ static struct mutex hotkey_thread_data_mutex;
+ static unsigned int hotkey_config_change;
+ 
+ #else /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
+ 
+ #define hotkey_source_mask 0U
+ 
+ #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
+ 
+ static struct mutex hotkey_mutex;
+ 
+ static enum { /* Reasons for waking up */
+       TP_ACPI_WAKEUP_NONE = 0,        /* None or unknown */
+       TP_ACPI_WAKEUP_BAYEJ,           /* Bay ejection request */
+       TP_ACPI_WAKEUP_UNDOCK,          /* Undock request */
+ } hotkey_wakeup_reason;
+ 
+ static int hotkey_autosleep_ack;
+ 
+ static int hotkey_orig_status;
+ static u32 hotkey_orig_mask;
+ static u32 hotkey_all_mask;
+ static u32 hotkey_reserved_mask;
+ static u32 hotkey_mask;
+ 
+ static unsigned int hotkey_report_mode;
+ 
+ static u16 *hotkey_keycode_map;
+ 
+ static struct attribute_set *hotkey_dev_attributes;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+ #define HOTKEY_CONFIG_CRITICAL_START \
+       do { \
+               mutex_lock(&hotkey_thread_data_mutex); \
+               hotkey_config_change++; \
+       } while (0);
+ #define HOTKEY_CONFIG_CRITICAL_END \
+       mutex_unlock(&hotkey_thread_data_mutex);
+ #else
+ #define HOTKEY_CONFIG_CRITICAL_START
+ #define HOTKEY_CONFIG_CRITICAL_END
+ #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
+ 
+ /* HKEY.MHKG() return bits */
+ #define TP_HOTKEY_TABLET_MASK (1 << 3)
+ 
+ static int hotkey_get_wlsw(int *status)
+ {
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_wlswemul) {
+               *status = !!tpacpi_wlsw_emulstate;
+               return 0;
+       }
+ #endif
+       if (!acpi_evalf(hkey_handle, status, "WLSW", "d"))
+               return -EIO;
+       return 0;
+ }
+ 
+ static int hotkey_get_tablet_mode(int *status)
+ {
+       int s;
+ 
+       if (!acpi_evalf(hkey_handle, &s, "MHKG", "d"))
+               return -EIO;
+ 
+       *status = ((s & TP_HOTKEY_TABLET_MASK) != 0);
+       return 0;
+ }
+ 
+ /*
+  * Call with hotkey_mutex held
+  */
+ static int hotkey_mask_get(void)
+ {
+       u32 m = 0;
+ 
+       if (tp_features.hotkey_mask) {
+               if (!acpi_evalf(hkey_handle, &m, "DHKN", "d"))
+                       return -EIO;
+       }
+       hotkey_mask = m | (hotkey_source_mask & hotkey_mask);
+ 
+       return 0;
+ }
+ 
+ /*
+  * Call with hotkey_mutex held
+  */
+ static int hotkey_mask_set(u32 mask)
+ {
+       int i;
+       int rc = 0;
+ 
+       if (tp_features.hotkey_mask) {
+               if (!tp_warned.hotkey_mask_ff &&
+                   (mask == 0xffff || mask == 0xffffff ||
+                    mask == 0xffffffff)) {
+                       tp_warned.hotkey_mask_ff = 1;
+                       printk(TPACPI_NOTICE
+                              "setting the hotkey mask to 0x%08x is likely "
+                              "not the best way to go about it\n", mask);
+                       printk(TPACPI_NOTICE
+                              "please consider using the driver defaults, "
+                              "and refer to up-to-date thinkpad-acpi "
+                              "documentation\n");
+               }
+ 
+               HOTKEY_CONFIG_CRITICAL_START
+               for (i = 0; i < 32; i++) {
+                       u32 m = 1 << i;
+                       /* enable in firmware mask only keys not in NVRAM
+                        * mode, but enable the key in the cached hotkey_mask
+                        * regardless of mode, or the key will end up
+                        * disabled by hotkey_mask_get() */
+                       if (!acpi_evalf(hkey_handle,
+                                       NULL, "MHKM", "vdd", i + 1,
+                                       !!((mask & ~hotkey_source_mask) & m))) {
+                               rc = -EIO;
+                               break;
+                       } else {
+                               hotkey_mask = (hotkey_mask & ~m) | (mask & m);
+                       }
+               }
+               HOTKEY_CONFIG_CRITICAL_END
+ 
+               /* hotkey_mask_get must be called unconditionally below */
+               if (!hotkey_mask_get() && !rc &&
+                   (hotkey_mask & ~hotkey_source_mask) !=
+                    (mask & ~hotkey_source_mask)) {
+                       printk(TPACPI_NOTICE
+                              "requested hot key mask 0x%08x, but "
+                              "firmware forced it to 0x%08x\n",
+                              mask, hotkey_mask);
+               }
+       } else {
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+               HOTKEY_CONFIG_CRITICAL_START
+               hotkey_mask = mask & hotkey_source_mask;
+               HOTKEY_CONFIG_CRITICAL_END
+               hotkey_mask_get();
+               if (hotkey_mask != mask) {
+                       printk(TPACPI_NOTICE
+                              "requested hot key mask 0x%08x, "
+                              "forced to 0x%08x (NVRAM poll mask is "
+                              "0x%08x): no firmware mask support\n",
+                              mask, hotkey_mask, hotkey_source_mask);
+               }
+ #else
+               hotkey_mask_get();
+               rc = -ENXIO;
+ #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
+       }
+ 
+       return rc;
+ }
+ 
+ static int hotkey_status_get(int *status)
+ {
+       if (!acpi_evalf(hkey_handle, status, "DHKC", "d"))
+               return -EIO;
+ 
+       return 0;
+ }
+ 
+ static int hotkey_status_set(int status)
+ {
+       if (!acpi_evalf(hkey_handle, NULL, "MHKC", "vd", status))
+               return -EIO;
+ 
+       return 0;
+ }
+ 
+ static void tpacpi_input_send_tabletsw(void)
+ {
+       int state;
+ 
+       if (tp_features.hotkey_tablet &&
+           !hotkey_get_tablet_mode(&state)) {
+               mutex_lock(&tpacpi_inputdev_send_mutex);
+ 
+               input_report_switch(tpacpi_inputdev,
+                                   SW_TABLET_MODE, !!state);
+               input_sync(tpacpi_inputdev);
+ 
+               mutex_unlock(&tpacpi_inputdev_send_mutex);
+       }
+ }
+ 
+ static void tpacpi_input_send_key(unsigned int scancode)
+ {
+       unsigned int keycode;
+ 
+       keycode = hotkey_keycode_map[scancode];
+ 
+       if (keycode != KEY_RESERVED) {
+               mutex_lock(&tpacpi_inputdev_send_mutex);
+ 
+               input_report_key(tpacpi_inputdev, keycode, 1);
+               if (keycode == KEY_UNKNOWN)
+                       input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
+                                   scancode);
+               input_sync(tpacpi_inputdev);
+ 
+               input_report_key(tpacpi_inputdev, keycode, 0);
+               if (keycode == KEY_UNKNOWN)
+                       input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
+                                   scancode);
+               input_sync(tpacpi_inputdev);
+ 
+               mutex_unlock(&tpacpi_inputdev_send_mutex);
+       }
+ }
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+ static struct tp_acpi_drv_struct ibm_hotkey_acpidriver;
+ 
+ static void tpacpi_hotkey_send_key(unsigned int scancode)
+ {
+       tpacpi_input_send_key(scancode);
+       if (hotkey_report_mode < 2) {
+               acpi_bus_generate_proc_event(ibm_hotkey_acpidriver.device,
+                                               0x80, 0x1001 + scancode);
+       }
+ }
+ 
+ static void hotkey_read_nvram(struct tp_nvram_state *n, u32 m)
+ {
+       u8 d;
+ 
+       if (m & TP_NVRAM_HKEY_GROUP_HK2) {
+               d = nvram_read_byte(TP_NVRAM_ADDR_HK2);
+               n->thinkpad_toggle = !!(d & TP_NVRAM_MASK_HKT_THINKPAD);
+               n->zoom_toggle = !!(d & TP_NVRAM_MASK_HKT_ZOOM);
+               n->display_toggle = !!(d & TP_NVRAM_MASK_HKT_DISPLAY);
+               n->hibernate_toggle = !!(d & TP_NVRAM_MASK_HKT_HIBERNATE);
+       }
+       if (m & TP_ACPI_HKEY_THNKLGHT_MASK) {
+               d = nvram_read_byte(TP_NVRAM_ADDR_THINKLIGHT);
+               n->thinklight_toggle = !!(d & TP_NVRAM_MASK_THINKLIGHT);
+       }
+       if (m & TP_ACPI_HKEY_DISPXPAND_MASK) {
+               d = nvram_read_byte(TP_NVRAM_ADDR_VIDEO);
+               n->displayexp_toggle =
+                               !!(d & TP_NVRAM_MASK_HKT_DISPEXPND);
+       }
+       if (m & TP_NVRAM_HKEY_GROUP_BRIGHTNESS) {
+               d = nvram_read_byte(TP_NVRAM_ADDR_BRIGHTNESS);
+               n->brightness_level = (d & TP_NVRAM_MASK_LEVEL_BRIGHTNESS)
+                               >> TP_NVRAM_POS_LEVEL_BRIGHTNESS;
+               n->brightness_toggle =
+                               !!(d & TP_NVRAM_MASK_HKT_BRIGHTNESS);
+       }
+       if (m & TP_NVRAM_HKEY_GROUP_VOLUME) {
+               d = nvram_read_byte(TP_NVRAM_ADDR_MIXER);
+               n->volume_level = (d & TP_NVRAM_MASK_LEVEL_VOLUME)
+                               >> TP_NVRAM_POS_LEVEL_VOLUME;
+               n->mute = !!(d & TP_NVRAM_MASK_MUTE);
+               n->volume_toggle = !!(d & TP_NVRAM_MASK_HKT_VOLUME);
+       }
+ }
+ 
+ #define TPACPI_COMPARE_KEY(__scancode, __member) \
+       do { \
+               if ((mask & (1 << __scancode)) && \
+                   oldn->__member != newn->__member) \
+               tpacpi_hotkey_send_key(__scancode); \
+       } while (0)
+ 
+ #define TPACPI_MAY_SEND_KEY(__scancode) \
+       do { if (mask & (1 << __scancode)) \
+               tpacpi_hotkey_send_key(__scancode); } while (0)
+ 
+ static void hotkey_compare_and_issue_event(struct tp_nvram_state *oldn,
+                                          struct tp_nvram_state *newn,
+                                          u32 mask)
+ {
+       TPACPI_COMPARE_KEY(TP_ACPI_HOTKEYSCAN_THINKPAD, thinkpad_toggle);
+       TPACPI_COMPARE_KEY(TP_ACPI_HOTKEYSCAN_FNSPACE, zoom_toggle);
+       TPACPI_COMPARE_KEY(TP_ACPI_HOTKEYSCAN_FNF7, display_toggle);
+       TPACPI_COMPARE_KEY(TP_ACPI_HOTKEYSCAN_FNF12, hibernate_toggle);
+ 
+       TPACPI_COMPARE_KEY(TP_ACPI_HOTKEYSCAN_FNPAGEUP, thinklight_toggle);
+ 
+       TPACPI_COMPARE_KEY(TP_ACPI_HOTKEYSCAN_FNF8, displayexp_toggle);
+ 
+       /* handle volume */
+       if (oldn->volume_toggle != newn->volume_toggle) {
+               if (oldn->mute != newn->mute) {
+                       TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_MUTE);
+               }
+               if (oldn->volume_level > newn->volume_level) {
+                       TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_VOLUMEDOWN);
+               } else if (oldn->volume_level < newn->volume_level) {
+                       TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_VOLUMEUP);
+               } else if (oldn->mute == newn->mute) {
+                       /* repeated key presses that didn't change state */
+                       if (newn->mute) {
+                               TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_MUTE);
+                       } else if (newn->volume_level != 0) {
+                               TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_VOLUMEUP);
+                       } else {
+                               TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_VOLUMEDOWN);
+                       }
+               }
+       }
+ 
+       /* handle brightness */
+       if (oldn->brightness_toggle != newn->brightness_toggle) {
+               if (oldn->brightness_level < newn->brightness_level) {
+                       TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_FNHOME);
+               } else if (oldn->brightness_level > newn->brightness_level) {
+                       TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_FNEND);
+               } else {
+                       /* repeated key presses that didn't change state */
+                       if (newn->brightness_level != 0) {
+                               TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_FNHOME);
+                       } else {
+                               TPACPI_MAY_SEND_KEY(TP_ACPI_HOTKEYSCAN_FNEND);
+                       }
+               }
+       }
+ }
+ 
+ #undef TPACPI_COMPARE_KEY
+ #undef TPACPI_MAY_SEND_KEY
+ 
+ static int hotkey_kthread(void *data)
+ {
+       struct tp_nvram_state s[2];
+       u32 mask;
+       unsigned int si, so;
+       unsigned long t;
+       unsigned int change_detector, must_reset;
+ 
+       mutex_lock(&hotkey_thread_mutex);
+ 
+       if (tpacpi_lifecycle == TPACPI_LIFE_EXITING)
+               goto exit;
+ 
+       set_freezable();
+ 
+       so = 0;
+       si = 1;
+       t = 0;
+ 
+       /* Initial state for compares */
+       mutex_lock(&hotkey_thread_data_mutex);
+       change_detector = hotkey_config_change;
+       mask = hotkey_source_mask & hotkey_mask;
+       mutex_unlock(&hotkey_thread_data_mutex);
+       hotkey_read_nvram(&s[so], mask);
+ 
+       while (!kthread_should_stop() && hotkey_poll_freq) {
+               if (t == 0)
+                       t = 1000/hotkey_poll_freq;
+               t = msleep_interruptible(t);
+               if (unlikely(kthread_should_stop()))
+                       break;
+               must_reset = try_to_freeze();
+               if (t > 0 && !must_reset)
+                       continue;
+ 
+               mutex_lock(&hotkey_thread_data_mutex);
+               if (must_reset || hotkey_config_change != change_detector) {
+                       /* forget old state on thaw or config change */
+                       si = so;
+                       t = 0;
+                       change_detector = hotkey_config_change;
+               }
+               mask = hotkey_source_mask & hotkey_mask;
+               mutex_unlock(&hotkey_thread_data_mutex);
+ 
+               if (likely(mask)) {
+                       hotkey_read_nvram(&s[si], mask);
+                       if (likely(si != so)) {
+                               hotkey_compare_and_issue_event(&s[so], &s[si],
+                                                               mask);
+                       }
+               }
+ 
+               so = si;
+               si ^= 1;
+       }
+ 
+ exit:
+       mutex_unlock(&hotkey_thread_mutex);
+       return 0;
+ }
+ 
+ static void hotkey_poll_stop_sync(void)
+ {
+       if (tpacpi_hotkey_task) {
+               if (frozen(tpacpi_hotkey_task) ||
+                   freezing(tpacpi_hotkey_task))
+                       thaw_process(tpacpi_hotkey_task);
+ 
+               kthread_stop(tpacpi_hotkey_task);
+               tpacpi_hotkey_task = NULL;
+               mutex_lock(&hotkey_thread_mutex);
+               /* at this point, the thread did exit */
+               mutex_unlock(&hotkey_thread_mutex);
+       }
+ }
+ 
+ /* call with hotkey_mutex held */
+ static void hotkey_poll_setup(int may_warn)
+ {
+       if ((hotkey_source_mask & hotkey_mask) != 0 &&
+           hotkey_poll_freq > 0 &&
+           (tpacpi_inputdev->users > 0 || hotkey_report_mode < 2)) {
+               if (!tpacpi_hotkey_task) {
+                       tpacpi_hotkey_task = kthread_run(hotkey_kthread,
+                                       NULL, TPACPI_NVRAM_KTHREAD_NAME);
+                       if (IS_ERR(tpacpi_hotkey_task)) {
+                               tpacpi_hotkey_task = NULL;
+                               printk(TPACPI_ERR
+                                      "could not create kernel thread "
+                                      "for hotkey polling\n");
+                       }
+               }
+       } else {
+               hotkey_poll_stop_sync();
+               if (may_warn &&
+                   hotkey_source_mask != 0 && hotkey_poll_freq == 0) {
+                       printk(TPACPI_NOTICE
+                               "hot keys 0x%08x require polling, "
+                               "which is currently disabled\n",
+                               hotkey_source_mask);
+               }
+       }
+ }
+ 
+ static void hotkey_poll_setup_safe(int may_warn)
+ {
+       mutex_lock(&hotkey_mutex);
+       hotkey_poll_setup(may_warn);
+       mutex_unlock(&hotkey_mutex);
+ }
+ 
+ #else /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
+ 
+ static void hotkey_poll_setup_safe(int __unused)
+ {
+ }
+ 
+ #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
+ 
+ static int hotkey_inputdev_open(struct input_dev *dev)
+ {
+       switch (tpacpi_lifecycle) {
+       case TPACPI_LIFE_INIT:
+               /*
+                * hotkey_init will call hotkey_poll_setup_safe
+                * at the appropriate moment
+                */
+               return 0;
+       case TPACPI_LIFE_EXITING:
+               return -EBUSY;
+       case TPACPI_LIFE_RUNNING:
+               hotkey_poll_setup_safe(0);
+               return 0;
+       }
+ 
+       /* Should only happen if tpacpi_lifecycle is corrupt */
+       BUG();
+       return -EBUSY;
+ }
+ 
+ static void hotkey_inputdev_close(struct input_dev *dev)
+ {
+       /* disable hotkey polling when possible */
+       if (tpacpi_lifecycle == TPACPI_LIFE_RUNNING)
+               hotkey_poll_setup_safe(0);
+ }
+ 
+ /* sysfs hotkey enable ------------------------------------------------- */
+ static ssize_t hotkey_enable_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       int res, status;
+ 
+       res = hotkey_status_get(&status);
+       if (res)
+               return res;
+ 
+       return snprintf(buf, PAGE_SIZE, "%d\n", status);
+ }
+ 
+ static ssize_t hotkey_enable_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+ {
+       unsigned long t;
+       int res;
+ 
+       if (parse_strtoul(buf, 1, &t))
+               return -EINVAL;
+ 
+       res = hotkey_status_set(t);
+ 
+       return (res) ? res : count;
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_enable =
+       __ATTR(hotkey_enable, S_IWUSR | S_IRUGO,
+               hotkey_enable_show, hotkey_enable_store);
+ 
+ /* sysfs hotkey mask --------------------------------------------------- */
+ static ssize_t hotkey_mask_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       int res;
+ 
+       if (mutex_lock_killable(&hotkey_mutex))
+               return -ERESTARTSYS;
+       res = hotkey_mask_get();
+       mutex_unlock(&hotkey_mutex);
+ 
+       return (res)?
+               res : snprintf(buf, PAGE_SIZE, "0x%08x\n", hotkey_mask);
+ }
+ 
+ static ssize_t hotkey_mask_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+ {
+       unsigned long t;
+       int res;
+ 
+       if (parse_strtoul(buf, 0xffffffffUL, &t))
+               return -EINVAL;
+ 
+       if (mutex_lock_killable(&hotkey_mutex))
+               return -ERESTARTSYS;
+ 
+       res = hotkey_mask_set(t);
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+       hotkey_poll_setup(1);
+ #endif
+ 
+       mutex_unlock(&hotkey_mutex);
+ 
+       return (res) ? res : count;
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_mask =
+       __ATTR(hotkey_mask, S_IWUSR | S_IRUGO,
+               hotkey_mask_show, hotkey_mask_store);
+ 
+ /* sysfs hotkey bios_enabled ------------------------------------------- */
+ static ssize_t hotkey_bios_enabled_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_orig_status);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_bios_enabled =
+       __ATTR(hotkey_bios_enabled, S_IRUGO, hotkey_bios_enabled_show, NULL);
+ 
+ /* sysfs hotkey bios_mask ---------------------------------------------- */
+ static ssize_t hotkey_bios_mask_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "0x%08x\n", hotkey_orig_mask);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_bios_mask =
+       __ATTR(hotkey_bios_mask, S_IRUGO, hotkey_bios_mask_show, NULL);
+ 
+ /* sysfs hotkey all_mask ----------------------------------------------- */
+ static ssize_t hotkey_all_mask_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "0x%08x\n",
+                               hotkey_all_mask | hotkey_source_mask);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_all_mask =
+       __ATTR(hotkey_all_mask, S_IRUGO, hotkey_all_mask_show, NULL);
+ 
+ /* sysfs hotkey recommended_mask --------------------------------------- */
+ static ssize_t hotkey_recommended_mask_show(struct device *dev,
+                                           struct device_attribute *attr,
+                                           char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "0x%08x\n",
+                       (hotkey_all_mask | hotkey_source_mask)
+                       & ~hotkey_reserved_mask);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_recommended_mask =
+       __ATTR(hotkey_recommended_mask, S_IRUGO,
+               hotkey_recommended_mask_show, NULL);
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+ 
+ /* sysfs hotkey hotkey_source_mask ------------------------------------- */
+ static ssize_t hotkey_source_mask_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "0x%08x\n", hotkey_source_mask);
+ }
+ 
+ static ssize_t hotkey_source_mask_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 0xffffffffUL, &t) ||
+               ((t & ~TPACPI_HKEY_NVRAM_KNOWN_MASK) != 0))
+               return -EINVAL;
+ 
+       if (mutex_lock_killable(&hotkey_mutex))
+               return -ERESTARTSYS;
+ 
+       HOTKEY_CONFIG_CRITICAL_START
+       hotkey_source_mask = t;
+       HOTKEY_CONFIG_CRITICAL_END
+ 
+       hotkey_poll_setup(1);
+ 
+       mutex_unlock(&hotkey_mutex);
+ 
+       return count;
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_source_mask =
+       __ATTR(hotkey_source_mask, S_IWUSR | S_IRUGO,
+               hotkey_source_mask_show, hotkey_source_mask_store);
+ 
+ /* sysfs hotkey hotkey_poll_freq --------------------------------------- */
+ static ssize_t hotkey_poll_freq_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_poll_freq);
+ }
+ 
+ static ssize_t hotkey_poll_freq_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 25, &t))
+               return -EINVAL;
+ 
+       if (mutex_lock_killable(&hotkey_mutex))
+               return -ERESTARTSYS;
+ 
+       hotkey_poll_freq = t;
+ 
+       hotkey_poll_setup(1);
+       mutex_unlock(&hotkey_mutex);
+ 
+       return count;
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_poll_freq =
+       __ATTR(hotkey_poll_freq, S_IWUSR | S_IRUGO,
+               hotkey_poll_freq_show, hotkey_poll_freq_store);
+ 
+ #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
+ 
+ /* sysfs hotkey radio_sw (pollable) ------------------------------------ */
+ static ssize_t hotkey_radio_sw_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       int res, s;
+       res = hotkey_get_wlsw(&s);
+       if (res < 0)
+               return res;
+ 
+       return snprintf(buf, PAGE_SIZE, "%d\n", !!s);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_radio_sw =
+       __ATTR(hotkey_radio_sw, S_IRUGO, hotkey_radio_sw_show, NULL);
+ 
+ static void hotkey_radio_sw_notify_change(void)
+ {
+       if (tp_features.hotkey_wlsw)
+               sysfs_notify(&tpacpi_pdev->dev.kobj, NULL,
+                            "hotkey_radio_sw");
+ }
+ 
+ /* sysfs hotkey tablet mode (pollable) --------------------------------- */
+ static ssize_t hotkey_tablet_mode_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       int res, s;
+       res = hotkey_get_tablet_mode(&s);
+       if (res < 0)
+               return res;
+ 
+       return snprintf(buf, PAGE_SIZE, "%d\n", !!s);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_tablet_mode =
+       __ATTR(hotkey_tablet_mode, S_IRUGO, hotkey_tablet_mode_show, NULL);
+ 
+ static void hotkey_tablet_mode_notify_change(void)
+ {
+       if (tp_features.hotkey_tablet)
+               sysfs_notify(&tpacpi_pdev->dev.kobj, NULL,
+                            "hotkey_tablet_mode");
+ }
+ 
+ /* sysfs hotkey report_mode -------------------------------------------- */
+ static ssize_t hotkey_report_mode_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n",
+               (hotkey_report_mode != 0) ? hotkey_report_mode : 1);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_report_mode =
+       __ATTR(hotkey_report_mode, S_IRUGO, hotkey_report_mode_show, NULL);
+ 
+ /* sysfs wakeup reason (pollable) -------------------------------------- */
+ static ssize_t hotkey_wakeup_reason_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_wakeup_reason);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_wakeup_reason =
+       __ATTR(wakeup_reason, S_IRUGO, hotkey_wakeup_reason_show, NULL);
+ 
+ static void hotkey_wakeup_reason_notify_change(void)
+ {
+       if (tp_features.hotkey_mask)
+               sysfs_notify(&tpacpi_pdev->dev.kobj, NULL,
+                            "wakeup_reason");
+ }
+ 
+ /* sysfs wakeup hotunplug_complete (pollable) -------------------------- */
+ static ssize_t hotkey_wakeup_hotunplug_complete_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_autosleep_ack);
+ }
+ 
+ static struct device_attribute dev_attr_hotkey_wakeup_hotunplug_complete =
+       __ATTR(wakeup_hotunplug_complete, S_IRUGO,
+              hotkey_wakeup_hotunplug_complete_show, NULL);
+ 
+ static void hotkey_wakeup_hotunplug_complete_notify_change(void)
+ {
+       if (tp_features.hotkey_mask)
+               sysfs_notify(&tpacpi_pdev->dev.kobj, NULL,
+                            "wakeup_hotunplug_complete");
+ }
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static struct attribute *hotkey_attributes[] __initdata = {
+       &dev_attr_hotkey_enable.attr,
+       &dev_attr_hotkey_bios_enabled.attr,
+       &dev_attr_hotkey_report_mode.attr,
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+       &dev_attr_hotkey_mask.attr,
+       &dev_attr_hotkey_all_mask.attr,
+       &dev_attr_hotkey_recommended_mask.attr,
+       &dev_attr_hotkey_source_mask.attr,
+       &dev_attr_hotkey_poll_freq.attr,
+ #endif
+ };
+ 
+ static struct attribute *hotkey_mask_attributes[] __initdata = {
+       &dev_attr_hotkey_bios_mask.attr,
+ #ifndef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+       &dev_attr_hotkey_mask.attr,
+       &dev_attr_hotkey_all_mask.attr,
+       &dev_attr_hotkey_recommended_mask.attr,
+ #endif
+       &dev_attr_hotkey_wakeup_reason.attr,
+       &dev_attr_hotkey_wakeup_hotunplug_complete.attr,
+ };
+ 
+ static void bluetooth_update_rfk(void);
+ static void wan_update_rfk(void);
+ static void uwb_update_rfk(void);
+ static void tpacpi_send_radiosw_update(void)
+ {
+       int wlsw;
+ 
+       /* Sync these BEFORE sending any rfkill events */
+       if (tp_features.bluetooth)
+               bluetooth_update_rfk();
+       if (tp_features.wan)
+               wan_update_rfk();
+       if (tp_features.uwb)
+               uwb_update_rfk();
+ 
+       if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) {
+               mutex_lock(&tpacpi_inputdev_send_mutex);
+ 
+               input_report_switch(tpacpi_inputdev,
+                                   SW_RFKILL_ALL, !!wlsw);
+               input_sync(tpacpi_inputdev);
+ 
+               mutex_unlock(&tpacpi_inputdev_send_mutex);
+       }
+       hotkey_radio_sw_notify_change();
+ }
+ 
+ static void hotkey_exit(void)
+ {
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+       hotkey_poll_stop_sync();
+ #endif
+ 
+       if (hotkey_dev_attributes)
+               delete_attr_set(hotkey_dev_attributes, &tpacpi_pdev->dev.kobj);
+ 
+       kfree(hotkey_keycode_map);
+ 
+       if (tp_features.hotkey) {
+               dbg_printk(TPACPI_DBG_EXIT,
+                          "restoring original hot key mask\n");
+               /* no short-circuit boolean operator below! */
+               if ((hotkey_mask_set(hotkey_orig_mask) |
+                    hotkey_status_set(hotkey_orig_status)) != 0)
+                       printk(TPACPI_ERR
+                              "failed to restore hot key mask "
+                              "to BIOS defaults\n");
+       }
+ }
+ 
+ static int __init hotkey_init(struct ibm_init_struct *iibm)
+ {
+       /* Requirements for changing the default keymaps:
+        *
+        * 1. Many of the keys are mapped to KEY_RESERVED for very
+        *    good reasons.  Do not change them unless you have deep
+        *    knowledge on the IBM and Lenovo ThinkPad firmware for
+        *    the various ThinkPad models.  The driver behaves
+        *    differently for KEY_RESERVED: such keys have their
+        *    hot key mask *unset* in mask_recommended, and also
+        *    in the initial hot key mask programmed into the
+        *    firmware at driver load time, which means the firm-
+        *    ware may react very differently if you change them to
+        *    something else;
+        *
+        * 2. You must be subscribed to the linux-thinkpad and
+        *    ibm-acpi-devel mailing lists, and you should read the
+        *    list archives since 2007 if you want to change the
+        *    keymaps.  This requirement exists so that you will
+        *    know the past history of problems with the thinkpad-
+        *    acpi driver keymaps, and also that you will be
+        *    listening to any bug reports;
+        *
+        * 3. Do not send thinkpad-acpi specific patches directly to
+        *    for merging, *ever*.  Send them to the linux-acpi
+        *    mailinglist for comments.  Merging is to be done only
+        *    through acpi-test and the ACPI maintainer.
+        *
+        * If the above is too much to ask, don't change the keymap.
+        * Ask the thinkpad-acpi maintainer to do it, instead.
+        */
+       static u16 ibm_keycode_map[] __initdata = {
+               /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */
+               KEY_FN_F1,      KEY_FN_F2,      KEY_COFFEE,     KEY_SLEEP,
+               KEY_WLAN,       KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8,
+               KEY_FN_F9,      KEY_FN_F10,     KEY_FN_F11,     KEY_SUSPEND,
+ 
+               /* Scan codes 0x0C to 0x1F: Other ACPI HKEY hot keys */
+               KEY_UNKNOWN,    /* 0x0C: FN+BACKSPACE */
+               KEY_UNKNOWN,    /* 0x0D: FN+INSERT */
+               KEY_UNKNOWN,    /* 0x0E: FN+DELETE */
+ 
+               /* brightness: firmware always reacts to them, unless
+                * X.org did some tricks in the radeon BIOS scratch
+                * registers of *some* models */
+               KEY_RESERVED,   /* 0x0F: FN+HOME (brightness up) */
+               KEY_RESERVED,   /* 0x10: FN+END (brightness down) */
+ 
+               /* Thinklight: firmware always react to it */
+               KEY_RESERVED,   /* 0x11: FN+PGUP (thinklight toggle) */
+ 
+               KEY_UNKNOWN,    /* 0x12: FN+PGDOWN */
+               KEY_ZOOM,       /* 0x13: FN+SPACE (zoom) */
+ 
+               /* Volume: firmware always react to it and reprograms
+                * the built-in *extra* mixer.  Never map it to control
+                * another mixer by default. */
+               KEY_RESERVED,   /* 0x14: VOLUME UP */
+               KEY_RESERVED,   /* 0x15: VOLUME DOWN */
+               KEY_RESERVED,   /* 0x16: MUTE */
+ 
+               KEY_VENDOR,     /* 0x17: Thinkpad/AccessIBM/Lenovo */
+ 
+               /* (assignments unknown, please report if found) */
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+       };
+       static u16 lenovo_keycode_map[] __initdata = {
+               /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */
+               KEY_FN_F1,      KEY_COFFEE,     KEY_BATTERY,    KEY_SLEEP,
+               KEY_WLAN,       KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8,
+               KEY_FN_F9,      KEY_FN_F10,     KEY_FN_F11,     KEY_SUSPEND,
+ 
+               /* Scan codes 0x0C to 0x1F: Other ACPI HKEY hot keys */
+               KEY_UNKNOWN,    /* 0x0C: FN+BACKSPACE */
+               KEY_UNKNOWN,    /* 0x0D: FN+INSERT */
+               KEY_UNKNOWN,    /* 0x0E: FN+DELETE */
+ 
+               /* These either have to go through ACPI video, or
+                * act like in the IBM ThinkPads, so don't ever
+                * enable them by default */
+               KEY_RESERVED,   /* 0x0F: FN+HOME (brightness up) */
+               KEY_RESERVED,   /* 0x10: FN+END (brightness down) */
+ 
+               KEY_RESERVED,   /* 0x11: FN+PGUP (thinklight toggle) */
+ 
+               KEY_UNKNOWN,    /* 0x12: FN+PGDOWN */
+               KEY_ZOOM,       /* 0x13: FN+SPACE (zoom) */
+ 
+               /* Volume: z60/z61, T60 (BIOS version?): firmware always
+                * react to it and reprograms the built-in *extra* mixer.
+                * Never map it to control another mixer by default.
+                *
+                * T60?, T61, R60?, R61: firmware and EC tries to send
+                * these over the regular keyboard, so these are no-ops,
+                * but there are still weird bugs re. MUTE, so do not
+                * change unless you get test reports from all Lenovo
+                * models.  May cause the BIOS to interfere with the
+                * HDA mixer.
+                */
+               KEY_RESERVED,   /* 0x14: VOLUME UP */
+               KEY_RESERVED,   /* 0x15: VOLUME DOWN */
+               KEY_RESERVED,   /* 0x16: MUTE */
+ 
+               KEY_VENDOR,     /* 0x17: Thinkpad/AccessIBM/Lenovo */
+ 
+               /* (assignments unknown, please report if found) */
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+       };
+ 
+ #define TPACPI_HOTKEY_MAP_LEN         ARRAY_SIZE(ibm_keycode_map)
+ #define TPACPI_HOTKEY_MAP_SIZE                sizeof(ibm_keycode_map)
+ #define TPACPI_HOTKEY_MAP_TYPESIZE    sizeof(ibm_keycode_map[0])
+ 
+       int res, i;
+       int status;
+       int hkeyv;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing hotkey subdriver\n");
+ 
+       BUG_ON(!tpacpi_inputdev);
+       BUG_ON(tpacpi_inputdev->open != NULL ||
+              tpacpi_inputdev->close != NULL);
+ 
+       TPACPI_ACPIHANDLE_INIT(hkey);
+       mutex_init(&hotkey_mutex);
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+       mutex_init(&hotkey_thread_mutex);
+       mutex_init(&hotkey_thread_data_mutex);
+ #endif
+ 
+       /* hotkey not supported on 570 */
+       tp_features.hotkey = hkey_handle != NULL;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "hotkeys are %s\n",
+               str_supported(tp_features.hotkey));
+ 
+       if (!tp_features.hotkey)
+               return 1;
+ 
+       tpacpi_disable_brightness_delay();
+ 
+       hotkey_dev_attributes = create_attr_set(13, NULL);
+       if (!hotkey_dev_attributes)
+               return -ENOMEM;
+       res = add_many_to_attr_set(hotkey_dev_attributes,
+                       hotkey_attributes,
+                       ARRAY_SIZE(hotkey_attributes));
+       if (res)
+               goto err_exit;
+ 
+       /* mask not supported on 570, 600e/x, 770e, 770x, A21e, A2xm/p,
+          A30, R30, R31, T20-22, X20-21, X22-24.  Detected by checking
+          for HKEY interface version 0x100 */
+       if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
+               if ((hkeyv >> 8) != 1) {
+                       printk(TPACPI_ERR "unknown version of the "
+                              "HKEY interface: 0x%x\n", hkeyv);
+                       printk(TPACPI_ERR "please report this to %s\n",
+                              TPACPI_MAIL);
+               } else {
+                       /*
+                        * MHKV 0x100 in A31, R40, R40e,
+                        * T4x, X31, and later
+                        */
+                       tp_features.hotkey_mask = 1;
+               }
+       }
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "hotkey masks are %s\n",
+               str_supported(tp_features.hotkey_mask));
+ 
+       if (tp_features.hotkey_mask) {
+               if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
+                               "MHKA", "qd")) {
+                       printk(TPACPI_ERR
+                              "missing MHKA handler, "
+                              "please report this to %s\n",
+                              TPACPI_MAIL);
+                       /* FN+F12, FN+F4, FN+F3 */
+                       hotkey_all_mask = 0x080cU;
+               }
+       }
+ 
+       /* hotkey_source_mask *must* be zero for
+        * the first hotkey_mask_get */
+       res = hotkey_status_get(&hotkey_orig_status);
+       if (res)
+               goto err_exit;
+ 
+       if (tp_features.hotkey_mask) {
+               res = hotkey_mask_get();
+               if (res)
+                       goto err_exit;
+ 
+               hotkey_orig_mask = hotkey_mask;
+               res = add_many_to_attr_set(
+                               hotkey_dev_attributes,
+                               hotkey_mask_attributes,
+                               ARRAY_SIZE(hotkey_mask_attributes));
+               if (res)
+                       goto err_exit;
+       }
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+       if (tp_features.hotkey_mask) {
+               hotkey_source_mask = TPACPI_HKEY_NVRAM_GOOD_MASK
+                                       & ~hotkey_all_mask;
+       } else {
+               hotkey_source_mask = TPACPI_HKEY_NVRAM_GOOD_MASK;
+       }
+ 
+       vdbg_printk(TPACPI_DBG_INIT,
+                   "hotkey source mask 0x%08x, polling freq %d\n",
+                   hotkey_source_mask, hotkey_poll_freq);
+ #endif
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_wlswemul) {
+               tp_features.hotkey_wlsw = 1;
+               printk(TPACPI_INFO
+                       "radio switch emulation enabled\n");
+       } else
+ #endif
+       /* Not all thinkpads have a hardware radio switch */
+       if (acpi_evalf(hkey_handle, &status, "WLSW", "qd")) {
+               tp_features.hotkey_wlsw = 1;
+               printk(TPACPI_INFO
+                       "radio switch found; radios are %s\n",
+                       enabled(status, 0));
+       }
+       if (tp_features.hotkey_wlsw)
+               res = add_to_attr_set(hotkey_dev_attributes,
+                               &dev_attr_hotkey_radio_sw.attr);
+ 
+       /* For X41t, X60t, X61t Tablets... */
+       if (!res && acpi_evalf(hkey_handle, &status, "MHKG", "qd")) {
+               tp_features.hotkey_tablet = 1;
+               printk(TPACPI_INFO
+                       "possible tablet mode switch found; "
+                       "ThinkPad in %s mode\n",
+                       (status & TP_HOTKEY_TABLET_MASK)?
+                               "tablet" : "laptop");
+               res = add_to_attr_set(hotkey_dev_attributes,
+                               &dev_attr_hotkey_tablet_mode.attr);
+       }
+ 
+       if (!res)
+               res = register_attr_set_with_sysfs(
+                               hotkey_dev_attributes,
+                               &tpacpi_pdev->dev.kobj);
+       if (res)
+               goto err_exit;
+ 
+       /* Set up key map */
+ 
+       hotkey_keycode_map = kmalloc(TPACPI_HOTKEY_MAP_SIZE,
+                                       GFP_KERNEL);
+       if (!hotkey_keycode_map) {
+               printk(TPACPI_ERR
+                       "failed to allocate memory for key map\n");
+               res = -ENOMEM;
+               goto err_exit;
+       }
+ 
+       if (thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO) {
+               dbg_printk(TPACPI_DBG_INIT,
+                          "using Lenovo default hot key map\n");
+               memcpy(hotkey_keycode_map, &lenovo_keycode_map,
+                       TPACPI_HOTKEY_MAP_SIZE);
+       } else {
+               dbg_printk(TPACPI_DBG_INIT,
+                          "using IBM default hot key map\n");
+               memcpy(hotkey_keycode_map, &ibm_keycode_map,
+                       TPACPI_HOTKEY_MAP_SIZE);
+       }
+ 
+       set_bit(EV_KEY, tpacpi_inputdev->evbit);
+       set_bit(EV_MSC, tpacpi_inputdev->evbit);
+       set_bit(MSC_SCAN, tpacpi_inputdev->mscbit);
+       tpacpi_inputdev->keycodesize = TPACPI_HOTKEY_MAP_TYPESIZE;
+       tpacpi_inputdev->keycodemax = TPACPI_HOTKEY_MAP_LEN;
+       tpacpi_inputdev->keycode = hotkey_keycode_map;
+       for (i = 0; i < TPACPI_HOTKEY_MAP_LEN; i++) {
+               if (hotkey_keycode_map[i] != KEY_RESERVED) {
+                       set_bit(hotkey_keycode_map[i],
+                               tpacpi_inputdev->keybit);
+               } else {
+                       if (i < sizeof(hotkey_reserved_mask)*8)
+                               hotkey_reserved_mask |= 1 << i;
+               }
+       }
+ 
+       if (tp_features.hotkey_wlsw) {
+               set_bit(EV_SW, tpacpi_inputdev->evbit);
+               set_bit(SW_RFKILL_ALL, tpacpi_inputdev->swbit);
+       }
+       if (tp_features.hotkey_tablet) {
+               set_bit(EV_SW, tpacpi_inputdev->evbit);
+               set_bit(SW_TABLET_MODE, tpacpi_inputdev->swbit);
+       }
+ 
+       /* Do not issue duplicate brightness change events to
+        * userspace */
+       if (!tp_features.bright_acpimode)
+               /* update bright_acpimode... */
+               tpacpi_check_std_acpi_brightness_support();
+ 
+       if (tp_features.bright_acpimode) {
+               printk(TPACPI_INFO
+                      "This ThinkPad has standard ACPI backlight "
+                      "brightness control, supported by the ACPI "
+                      "video driver\n");
+               printk(TPACPI_NOTICE
+                      "Disabling thinkpad-acpi brightness events "
+                      "by default...\n");
+ 
+               /* The hotkey_reserved_mask change below is not
+                * necessary while the keys are at KEY_RESERVED in the
+                * default map, but better safe than sorry, leave it
+                * here as a marker of what we have to do, especially
+                * when we finally become able to set this at runtime
+                * on response to X.org requests */
+               hotkey_reserved_mask |=
+                       (1 << TP_ACPI_HOTKEYSCAN_FNHOME)
+                       | (1 << TP_ACPI_HOTKEYSCAN_FNEND);
+       }
+ 
+       dbg_printk(TPACPI_DBG_INIT, "enabling hot key handling\n");
+       res = hotkey_status_set(1);
+       if (res) {
+               hotkey_exit();
+               return res;
+       }
+       res = hotkey_mask_set(((hotkey_all_mask | hotkey_source_mask)
+                               & ~hotkey_reserved_mask)
+                               | hotkey_orig_mask);
+       if (res < 0 && res != -ENXIO) {
+               hotkey_exit();
+               return res;
+       }
+ 
+       dbg_printk(TPACPI_DBG_INIT,
+                       "legacy hot key reporting over procfs %s\n",
+                       (hotkey_report_mode < 2) ?
+                               "enabled" : "disabled");
+ 
+       tpacpi_inputdev->open = &hotkey_inputdev_open;
+       tpacpi_inputdev->close = &hotkey_inputdev_close;
+ 
+       hotkey_poll_setup_safe(1);
+       tpacpi_send_radiosw_update();
+       tpacpi_input_send_tabletsw();
+ 
+       return 0;
+ 
+ err_exit:
+       delete_attr_set(hotkey_dev_attributes, &tpacpi_pdev->dev.kobj);
+       hotkey_dev_attributes = NULL;
+ 
+       return (res < 0)? res : 1;
+ }
+ 
++static struct backlight_device *ibm_backlight_device;
++static int brightness_update_status(struct backlight_device *bd);
++
+ static bool hotkey_notify_hotkey(const u32 hkey,
+                                bool *send_acpi_ev,
+                                bool *ignore_acpi_ev)
+ {
+       /* 0x1000-0x1FFF: key presses */
+       unsigned int scancode = hkey & 0xfff;
+       *send_acpi_ev = true;
+       *ignore_acpi_ev = false;
+ 
++      if (tp_features.bright_igdmode && ibm_backlight_device) {
++              /* ToDo: Is there an already defined key? */
++              if (hkey == 0x1011) {
++                      if (ibm_backlight_device->
++                          props.brightness > 0) {
++                              ibm_backlight_device->
++                                      props.brightness--;
++                      }
++              } else if (hkey == 0x1010) {
++                      if (ibm_backlight_device->
++                          props.brightness <
++                          ibm_backlight_device->
++                          props.max_brightness) {
++                              ibm_backlight_device->
++                                      props.brightness++;
++                      }
++              }
++              brightness_update_status(ibm_backlight_device);
++      }
++
+       if (scancode > 0 && scancode < 0x21) {
+               scancode--;
+               if (!(hotkey_source_mask & (1 << scancode))) {
+                       tpacpi_input_send_key(scancode);
+                       *send_acpi_ev = false;
+               } else {
+                       *ignore_acpi_ev = true;
+               }
+               return true;
+       }
+       return false;
+ }
+ 
+ static bool hotkey_notify_wakeup(const u32 hkey,
+                                bool *send_acpi_ev,
+                                bool *ignore_acpi_ev)
+ {
+       /* 0x2000-0x2FFF: Wakeup reason */
+       *send_acpi_ev = true;
+       *ignore_acpi_ev = false;
+ 
+       switch (hkey) {
+       case 0x2304: /* suspend, undock */
+       case 0x2404: /* hibernation, undock */
+               hotkey_wakeup_reason = TP_ACPI_WAKEUP_UNDOCK;
+               *ignore_acpi_ev = true;
+               break;
+ 
+       case 0x2305: /* suspend, bay eject */
+       case 0x2405: /* hibernation, bay eject */
+               hotkey_wakeup_reason = TP_ACPI_WAKEUP_BAYEJ;
+               *ignore_acpi_ev = true;
+               break;
+ 
+       case 0x2313: /* Battery on critical low level (S3) */
+       case 0x2413: /* Battery on critical low level (S4) */
+               printk(TPACPI_ALERT
+                       "EMERGENCY WAKEUP: battery almost empty\n");
+               /* how to auto-heal: */
+               /* 2313: woke up from S3, go to S4/S5 */
+               /* 2413: woke up from S4, go to S5 */
+               break;
+ 
+       default:
+               return false;
+       }
+ 
+       if (hotkey_wakeup_reason != TP_ACPI_WAKEUP_NONE) {
+               printk(TPACPI_INFO
+                      "woke up due to a hot-unplug "
+                      "request...\n");
+               hotkey_wakeup_reason_notify_change();
+       }
+       return true;
+ }
+ 
+ static bool hotkey_notify_usrevent(const u32 hkey,
+                                bool *send_acpi_ev,
+                                bool *ignore_acpi_ev)
+ {
+       /* 0x5000-0x5FFF: human interface helpers */
+       *send_acpi_ev = true;
+       *ignore_acpi_ev = false;
+ 
+       switch (hkey) {
+       case 0x5010: /* Lenovo new BIOS: brightness changed */
+       case 0x500b: /* X61t: tablet pen inserted into bay */
+       case 0x500c: /* X61t: tablet pen removed from bay */
+               return true;
+ 
+       case 0x5009: /* X41t-X61t: swivel up (tablet mode) */
+       case 0x500a: /* X41t-X61t: swivel down (normal mode) */
+               tpacpi_input_send_tabletsw();
+               hotkey_tablet_mode_notify_change();
+               *send_acpi_ev = false;
+               return true;
+ 
+       case 0x5001:
+       case 0x5002:
+               /* LID switch events.  Do not propagate */
+               *ignore_acpi_ev = true;
+               return true;
+ 
+       default:
+               return false;
+       }
+ }
+ 
+ static bool hotkey_notify_thermal(const u32 hkey,
+                                bool *send_acpi_ev,
+                                bool *ignore_acpi_ev)
+ {
+       /* 0x6000-0x6FFF: thermal alarms */
+       *send_acpi_ev = true;
+       *ignore_acpi_ev = false;
+ 
+       switch (hkey) {
+       case 0x6011:
+               printk(TPACPI_CRIT
+                       "THERMAL ALARM: battery is too hot!\n");
+               /* recommended action: warn user through gui */
+               return true;
+       case 0x6012:
+               printk(TPACPI_ALERT
+                       "THERMAL EMERGENCY: battery is extremely hot!\n");
+               /* recommended action: immediate sleep/hibernate */
+               return true;
+       case 0x6021:
+               printk(TPACPI_CRIT
+                       "THERMAL ALARM: "
+                       "a sensor reports something is too hot!\n");
+               /* recommended action: warn user through gui, that */
+               /* some internal component is too hot */
+               return true;
+       case 0x6022:
+               printk(TPACPI_ALERT
+                       "THERMAL EMERGENCY: "
+                       "a sensor reports something is extremely hot!\n");
+               /* recommended action: immediate sleep/hibernate */
+               return true;
+       case 0x6030:
+               printk(TPACPI_INFO
+                       "EC reports that Thermal Table has changed\n");
+               /* recommended action: do nothing, we don't have
+                * Lenovo ATM information */
+               return true;
+       default:
+               printk(TPACPI_ALERT
+                        "THERMAL ALERT: unknown thermal alarm received\n");
+               return false;
+       }
+ }
+ 
+ static void hotkey_notify(struct ibm_struct *ibm, u32 event)
+ {
+       u32 hkey;
+       bool send_acpi_ev;
+       bool ignore_acpi_ev;
+       bool known_ev;
+ 
+       if (event != 0x80) {
+               printk(TPACPI_ERR
+                      "unknown HKEY notification event %d\n", event);
+               /* forward it to userspace, maybe it knows how to handle it */
+               acpi_bus_generate_netlink_event(
+                                       ibm->acpi->device->pnp.device_class,
+                                       dev_name(&ibm->acpi->device->dev),
+                                       event, 0);
+               return;
+       }
+ 
+       while (1) {
+               if (!acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) {
+                       printk(TPACPI_ERR "failed to retrieve HKEY event\n");
+                       return;
+               }
+ 
+               if (hkey == 0) {
+                       /* queue empty */
+                       return;
+               }
+ 
+               send_acpi_ev = true;
+               ignore_acpi_ev = false;
+ 
+               switch (hkey >> 12) {
+               case 1:
+                       /* 0x1000-0x1FFF: key presses */
+                       known_ev = hotkey_notify_hotkey(hkey, &send_acpi_ev,
+                                                &ignore_acpi_ev);
+                       break;
+               case 2:
+                       /* 0x2000-0x2FFF: Wakeup reason */
+                       known_ev = hotkey_notify_wakeup(hkey, &send_acpi_ev,
+                                                &ignore_acpi_ev);
+                       break;
+               case 3:
+                       /* 0x3000-0x3FFF: bay-related wakeups */
+                       if (hkey == 0x3003) {
+                               hotkey_autosleep_ack = 1;
+                               printk(TPACPI_INFO
+                                      "bay ejected\n");
+                               hotkey_wakeup_hotunplug_complete_notify_change();
+                               known_ev = true;
+                       } else {
+                               known_ev = false;
+                       }
+                       break;
+               case 4:
+                       /* 0x4000-0x4FFF: dock-related wakeups */
+                       if (hkey == 0x4003) {
+                               hotkey_autosleep_ack = 1;
+                               printk(TPACPI_INFO
+                                      "undocked\n");
+                               hotkey_wakeup_hotunplug_complete_notify_change();
+                               known_ev = true;
+                       } else {
+                               known_ev = false;
+                       }
+                       break;
+               case 5:
+                       /* 0x5000-0x5FFF: human interface helpers */
+                       known_ev = hotkey_notify_usrevent(hkey, &send_acpi_ev,
+                                                &ignore_acpi_ev);
+                       break;
+               case 6:
+                       /* 0x6000-0x6FFF: thermal alarms */
+                       known_ev = hotkey_notify_thermal(hkey, &send_acpi_ev,
+                                                &ignore_acpi_ev);
+                       break;
+               case 7:
+                       /* 0x7000-0x7FFF: misc */
+                       if (tp_features.hotkey_wlsw && hkey == 0x7000) {
+                               tpacpi_send_radiosw_update();
+                               send_acpi_ev = 0;
+                               known_ev = true;
+                               break;
+                       }
+                       /* fallthrough to default */
+               default:
+                       known_ev = false;
+               }
+               if (!known_ev) {
+                       printk(TPACPI_NOTICE
+                              "unhandled HKEY event 0x%04x\n", hkey);
+                       printk(TPACPI_NOTICE
+                              "please report the conditions when this "
+                              "event happened to %s\n", TPACPI_MAIL);
+               }
+ 
+               /* Legacy events */
+               if (!ignore_acpi_ev &&
+                   (send_acpi_ev || hotkey_report_mode < 2)) {
+                       acpi_bus_generate_proc_event(ibm->acpi->device,
+                                                    event, hkey);
+               }
+ 
+               /* netlink events */
+               if (!ignore_acpi_ev && send_acpi_ev) {
+                       acpi_bus_generate_netlink_event(
+                                       ibm->acpi->device->pnp.device_class,
+                                       dev_name(&ibm->acpi->device->dev),
+                                       event, hkey);
+               }
+       }
+ }
+ 
+ static void hotkey_suspend(pm_message_t state)
+ {
+       /* Do these on suspend, we get the events on early resume! */
+       hotkey_wakeup_reason = TP_ACPI_WAKEUP_NONE;
+       hotkey_autosleep_ack = 0;
+ }
+ 
+ static void hotkey_resume(void)
+ {
+       tpacpi_disable_brightness_delay();
+ 
+       if (hotkey_mask_get())
+               printk(TPACPI_ERR
+                      "error while trying to read hot key mask "
+                      "from firmware\n");
+       tpacpi_send_radiosw_update();
+       hotkey_tablet_mode_notify_change();
+       hotkey_wakeup_reason_notify_change();
+       hotkey_wakeup_hotunplug_complete_notify_change();
+       hotkey_poll_setup_safe(0);
+ }
+ 
+ /* procfs -------------------------------------------------------------- */
+ static int hotkey_read(char *p)
+ {
+       int res, status;
+       int len = 0;
+ 
+       if (!tp_features.hotkey) {
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+               return len;
+       }
+ 
+       if (mutex_lock_killable(&hotkey_mutex))
+               return -ERESTARTSYS;
+       res = hotkey_status_get(&status);
+       if (!res)
+               res = hotkey_mask_get();
+       mutex_unlock(&hotkey_mutex);
+       if (res)
+               return res;
+ 
+       len += sprintf(p + len, "status:\t\t%s\n", enabled(status, 0));
+       if (tp_features.hotkey_mask) {
+               len += sprintf(p + len, "mask:\t\t0x%08x\n", hotkey_mask);
+               len += sprintf(p + len,
+                              "commands:\tenable, disable, reset, <mask>\n");
+       } else {
+               len += sprintf(p + len, "mask:\t\tnot supported\n");
+               len += sprintf(p + len, "commands:\tenable, disable, reset\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int hotkey_write(char *buf)
+ {
+       int res, status;
+       u32 mask;
+       char *cmd;
+ 
+       if (!tp_features.hotkey)
+               return -ENODEV;
+ 
+       if (mutex_lock_killable(&hotkey_mutex))
+               return -ERESTARTSYS;
+ 
+       status = -1;
+       mask = hotkey_mask;
+ 
+       res = 0;
+       while ((cmd = next_cmd(&buf))) {
+               if (strlencmp(cmd, "enable") == 0) {
+                       status = 1;
+               } else if (strlencmp(cmd, "disable") == 0) {
+                       status = 0;
+               } else if (strlencmp(cmd, "reset") == 0) {
+                       status = hotkey_orig_status;
+                       mask = hotkey_orig_mask;
+               } else if (sscanf(cmd, "0x%x", &mask) == 1) {
+                       /* mask set */
+               } else if (sscanf(cmd, "%x", &mask) == 1) {
+                       /* mask set */
+               } else {
+                       res = -EINVAL;
+                       goto errexit;
+               }
+       }
+       if (status != -1)
+               res = hotkey_status_set(status);
+ 
+       if (!res && mask != hotkey_mask)
+               res = hotkey_mask_set(mask);
+ 
+ errexit:
+       mutex_unlock(&hotkey_mutex);
+       return res;
+ }
+ 
+ static const struct acpi_device_id ibm_htk_device_ids[] = {
+       {TPACPI_ACPI_HKEY_HID, 0},
+       {"", 0},
+ };
+ 
+ static struct tp_acpi_drv_struct ibm_hotkey_acpidriver = {
+       .hid = ibm_htk_device_ids,
+       .notify = hotkey_notify,
+       .handle = &hkey_handle,
+       .type = ACPI_DEVICE_NOTIFY,
+ };
+ 
+ static struct ibm_struct hotkey_driver_data = {
+       .name = "hotkey",
+       .read = hotkey_read,
+       .write = hotkey_write,
+       .exit = hotkey_exit,
+       .resume = hotkey_resume,
+       .suspend = hotkey_suspend,
+       .acpi = &ibm_hotkey_acpidriver,
+ };
+ 
+ /*************************************************************************
+  * Bluetooth subdriver
+  */
+ 
+ enum {
+       /* ACPI GBDC/SBDC bits */
+       TP_ACPI_BLUETOOTH_HWPRESENT     = 0x01, /* Bluetooth hw available */
+       TP_ACPI_BLUETOOTH_RADIOSSW      = 0x02, /* Bluetooth radio enabled */
+       TP_ACPI_BLUETOOTH_RESUMECTRL    = 0x04, /* Bluetooth state at resume:
+                                                  off / last state */
+ };
+ 
+ enum {
+       /* ACPI \BLTH commands */
+       TP_ACPI_BLTH_GET_ULTRAPORT_ID   = 0x00, /* Get Ultraport BT ID */
+       TP_ACPI_BLTH_GET_PWR_ON_RESUME  = 0x01, /* Get power-on-resume state */
+       TP_ACPI_BLTH_PWR_ON_ON_RESUME   = 0x02, /* Resume powered on */
+       TP_ACPI_BLTH_PWR_OFF_ON_RESUME  = 0x03, /* Resume powered off */
+       TP_ACPI_BLTH_SAVE_STATE         = 0x05, /* Save state for S4/S5 */
+ };
+ 
+ static struct rfkill *tpacpi_bluetooth_rfkill;
+ 
+ static void bluetooth_suspend(pm_message_t state)
+ {
+       /* Try to make sure radio will resume powered off */
+       acpi_evalf(NULL, NULL, "\\BLTH", "vd",
+                  TP_ACPI_BLTH_PWR_OFF_ON_RESUME);
+ }
+ 
+ static int bluetooth_get_radiosw(void)
+ {
+       int status;
+ 
+       if (!tp_features.bluetooth)
+               return -ENODEV;
+ 
+       /* WLSW overrides bluetooth in firmware/hardware, reflect that */
+       if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
+               return RFKILL_STATE_HARD_BLOCKED;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_bluetoothemul)
+               return (tpacpi_bluetooth_emulstate) ?
+                       RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+ #endif
+ 
+       if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
+               return -EIO;
+ 
+       return ((status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0) ?
+               RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+ }
+ 
+ static void bluetooth_update_rfk(void)
+ {
+       int status;
+ 
+       if (!tpacpi_bluetooth_rfkill)
+               return;
+ 
+       status = bluetooth_get_radiosw();
+       if (status < 0)
+               return;
+       rfkill_force_state(tpacpi_bluetooth_rfkill, status);
+ }
+ 
+ static int bluetooth_set_radiosw(int radio_on, int update_rfk)
+ {
+       int status;
+ 
+       if (!tp_features.bluetooth)
+               return -ENODEV;
+ 
+       /* WLSW overrides bluetooth in firmware/hardware, but there is no
+        * reason to risk weird behaviour. */
+       if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
+           && radio_on)
+               return -EPERM;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_bluetoothemul) {
+               tpacpi_bluetooth_emulstate = !!radio_on;
+               if (update_rfk)
+                       bluetooth_update_rfk();
+               return 0;
+       }
+ #endif
+ 
+       /* We make sure to keep TP_ACPI_BLUETOOTH_RESUMECTRL off */
+       if (radio_on)
+               status = TP_ACPI_BLUETOOTH_RADIOSSW;
+       else
+               status = 0;
+       if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status))
+               return -EIO;
+ 
+       if (update_rfk)
+               bluetooth_update_rfk();
+ 
+       return 0;
+ }
+ 
+ /* sysfs bluetooth enable ---------------------------------------------- */
+ static ssize_t bluetooth_enable_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       int status;
+ 
+       status = bluetooth_get_radiosw();
+       if (status < 0)
+               return status;
+ 
+       return snprintf(buf, PAGE_SIZE, "%d\n",
+                       (status == RFKILL_STATE_UNBLOCKED) ? 1 : 0);
+ }
+ 
+ static ssize_t bluetooth_enable_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+ {
+       unsigned long t;
+       int res;
+ 
+       if (parse_strtoul(buf, 1, &t))
+               return -EINVAL;
+ 
+       res = bluetooth_set_radiosw(t, 1);
+ 
+       return (res) ? res : count;
+ }
+ 
+ static struct device_attribute dev_attr_bluetooth_enable =
+       __ATTR(bluetooth_enable, S_IWUSR | S_IRUGO,
+               bluetooth_enable_show, bluetooth_enable_store);
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static struct attribute *bluetooth_attributes[] = {
+       &dev_attr_bluetooth_enable.attr,
+       NULL
+ };
+ 
+ static const struct attribute_group bluetooth_attr_group = {
+       .attrs = bluetooth_attributes,
+ };
+ 
+ static int tpacpi_bluetooth_rfk_get(void *data, enum rfkill_state *state)
+ {
+       int bts = bluetooth_get_radiosw();
+ 
+       if (bts < 0)
+               return bts;
+ 
+       *state = bts;
+       return 0;
+ }
+ 
+ static int tpacpi_bluetooth_rfk_set(void *data, enum rfkill_state state)
+ {
+       return bluetooth_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
+ }
+ 
+ static void bluetooth_shutdown(void)
+ {
+       /* Order firmware to save current state to NVRAM */
+       if (!acpi_evalf(NULL, NULL, "\\BLTH", "vd",
+                       TP_ACPI_BLTH_SAVE_STATE))
+               printk(TPACPI_NOTICE
+                       "failed to save bluetooth state to NVRAM\n");
+ }
+ 
+ static void bluetooth_exit(void)
+ {
+       bluetooth_shutdown();
+ 
+       if (tpacpi_bluetooth_rfkill)
+               rfkill_unregister(tpacpi_bluetooth_rfkill);
+ 
+       sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+                       &bluetooth_attr_group);
+ }
+ 
+ static int __init bluetooth_init(struct ibm_init_struct *iibm)
+ {
+       int res;
+       int status = 0;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing bluetooth subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(hkey);
+ 
+       /* bluetooth not supported on 570, 600e/x, 770e, 770x, A21e, A2xm/p,
+          G4x, R30, R31, R40e, R50e, T20-22, X20-21 */
+       tp_features.bluetooth = hkey_handle &&
+           acpi_evalf(hkey_handle, &status, "GBDC", "qd");
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "bluetooth is %s, status 0x%02x\n",
+               str_supported(tp_features.bluetooth),
+               status);
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_bluetoothemul) {
+               tp_features.bluetooth = 1;
+               printk(TPACPI_INFO
+                       "bluetooth switch emulation enabled\n");
+       } else
+ #endif
+       if (tp_features.bluetooth &&
+           !(status & TP_ACPI_BLUETOOTH_HWPRESENT)) {
+               /* no bluetooth hardware present in system */
+               tp_features.bluetooth = 0;
+               dbg_printk(TPACPI_DBG_INIT,
+                          "bluetooth hardware not installed\n");
+       }
+ 
+       if (!tp_features.bluetooth)
+               return 1;
+ 
+       res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+                               &bluetooth_attr_group);
+       if (res)
+               return res;
+ 
+       res = tpacpi_new_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID,
+                               &tpacpi_bluetooth_rfkill,
+                               RFKILL_TYPE_BLUETOOTH,
+                               "tpacpi_bluetooth_sw",
+                               true,
+                               tpacpi_bluetooth_rfk_set,
+                               tpacpi_bluetooth_rfk_get);
+       if (res) {
+               bluetooth_exit();
+               return res;
+       }
+ 
+       return 0;
+ }
+ 
+ /* procfs -------------------------------------------------------------- */
+ static int bluetooth_read(char *p)
+ {
+       int len = 0;
+       int status = bluetooth_get_radiosw();
+ 
+       if (!tp_features.bluetooth)
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+       else {
+               len += sprintf(p + len, "status:\t\t%s\n",
+                               (status == RFKILL_STATE_UNBLOCKED) ?
+                                       "enabled" : "disabled");
+               len += sprintf(p + len, "commands:\tenable, disable\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int bluetooth_write(char *buf)
+ {
+       char *cmd;
+ 
+       if (!tp_features.bluetooth)
+               return -ENODEV;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (strlencmp(cmd, "enable") == 0) {
+                       bluetooth_set_radiosw(1, 1);
+               } else if (strlencmp(cmd, "disable") == 0) {
+                       bluetooth_set_radiosw(0, 1);
+               } else
+                       return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct bluetooth_driver_data = {
+       .name = "bluetooth",
+       .read = bluetooth_read,
+       .write = bluetooth_write,
+       .exit = bluetooth_exit,
+       .suspend = bluetooth_suspend,
+       .shutdown = bluetooth_shutdown,
+ };
+ 
+ /*************************************************************************
+  * Wan subdriver
+  */
+ 
+ enum {
+       /* ACPI GWAN/SWAN bits */
+       TP_ACPI_WANCARD_HWPRESENT       = 0x01, /* Wan hw available */
+       TP_ACPI_WANCARD_RADIOSSW        = 0x02, /* Wan radio enabled */
+       TP_ACPI_WANCARD_RESUMECTRL      = 0x04, /* Wan state at resume:
+                                                  off / last state */
+ };
+ 
+ static struct rfkill *tpacpi_wan_rfkill;
+ 
+ static void wan_suspend(pm_message_t state)
+ {
+       /* Try to make sure radio will resume powered off */
+       acpi_evalf(NULL, NULL, "\\WGSV", "qvd",
+                  TP_ACPI_WGSV_PWR_OFF_ON_RESUME);
+ }
+ 
+ static int wan_get_radiosw(void)
+ {
+       int status;
+ 
+       if (!tp_features.wan)
+               return -ENODEV;
+ 
+       /* WLSW overrides WWAN in firmware/hardware, reflect that */
+       if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
+               return RFKILL_STATE_HARD_BLOCKED;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_wwanemul)
+               return (tpacpi_wwan_emulstate) ?
+                       RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+ #endif
+ 
+       if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
+               return -EIO;
+ 
+       return ((status & TP_ACPI_WANCARD_RADIOSSW) != 0) ?
+               RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+ }
+ 
+ static void wan_update_rfk(void)
+ {
+       int status;
+ 
+       if (!tpacpi_wan_rfkill)
+               return;
+ 
+       status = wan_get_radiosw();
+       if (status < 0)
+               return;
+       rfkill_force_state(tpacpi_wan_rfkill, status);
+ }
+ 
+ static int wan_set_radiosw(int radio_on, int update_rfk)
+ {
+       int status;
+ 
+       if (!tp_features.wan)
+               return -ENODEV;
+ 
+       /* WLSW overrides bluetooth in firmware/hardware, but there is no
+        * reason to risk weird behaviour. */
+       if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
+           && radio_on)
+               return -EPERM;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_wwanemul) {
+               tpacpi_wwan_emulstate = !!radio_on;
+               if (update_rfk)
+                       wan_update_rfk();
+               return 0;
+       }
+ #endif
+ 
+       /* We make sure to keep TP_ACPI_WANCARD_RESUMECTRL off */
+       if (radio_on)
+               status = TP_ACPI_WANCARD_RADIOSSW;
+       else
+               status = 0;
+       if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status))
+               return -EIO;
+ 
+       if (update_rfk)
+               wan_update_rfk();
+ 
+       return 0;
+ }
+ 
+ /* sysfs wan enable ---------------------------------------------------- */
+ static ssize_t wan_enable_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       int status;
+ 
+       status = wan_get_radiosw();
+       if (status < 0)
+               return status;
+ 
+       return snprintf(buf, PAGE_SIZE, "%d\n",
+                       (status == RFKILL_STATE_UNBLOCKED) ? 1 : 0);
+ }
+ 
+ static ssize_t wan_enable_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+ {
+       unsigned long t;
+       int res;
+ 
+       if (parse_strtoul(buf, 1, &t))
+               return -EINVAL;
+ 
+       res = wan_set_radiosw(t, 1);
+ 
+       return (res) ? res : count;
+ }
+ 
+ static struct device_attribute dev_attr_wan_enable =
+       __ATTR(wwan_enable, S_IWUSR | S_IRUGO,
+               wan_enable_show, wan_enable_store);
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static struct attribute *wan_attributes[] = {
+       &dev_attr_wan_enable.attr,
+       NULL
+ };
+ 
+ static const struct attribute_group wan_attr_group = {
+       .attrs = wan_attributes,
+ };
+ 
+ static int tpacpi_wan_rfk_get(void *data, enum rfkill_state *state)
+ {
+       int wans = wan_get_radiosw();
+ 
+       if (wans < 0)
+               return wans;
+ 
+       *state = wans;
+       return 0;
+ }
+ 
+ static int tpacpi_wan_rfk_set(void *data, enum rfkill_state state)
+ {
+       return wan_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
+ }
+ 
+ static void wan_shutdown(void)
+ {
+       /* Order firmware to save current state to NVRAM */
+       if (!acpi_evalf(NULL, NULL, "\\WGSV", "vd",
+                       TP_ACPI_WGSV_SAVE_STATE))
+               printk(TPACPI_NOTICE
+                       "failed to save WWAN state to NVRAM\n");
+ }
+ 
+ static void wan_exit(void)
+ {
+       wan_shutdown();
+ 
+       if (tpacpi_wan_rfkill)
+               rfkill_unregister(tpacpi_wan_rfkill);
+ 
+       sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+               &wan_attr_group);
+ }
+ 
+ static int __init wan_init(struct ibm_init_struct *iibm)
+ {
+       int res;
+       int status = 0;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing wan subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(hkey);
+ 
+       tp_features.wan = hkey_handle &&
+           acpi_evalf(hkey_handle, &status, "GWAN", "qd");
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "wan is %s, status 0x%02x\n",
+               str_supported(tp_features.wan),
+               status);
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_wwanemul) {
+               tp_features.wan = 1;
+               printk(TPACPI_INFO
+                       "wwan switch emulation enabled\n");
+       } else
+ #endif
+       if (tp_features.wan &&
+           !(status & TP_ACPI_WANCARD_HWPRESENT)) {
+               /* no wan hardware present in system */
+               tp_features.wan = 0;
+               dbg_printk(TPACPI_DBG_INIT,
+                          "wan hardware not installed\n");
+       }
+ 
+       if (!tp_features.wan)
+               return 1;
+ 
+       res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+                               &wan_attr_group);
+       if (res)
+               return res;
+ 
+       res = tpacpi_new_rfkill(TPACPI_RFK_WWAN_SW_ID,
+                               &tpacpi_wan_rfkill,
+                               RFKILL_TYPE_WWAN,
+                               "tpacpi_wwan_sw",
+                               true,
+                               tpacpi_wan_rfk_set,
+                               tpacpi_wan_rfk_get);
+       if (res) {
+               wan_exit();
+               return res;
+       }
+ 
+       return 0;
+ }
+ 
+ /* procfs -------------------------------------------------------------- */
+ static int wan_read(char *p)
+ {
+       int len = 0;
+       int status = wan_get_radiosw();
+ 
+       if (!tp_features.wan)
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+       else {
+               len += sprintf(p + len, "status:\t\t%s\n",
+                               (status == RFKILL_STATE_UNBLOCKED) ?
+                                       "enabled" : "disabled");
+               len += sprintf(p + len, "commands:\tenable, disable\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int wan_write(char *buf)
+ {
+       char *cmd;
+ 
+       if (!tp_features.wan)
+               return -ENODEV;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (strlencmp(cmd, "enable") == 0) {
+                       wan_set_radiosw(1, 1);
+               } else if (strlencmp(cmd, "disable") == 0) {
+                       wan_set_radiosw(0, 1);
+               } else
+                       return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct wan_driver_data = {
+       .name = "wan",
+       .read = wan_read,
+       .write = wan_write,
+       .exit = wan_exit,
+       .suspend = wan_suspend,
+       .shutdown = wan_shutdown,
+ };
+ 
+ /*************************************************************************
+  * UWB subdriver
+  */
+ 
+ enum {
+       /* ACPI GUWB/SUWB bits */
+       TP_ACPI_UWB_HWPRESENT   = 0x01, /* UWB hw available */
+       TP_ACPI_UWB_RADIOSSW    = 0x02, /* UWB radio enabled */
+ };
+ 
+ static struct rfkill *tpacpi_uwb_rfkill;
+ 
+ static int uwb_get_radiosw(void)
+ {
+       int status;
+ 
+       if (!tp_features.uwb)
+               return -ENODEV;
+ 
+       /* WLSW overrides UWB in firmware/hardware, reflect that */
+       if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
+               return RFKILL_STATE_HARD_BLOCKED;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_uwbemul)
+               return (tpacpi_uwb_emulstate) ?
+                       RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+ #endif
+ 
+       if (!acpi_evalf(hkey_handle, &status, "GUWB", "d"))
+               return -EIO;
+ 
+       return ((status & TP_ACPI_UWB_RADIOSSW) != 0) ?
+               RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+ }
+ 
+ static void uwb_update_rfk(void)
+ {
+       int status;
+ 
+       if (!tpacpi_uwb_rfkill)
+               return;
+ 
+       status = uwb_get_radiosw();
+       if (status < 0)
+               return;
+       rfkill_force_state(tpacpi_uwb_rfkill, status);
+ }
+ 
+ static int uwb_set_radiosw(int radio_on, int update_rfk)
+ {
+       int status;
+ 
+       if (!tp_features.uwb)
+               return -ENODEV;
+ 
+       /* WLSW overrides UWB in firmware/hardware, but there is no
+        * reason to risk weird behaviour. */
+       if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
+           && radio_on)
+               return -EPERM;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_uwbemul) {
+               tpacpi_uwb_emulstate = !!radio_on;
+               if (update_rfk)
+                       uwb_update_rfk();
+               return 0;
+       }
+ #endif
+ 
+       status = (radio_on) ? TP_ACPI_UWB_RADIOSSW : 0;
+       if (!acpi_evalf(hkey_handle, NULL, "SUWB", "vd", status))
+               return -EIO;
+ 
+       if (update_rfk)
+               uwb_update_rfk();
+ 
+       return 0;
+ }
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static int tpacpi_uwb_rfk_get(void *data, enum rfkill_state *state)
+ {
+       int uwbs = uwb_get_radiosw();
+ 
+       if (uwbs < 0)
+               return uwbs;
+ 
+       *state = uwbs;
+       return 0;
+ }
+ 
+ static int tpacpi_uwb_rfk_set(void *data, enum rfkill_state state)
+ {
+       return uwb_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
+ }
+ 
+ static void uwb_exit(void)
+ {
+       if (tpacpi_uwb_rfkill)
+               rfkill_unregister(tpacpi_uwb_rfkill);
+ }
+ 
+ static int __init uwb_init(struct ibm_init_struct *iibm)
+ {
+       int res;
+       int status = 0;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing uwb subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(hkey);
+ 
+       tp_features.uwb = hkey_handle &&
+           acpi_evalf(hkey_handle, &status, "GUWB", "qd");
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "uwb is %s, status 0x%02x\n",
+               str_supported(tp_features.uwb),
+               status);
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+       if (dbg_uwbemul) {
+               tp_features.uwb = 1;
+               printk(TPACPI_INFO
+                       "uwb switch emulation enabled\n");
+       } else
+ #endif
+       if (tp_features.uwb &&
+           !(status & TP_ACPI_UWB_HWPRESENT)) {
+               /* no uwb hardware present in system */
+               tp_features.uwb = 0;
+               dbg_printk(TPACPI_DBG_INIT,
+                          "uwb hardware not installed\n");
+       }
+ 
+       if (!tp_features.uwb)
+               return 1;
+ 
+       res = tpacpi_new_rfkill(TPACPI_RFK_UWB_SW_ID,
+                               &tpacpi_uwb_rfkill,
+                               RFKILL_TYPE_UWB,
+                               "tpacpi_uwb_sw",
+                               false,
+                               tpacpi_uwb_rfk_set,
+                               tpacpi_uwb_rfk_get);
+ 
+       return res;
+ }
+ 
+ static struct ibm_struct uwb_driver_data = {
+       .name = "uwb",
+       .exit = uwb_exit,
+       .flags.experimental = 1,
+ };
+ 
+ /*************************************************************************
+  * Video subdriver
+  */
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_VIDEO
+ 
+ enum video_access_mode {
+       TPACPI_VIDEO_NONE = 0,
+       TPACPI_VIDEO_570,       /* 570 */
+       TPACPI_VIDEO_770,       /* 600e/x, 770e, 770x */
+       TPACPI_VIDEO_NEW,       /* all others */
+ };
+ 
+ enum {        /* video status flags, based on VIDEO_570 */
+       TP_ACPI_VIDEO_S_LCD = 0x01,     /* LCD output enabled */
+       TP_ACPI_VIDEO_S_CRT = 0x02,     /* CRT output enabled */
+       TP_ACPI_VIDEO_S_DVI = 0x08,     /* DVI output enabled */
+ };
+ 
+ enum {  /* TPACPI_VIDEO_570 constants */
+       TP_ACPI_VIDEO_570_PHSCMD = 0x87,        /* unknown magic constant :( */
+       TP_ACPI_VIDEO_570_PHSMASK = 0x03,       /* PHS bits that map to
+                                                * video_status_flags */
+       TP_ACPI_VIDEO_570_PHS2CMD = 0x8b,       /* unknown magic constant :( */
+       TP_ACPI_VIDEO_570_PHS2SET = 0x80,       /* unknown magic constant :( */
+ };
+ 
+ static enum video_access_mode video_supported;
+ static int video_orig_autosw;
+ 
+ static int video_autosw_get(void);
+ static int video_autosw_set(int enable);
+ 
+ TPACPI_HANDLE(vid2, root, "\\_SB.PCI0.AGPB.VID");     /* G41 */
+ 
+ static int __init video_init(struct ibm_init_struct *iibm)
+ {
+       int ivga;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing video subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(vid);
+       TPACPI_ACPIHANDLE_INIT(vid2);
+ 
+       if (vid2_handle && acpi_evalf(NULL, &ivga, "\\IVGA", "d") && ivga)
+               /* G41, assume IVGA doesn't change */
+               vid_handle = vid2_handle;
+ 
+       if (!vid_handle)
+               /* video switching not supported on R30, R31 */
+               video_supported = TPACPI_VIDEO_NONE;
+       else if (acpi_evalf(vid_handle, &video_orig_autosw, "SWIT", "qd"))
+               /* 570 */
+               video_supported = TPACPI_VIDEO_570;
+       else if (acpi_evalf(vid_handle, &video_orig_autosw, "^VADL", "qd"))
+               /* 600e/x, 770e, 770x */
+               video_supported = TPACPI_VIDEO_770;
+       else
+               /* all others */
+               video_supported = TPACPI_VIDEO_NEW;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "video is %s, mode %d\n",
+               str_supported(video_supported != TPACPI_VIDEO_NONE),
+               video_supported);
+ 
+       return (video_supported != TPACPI_VIDEO_NONE)? 0 : 1;
+ }
+ 
+ static void video_exit(void)
+ {
+       dbg_printk(TPACPI_DBG_EXIT,
+                  "restoring original video autoswitch mode\n");
+       if (video_autosw_set(video_orig_autosw))
+               printk(TPACPI_ERR "error while trying to restore original "
+                       "video autoswitch mode\n");
+ }
+ 
+ static int video_outputsw_get(void)
+ {
+       int status = 0;
+       int i;
+ 
+       switch (video_supported) {
+       case TPACPI_VIDEO_570:
+               if (!acpi_evalf(NULL, &i, "\\_SB.PHS", "dd",
+                                TP_ACPI_VIDEO_570_PHSCMD))
+                       return -EIO;
+               status = i & TP_ACPI_VIDEO_570_PHSMASK;
+               break;
+       case TPACPI_VIDEO_770:
+               if (!acpi_evalf(NULL, &i, "\\VCDL", "d"))
+                       return -EIO;
+               if (i)
+                       status |= TP_ACPI_VIDEO_S_LCD;
+               if (!acpi_evalf(NULL, &i, "\\VCDC", "d"))
+                       return -EIO;
+               if (i)
+                       status |= TP_ACPI_VIDEO_S_CRT;
+               break;
+       case TPACPI_VIDEO_NEW:
+               if (!acpi_evalf(NULL, NULL, "\\VUPS", "vd", 1) ||
+                   !acpi_evalf(NULL, &i, "\\VCDC", "d"))
+                       return -EIO;
+               if (i)
+                       status |= TP_ACPI_VIDEO_S_CRT;
+ 
+               if (!acpi_evalf(NULL, NULL, "\\VUPS", "vd", 0) ||
+                   !acpi_evalf(NULL, &i, "\\VCDL", "d"))
+                       return -EIO;
+               if (i)
+                       status |= TP_ACPI_VIDEO_S_LCD;
+               if (!acpi_evalf(NULL, &i, "\\VCDD", "d"))
+                       return -EIO;
+               if (i)
+                       status |= TP_ACPI_VIDEO_S_DVI;
+               break;
+       default:
+               return -ENOSYS;
+       }
+ 
+       return status;
+ }
+ 
+ static int video_outputsw_set(int status)
+ {
+       int autosw;
+       int res = 0;
+ 
+       switch (video_supported) {
+       case TPACPI_VIDEO_570:
+               res = acpi_evalf(NULL, NULL,
+                                "\\_SB.PHS2", "vdd",
+                                TP_ACPI_VIDEO_570_PHS2CMD,
+                                status | TP_ACPI_VIDEO_570_PHS2SET);
+               break;
+       case TPACPI_VIDEO_770:
+               autosw = video_autosw_get();
+               if (autosw < 0)
+                       return autosw;
+ 
+               res = video_autosw_set(1);
+               if (res)
+                       return res;
+               res = acpi_evalf(vid_handle, NULL,
+                                "ASWT", "vdd", status * 0x100, 0);
+               if (!autosw && video_autosw_set(autosw)) {
+                       printk(TPACPI_ERR
+                              "video auto-switch left enabled due to error\n");
+                       return -EIO;
+               }
+               break;
+       case TPACPI_VIDEO_NEW:
+               res = acpi_evalf(NULL, NULL, "\\VUPS", "vd", 0x80) &&
+                     acpi_evalf(NULL, NULL, "\\VSDS", "vdd", status, 1);
+               break;
+       default:
+               return -ENOSYS;
+       }
+ 
+       return (res)? 0 : -EIO;
+ }
+ 
+ static int video_autosw_get(void)
+ {
+       int autosw = 0;
+ 
+       switch (video_supported) {
+       case TPACPI_VIDEO_570:
+               if (!acpi_evalf(vid_handle, &autosw, "SWIT", "d"))
+                       return -EIO;
+               break;
+       case TPACPI_VIDEO_770:
+       case TPACPI_VIDEO_NEW:
+               if (!acpi_evalf(vid_handle, &autosw, "^VDEE", "d"))
+                       return -EIO;
+               break;
+       default:
+               return -ENOSYS;
+       }
+ 
+       return autosw & 1;
+ }
+ 
+ static int video_autosw_set(int enable)
+ {
+       if (!acpi_evalf(vid_handle, NULL, "_DOS", "vd", (enable)? 1 : 0))
+               return -EIO;
+       return 0;
+ }
+ 
+ static int video_outputsw_cycle(void)
+ {
+       int autosw = video_autosw_get();
+       int res;
+ 
+       if (autosw < 0)
+               return autosw;
+ 
+       switch (video_supported) {
+       case TPACPI_VIDEO_570:
+               res = video_autosw_set(1);
+               if (res)
+                       return res;
+               res = acpi_evalf(ec_handle, NULL, "_Q16", "v");
+               break;
+       case TPACPI_VIDEO_770:
+       case TPACPI_VIDEO_NEW:
+               res = video_autosw_set(1);
+               if (res)
+                       return res;
+               res = acpi_evalf(vid_handle, NULL, "VSWT", "v");
+               break;
+       default:
+               return -ENOSYS;
+       }
+       if (!autosw && video_autosw_set(autosw)) {
+               printk(TPACPI_ERR
+                      "video auto-switch left enabled due to error\n");
+               return -EIO;
+       }
+ 
+       return (res)? 0 : -EIO;
+ }
+ 
+ static int video_expand_toggle(void)
+ {
+       switch (video_supported) {
+       case TPACPI_VIDEO_570:
+               return acpi_evalf(ec_handle, NULL, "_Q17", "v")?
+                       0 : -EIO;
+       case TPACPI_VIDEO_770:
+               return acpi_evalf(vid_handle, NULL, "VEXP", "v")?
+                       0 : -EIO;
+       case TPACPI_VIDEO_NEW:
+               return acpi_evalf(NULL, NULL, "\\VEXP", "v")?
+                       0 : -EIO;
+       default:
+               return -ENOSYS;
+       }
+       /* not reached */
+ }
+ 
+ static int video_read(char *p)
+ {
+       int status, autosw;
+       int len = 0;
+ 
+       if (video_supported == TPACPI_VIDEO_NONE) {
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+               return len;
+       }
+ 
+       status = video_outputsw_get();
+       if (status < 0)
+               return status;
+ 
+       autosw = video_autosw_get();
+       if (autosw < 0)
+               return autosw;
+ 
+       len += sprintf(p + len, "status:\t\tsupported\n");
+       len += sprintf(p + len, "lcd:\t\t%s\n", enabled(status, 0));
+       len += sprintf(p + len, "crt:\t\t%s\n", enabled(status, 1));
+       if (video_supported == TPACPI_VIDEO_NEW)
+               len += sprintf(p + len, "dvi:\t\t%s\n", enabled(status, 3));
+       len += sprintf(p + len, "auto:\t\t%s\n", enabled(autosw, 0));
+       len += sprintf(p + len, "commands:\tlcd_enable, lcd_disable\n");
+       len += sprintf(p + len, "commands:\tcrt_enable, crt_disable\n");
+       if (video_supported == TPACPI_VIDEO_NEW)
+               len += sprintf(p + len, "commands:\tdvi_enable, dvi_disable\n");
+       len += sprintf(p + len, "commands:\tauto_enable, auto_disable\n");
+       len += sprintf(p + len, "commands:\tvideo_switch, expand_toggle\n");
+ 
+       return len;
+ }
+ 
+ static int video_write(char *buf)
+ {
+       char *cmd;
+       int enable, disable, status;
+       int res;
+ 
+       if (video_supported == TPACPI_VIDEO_NONE)
+               return -ENODEV;
+ 
+       enable = 0;
+       disable = 0;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (strlencmp(cmd, "lcd_enable") == 0) {
+                       enable |= TP_ACPI_VIDEO_S_LCD;
+               } else if (strlencmp(cmd, "lcd_disable") == 0) {
+                       disable |= TP_ACPI_VIDEO_S_LCD;
+               } else if (strlencmp(cmd, "crt_enable") == 0) {
+                       enable |= TP_ACPI_VIDEO_S_CRT;
+               } else if (strlencmp(cmd, "crt_disable") == 0) {
+                       disable |= TP_ACPI_VIDEO_S_CRT;
+               } else if (video_supported == TPACPI_VIDEO_NEW &&
+                          strlencmp(cmd, "dvi_enable") == 0) {
+                       enable |= TP_ACPI_VIDEO_S_DVI;
+               } else if (video_supported == TPACPI_VIDEO_NEW &&
+                          strlencmp(cmd, "dvi_disable") == 0) {
+                       disable |= TP_ACPI_VIDEO_S_DVI;
+               } else if (strlencmp(cmd, "auto_enable") == 0) {
+                       res = video_autosw_set(1);
+                       if (res)
+                               return res;
+               } else if (strlencmp(cmd, "auto_disable") == 0) {
+                       res = video_autosw_set(0);
+                       if (res)
+                               return res;
+               } else if (strlencmp(cmd, "video_switch") == 0) {
+                       res = video_outputsw_cycle();
+                       if (res)
+                               return res;
+               } else if (strlencmp(cmd, "expand_toggle") == 0) {
+                       res = video_expand_toggle();
+                       if (res)
+                               return res;
+               } else
+                       return -EINVAL;
+       }
+ 
+       if (enable || disable) {
+               status = video_outputsw_get();
+               if (status < 0)
+                       return status;
+               res = video_outputsw_set((status & ~disable) | enable);
+               if (res)
+                       return res;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct video_driver_data = {
+       .name = "video",
+       .read = video_read,
+       .write = video_write,
+       .exit = video_exit,
+ };
+ 
+ #endif /* CONFIG_THINKPAD_ACPI_VIDEO */
+ 
+ /*************************************************************************
+  * Light (thinklight) subdriver
+  */
+ 
+ TPACPI_HANDLE(lght, root, "\\LGHT");  /* A21e, A2xm/p, T20-22, X20-21 */
+ TPACPI_HANDLE(ledb, ec, "LEDB");              /* G4x */
+ 
+ static int light_get_status(void)
+ {
+       int status = 0;
+ 
+       if (tp_features.light_status) {
+               if (!acpi_evalf(ec_handle, &status, "KBLT", "d"))
+                       return -EIO;
+               return (!!status);
+       }
+ 
+       return -ENXIO;
+ }
+ 
+ static int light_set_status(int status)
+ {
+       int rc;
+ 
+       if (tp_features.light) {
+               if (cmos_handle) {
+                       rc = acpi_evalf(cmos_handle, NULL, NULL, "vd",
+                                       (status)?
+                                               TP_CMOS_THINKLIGHT_ON :
+                                               TP_CMOS_THINKLIGHT_OFF);
+               } else {
+                       rc = acpi_evalf(lght_handle, NULL, NULL, "vd",
+                                       (status)? 1 : 0);
+               }
+               return (rc)? 0 : -EIO;
+       }
+ 
+       return -ENXIO;
+ }
+ 
+ static void light_set_status_worker(struct work_struct *work)
+ {
+       struct tpacpi_led_classdev *data =
+                       container_of(work, struct tpacpi_led_classdev, work);
+ 
+       if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
+               light_set_status((data->new_brightness != LED_OFF));
+ }
+ 
+ static void light_sysfs_set(struct led_classdev *led_cdev,
+                       enum led_brightness brightness)
+ {
+       struct tpacpi_led_classdev *data =
+               container_of(led_cdev,
+                            struct tpacpi_led_classdev,
+                            led_classdev);
+       data->new_brightness = brightness;
+       queue_work(tpacpi_wq, &data->work);
+ }
+ 
+ static enum led_brightness light_sysfs_get(struct led_classdev *led_cdev)
+ {
+       return (light_get_status() == 1)? LED_FULL : LED_OFF;
+ }
+ 
+ static struct tpacpi_led_classdev tpacpi_led_thinklight = {
+       .led_classdev = {
+               .name           = "tpacpi::thinklight",
+               .brightness_set = &light_sysfs_set,
+               .brightness_get = &light_sysfs_get,
+       }
+ };
+ 
+ static int __init light_init(struct ibm_init_struct *iibm)
+ {
+       int rc;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing light subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(ledb);
+       TPACPI_ACPIHANDLE_INIT(lght);
+       TPACPI_ACPIHANDLE_INIT(cmos);
+       INIT_WORK(&tpacpi_led_thinklight.work, light_set_status_worker);
+ 
+       /* light not supported on 570, 600e/x, 770e, 770x, G4x, R30, R31 */
+       tp_features.light = (cmos_handle || lght_handle) && !ledb_handle;
+ 
+       if (tp_features.light)
+               /* light status not supported on
+                  570, 600e/x, 770e, 770x, G4x, R30, R31, R32, X20 */
+               tp_features.light_status =
+                       acpi_evalf(ec_handle, NULL, "KBLT", "qv");
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "light is %s, light status is %s\n",
+               str_supported(tp_features.light),
+               str_supported(tp_features.light_status));
+ 
+       if (!tp_features.light)
+               return 1;
+ 
+       rc = led_classdev_register(&tpacpi_pdev->dev,
+                                  &tpacpi_led_thinklight.led_classdev);
+ 
+       if (rc < 0) {
+               tp_features.light = 0;
+               tp_features.light_status = 0;
+       } else  {
+               rc = 0;
+       }
+ 
+       return rc;
+ }
+ 
+ static void light_exit(void)
+ {
+       led_classdev_unregister(&tpacpi_led_thinklight.led_classdev);
+       if (work_pending(&tpacpi_led_thinklight.work))
+               flush_workqueue(tpacpi_wq);
+ }
+ 
+ static int light_read(char *p)
+ {
+       int len = 0;
+       int status;
+ 
+       if (!tp_features.light) {
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+       } else if (!tp_features.light_status) {
+               len += sprintf(p + len, "status:\t\tunknown\n");
+               len += sprintf(p + len, "commands:\ton, off\n");
+       } else {
+               status = light_get_status();
+               if (status < 0)
+                       return status;
+               len += sprintf(p + len, "status:\t\t%s\n", onoff(status, 0));
+               len += sprintf(p + len, "commands:\ton, off\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int light_write(char *buf)
+ {
+       char *cmd;
+       int newstatus = 0;
+ 
+       if (!tp_features.light)
+               return -ENODEV;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (strlencmp(cmd, "on") == 0) {
+                       newstatus = 1;
+               } else if (strlencmp(cmd, "off") == 0) {
+                       newstatus = 0;
+               } else
+                       return -EINVAL;
+       }
+ 
+       return light_set_status(newstatus);
+ }
+ 
+ static struct ibm_struct light_driver_data = {
+       .name = "light",
+       .read = light_read,
+       .write = light_write,
+       .exit = light_exit,
+ };
+ 
+ /*************************************************************************
+  * Dock subdriver
+  */
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DOCK
+ 
+ static void dock_notify(struct ibm_struct *ibm, u32 event);
+ static int dock_read(char *p);
+ static int dock_write(char *buf);
+ 
+ TPACPI_HANDLE(dock, root, "\\_SB.GDCK",       /* X30, X31, X40 */
+          "\\_SB.PCI0.DOCK",   /* 600e/x,770e,770x,A2xm/p,T20-22,X20-21 */
+          "\\_SB.PCI0.PCI1.DOCK",      /* all others */
+          "\\_SB.PCI.ISA.SLCE",        /* 570 */
+     );                                /* A21e,G4x,R30,R31,R32,R40,R40e,R50e */
+ 
+ /* don't list other alternatives as we install a notify handler on the 570 */
+ TPACPI_HANDLE(pci, root, "\\_SB.PCI");        /* 570 */
+ 
+ static const struct acpi_device_id ibm_pci_device_ids[] = {
+       {PCI_ROOT_HID_STRING, 0},
+       {"", 0},
+ };
+ 
+ static struct tp_acpi_drv_struct ibm_dock_acpidriver[2] = {
+       {
+        .notify = dock_notify,
+        .handle = &dock_handle,
+        .type = ACPI_SYSTEM_NOTIFY,
+       },
+       {
+       /* THIS ONE MUST NEVER BE USED FOR DRIVER AUTOLOADING.
+        * We just use it to get notifications of dock hotplug
+        * in very old thinkpads */
+        .hid = ibm_pci_device_ids,
+        .notify = dock_notify,
+        .handle = &pci_handle,
+        .type = ACPI_SYSTEM_NOTIFY,
+       },
+ };
+ 
+ static struct ibm_struct dock_driver_data[2] = {
+       {
+        .name = "dock",
+        .read = dock_read,
+        .write = dock_write,
+        .acpi = &ibm_dock_acpidriver[0],
+       },
+       {
+        .name = "dock",
+        .acpi = &ibm_dock_acpidriver[1],
+       },
+ };
+ 
+ #define dock_docked() (_sta(dock_handle) & 1)
+ 
+ static int __init dock_init(struct ibm_init_struct *iibm)
+ {
+       vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(dock);
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "dock is %s\n",
+               str_supported(dock_handle != NULL));
+ 
+       return (dock_handle)? 0 : 1;
+ }
+ 
+ static int __init dock_init2(struct ibm_init_struct *iibm)
+ {
+       int dock2_needed;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver part 2\n");
+ 
+       if (dock_driver_data[0].flags.acpi_driver_registered &&
+           dock_driver_data[0].flags.acpi_notify_installed) {
+               TPACPI_ACPIHANDLE_INIT(pci);
+               dock2_needed = (pci_handle != NULL);
+               vdbg_printk(TPACPI_DBG_INIT,
+                           "dock PCI handler for the TP 570 is %s\n",
+                           str_supported(dock2_needed));
+       } else {
+               vdbg_printk(TPACPI_DBG_INIT,
+               "dock subdriver part 2 not required\n");
+               dock2_needed = 0;
+       }
+ 
+       return (dock2_needed)? 0 : 1;
+ }
+ 
+ static void dock_notify(struct ibm_struct *ibm, u32 event)
+ {
+       int docked = dock_docked();
+       int pci = ibm->acpi->hid && ibm->acpi->device &&
+               acpi_match_device_ids(ibm->acpi->device, ibm_pci_device_ids);
+       int data;
+ 
+       if (event == 1 && !pci) /* 570 */
+               data = 1;       /* button */
+       else if (event == 1 && pci)     /* 570 */
+               data = 3;       /* dock */
+       else if (event == 3 && docked)
+               data = 1;       /* button */
+       else if (event == 3 && !docked)
+               data = 2;       /* undock */
+       else if (event == 0 && docked)
+               data = 3;       /* dock */
+       else {
+               printk(TPACPI_ERR "unknown dock event %d, status %d\n",
+                      event, _sta(dock_handle));
+               data = 0;       /* unknown */
+       }
+       acpi_bus_generate_proc_event(ibm->acpi->device, event, data);
+       acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
+                                         dev_name(&ibm->acpi->device->dev),
+                                         event, data);
+ }
+ 
+ static int dock_read(char *p)
+ {
+       int len = 0;
+       int docked = dock_docked();
+ 
+       if (!dock_handle)
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+       else if (!docked)
+               len += sprintf(p + len, "status:\t\tundocked\n");
+       else {
+               len += sprintf(p + len, "status:\t\tdocked\n");
+               len += sprintf(p + len, "commands:\tdock, undock\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int dock_write(char *buf)
+ {
+       char *cmd;
+ 
+       if (!dock_docked())
+               return -ENODEV;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (strlencmp(cmd, "undock") == 0) {
+                       if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 0) ||
+                           !acpi_evalf(dock_handle, NULL, "_EJ0", "vd", 1))
+                               return -EIO;
+               } else if (strlencmp(cmd, "dock") == 0) {
+                       if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 1))
+                               return -EIO;
+               } else
+                       return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ #endif /* CONFIG_THINKPAD_ACPI_DOCK */
+ 
+ /*************************************************************************
+  * Bay subdriver
+  */
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_BAY
+ 
+ TPACPI_HANDLE(bay, root, "\\_SB.PCI.IDE.SECN.MAST",   /* 570 */
+          "\\_SB.PCI0.IDE0.IDES.IDSM", /* 600e/x, 770e, 770x */
+          "\\_SB.PCI0.SATA.SCND.MSTR", /* T60, X60, Z60 */
+          "\\_SB.PCI0.IDE0.SCND.MSTR", /* all others */
+          );                           /* A21e, R30, R31 */
+ TPACPI_HANDLE(bay_ej, bay, "_EJ3",    /* 600e/x, A2xm/p, A3x */
+          "_EJ0",              /* all others */
+          );                   /* 570,A21e,G4x,R30,R31,R32,R40e,R50e */
+ TPACPI_HANDLE(bay2, root, "\\_SB.PCI0.IDE0.PRIM.SLAV",        /* A3x, R32 */
+          "\\_SB.PCI0.IDE0.IDEP.IDPS", /* 600e/x, 770e, 770x */
+          );                           /* all others */
+ TPACPI_HANDLE(bay2_ej, bay2, "_EJ3",  /* 600e/x, 770e, A3x */
+          "_EJ0",                      /* 770x */
+          );                           /* all others */
+ 
+ static int __init bay_init(struct ibm_init_struct *iibm)
+ {
+       vdbg_printk(TPACPI_DBG_INIT, "initializing bay subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(bay);
+       if (bay_handle)
+               TPACPI_ACPIHANDLE_INIT(bay_ej);
+       TPACPI_ACPIHANDLE_INIT(bay2);
+       if (bay2_handle)
+               TPACPI_ACPIHANDLE_INIT(bay2_ej);
+ 
+       tp_features.bay_status = bay_handle &&
+               acpi_evalf(bay_handle, NULL, "_STA", "qv");
+       tp_features.bay_status2 = bay2_handle &&
+               acpi_evalf(bay2_handle, NULL, "_STA", "qv");
+ 
+       tp_features.bay_eject = bay_handle && bay_ej_handle &&
+               (strlencmp(bay_ej_path, "_EJ0") == 0 || experimental);
+       tp_features.bay_eject2 = bay2_handle && bay2_ej_handle &&
+               (strlencmp(bay2_ej_path, "_EJ0") == 0 || experimental);
+ 
+       vdbg_printk(TPACPI_DBG_INIT,
+               "bay 1: status %s, eject %s; bay 2: status %s, eject %s\n",
+               str_supported(tp_features.bay_status),
+               str_supported(tp_features.bay_eject),
+               str_supported(tp_features.bay_status2),
+               str_supported(tp_features.bay_eject2));
+ 
+       return (tp_features.bay_status || tp_features.bay_eject ||
+               tp_features.bay_status2 || tp_features.bay_eject2)? 0 : 1;
+ }
+ 
+ static void bay_notify(struct ibm_struct *ibm, u32 event)
+ {
+       acpi_bus_generate_proc_event(ibm->acpi->device, event, 0);
+       acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
+                                         dev_name(&ibm->acpi->device->dev),
+                                         event, 0);
+ }
+ 
+ #define bay_occupied(b) (_sta(b##_handle) & 1)
+ 
+ static int bay_read(char *p)
+ {
+       int len = 0;
+       int occupied = bay_occupied(bay);
+       int occupied2 = bay_occupied(bay2);
+       int eject, eject2;
+ 
+       len += sprintf(p + len, "status:\t\t%s\n",
+               tp_features.bay_status ?
+                       (occupied ? "occupied" : "unoccupied") :
+                               "not supported");
+       if (tp_features.bay_status2)
+               len += sprintf(p + len, "status2:\t%s\n", occupied2 ?
+                              "occupied" : "unoccupied");
+ 
+       eject = tp_features.bay_eject && occupied;
+       eject2 = tp_features.bay_eject2 && occupied2;
+ 
+       if (eject && eject2)
+               len += sprintf(p + len, "commands:\teject, eject2\n");
+       else if (eject)
+               len += sprintf(p + len, "commands:\teject\n");
+       else if (eject2)
+               len += sprintf(p + len, "commands:\teject2\n");
+ 
+       return len;
+ }
+ 
+ static int bay_write(char *buf)
+ {
+       char *cmd;
+ 
+       if (!tp_features.bay_eject && !tp_features.bay_eject2)
+               return -ENODEV;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (tp_features.bay_eject && strlencmp(cmd, "eject") == 0) {
+                       if (!acpi_evalf(bay_ej_handle, NULL, NULL, "vd", 1))
+                               return -EIO;
+               } else if (tp_features.bay_eject2 &&
+                          strlencmp(cmd, "eject2") == 0) {
+                       if (!acpi_evalf(bay2_ej_handle, NULL, NULL, "vd", 1))
+                               return -EIO;
+               } else
+                       return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct tp_acpi_drv_struct ibm_bay_acpidriver = {
+       .notify = bay_notify,
+       .handle = &bay_handle,
+       .type = ACPI_SYSTEM_NOTIFY,
+ };
+ 
+ static struct ibm_struct bay_driver_data = {
+       .name = "bay",
+       .read = bay_read,
+       .write = bay_write,
+       .acpi = &ibm_bay_acpidriver,
+ };
+ 
+ #endif /* CONFIG_THINKPAD_ACPI_BAY */
+ 
+ /*************************************************************************
+  * CMOS subdriver
+  */
+ 
+ /* sysfs cmos_command -------------------------------------------------- */
+ static ssize_t cmos_command_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+ {
+       unsigned long cmos_cmd;
+       int res;
+ 
+       if (parse_strtoul(buf, 21, &cmos_cmd))
+               return -EINVAL;
+ 
+       res = issue_thinkpad_cmos_command(cmos_cmd);
+       return (res)? res : count;
+ }
+ 
+ static struct device_attribute dev_attr_cmos_command =
+       __ATTR(cmos_command, S_IWUSR, NULL, cmos_command_store);
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static int __init cmos_init(struct ibm_init_struct *iibm)
+ {
+       int res;
+ 
+       vdbg_printk(TPACPI_DBG_INIT,
+               "initializing cmos commands subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(cmos);
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "cmos commands are %s\n",
+               str_supported(cmos_handle != NULL));
+ 
+       res = device_create_file(&tpacpi_pdev->dev, &dev_attr_cmos_command);
+       if (res)
+               return res;
+ 
+       return (cmos_handle)? 0 : 1;
+ }
+ 
+ static void cmos_exit(void)
+ {
+       device_remove_file(&tpacpi_pdev->dev, &dev_attr_cmos_command);
+ }
+ 
+ static int cmos_read(char *p)
+ {
+       int len = 0;
+ 
+       /* cmos not supported on 570, 600e/x, 770e, 770x, A21e, A2xm/p,
+          R30, R31, T20-22, X20-21 */
+       if (!cmos_handle)
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+       else {
+               len += sprintf(p + len, "status:\t\tsupported\n");
+               len += sprintf(p + len, "commands:\t<cmd> (<cmd> is 0-21)\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int cmos_write(char *buf)
+ {
+       char *cmd;
+       int cmos_cmd, res;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (sscanf(cmd, "%u", &cmos_cmd) == 1 &&
+                   cmos_cmd >= 0 && cmos_cmd <= 21) {
+                       /* cmos_cmd set */
+               } else
+                       return -EINVAL;
+ 
+               res = issue_thinkpad_cmos_command(cmos_cmd);
+               if (res)
+                       return res;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct cmos_driver_data = {
+       .name = "cmos",
+       .read = cmos_read,
+       .write = cmos_write,
+       .exit = cmos_exit,
+ };
+ 
+ /*************************************************************************
+  * LED subdriver
+  */
+ 
+ enum led_access_mode {
+       TPACPI_LED_NONE = 0,
+       TPACPI_LED_570, /* 570 */
+       TPACPI_LED_OLD, /* 600e/x, 770e, 770x, A21e, A2xm/p, T20-22, X20-21 */
+       TPACPI_LED_NEW, /* all others */
+ };
+ 
+ enum {        /* For TPACPI_LED_OLD */
+       TPACPI_LED_EC_HLCL = 0x0c,      /* EC reg to get led to power on */
+       TPACPI_LED_EC_HLBL = 0x0d,      /* EC reg to blink a lit led */
+       TPACPI_LED_EC_HLMS = 0x0e,      /* EC reg to select led to command */
+ };
+ 
+ enum led_status_t {
+       TPACPI_LED_OFF = 0,
+       TPACPI_LED_ON,
+       TPACPI_LED_BLINK,
+ };
+ 
+ static enum led_access_mode led_supported;
+ 
+ TPACPI_HANDLE(led, ec, "SLED",        /* 570 */
+          "SYSL",              /* 600e/x, 770e, 770x, A21e, A2xm/p, */
+                               /* T20-22, X20-21 */
+          "LED",               /* all others */
+          );                   /* R30, R31 */
+ 
+ #define TPACPI_LED_NUMLEDS 8
+ static struct tpacpi_led_classdev *tpacpi_leds;
+ static enum led_status_t tpacpi_led_state_cache[TPACPI_LED_NUMLEDS];
+ static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = {
+       /* there's a limit of 19 chars + NULL before 2.6.26 */
+       "tpacpi::power",
+       "tpacpi:orange:batt",
+       "tpacpi:green:batt",
+       "tpacpi::dock_active",
+       "tpacpi::bay_active",
+       "tpacpi::dock_batt",
+       "tpacpi::unknown_led",
+       "tpacpi::standby",
+ };
+ 
+ static int led_get_status(const unsigned int led)
+ {
+       int status;
+       enum led_status_t led_s;
+ 
+       switch (led_supported) {
+       case TPACPI_LED_570:
+               if (!acpi_evalf(ec_handle,
+                               &status, "GLED", "dd", 1 << led))
+                       return -EIO;
+               led_s = (status == 0)?
+                               TPACPI_LED_OFF :
+                               ((status == 1)?
+                                       TPACPI_LED_ON :
+                                       TPACPI_LED_BLINK);
+               tpacpi_led_state_cache[led] = led_s;
+               return led_s;
+       default:
+               return -ENXIO;
+       }
+ 
+       /* not reached */
+ }
+ 
+ static int led_set_status(const unsigned int led,
+                         const enum led_status_t ledstatus)
+ {
+       /* off, on, blink. Index is led_status_t */
+       static const unsigned int led_sled_arg1[] = { 0, 1, 3 };
+       static const unsigned int led_led_arg1[] = { 0, 0x80, 0xc0 };
+ 
+       int rc = 0;
+ 
+       switch (led_supported) {
+       case TPACPI_LED_570:
+               /* 570 */
+               if (led > 7)
+                       return -EINVAL;
+               if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
+                               (1 << led), led_sled_arg1[ledstatus]))
+                       rc = -EIO;
+               break;
+       case TPACPI_LED_OLD:
+               /* 600e/x, 770e, 770x, A21e, A2xm/p, T20-22, X20 */
+               if (led > 7)
+                       return -EINVAL;
+               rc = ec_write(TPACPI_LED_EC_HLMS, (1 << led));
+               if (rc >= 0)
+                       rc = ec_write(TPACPI_LED_EC_HLBL,
+                                     (ledstatus == TPACPI_LED_BLINK) << led);
+               if (rc >= 0)
+                       rc = ec_write(TPACPI_LED_EC_HLCL,
+                                     (ledstatus != TPACPI_LED_OFF) << led);
+               break;
+       case TPACPI_LED_NEW:
+               /* all others */
+               if (!acpi_evalf(led_handle, NULL, NULL, "vdd",
+                               led, led_led_arg1[ledstatus]))
+                       rc = -EIO;
+               break;
+       default:
+               rc = -ENXIO;
+       }
+ 
+       if (!rc)
+               tpacpi_led_state_cache[led] = ledstatus;
+ 
+       return rc;
+ }
+ 
+ static void led_sysfs_set_status(unsigned int led,
+                                enum led_brightness brightness)
+ {
+       led_set_status(led,
+                       (brightness == LED_OFF) ?
+                       TPACPI_LED_OFF :
+                       (tpacpi_led_state_cache[led] == TPACPI_LED_BLINK) ?
+                               TPACPI_LED_BLINK : TPACPI_LED_ON);
+ }
+ 
+ static void led_set_status_worker(struct work_struct *work)
+ {
+       struct tpacpi_led_classdev *data =
+               container_of(work, struct tpacpi_led_classdev, work);
+ 
+       if (likely(tpacpi_lifecycle == TPACPI_LIFE_RUNNING))
+               led_sysfs_set_status(data->led, data->new_brightness);
+ }
+ 
+ static void led_sysfs_set(struct led_classdev *led_cdev,
+                       enum led_brightness brightness)
+ {
+       struct tpacpi_led_classdev *data = container_of(led_cdev,
+                            struct tpacpi_led_classdev, led_classdev);
+ 
+       data->new_brightness = brightness;
+       queue_work(tpacpi_wq, &data->work);
+ }
+ 
+ static int led_sysfs_blink_set(struct led_classdev *led_cdev,
+                       unsigned long *delay_on, unsigned long *delay_off)
+ {
+       struct tpacpi_led_classdev *data = container_of(led_cdev,
+                            struct tpacpi_led_classdev, led_classdev);
+ 
+       /* Can we choose the flash rate? */
+       if (*delay_on == 0 && *delay_off == 0) {
+               /* yes. set them to the hardware blink rate (1 Hz) */
+               *delay_on = 500; /* ms */
+               *delay_off = 500; /* ms */
+       } else if ((*delay_on != 500) || (*delay_off != 500))
+               return -EINVAL;
+ 
+       data->new_brightness = TPACPI_LED_BLINK;
+       queue_work(tpacpi_wq, &data->work);
+ 
+       return 0;
+ }
+ 
+ static enum led_brightness led_sysfs_get(struct led_classdev *led_cdev)
+ {
+       int rc;
+ 
+       struct tpacpi_led_classdev *data = container_of(led_cdev,
+                            struct tpacpi_led_classdev, led_classdev);
+ 
+       rc = led_get_status(data->led);
+ 
+       if (rc == TPACPI_LED_OFF || rc < 0)
+               rc = LED_OFF;   /* no error handling in led class :( */
+       else
+               rc = LED_FULL;
+ 
+       return rc;
+ }
+ 
+ static void led_exit(void)
+ {
+       unsigned int i;
+ 
+       for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
+               if (tpacpi_leds[i].led_classdev.name)
+                       led_classdev_unregister(&tpacpi_leds[i].led_classdev);
+       }
+ 
+       kfree(tpacpi_leds);
+ }
+ 
+ static int __init led_init(struct ibm_init_struct *iibm)
+ {
+       unsigned int i;
+       int rc;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing LED subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(led);
+ 
+       if (!led_handle)
+               /* led not supported on R30, R31 */
+               led_supported = TPACPI_LED_NONE;
+       else if (strlencmp(led_path, "SLED") == 0)
+               /* 570 */
+               led_supported = TPACPI_LED_570;
+       else if (strlencmp(led_path, "SYSL") == 0)
+               /* 600e/x, 770e, 770x, A21e, A2xm/p, T20-22, X20-21 */
+               led_supported = TPACPI_LED_OLD;
+       else
+               /* all others */
+               led_supported = TPACPI_LED_NEW;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n",
+               str_supported(led_supported), led_supported);
+ 
+       tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS,
+                             GFP_KERNEL);
+       if (!tpacpi_leds) {
+               printk(TPACPI_ERR "Out of memory for LED data\n");
+               return -ENOMEM;
+       }
+ 
+       for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
+               tpacpi_leds[i].led = i;
+ 
+               tpacpi_leds[i].led_classdev.brightness_set = &led_sysfs_set;
+               tpacpi_leds[i].led_classdev.blink_set = &led_sysfs_blink_set;
+               if (led_supported == TPACPI_LED_570)
+                       tpacpi_leds[i].led_classdev.brightness_get =
+                                                       &led_sysfs_get;
+ 
+               tpacpi_leds[i].led_classdev.name = tpacpi_led_names[i];
+ 
+               INIT_WORK(&tpacpi_leds[i].work, led_set_status_worker);
+ 
+               rc = led_classdev_register(&tpacpi_pdev->dev,
+                                          &tpacpi_leds[i].led_classdev);
+               if (rc < 0) {
+                       tpacpi_leds[i].led_classdev.name = NULL;
+                       led_exit();
+                       return rc;
+               }
+       }
+ 
+       return (led_supported != TPACPI_LED_NONE)? 0 : 1;
+ }
+ 
+ #define str_led_status(s) \
+       ((s) == TPACPI_LED_OFF ? "off" : \
+               ((s) == TPACPI_LED_ON ? "on" : "blinking"))
+ 
+ static int led_read(char *p)
+ {
+       int len = 0;
+ 
+       if (!led_supported) {
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+               return len;
+       }
+       len += sprintf(p + len, "status:\t\tsupported\n");
+ 
+       if (led_supported == TPACPI_LED_570) {
+               /* 570 */
+               int i, status;
+               for (i = 0; i < 8; i++) {
+                       status = led_get_status(i);
+                       if (status < 0)
+                               return -EIO;
+                       len += sprintf(p + len, "%d:\t\t%s\n",
+                                      i, str_led_status(status));
+               }
+       }
+ 
+       len += sprintf(p + len, "commands:\t"
+                      "<led> on, <led> off, <led> blink (<led> is 0-7)\n");
+ 
+       return len;
+ }
+ 
+ static int led_write(char *buf)
+ {
+       char *cmd;
+       int led, rc;
+       enum led_status_t s;
+ 
+       if (!led_supported)
+               return -ENODEV;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (sscanf(cmd, "%d", &led) != 1 || led < 0 || led > 7)
+                       return -EINVAL;
+ 
+               if (strstr(cmd, "off")) {
+                       s = TPACPI_LED_OFF;
+               } else if (strstr(cmd, "on")) {
+                       s = TPACPI_LED_ON;
+               } else if (strstr(cmd, "blink")) {
+                       s = TPACPI_LED_BLINK;
+               } else {
+                       return -EINVAL;
+               }
+ 
+               rc = led_set_status(led, s);
+               if (rc < 0)
+                       return rc;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct led_driver_data = {
+       .name = "led",
+       .read = led_read,
+       .write = led_write,
+       .exit = led_exit,
+ };
+ 
+ /*************************************************************************
+  * Beep subdriver
+  */
+ 
+ TPACPI_HANDLE(beep, ec, "BEEP");      /* all except R30, R31 */
+ 
+ static int __init beep_init(struct ibm_init_struct *iibm)
+ {
+       vdbg_printk(TPACPI_DBG_INIT, "initializing beep subdriver\n");
+ 
+       TPACPI_ACPIHANDLE_INIT(beep);
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "beep is %s\n",
+               str_supported(beep_handle != NULL));
+ 
+       return (beep_handle)? 0 : 1;
+ }
+ 
+ static int beep_read(char *p)
+ {
+       int len = 0;
+ 
+       if (!beep_handle)
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+       else {
+               len += sprintf(p + len, "status:\t\tsupported\n");
+               len += sprintf(p + len, "commands:\t<cmd> (<cmd> is 0-17)\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int beep_write(char *buf)
+ {
+       char *cmd;
+       int beep_cmd;
+ 
+       if (!beep_handle)
+               return -ENODEV;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (sscanf(cmd, "%u", &beep_cmd) == 1 &&
+                   beep_cmd >= 0 && beep_cmd <= 17) {
+                       /* beep_cmd set */
+               } else
+                       return -EINVAL;
+               if (!acpi_evalf(beep_handle, NULL, NULL, "vdd", beep_cmd, 0))
+                       return -EIO;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct beep_driver_data = {
+       .name = "beep",
+       .read = beep_read,
+       .write = beep_write,
+ };
+ 
+ /*************************************************************************
+  * Thermal subdriver
+  */
+ 
+ enum thermal_access_mode {
+       TPACPI_THERMAL_NONE = 0,        /* No thermal support */
+       TPACPI_THERMAL_ACPI_TMP07,      /* Use ACPI TMP0-7 */
+       TPACPI_THERMAL_ACPI_UPDT,       /* Use ACPI TMP0-7 with UPDT */
+       TPACPI_THERMAL_TPEC_8,          /* Use ACPI EC regs, 8 sensors */
+       TPACPI_THERMAL_TPEC_16,         /* Use ACPI EC regs, 16 sensors */
+ };
+ 
+ enum { /* TPACPI_THERMAL_TPEC_* */
+       TP_EC_THERMAL_TMP0 = 0x78,      /* ACPI EC regs TMP 0..7 */
+       TP_EC_THERMAL_TMP8 = 0xC0,      /* ACPI EC regs TMP 8..15 */
+       TP_EC_THERMAL_TMP_NA = -128,    /* ACPI EC sensor not available */
+ };
+ 
+ #define TPACPI_MAX_THERMAL_SENSORS 16 /* Max thermal sensors supported */
+ struct ibm_thermal_sensors_struct {
+       s32 temp[TPACPI_MAX_THERMAL_SENSORS];
+ };
+ 
+ static enum thermal_access_mode thermal_read_mode;
+ 
+ /* idx is zero-based */
+ static int thermal_get_sensor(int idx, s32 *value)
+ {
+       int t;
+       s8 tmp;
+       char tmpi[5];
+ 
+       t = TP_EC_THERMAL_TMP0;
+ 
+       switch (thermal_read_mode) {
+ #if TPACPI_MAX_THERMAL_SENSORS >= 16
+       case TPACPI_THERMAL_TPEC_16:
+               if (idx >= 8 && idx <= 15) {
+                       t = TP_EC_THERMAL_TMP8;
+                       idx -= 8;
+               }
+               /* fallthrough */
+ #endif
+       case TPACPI_THERMAL_TPEC_8:
+               if (idx <= 7) {
+                       if (!acpi_ec_read(t + idx, &tmp))
+                               return -EIO;
+                       *value = tmp * 1000;
+                       return 0;
+               }
+               break;
+ 
+       case TPACPI_THERMAL_ACPI_UPDT:
+               if (idx <= 7) {
+                       snprintf(tmpi, sizeof(tmpi), "TMP%c", '0' + idx);
+                       if (!acpi_evalf(ec_handle, NULL, "UPDT", "v"))
+                               return -EIO;
+                       if (!acpi_evalf(ec_handle, &t, tmpi, "d"))
+                               return -EIO;
+                       *value = (t - 2732) * 100;
+                       return 0;
+               }
+               break;
+ 
+       case TPACPI_THERMAL_ACPI_TMP07:
+               if (idx <= 7) {
+                       snprintf(tmpi, sizeof(tmpi), "TMP%c", '0' + idx);
+                       if (!acpi_evalf(ec_handle, &t, tmpi, "d"))
+                               return -EIO;
+                       if (t > 127 || t < -127)
+                               t = TP_EC_THERMAL_TMP_NA;
+                       *value = t * 1000;
+                       return 0;
+               }
+               break;
+ 
+       case TPACPI_THERMAL_NONE:
+       default:
+               return -ENOSYS;
+       }
+ 
+       return -EINVAL;
+ }
+ 
+ static int thermal_get_sensors(struct ibm_thermal_sensors_struct *s)
+ {
+       int res, i;
+       int n;
+ 
+       n = 8;
+       i = 0;
+ 
+       if (!s)
+               return -EINVAL;
+ 
+       if (thermal_read_mode == TPACPI_THERMAL_TPEC_16)
+               n = 16;
+ 
+       for (i = 0 ; i < n; i++) {
+               res = thermal_get_sensor(i, &s->temp[i]);
+               if (res)
+                       return res;
+       }
+ 
+       return n;
+ }
+ 
+ /* sysfs temp##_input -------------------------------------------------- */
+ 
+ static ssize_t thermal_temp_input_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       struct sensor_device_attribute *sensor_attr =
+                                       to_sensor_dev_attr(attr);
+       int idx = sensor_attr->index;
+       s32 value;
+       int res;
+ 
+       res = thermal_get_sensor(idx, &value);
+       if (res)
+               return res;
+       if (value == TP_EC_THERMAL_TMP_NA * 1000)
+               return -ENXIO;
+ 
+       return snprintf(buf, PAGE_SIZE, "%d\n", value);
+ }
+ 
+ #define THERMAL_SENSOR_ATTR_TEMP(_idxA, _idxB) \
+        SENSOR_ATTR(temp##_idxA##_input, S_IRUGO, \
+                    thermal_temp_input_show, NULL, _idxB)
+ 
+ static struct sensor_device_attribute sensor_dev_attr_thermal_temp_input[] = {
+       THERMAL_SENSOR_ATTR_TEMP(1, 0),
+       THERMAL_SENSOR_ATTR_TEMP(2, 1),
+       THERMAL_SENSOR_ATTR_TEMP(3, 2),
+       THERMAL_SENSOR_ATTR_TEMP(4, 3),
+       THERMAL_SENSOR_ATTR_TEMP(5, 4),
+       THERMAL_SENSOR_ATTR_TEMP(6, 5),
+       THERMAL_SENSOR_ATTR_TEMP(7, 6),
+       THERMAL_SENSOR_ATTR_TEMP(8, 7),
+       THERMAL_SENSOR_ATTR_TEMP(9, 8),
+       THERMAL_SENSOR_ATTR_TEMP(10, 9),
+       THERMAL_SENSOR_ATTR_TEMP(11, 10),
+       THERMAL_SENSOR_ATTR_TEMP(12, 11),
+       THERMAL_SENSOR_ATTR_TEMP(13, 12),
+       THERMAL_SENSOR_ATTR_TEMP(14, 13),
+       THERMAL_SENSOR_ATTR_TEMP(15, 14),
+       THERMAL_SENSOR_ATTR_TEMP(16, 15),
+ };
+ 
+ #define THERMAL_ATTRS(X) \
+       &sensor_dev_attr_thermal_temp_input[X].dev_attr.attr
+ 
+ static struct attribute *thermal_temp_input_attr[] = {
+       THERMAL_ATTRS(8),
+       THERMAL_ATTRS(9),
+       THERMAL_ATTRS(10),
+       THERMAL_ATTRS(11),
+       THERMAL_ATTRS(12),
+       THERMAL_ATTRS(13),
+       THERMAL_ATTRS(14),
+       THERMAL_ATTRS(15),
+       THERMAL_ATTRS(0),
+       THERMAL_ATTRS(1),
+       THERMAL_ATTRS(2),
+       THERMAL_ATTRS(3),
+       THERMAL_ATTRS(4),
+       THERMAL_ATTRS(5),
+       THERMAL_ATTRS(6),
+       THERMAL_ATTRS(7),
+       NULL
+ };
+ 
+ static const struct attribute_group thermal_temp_input16_group = {
+       .attrs = thermal_temp_input_attr
+ };
+ 
+ static const struct attribute_group thermal_temp_input8_group = {
+       .attrs = &thermal_temp_input_attr[8]
+ };
+ 
+ #undef THERMAL_SENSOR_ATTR_TEMP
+ #undef THERMAL_ATTRS
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static int __init thermal_init(struct ibm_init_struct *iibm)
+ {
+       u8 t, ta1, ta2;
+       int i;
+       int acpi_tmp7;
+       int res;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing thermal subdriver\n");
+ 
+       acpi_tmp7 = acpi_evalf(ec_handle, NULL, "TMP7", "qv");
+ 
+       if (thinkpad_id.ec_model) {
+               /*
+                * Direct EC access mode: sensors at registers
+                * 0x78-0x7F, 0xC0-0xC7.  Registers return 0x00 for
+                * non-implemented, thermal sensors return 0x80 when
+                * not available
+                */
+ 
+               ta1 = ta2 = 0;
+               for (i = 0; i < 8; i++) {
+                       if (acpi_ec_read(TP_EC_THERMAL_TMP0 + i, &t)) {
+                               ta1 |= t;
+                       } else {
+                               ta1 = 0;
+                               break;
+                       }
+                       if (acpi_ec_read(TP_EC_THERMAL_TMP8 + i, &t)) {
+                               ta2 |= t;
+                       } else {
+                               ta1 = 0;
+                               break;
+                       }
+               }
+               if (ta1 == 0) {
+                       /* This is sheer paranoia, but we handle it anyway */
+                       if (acpi_tmp7) {
+                               printk(TPACPI_ERR
+                                      "ThinkPad ACPI EC access misbehaving, "
+                                      "falling back to ACPI TMPx access "
+                                      "mode\n");
+                               thermal_read_mode = TPACPI_THERMAL_ACPI_TMP07;
+                       } else {
+                               printk(TPACPI_ERR
+                                      "ThinkPad ACPI EC access misbehaving, "
+                                      "disabling thermal sensors access\n");
+                               thermal_read_mode = TPACPI_THERMAL_NONE;
+                       }
+               } else {
+                       thermal_read_mode =
+                           (ta2 != 0) ?
+                           TPACPI_THERMAL_TPEC_16 : TPACPI_THERMAL_TPEC_8;
+               }
+       } else if (acpi_tmp7) {
+               if (acpi_evalf(ec_handle, NULL, "UPDT", "qv")) {
+                       /* 600e/x, 770e, 770x */
+                       thermal_read_mode = TPACPI_THERMAL_ACPI_UPDT;
+               } else {
+                       /* Standard ACPI TMPx access, max 8 sensors */
+                       thermal_read_mode = TPACPI_THERMAL_ACPI_TMP07;
+               }
+       } else {
+               /* temperatures not supported on 570, G4x, R30, R31, R32 */
+               thermal_read_mode = TPACPI_THERMAL_NONE;
+       }
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "thermal is %s, mode %d\n",
+               str_supported(thermal_read_mode != TPACPI_THERMAL_NONE),
+               thermal_read_mode);
+ 
+       switch (thermal_read_mode) {
+       case TPACPI_THERMAL_TPEC_16:
+               res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
+                               &thermal_temp_input16_group);
+               if (res)
+                       return res;
+               break;
+       case TPACPI_THERMAL_TPEC_8:
+       case TPACPI_THERMAL_ACPI_TMP07:
+       case TPACPI_THERMAL_ACPI_UPDT:
+               res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
+                               &thermal_temp_input8_group);
+               if (res)
+                       return res;
+               break;
+       case TPACPI_THERMAL_NONE:
+       default:
+               return 1;
+       }
+ 
+       return 0;
+ }
+ 
+ static void thermal_exit(void)
+ {
+       switch (thermal_read_mode) {
+       case TPACPI_THERMAL_TPEC_16:
+               sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
+                                  &thermal_temp_input16_group);
+               break;
+       case TPACPI_THERMAL_TPEC_8:
+       case TPACPI_THERMAL_ACPI_TMP07:
+       case TPACPI_THERMAL_ACPI_UPDT:
+               sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
+                                  &thermal_temp_input16_group);
+               break;
+       case TPACPI_THERMAL_NONE:
+       default:
+               break;
+       }
+ }
+ 
+ static int thermal_read(char *p)
+ {
+       int len = 0;
+       int n, i;
+       struct ibm_thermal_sensors_struct t;
+ 
+       n = thermal_get_sensors(&t);
+       if (unlikely(n < 0))
+               return n;
+ 
+       len += sprintf(p + len, "temperatures:\t");
+ 
+       if (n > 0) {
+               for (i = 0; i < (n - 1); i++)
+                       len += sprintf(p + len, "%d ", t.temp[i] / 1000);
+               len += sprintf(p + len, "%d\n", t.temp[i] / 1000);
+       } else
+               len += sprintf(p + len, "not supported\n");
+ 
+       return len;
+ }
+ 
+ static struct ibm_struct thermal_driver_data = {
+       .name = "thermal",
+       .read = thermal_read,
+       .exit = thermal_exit,
+ };
+ 
+ /*************************************************************************
+  * EC Dump subdriver
+  */
+ 
+ static u8 ecdump_regs[256];
+ 
+ static int ecdump_read(char *p)
+ {
+       int len = 0;
+       int i, j;
+       u8 v;
+ 
+       len += sprintf(p + len, "EC      "
+                      " +00 +01 +02 +03 +04 +05 +06 +07"
+                      " +08 +09 +0a +0b +0c +0d +0e +0f\n");
+       for (i = 0; i < 256; i += 16) {
+               len += sprintf(p + len, "EC 0x%02x:", i);
+               for (j = 0; j < 16; j++) {
+                       if (!acpi_ec_read(i + j, &v))
+                               break;
+                       if (v != ecdump_regs[i + j])
+                               len += sprintf(p + len, " *%02x", v);
+                       else
+                               len += sprintf(p + len, "  %02x", v);
+                       ecdump_regs[i + j] = v;
+               }
+               len += sprintf(p + len, "\n");
+               if (j != 16)
+                       break;
+       }
+ 
+       /* These are way too dangerous to advertise openly... */
+ #if 0
+       len += sprintf(p + len, "commands:\t0x<offset> 0x<value>"
+                      " (<offset> is 00-ff, <value> is 00-ff)\n");
+       len += sprintf(p + len, "commands:\t0x<offset> <value>  "
+                      " (<offset> is 00-ff, <value> is 0-255)\n");
+ #endif
+       return len;
+ }
+ 
+ static int ecdump_write(char *buf)
+ {
+       char *cmd;
+       int i, v;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (sscanf(cmd, "0x%x 0x%x", &i, &v) == 2) {
+                       /* i and v set */
+               } else if (sscanf(cmd, "0x%x %u", &i, &v) == 2) {
+                       /* i and v set */
+               } else
+                       return -EINVAL;
+               if (i >= 0 && i < 256 && v >= 0 && v < 256) {
+                       if (!acpi_ec_write(i, v))
+                               return -EIO;
+               } else
+                       return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct ecdump_driver_data = {
+       .name = "ecdump",
+       .read = ecdump_read,
+       .write = ecdump_write,
+       .flags.experimental = 1,
+ };
+ 
+ /*************************************************************************
+  * Backlight/brightness subdriver
+  */
+ 
+ #define TPACPI_BACKLIGHT_DEV_NAME "thinkpad_screen"
+ 
+ enum {
+       TP_EC_BACKLIGHT = 0x31,
+ 
+       /* TP_EC_BACKLIGHT bitmasks */
+       TP_EC_BACKLIGHT_LVLMSK = 0x1F,
+       TP_EC_BACKLIGHT_CMDMSK = 0xE0,
+       TP_EC_BACKLIGHT_MAPSW = 0x20,
+ };
+ 
- -static struct backlight_device *ibm_backlight_device;
+ static int brightness_mode;
+ static unsigned int brightness_enable = 2; /* 2 = auto, 0 = no, 1 = yes */
+ 
+ static struct mutex brightness_mutex;
+ 
+ /*
+  * ThinkPads can read brightness from two places: EC 0x31, or
+  * CMOS NVRAM byte 0x5E, bits 0-3.
+  *
+  * EC 0x31 has the following layout
+  *   Bit 7: unknown function
+  *   Bit 6: unknown function
+  *   Bit 5: Z: honour scale changes, NZ: ignore scale changes
+  *   Bit 4: must be set to zero to avoid problems
+  *   Bit 3-0: backlight brightness level
+  *
+  * brightness_get_raw returns status data in the EC 0x31 layout
+  */
+ static int brightness_get_raw(int *status)
+ {
+       u8 lec = 0, lcmos = 0, level = 0;
+ 
+       if (brightness_mode & 1) {
+               if (!acpi_ec_read(TP_EC_BACKLIGHT, &lec))
+                       return -EIO;
+               level = lec & TP_EC_BACKLIGHT_LVLMSK;
+       };
+       if (brightness_mode & 2) {
+               lcmos = (nvram_read_byte(TP_NVRAM_ADDR_BRIGHTNESS)
+                        & TP_NVRAM_MASK_LEVEL_BRIGHTNESS)
+                       >> TP_NVRAM_POS_LEVEL_BRIGHTNESS;
+               lcmos &= (tp_features.bright_16levels)? 0x0f : 0x07;
+               level = lcmos;
+       }
+ 
+       if (brightness_mode == 3) {
+               *status = lec;  /* Prefer EC, CMOS is just a backing store */
+               lec &= TP_EC_BACKLIGHT_LVLMSK;
+               if (lec == lcmos)
+                       tp_warned.bright_cmos_ec_unsync = 0;
+               else {
+                       if (!tp_warned.bright_cmos_ec_unsync) {
+                               printk(TPACPI_ERR
+                                       "CMOS NVRAM (%u) and EC (%u) do not "
+                                       "agree on display brightness level\n",
+                                       (unsigned int) lcmos,
+                                       (unsigned int) lec);
+                               tp_warned.bright_cmos_ec_unsync = 1;
+                       }
+                       return -EIO;
+               }
+       } else {
+               *status = level;
+       }
+ 
+       return 0;
+ }
+ 
+ /* May return EINTR which can always be mapped to ERESTARTSYS */
+ static int brightness_set(int value)
+ {
+       int cmos_cmd, inc, i, res;
+       int current_value;
+       int command_bits;
+ 
+       if (value > ((tp_features.bright_16levels)? 15 : 7) ||
+           value < 0)
+               return -EINVAL;
+ 
+       res = mutex_lock_killable(&brightness_mutex);
+       if (res < 0)
+               return res;
+ 
+       res = brightness_get_raw(&current_value);
+       if (res < 0)
+               goto errout;
+ 
+       command_bits = current_value & TP_EC_BACKLIGHT_CMDMSK;
+       current_value &= TP_EC_BACKLIGHT_LVLMSK;
+ 
+       cmos_cmd = value > current_value ?
+                       TP_CMOS_BRIGHTNESS_UP :
+                       TP_CMOS_BRIGHTNESS_DOWN;
+       inc = (value > current_value)? 1 : -1;
+ 
+       res = 0;
+       for (i = current_value; i != value; i += inc) {
+               if ((brightness_mode & 2) &&
+                   issue_thinkpad_cmos_command(cmos_cmd)) {
+                       res = -EIO;
+                       goto errout;
+               }
+               if ((brightness_mode & 1) &&
+                   !acpi_ec_write(TP_EC_BACKLIGHT,
+                                  (i + inc) | command_bits)) {
+                       res = -EIO;
+                       goto errout;;
+               }
+       }
+ 
+ errout:
+       mutex_unlock(&brightness_mutex);
+       return res;
+ }
+ 
+ /* sysfs backlight class ----------------------------------------------- */
+ 
+ static int brightness_update_status(struct backlight_device *bd)
+ {
+       /* it is the backlight class's job (caller) to handle
+        * EINTR and other errors properly */
+       return brightness_set(
+               (bd->props.fb_blank == FB_BLANK_UNBLANK &&
+                bd->props.power == FB_BLANK_UNBLANK) ?
+                               bd->props.brightness : 0);
+ }
+ 
+ static int brightness_get(struct backlight_device *bd)
+ {
+       int status, res;
+ 
+       res = brightness_get_raw(&status);
+       if (res < 0)
+               return 0; /* FIXME: teach backlight about error handling */
+ 
+       return status & TP_EC_BACKLIGHT_LVLMSK;
+ }
+ 
+ static struct backlight_ops ibm_backlight_data = {
+       .get_brightness = brightness_get,
+       .update_status  = brightness_update_status,
+ };
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ static int __init brightness_init(struct ibm_init_struct *iibm)
+ {
+       int b;
- -
++      long acpi_video_support;
+       vdbg_printk(TPACPI_DBG_INIT, "initializing brightness subdriver\n");
+ 
+       mutex_init(&brightness_mutex);
+ 
+       /*
+        * We always attempt to detect acpi support, so as to switch
+        * Lenovo Vista BIOS to ACPI brightness mode even if we are not
+        * going to publish a backlight interface
+        */
+       b = tpacpi_check_std_acpi_brightness_support();
+       if (b > 0) {
- -
- -              if (acpi_video_backlight_support()) {
++              acpi_video_support = acpi_video_backlight_support();
++              if (acpi_video_support &&
++                  !(acpi_video_support & ACPI_VIDEO_IGD)) {
+                       if (brightness_enable > 1) {
+                               printk(TPACPI_NOTICE
+                                      "Standard ACPI backlight interface "
+                                      "available, not loading native one.\n");
+                               return 1;
+                       } else if (brightness_enable == 1) {
+                               printk(TPACPI_NOTICE
+                                      "Backlight control force enabled, even if standard "
+                                      "ACPI backlight interface is available\n");
+                       }
+               } else {
+                       if (brightness_enable > 1) {
+                               printk(TPACPI_NOTICE
+                                      "Standard ACPI backlight interface not "
+                                      "available, thinkpad_acpi native "
+                                      "brightness control enabled\n");
+                       }
++                      if (acpi_video_support & ACPI_VIDEO_IGD) {
++                              printk(TPACPI_NOTICE, "IGD device"
++                                     " detected - take over backlight"
++                                     " switching\n");
++                              tp_features.bright_igdmode = 1;
++                      }
+               }
+       }
+ 
+       if (!brightness_enable) {
+               dbg_printk(TPACPI_DBG_INIT,
+                          "brightness support disabled by "
+                          "module parameter\n");
+               return 1;
+       }
+ 
+       if (b > 16) {
+               printk(TPACPI_ERR
+                      "Unsupported brightness interface, "
+                      "please contact %s\n", TPACPI_MAIL);
+               return 1;
+       }
+       if (b == 16)
+               tp_features.bright_16levels = 1;
+ 
+       if (!brightness_mode) {
+               if (thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO)
+                       brightness_mode = 2;
+               else
+                       brightness_mode = 3;
+ 
+               dbg_printk(TPACPI_DBG_INIT, "selected brightness_mode=%d\n",
+                       brightness_mode);
+       }
+ 
+       if (brightness_mode > 3)
+               return -EINVAL;
+ 
+       if (brightness_get_raw(&b) < 0)
+               return 1;
+ 
+       if (tp_features.bright_16levels)
+               printk(TPACPI_INFO
+                      "detected a 16-level brightness capable ThinkPad\n");
+ 
+       ibm_backlight_device = backlight_device_register(
+                                       TPACPI_BACKLIGHT_DEV_NAME, NULL, NULL,
+                                       &ibm_backlight_data);
+       if (IS_ERR(ibm_backlight_device)) {
+               printk(TPACPI_ERR "Could not register backlight device\n");
+               return PTR_ERR(ibm_backlight_device);
+       }
+       vdbg_printk(TPACPI_DBG_INIT, "brightness is supported\n");
+ 
+       ibm_backlight_device->props.max_brightness =
+                               (tp_features.bright_16levels)? 15 : 7;
+       ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK;
+       backlight_update_status(ibm_backlight_device);
+ 
+       return 0;
+ }
+ 
+ static void brightness_exit(void)
+ {
+       if (ibm_backlight_device) {
+               vdbg_printk(TPACPI_DBG_EXIT,
+                           "calling backlight_device_unregister()\n");
+               backlight_device_unregister(ibm_backlight_device);
+       }
+ }
+ 
+ static int brightness_read(char *p)
+ {
+       int len = 0;
+       int level;
+ 
+       level = brightness_get(NULL);
+       if (level < 0) {
+               len += sprintf(p + len, "level:\t\tunreadable\n");
+       } else {
+               len += sprintf(p + len, "level:\t\t%d\n", level);
+               len += sprintf(p + len, "commands:\tup, down\n");
+               len += sprintf(p + len, "commands:\tlevel <level>"
+                              " (<level> is 0-%d)\n",
+                              (tp_features.bright_16levels) ? 15 : 7);
+       }
+ 
+       return len;
+ }
+ 
+ static int brightness_write(char *buf)
+ {
+       int level;
+       int rc;
+       char *cmd;
+       int max_level = (tp_features.bright_16levels) ? 15 : 7;
+ 
+       level = brightness_get(NULL);
+       if (level < 0)
+               return level;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (strlencmp(cmd, "up") == 0) {
+                       if (level < max_level)
+                               level++;
+               } else if (strlencmp(cmd, "down") == 0) {
+                       if (level > 0)
+                               level--;
+               } else if (sscanf(cmd, "level %d", &level) == 1 &&
+                          level >= 0 && level <= max_level) {
+                       /* new level set */
+               } else
+                       return -EINVAL;
+       }
+ 
+       /*
+        * Now we know what the final level should be, so we try to set it.
+        * Doing it this way makes the syscall restartable in case of EINTR
+        */
+       rc = brightness_set(level);
+       return (rc == -EINTR)? ERESTARTSYS : rc;
+ }
+ 
+ static struct ibm_struct brightness_driver_data = {
+       .name = "brightness",
+       .read = brightness_read,
+       .write = brightness_write,
+       .exit = brightness_exit,
+ };
+ 
+ /*************************************************************************
+  * Volume subdriver
+  */
+ 
+ static int volume_offset = 0x30;
+ 
+ static int volume_read(char *p)
+ {
+       int len = 0;
+       u8 level;
+ 
+       if (!acpi_ec_read(volume_offset, &level)) {
+               len += sprintf(p + len, "level:\t\tunreadable\n");
+       } else {
+               len += sprintf(p + len, "level:\t\t%d\n", level & 0xf);
+               len += sprintf(p + len, "mute:\t\t%s\n", onoff(level, 6));
+               len += sprintf(p + len, "commands:\tup, down, mute\n");
+               len += sprintf(p + len, "commands:\tlevel <level>"
+                              " (<level> is 0-15)\n");
+       }
+ 
+       return len;
+ }
+ 
+ static int volume_write(char *buf)
+ {
+       int cmos_cmd, inc, i;
+       u8 level, mute;
+       int new_level, new_mute;
+       char *cmd;
+ 
+       while ((cmd = next_cmd(&buf))) {
+               if (!acpi_ec_read(volume_offset, &level))
+                       return -EIO;
+               new_mute = mute = level & 0x40;
+               new_level = level = level & 0xf;
+ 
+               if (strlencmp(cmd, "up") == 0) {
+                       if (mute)
+                               new_mute = 0;
+                       else
+                               new_level = level == 15 ? 15 : level + 1;
+               } else if (strlencmp(cmd, "down") == 0) {
+                       if (mute)
+                               new_mute = 0;
+                       else
+                               new_level = level == 0 ? 0 : level - 1;
+               } else if (sscanf(cmd, "level %d", &new_level) == 1 &&
+                          new_level >= 0 && new_level <= 15) {
+                       /* new_level set */
+               } else if (strlencmp(cmd, "mute") == 0) {
+                       new_mute = 0x40;
+               } else
+                       return -EINVAL;
+ 
+               if (new_level != level) {
+                       /* mute doesn't change */
+ 
+                       cmos_cmd = (new_level > level) ?
+                                       TP_CMOS_VOLUME_UP : TP_CMOS_VOLUME_DOWN;
+                       inc = new_level > level ? 1 : -1;
+ 
+                       if (mute && (issue_thinkpad_cmos_command(cmos_cmd) ||
+                                    !acpi_ec_write(volume_offset, level)))
+                               return -EIO;
+ 
+                       for (i = level; i != new_level; i += inc)
+                               if (issue_thinkpad_cmos_command(cmos_cmd) ||
+                                   !acpi_ec_write(volume_offset, i + inc))
+                                       return -EIO;
+ 
+                       if (mute &&
+                           (issue_thinkpad_cmos_command(TP_CMOS_VOLUME_MUTE) ||
+                            !acpi_ec_write(volume_offset, new_level + mute))) {
+                               return -EIO;
+                       }
+               }
+ 
+               if (new_mute != mute) {
+                       /* level doesn't change */
+ 
+                       cmos_cmd = (new_mute) ?
+                                  TP_CMOS_VOLUME_MUTE : TP_CMOS_VOLUME_UP;
+ 
+                       if (issue_thinkpad_cmos_command(cmos_cmd) ||
+                           !acpi_ec_write(volume_offset, level + new_mute))
+                               return -EIO;
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ static struct ibm_struct volume_driver_data = {
+       .name = "volume",
+       .read = volume_read,
+       .write = volume_write,
+ };
+ 
+ /*************************************************************************
+  * Fan subdriver
+  */
+ 
+ /*
+  * FAN ACCESS MODES
+  *
+  * TPACPI_FAN_RD_ACPI_GFAN:
+  *    ACPI GFAN method: returns fan level
+  *
+  *    see TPACPI_FAN_WR_ACPI_SFAN
+  *    EC 0x2f (HFSP) not available if GFAN exists
+  *
+  * TPACPI_FAN_WR_ACPI_SFAN:
+  *    ACPI SFAN method: sets fan level, 0 (stop) to 7 (max)
+  *
+  *    EC 0x2f (HFSP) might be available *for reading*, but do not use
+  *    it for writing.
+  *
+  * TPACPI_FAN_WR_TPEC:
+  *    ThinkPad EC register 0x2f (HFSP): fan control loop mode
+  *    Supported on almost all ThinkPads
+  *
+  *    Fan speed changes of any sort (including those caused by the
+  *    disengaged mode) are usually done slowly by the firmware as the
+  *    maximum ammount of fan duty cycle change per second seems to be
+  *    limited.
+  *
+  *    Reading is not available if GFAN exists.
+  *    Writing is not available if SFAN exists.
+  *
+  *    Bits
+  *     7      automatic mode engaged;
+  *            (default operation mode of the ThinkPad)
+  *            fan level is ignored in this mode.
+  *     6      full speed mode (takes precedence over bit 7);
+  *            not available on all thinkpads.  May disable
+  *            the tachometer while the fan controller ramps up
+  *            the speed (which can take up to a few *minutes*).
+  *            Speeds up fan to 100% duty-cycle, which is far above
+  *            the standard RPM levels.  It is not impossible that
+  *            it could cause hardware damage.
+  *    5-3     unused in some models.  Extra bits for fan level
+  *            in others, but still useless as all values above
+  *            7 map to the same speed as level 7 in these models.
+  *    2-0     fan level (0..7 usually)
+  *                    0x00 = stop
+  *                    0x07 = max (set when temperatures critical)
+  *            Some ThinkPads may have other levels, see
+  *            TPACPI_FAN_WR_ACPI_FANS (X31/X40/X41)
+  *
+  *    FIRMWARE BUG: on some models, EC 0x2f might not be initialized at
+  *    boot. Apparently the EC does not intialize it, so unless ACPI DSDT
+  *    does so, its initial value is meaningless (0x07).
+  *
+  *    For firmware bugs, refer to:
+  *    http://thinkwiki.org/wiki/Embedded_Controller_Firmware#Firmware_Issues
+  *
+  *    ----
+  *
+  *    ThinkPad EC register 0x84 (LSB), 0x85 (MSB):
+  *    Main fan tachometer reading (in RPM)
+  *
+  *    This register is present on all ThinkPads with a new-style EC, and
+  *    it is known not to be present on the A21m/e, and T22, as there is
+  *    something else in offset 0x84 according to the ACPI DSDT.  Other
+  *    ThinkPads from this same time period (and earlier) probably lack the
+  *    tachometer as well.
+  *
+  *    Unfortunately a lot of ThinkPads with new-style ECs but whose firwmare
+  *    was never fixed by IBM to report the EC firmware version string
+  *    probably support the tachometer (like the early X models), so
+  *    detecting it is quite hard.  We need more data to know for sure.
+  *
+  *    FIRMWARE BUG: always read 0x84 first, otherwise incorrect readings
+  *    might result.
+  *
+  *    FIRMWARE BUG: may go stale while the EC is switching to full speed
+  *    mode.
+  *
+  *    For firmware bugs, refer to:
+  *    http://thinkwiki.org/wiki/Embedded_Controller_Firmware#Firmware_Issues
+  *
+  * TPACPI_FAN_WR_ACPI_FANS:
+  *    ThinkPad X31, X40, X41.  Not available in the X60.
+  *
+  *    FANS ACPI handle: takes three arguments: low speed, medium speed,
+  *    high speed.  ACPI DSDT seems to map these three speeds to levels
+  *    as follows: STOP LOW LOW MED MED HIGH HIGH HIGH HIGH
+  *    (this map is stored on FAN0..FAN8 as "0,1,1,2,2,3,3,3,3")
+  *
+  *    The speeds are stored on handles
+  *    (FANA:FAN9), (FANC:FANB), (FANE:FAND).
+  *
+  *    There are three default speed sets, acessible as handles:
+  *    FS1L,FS1M,FS1H; FS2L,FS2M,FS2H; FS3L,FS3M,FS3H
+  *
+  *    ACPI DSDT switches which set is in use depending on various
+  *    factors.
+  *
+  *    TPACPI_FAN_WR_TPEC is also available and should be used to
+  *    command the fan.  The X31/X40/X41 seems to have 8 fan levels,
+  *    but the ACPI tables just mention level 7.
+  */
+ 
+ enum {                                        /* Fan control constants */
+       fan_status_offset = 0x2f,       /* EC register 0x2f */
+       fan_rpm_offset = 0x84,          /* EC register 0x84: LSB, 0x85 MSB (RPM)
+                                        * 0x84 must be read before 0x85 */
+ 
+       TP_EC_FAN_FULLSPEED = 0x40,     /* EC fan mode: full speed */
+       TP_EC_FAN_AUTO      = 0x80,     /* EC fan mode: auto fan control */
+ 
+       TPACPI_FAN_LAST_LEVEL = 0x100,  /* Use cached last-seen fan level */
+ };
+ 
+ enum fan_status_access_mode {
+       TPACPI_FAN_NONE = 0,            /* No fan status or control */
+       TPACPI_FAN_RD_ACPI_GFAN,        /* Use ACPI GFAN */
+       TPACPI_FAN_RD_TPEC,             /* Use ACPI EC regs 0x2f, 0x84-0x85 */
+ };
+ 
+ enum fan_control_access_mode {
+       TPACPI_FAN_WR_NONE = 0,         /* No fan control */
+       TPACPI_FAN_WR_ACPI_SFAN,        /* Use ACPI SFAN */
+       TPACPI_FAN_WR_TPEC,             /* Use ACPI EC reg 0x2f */
+       TPACPI_FAN_WR_ACPI_FANS,        /* Use ACPI FANS and EC reg 0x2f */
+ };
+ 
+ enum fan_control_commands {
+       TPACPI_FAN_CMD_SPEED    = 0x0001,       /* speed command */
+       TPACPI_FAN_CMD_LEVEL    = 0x0002,       /* level command  */
+       TPACPI_FAN_CMD_ENABLE   = 0x0004,       /* enable/disable cmd,
+                                                * and also watchdog cmd */
+ };
+ 
+ static int fan_control_allowed;
+ 
+ static enum fan_status_access_mode fan_status_access_mode;
+ static enum fan_control_access_mode fan_control_access_mode;
+ static enum fan_control_commands fan_control_commands;
+ 
+ static u8 fan_control_initial_status;
+ static u8 fan_control_desired_level;
+ static u8 fan_control_resume_level;
+ static int fan_watchdog_maxinterval;
+ 
+ static struct mutex fan_mutex;
+ 
+ static void fan_watchdog_fire(struct work_struct *ignored);
+ static DECLARE_DELAYED_WORK(fan_watchdog_task, fan_watchdog_fire);
+ 
+ TPACPI_HANDLE(fans, ec, "FANS");      /* X31, X40, X41 */
+ TPACPI_HANDLE(gfan, ec, "GFAN",       /* 570 */
+          "\\FSPD",            /* 600e/x, 770e, 770x */
+          );                   /* all others */
+ TPACPI_HANDLE(sfan, ec, "SFAN",       /* 570 */
+          "JFNS",              /* 770x-JL */
+          );                   /* all others */
+ 
+ /*
+  * Unitialized HFSP quirk: ACPI DSDT and EC fail to initialize the
+  * HFSP register at boot, so it contains 0x07 but the Thinkpad could
+  * be in auto mode (0x80).
+  *
+  * This is corrected by any write to HFSP either by the driver, or
+  * by the firmware.
+  *
+  * We assume 0x07 really means auto mode while this quirk is active,
+  * as this is far more likely than the ThinkPad being in level 7,
+  * which is only used by the firmware during thermal emergencies.
+  */
+ 
+ static void fan_quirk1_detect(void)
+ {
+       /* In some ThinkPads, neither the EC nor the ACPI
+        * DSDT initialize the HFSP register, and it ends up
+        * being initially set to 0x07 when it *could* be
+        * either 0x07 or 0x80.
+        *
+        * Enable for TP-1Y (T43), TP-78 (R51e),
+        * TP-76 (R52), TP-70 (T43, R52), which are known
+        * to be buggy. */
+       if (fan_control_initial_status == 0x07) {
+               switch (thinkpad_id.ec_model) {
+               case 0x5931: /* TP-1Y */
+               case 0x3837: /* TP-78 */
+               case 0x3637: /* TP-76 */
+               case 0x3037: /* TP-70 */
+                       printk(TPACPI_NOTICE
+                              "fan_init: initial fan status is unknown, "
+                              "assuming it is in auto mode\n");
+                       tp_features.fan_ctrl_status_undef = 1;
+                       ;;
+               }
+       }
+ }
+ 
+ static void fan_quirk1_handle(u8 *fan_status)
+ {
+       if (unlikely(tp_features.fan_ctrl_status_undef)) {
+               if (*fan_status != fan_control_initial_status) {
+                       /* something changed the HFSP regisnter since
+                        * driver init time, so it is not undefined
+                        * anymore */
+                       tp_features.fan_ctrl_status_undef = 0;
+               } else {
+                       /* Return most likely status. In fact, it
+                        * might be the only possible status */
+                       *fan_status = TP_EC_FAN_AUTO;
+               }
+       }
+ }
+ 
+ /*
+  * Call with fan_mutex held
+  */
+ static void fan_update_desired_level(u8 status)
+ {
+       if ((status &
+            (TP_EC_FAN_AUTO | TP_EC_FAN_FULLSPEED)) == 0) {
+               if (status > 7)
+                       fan_control_desired_level = 7;
+               else
+                       fan_control_desired_level = status;
+       }
+ }
+ 
+ static int fan_get_status(u8 *status)
+ {
+       u8 s;
+ 
+       /* TODO:
+        * Add TPACPI_FAN_RD_ACPI_FANS ? */
+ 
+       switch (fan_status_access_mode) {
+       case TPACPI_FAN_RD_ACPI_GFAN:
+               /* 570, 600e/x, 770e, 770x */
+ 
+               if (unlikely(!acpi_evalf(gfan_handle, &s, NULL, "d")))
+                       return -EIO;
+ 
+               if (likely(status))
+                       *status = s & 0x07;
+ 
+               break;
+ 
+       case TPACPI_FAN_RD_TPEC:
+               /* all except 570, 600e/x, 770e, 770x */
+               if (unlikely(!acpi_ec_read(fan_status_offset, &s)))
+                       return -EIO;
+ 
+               if (likely(status)) {
+                       *status = s;
+                       fan_quirk1_handle(status);
+               }
+ 
+               break;
+ 
+       default:
+               return -ENXIO;
+       }
+ 
+       return 0;
+ }
+ 
+ static int fan_get_status_safe(u8 *status)
+ {
+       int rc;
+       u8 s;
+ 
+       if (mutex_lock_killable(&fan_mutex))
+               return -ERESTARTSYS;
+       rc = fan_get_status(&s);
+       if (!rc)
+               fan_update_desired_level(s);
+       mutex_unlock(&fan_mutex);
+ 
+       if (status)
+               *status = s;
+ 
+       return rc;
+ }
+ 
+ static int fan_get_speed(unsigned int *speed)
+ {
+       u8 hi, lo;
+ 
+       switch (fan_status_access_mode) {
+       case TPACPI_FAN_RD_TPEC:
+               /* all except 570, 600e/x, 770e, 770x */
+               if (unlikely(!acpi_ec_read(fan_rpm_offset, &lo) ||
+                            !acpi_ec_read(fan_rpm_offset + 1, &hi)))
+                       return -EIO;
+ 
+               if (likely(speed))
+                       *speed = (hi << 8) | lo;
+ 
+               break;
+ 
+       default:
+               return -ENXIO;
+       }
+ 
+       return 0;
+ }
+ 
+ static int fan_set_level(int level)
+ {
+       if (!fan_control_allowed)
+               return -EPERM;
+ 
+       switch (fan_control_access_mode) {
+       case TPACPI_FAN_WR_ACPI_SFAN:
+               if (level >= 0 && level <= 7) {
+                       if (!acpi_evalf(sfan_handle, NULL, NULL, "vd", level))
+                               return -EIO;
+               } else
+                       return -EINVAL;
+               break;
+ 
+       case TPACPI_FAN_WR_ACPI_FANS:
+       case TPACPI_FAN_WR_TPEC:
+               if (!(level & TP_EC_FAN_AUTO) &&
+                   !(level & TP_EC_FAN_FULLSPEED) &&
+                   ((level < 0) || (level > 7)))
+                       return -EINVAL;
+ 
+               /* safety net should the EC not support AUTO
+                * or FULLSPEED mode bits and just ignore them */
+               if (level & TP_EC_FAN_FULLSPEED)
+                       level |= 7;     /* safety min speed 7 */
+               else if (level & TP_EC_FAN_AUTO)
+                       level |= 4;     /* safety min speed 4 */
+ 
+               if (!acpi_ec_write(fan_status_offset, level))
+                       return -EIO;
+               else
+                       tp_features.fan_ctrl_status_undef = 0;
+               break;
+ 
+       default:
+               return -ENXIO;
+       }
+       return 0;
+ }
+ 
+ static int fan_set_level_safe(int level)
+ {
+       int rc;
+ 
+       if (!fan_control_allowed)
+               return -EPERM;
+ 
+       if (mutex_lock_killable(&fan_mutex))
+               return -ERESTARTSYS;
+ 
+       if (level == TPACPI_FAN_LAST_LEVEL)
+               level = fan_control_desired_level;
+ 
+       rc = fan_set_level(level);
+       if (!rc)
+               fan_update_desired_level(level);
+ 
+       mutex_unlock(&fan_mutex);
+       return rc;
+ }
+ 
+ static int fan_set_enable(void)
+ {
+       u8 s;
+       int rc;
+ 
+       if (!fan_control_allowed)
+               return -EPERM;
+ 
+       if (mutex_lock_killable(&fan_mutex))
+               return -ERESTARTSYS;
+ 
+       switch (fan_control_access_mode) {
+       case TPACPI_FAN_WR_ACPI_FANS:
+       case TPACPI_FAN_WR_TPEC:
+               rc = fan_get_status(&s);
+               if (rc < 0)
+                       break;
+ 
+               /* Don't go out of emergency fan mode */
+               if (s != 7) {
+                       s &= 0x07;
+                       s |= TP_EC_FAN_AUTO | 4; /* min fan speed 4 */
+               }
+ 
+               if (!acpi_ec_write(fan_status_offset, s))
+                       rc = -EIO;
+               else {
+                       tp_features.fan_ctrl_status_undef = 0;
+                       rc = 0;
+               }
+               break;
+ 
+       case TPACPI_FAN_WR_ACPI_SFAN:
+               rc = fan_get_status(&s);
+               if (rc < 0)
+                       break;
+ 
+               s &= 0x07;
+ 
+               /* Set fan to at least level 4 */
+               s |= 4;
+ 
+               if (!acpi_evalf(sfan_handle, NULL, NULL, "vd", s))
+                       rc = -EIO;
+               else
+                       rc = 0;
+               break;
+ 
+       default:
+               rc = -ENXIO;
+       }
+ 
+       mutex_unlock(&fan_mutex);
+       return rc;
+ }
+ 
+ static int fan_set_disable(void)
+ {
+       int rc;
+ 
+       if (!fan_control_allowed)
+               return -EPERM;
+ 
+       if (mutex_lock_killable(&fan_mutex))
+               return -ERESTARTSYS;
+ 
+       rc = 0;
+       switch (fan_control_access_mode) {
+       case TPACPI_FAN_WR_ACPI_FANS:
+       case TPACPI_FAN_WR_TPEC:
+               if (!acpi_ec_write(fan_status_offset, 0x00))
+                       rc = -EIO;
+               else {
+                       fan_control_desired_level = 0;
+                       tp_features.fan_ctrl_status_undef = 0;
+               }
+               break;
+ 
+       case TPACPI_FAN_WR_ACPI_SFAN:
+               if (!acpi_evalf(sfan_handle, NULL, NULL, "vd", 0x00))
+                       rc = -EIO;
+               else
+                       fan_control_desired_level = 0;
+               break;
+ 
+       default:
+               rc = -ENXIO;
+       }
+ 
+ 
+       mutex_unlock(&fan_mutex);
+       return rc;
+ }
+ 
+ static int fan_set_speed(int speed)
+ {
+       int rc;
+ 
+       if (!fan_control_allowed)
+               return -EPERM;
+ 
+       if (mutex_lock_killable(&fan_mutex))
+               return -ERESTARTSYS;
+ 
+       rc = 0;
+       switch (fan_control_access_mode) {
+       case TPACPI_FAN_WR_ACPI_FANS:
+               if (speed >= 0 && speed <= 65535) {
+                       if (!acpi_evalf(fans_handle, NULL, NULL, "vddd",
+                                       speed, speed, speed))
+                               rc = -EIO;
+               } else
+                       rc = -EINVAL;
+               break;
+ 
+       default:
+               rc = -ENXIO;
+       }
+ 
+       mutex_unlock(&fan_mutex);
+       return rc;
+ }
+ 
+ static void fan_watchdog_reset(void)
+ {
+       static int fan_watchdog_active;
+ 
+       if (fan_control_access_mode == TPACPI_FAN_WR_NONE)
+               return;
+ 
+       if (fan_watchdog_active)
+               cancel_delayed_work(&fan_watchdog_task);
+ 
+       if (fan_watchdog_maxinterval > 0 &&
+           tpacpi_lifecycle != TPACPI_LIFE_EXITING) {
+               fan_watchdog_active = 1;
+               if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task,
+                               msecs_to_jiffies(fan_watchdog_maxinterval
+                                                * 1000))) {
+                       printk(TPACPI_ERR
+                              "failed to queue the fan watchdog, "
+                              "watchdog will not trigger\n");
+               }
+       } else
+               fan_watchdog_active = 0;
+ }
+ 
+ static void fan_watchdog_fire(struct work_struct *ignored)
+ {
+       int rc;
+ 
+       if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING)
+               return;
+ 
+       printk(TPACPI_NOTICE "fan watchdog: enabling fan\n");
+       rc = fan_set_enable();
+       if (rc < 0) {
+               printk(TPACPI_ERR "fan watchdog: error %d while enabling fan, "
+                       "will try again later...\n", -rc);
+               /* reschedule for later */
+               fan_watchdog_reset();
+       }
+ }
+ 
+ /*
+  * SYSFS fan layout: hwmon compatible (device)
+  *
+  * pwm*_enable:
+  *    0: "disengaged" mode
+  *    1: manual mode
+  *    2: native EC "auto" mode (recommended, hardware default)
+  *
+  * pwm*: set speed in manual mode, ignored otherwise.
+  *    0 is level 0; 255 is level 7. Intermediate points done with linear
+  *    interpolation.
+  *
+  * fan*_input: tachometer reading, RPM
+  *
+  *
+  * SYSFS fan layout: extensions
+  *
+  * fan_watchdog (driver):
+  *    fan watchdog interval in seconds, 0 disables (default), max 120
+  */
+ 
+ /* sysfs fan pwm1_enable ----------------------------------------------- */
+ static ssize_t fan_pwm1_enable_show(struct device *dev,
+                                   struct device_attribute *attr,
+                                   char *buf)
+ {
+       int res, mode;
+       u8 status;
+ 
+       res = fan_get_status_safe(&status);
+       if (res)
+               return res;
+ 
+       if (status & TP_EC_FAN_FULLSPEED) {
+               mode = 0;
+       } else if (status & TP_EC_FAN_AUTO) {
+               mode = 2;
+       } else
+               mode = 1;
+ 
+       return snprintf(buf, PAGE_SIZE, "%d\n", mode);
+ }
+ 
+ static ssize_t fan_pwm1_enable_store(struct device *dev,
+                                    struct device_attribute *attr,
+                                    const char *buf, size_t count)
+ {
+       unsigned long t;
+       int res, level;
+ 
+       if (parse_strtoul(buf, 2, &t))
+               return -EINVAL;
+ 
+       switch (t) {
+       case 0:
+               level = TP_EC_FAN_FULLSPEED;
+               break;
+       case 1:
+               level = TPACPI_FAN_LAST_LEVEL;
+               break;
+       case 2:
+               level = TP_EC_FAN_AUTO;
+               break;
+       case 3:
+               /* reserved for software-controlled auto mode */
+               return -ENOSYS;
+       default:
+               return -EINVAL;
+       }
+ 
+       res = fan_set_level_safe(level);
+       if (res == -ENXIO)
+               return -EINVAL;
+       else if (res < 0)
+               return res;
+ 
+       fan_watchdog_reset();
+ 
+       return count;
+ }
+ 
+ static struct device_attribute dev_attr_fan_pwm1_enable =
+       __ATTR(pwm1_enable, S_IWUSR | S_IRUGO,
+               fan_pwm1_enable_show, fan_pwm1_enable_store);
+ 
+ /* sysfs fan pwm1 ------------------------------------------------------ */
+ static ssize_t fan_pwm1_show(struct device *dev,
+                            struct device_attribute *attr,
+                            char *buf)
+ {
+       int res;
+       u8 status;
+ 
+       res = fan_get_status_safe(&status);
+       if (res)
+               return res;
+ 
+       if ((status &
+            (TP_EC_FAN_AUTO | TP_EC_FAN_FULLSPEED)) != 0)
+               status = fan_control_desired_level;
+ 
+       if (status > 7)
+               status = 7;
+ 
+       return snprintf(buf, PAGE_SIZE, "%u\n", (status * 255) / 7);
+ }
+ 
+ static ssize_t fan_pwm1_store(struct device *dev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+ {
+       unsigned long s;
+       int rc;
+       u8 status, newlevel;
+ 
+       if (parse_strtoul(buf, 255, &s))
+               return -EINVAL;
+ 
+       /* scale down from 0-255 to 0-7 */
+       newlevel = (s >> 5) & 0x07;
+ 
+       if (mutex_lock_killable(&fan_mutex))
+               return -ERESTARTSYS;
+ 
+       rc = fan_get_status(&status);
+       if (!rc && (status &
+                   (TP_EC_FAN_AUTO | TP_EC_FAN_FULLSPEED)) == 0) {
+               rc = fan_set_level(newlevel);
+               if (rc == -ENXIO)
+                       rc = -EINVAL;
+               else if (!rc) {
+                       fan_update_desired_level(newlevel);
+                       fan_watchdog_reset();
+               }
+       }
+ 
+       mutex_unlock(&fan_mutex);
+       return (rc)? rc : count;
+ }
+ 
+ static struct device_attribute dev_attr_fan_pwm1 =
+       __ATTR(pwm1, S_IWUSR | S_IRUGO,
+               fan_pwm1_show, fan_pwm1_store);
+ 
+ /* sysfs fan fan1_input ------------------------------------------------ */
+ static ssize_t fan_fan1_input_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       int res;
+       unsigned int speed;
+ 
+       res = fan_get_speed(&speed);
+       if (res < 0)
+               return res;
+ 
+       return snprintf(buf, PAGE_SIZE, "%u\n", speed);
+ }
+ 
+ static struct device_attribute dev_attr_fan_fan1_input =
+       __ATTR(fan1_input, S_IRUGO,
+               fan_fan1_input_show, NULL);
+ 
+ /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
+ static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
+                                    char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%u\n", fan_watchdog_maxinterval);
+ }
+ 
+ static ssize_t fan_fan_watchdog_store(struct device_driver *drv,
+                                     const char *buf, size_t count)
+ {
+       unsigned long t;
+ 
+       if (parse_strtoul(buf, 120, &t))
+               return -EINVAL;
+ 
+       if (!fan_control_allowed)
+               return -EPERM;
+ 
+       fan_watchdog_maxinterval = t;
+       fan_watchdog_reset();
+ 
+       return count;
+ }
+ 
+ static DRIVER_ATTR(fan_watchdog, S_IWUSR | S_IRUGO,
+               fan_fan_watchdog_show, fan_fan_watchdog_store);
+ 
+ /* --------------------------------------------------------------------- */
+ static struct attribute *fan_attributes[] = {
+       &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr,
+       &dev_attr_fan_fan1_input.attr,
+       NULL
+ };
+ 
+ static const struct attribute_group fan_attr_group = {
+       .attrs = fan_attributes,
+ };
+ 
+ static int __init fan_init(struct ibm_init_struct *iibm)
+ {
+       int rc;
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "initializing fan subdriver\n");
+ 
+       mutex_init(&fan_mutex);
+       fan_status_access_mode = TPACPI_FAN_NONE;
+       fan_control_access_mode = TPACPI_FAN_WR_NONE;
+       fan_control_commands = 0;
+       fan_watchdog_maxinterval = 0;
+       tp_features.fan_ctrl_status_undef = 0;
+       fan_control_desired_level = 7;
+ 
+       TPACPI_ACPIHANDLE_INIT(fans);
+       TPACPI_ACPIHANDLE_INIT(gfan);
+       TPACPI_ACPIHANDLE_INIT(sfan);
+ 
+       if (gfan_handle) {
+               /* 570, 600e/x, 770e, 770x */
+               fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN;
+       } else {
+               /* all other ThinkPads: note that even old-style
+                * ThinkPad ECs supports the fan control register */
+               if (likely(acpi_ec_read(fan_status_offset,
+                                       &fan_control_initial_status))) {
+                       fan_status_access_mode = TPACPI_FAN_RD_TPEC;
+                       fan_quirk1_detect();
+               } else {
+                       printk(TPACPI_ERR
+                              "ThinkPad ACPI EC access misbehaving, "
+                              "fan status and control unavailable\n");
+                       return 1;
+               }
+       }
+ 
+       if (sfan_handle) {
+               /* 570, 770x-JL */
+               fan_control_access_mode = TPACPI_FAN_WR_ACPI_SFAN;
+               fan_control_commands |=
+                   TPACPI_FAN_CMD_LEVEL | TPACPI_FAN_CMD_ENABLE;
+       } else {
+               if (!gfan_handle) {
+                       /* gfan without sfan means no fan control */
+                       /* all other models implement TP EC 0x2f control */
+ 
+                       if (fans_handle) {
+                               /* X31, X40, X41 */
+                               fan_control_access_mode =
+                                   TPACPI_FAN_WR_ACPI_FANS;
+                               fan_control_commands |=
+                                   TPACPI_FAN_CMD_SPEED |
+                                   TPACPI_FAN_CMD_LEVEL |
+                                   TPACPI_FAN_CMD_ENABLE;
+                       } else {
+                               fan_control_access_mode = TPACPI_FAN_WR_TPEC;
+                               fan_control_commands |=
+                                   TPACPI_FAN_CMD_LEVEL |
+                                   TPACPI_FAN_CMD_ENABLE;
+                       }
+               }
+       }
+ 
+       vdbg_printk(TPACPI_DBG_INIT, "fan is %s, modes %d, %d\n",
+               str_supported(fan_status_access_mode != TPACPI_FAN_NONE ||
+                 fan_control_access_mode != TPACPI_FAN_WR_NONE),
+               fan_status_access_mode, fan_control_access_mode);
+ 
+       /* fan control master switch */
+       if (!fan_control_allowed) {
+               fan_control_access_mode = TPACPI_FAN_WR_NONE;
+               fan_control_commands = 0;
+               dbg_printk(TPACPI_DBG_INIT,
+                          "fan control features disabled by parameter\n");
+       }
+ 
+       /* update fan_control_desired_level */
+       if (fan_status_access_mode != TPACPI_FAN_NONE)
+               fan_get_status_safe(NULL);
+ 
+       if (fan_status_access_mode != TPACPI_FAN_NONE ||
+           fan_control_access_mode != TPACPI_FAN_WR_NONE) {
+               rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
+                                        &fan_attr_group);
+               if (rc < 0)
+                       return rc;
+ 
+               rc = driver_create_file(&tpacpi_hwmon_pdriver.driver,
+                                       &driver_attr_fan_watchdog);
+               if (rc < 0) {
+                       sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
+                                       &fan_attr_group);
+                       return rc;
+               }
+               return 0;
+       } else
+               return 1;
+ }
+ 
+ static void fan_exit(void)
+ {
+       vdbg_printk(TPACPI_DBG_EXIT,
+                   "cancelling any pending fan watchdog tasks\n");
+ 
+       /* FIXME: can we really do this unconditionally? */
+       sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, &fan_attr_group);
+       driver_remove_file(&tpacpi_hwmon_pdriver.driver,
+                          &driver_attr_fan_watchdog);
+ 
+       cancel_delayed_work(&fan_watchdog_task);
+       flush_workqueue(tpacpi_wq);
+ }
+ 
+ static void fan_suspend(pm_message_t state)
+ {
+       int rc;
+ 
+       if (!fan_control_allowed)
+               return;
+ 
+       /* Store fan status in cache */
+       fan_control_resume_level = 0;
+       rc = fan_get_status_safe(&fan_control_resume_level);
+       if (rc < 0)
+               printk(TPACPI_NOTICE
+                       "failed to read fan level for later "
+                       "restore during resume: %d\n", rc);
+ 
+       /* if it is undefined, don't attempt to restore it.
+        * KEEP THIS LAST */
+       if (tp_features.fan_ctrl_status_undef)
+               fan_control_resume_level = 0;
+ }
+ 
+ static void fan_resume(void)
+ {
+       u8 current_level = 7;
+       bool do_set = false;
+       int rc;
+ 
+       /* DSDT *always* updates status on resume */
+       tp_features.fan_ctrl_status_undef = 0;
+ 
+       if (!fan_control_allowed ||
+           !fan_control_resume_level ||
+           (fan_get_status_safe(&current_level) < 0))
+               return;
+ 
+       switch (fan_control_access_mode) {
+       case TPACPI_FAN_WR_ACPI_SFAN:
+               /* never decrease fan level */
+               do_set = (fan_control_resume_level > current_level);
+               break;
+       case TPACPI_FAN_WR_ACPI_FANS:
+       case TPACPI_FAN_WR_TPEC:
+               /* never decrease fan level, scale is:
+                * TP_EC_FAN_FULLSPEED > 7 >= TP_EC_FAN_AUTO
+                *
+                * We expect the firmware to set either 7 or AUTO, but we
+                * handle FULLSPEED out of paranoia.
+                *
+                * So, we can safely only restore FULLSPEED or 7, anything
+                * else could slow the fan.  Restoring AUTO is useless, at
+                * best that's exactly what the DSDT already set (it is the
+                * slower it uses).
+                *
+                * Always keep in mind that the DSDT *will* have set the
+                * fans to what the vendor supposes is the best level.  We
+                * muck with it only to speed the fan up.
+                */
+               if (fan_control_resume_level != 7 &&
+                   !(fan_control_resume_level & TP_EC_FAN_FULLSPEED))
+                       return;
+               else
+                       do_set = !(current_level & TP_EC_FAN_FULLSPEED) &&
+                                (current_level != fan_control_resume_level);
+               break;
+       default:
+               return;
+       }
+       if (do_set) {
+               printk(TPACPI_NOTICE
+                       "restoring fan level to 0x%02x\n",
+                       fan_control_resume_level);
+               rc = fan_set_level_safe(fan_control_resume_level);
+               if (rc < 0)
+                       printk(TPACPI_NOTICE
+                               "failed to restore fan level: %d\n", rc);
+       }
+ }
+ 
+ static int fan_read(char *p)
+ {
+       int len = 0;
+       int rc;
+       u8 status;
+       unsigned int speed = 0;
+ 
+       switch (fan_status_access_mode) {
+       case TPACPI_FAN_RD_ACPI_GFAN:
+               /* 570, 600e/x, 770e, 770x */
+               rc = fan_get_status_safe(&status);
+               if (rc < 0)
+                       return rc;
+ 
+               len += sprintf(p + len, "status:\t\t%s\n"
+                              "level:\t\t%d\n",
+                              (status != 0) ? "enabled" : "disabled", status);
+               break;
+ 
+       case TPACPI_FAN_RD_TPEC:
+               /* all except 570, 600e/x, 770e, 770x */
+               rc = fan_get_status_safe(&status);
+               if (rc < 0)
+                       return rc;
+ 
+               len += sprintf(p + len, "status:\t\t%s\n",
+                              (status != 0) ? "enabled" : "disabled");
+ 
+               rc = fan_get_speed(&speed);
+               if (rc < 0)
+                       return rc;
+ 
+               len += sprintf(p + len, "speed:\t\t%d\n", speed);
+ 
+               if (status & TP_EC_FAN_FULLSPEED)
+                       /* Disengaged mode takes precedence */
+                       len += sprintf(p + len, "level:\t\tdisengaged\n");
+               else if (status & TP_EC_FAN_AUTO)
+                       len += sprintf(p + len, "level:\t\tauto\n");
+               else
+                       len += sprintf(p + len, "level:\t\t%d\n", status);
+               break;
+ 
+       case TPACPI_FAN_NONE:
+       default:
+               len += sprintf(p + len, "status:\t\tnot supported\n");
+       }
+ 
+       if (fan_control_commands & TPACPI_FAN_CMD_LEVEL) {
+               len += sprintf(p + len, "commands:\tlevel <level>");
+ 
+               switch (fan_control_access_mode) {
+               case TPACPI_FAN_WR_ACPI_SFAN:
+                       len += sprintf(p + len, " (<level> is 0-7)\n");
+                       break;
+ 
+               default:
+                       len += sprintf(p + len, " (<level> is 0-7, "
+                                      "auto, disengaged, full-speed)\n");
+                       break;
+               }
+       }
+ 
+       if (fan_control_commands & TPACPI_FAN_CMD_ENABLE)
+               len += sprintf(p + len, "commands:\tenable, disable\n"
+                              "commands:\twatchdog <timeout> (<timeout> "
+                              "is 0 (off), 1-120 (seconds))\n");
+ 
+       if (fan_control_commands & TPACPI_FAN_CMD_SPEED)
+               len += sprintf(p + len, "commands:\tspeed <speed>"
+                              " (<speed> is 0-65535)\n");
+ 
+       return len;
+ }
+ 
+ static int fan_write_cmd_level(const char *cmd, int *rc)
+ {
+       int level;
+ 
+       if (strlencmp(cmd, "level auto") == 0)
+               level = TP_EC_FAN_AUTO;
+       else if ((strlencmp(cmd, "level disengaged") == 0) |
+                       (strlencmp(cmd, "level full-speed") == 0))
+               level = TP_EC_FAN_FULLSPEED;
+       else if (sscanf(cmd, "level %d", &level) != 1)
+               return 0;
+ 
+       *rc = fan_set_level_safe(level);
+       if (*rc == -ENXIO)
+               printk(TPACPI_ERR "level command accepted for unsupported "
+                      "access mode %d", fan_control_access_mode);
+ 
+       return 1;
+ }
+ 
+ static int fan_write_cmd_enable(const char *cmd, int *rc)
+ {
+       if (strlencmp(cmd, "enable") != 0)
+               return 0;
+ 
+       *rc = fan_set_enable();
+       if (*rc == -ENXIO)
+               printk(TPACPI_ERR "enable command accepted for unsupported "
+                      "access mode %d", fan_control_access_mode);
+ 
+       return 1;
+ }
+ 
+ static int fan_write_cmd_disable(const char *cmd, int *rc)
+ {
+       if (strlencmp(cmd, "disable") != 0)
+               return 0;
+ 
+       *rc = fan_set_disable();
+       if (*rc == -ENXIO)
+               printk(TPACPI_ERR "disable command accepted for unsupported "
+                      "access mode %d", fan_control_access_mode);
+ 
+       return 1;
+ }
+ 
+ static int fan_write_cmd_speed(const char *cmd, int *rc)
+ {
+       int speed;
+ 
+       /* TODO:
+        * Support speed <low> <medium> <high> ? */
+ 
+       if (sscanf(cmd, "speed %d", &speed) != 1)
+               return 0;
+ 
+       *rc = fan_set_speed(speed);
+       if (*rc == -ENXIO)
+               printk(TPACPI_ERR "speed command accepted for unsupported "
+                      "access mode %d", fan_control_access_mode);
+ 
+       return 1;
+ }
+ 
+ static int fan_write_cmd_watchdog(const char *cmd, int *rc)
+ {
+       int interval;
+ 
+       if (sscanf(cmd, "watchdog %d", &interval) != 1)
+               return 0;
+ 
+       if (interval < 0 || interval > 120)
+               *rc = -EINVAL;
+       else
+               fan_watchdog_maxinterval = interval;
+ 
+       return 1;
+ }
+ 
+ static int fan_write(char *buf)
+ {
+       char *cmd;
+       int rc = 0;
+ 
+       while (!rc && (cmd = next_cmd(&buf))) {
+               if (!((fan_control_commands & TPACPI_FAN_CMD_LEVEL) &&
+                     fan_write_cmd_level(cmd, &rc)) &&
+                   !((fan_control_commands & TPACPI_FAN_CMD_ENABLE) &&
+                     (fan_write_cmd_enable(cmd, &rc) ||
+                      fan_write_cmd_disable(cmd, &rc) ||
+                      fan_write_cmd_watchdog(cmd, &rc))) &&
+                   !((fan_control_commands & TPACPI_FAN_CMD_SPEED) &&
+                     fan_write_cmd_speed(cmd, &rc))
+                   )
+                       rc = -EINVAL;
+               else if (!rc)
+                       fan_watchdog_reset();
+       }
+ 
+       return rc;
+ }
+ 
+ static struct ibm_struct fan_driver_data = {
+       .name = "fan",
+       .read = fan_read,
+       .write = fan_write,
+       .exit = fan_exit,
+       .suspend = fan_suspend,
+       .resume = fan_resume,
+ };
+ 
+ /****************************************************************************
+  ****************************************************************************
+  *
+  * Infrastructure
+  *
+  ****************************************************************************
+  ****************************************************************************/
+ 
+ /* sysfs name ---------------------------------------------------------- */
+ static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+ {
+       return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME);
+ }
+ 
+ static struct device_attribute dev_attr_thinkpad_acpi_pdev_name =
+       __ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
+ 
+ /* --------------------------------------------------------------------- */
+ 
+ /* /proc support */
+ static struct proc_dir_entry *proc_dir;
+ 
+ /*
+  * Module and infrastructure proble, init and exit handling
+  */
+ 
+ static int force_load;
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUG
+ static const char * __init str_supported(int is_supported)
+ {
+       static char text_unsupported[] __initdata = "not supported";
+ 
+       return (is_supported)? &text_unsupported[4] : &text_unsupported[0];
+ }
+ #endif /* CONFIG_THINKPAD_ACPI_DEBUG */
+ 
+ static void ibm_exit(struct ibm_struct *ibm)
+ {
+       dbg_printk(TPACPI_DBG_EXIT, "removing %s\n", ibm->name);
+ 
+       list_del_init(&ibm->all_drivers);
+ 
+       if (ibm->flags.acpi_notify_installed) {
+               dbg_printk(TPACPI_DBG_EXIT,
+                       "%s: acpi_remove_notify_handler\n", ibm->name);
+               BUG_ON(!ibm->acpi);
+               acpi_remove_notify_handler(*ibm->acpi->handle,
+                                          ibm->acpi->type,
+                                          dispatch_acpi_notify);
+               ibm->flags.acpi_notify_installed = 0;
+               ibm->flags.acpi_notify_installed = 0;
+       }
+ 
+       if (ibm->flags.proc_created) {
+               dbg_printk(TPACPI_DBG_EXIT,
+                       "%s: remove_proc_entry\n", ibm->name);
+               remove_proc_entry(ibm->name, proc_dir);
+               ibm->flags.proc_created = 0;
+       }
+ 
+       if (ibm->flags.acpi_driver_registered) {
+               dbg_printk(TPACPI_DBG_EXIT,
+                       "%s: acpi_bus_unregister_driver\n", ibm->name);
+               BUG_ON(!ibm->acpi);
+               acpi_bus_unregister_driver(ibm->acpi->driver);
+               kfree(ibm->acpi->driver);
+               ibm->acpi->driver = NULL;
+               ibm->flags.acpi_driver_registered = 0;
+       }
+ 
+       if (ibm->flags.init_called && ibm->exit) {
+               ibm->exit();
+               ibm->flags.init_called = 0;
+       }
+ 
+       dbg_printk(TPACPI_DBG_INIT, "finished removing %s\n", ibm->name);
+ }
+ 
+ static int __init ibm_init(struct ibm_init_struct *iibm)
+ {
+       int ret;
+       struct ibm_struct *ibm = iibm->data;
+       struct proc_dir_entry *entry;
+ 
+       BUG_ON(ibm == NULL);
+ 
+       INIT_LIST_HEAD(&ibm->all_drivers);
+ 
+       if (ibm->flags.experimental && !experimental)
+               return 0;
+ 
+       dbg_printk(TPACPI_DBG_INIT,
+               "probing for %s\n", ibm->name);
+ 
+       if (iibm->init) {
+               ret = iibm->init(iibm);
+               if (ret > 0)
+                       return 0;       /* probe failed */
+               if (ret)
+                       return ret;
+ 
+               ibm->flags.init_called = 1;
+       }
+ 
+       if (ibm->acpi) {
+               if (ibm->acpi->hid) {
+                       ret = register_tpacpi_subdriver(ibm);
+                       if (ret)
+                               goto err_out;
+               }
+ 
+               if (ibm->acpi->notify) {
+                       ret = setup_acpi_notify(ibm);
+                       if (ret == -ENODEV) {
+                               printk(TPACPI_NOTICE "disabling subdriver %s\n",
+                                       ibm->name);
+                               ret = 0;
+                               goto err_out;
+                       }
+                       if (ret < 0)
+                               goto err_out;
+               }
+       }
+ 
+       dbg_printk(TPACPI_DBG_INIT,
+               "%s installed\n", ibm->name);
+ 
+       if (ibm->read) {
+               entry = create_proc_entry(ibm->name,
+                                         S_IFREG | S_IRUGO | S_IWUSR,
+                                         proc_dir);
+               if (!entry) {
+                       printk(TPACPI_ERR "unable to create proc entry %s\n",
+                              ibm->name);
+                       ret = -ENODEV;
+                       goto err_out;
+               }
+               entry->owner = THIS_MODULE;
+               entry->data = ibm;
+               entry->read_proc = &dispatch_procfs_read;
+               if (ibm->write)
+                       entry->write_proc = &dispatch_procfs_write;
+               ibm->flags.proc_created = 1;
+       }
+ 
+       list_add_tail(&ibm->all_drivers, &tpacpi_all_drivers);
+ 
+       return 0;
+ 
+ err_out:
+       dbg_printk(TPACPI_DBG_INIT,
+               "%s: at error exit path with result %d\n",
+               ibm->name, ret);
+ 
+       ibm_exit(ibm);
+       return (ret < 0)? ret : 0;
+ }
+ 
+ /* Probing */
+ 
+ /* returns 0 - probe ok, or < 0 - probe error.
+  * Probe ok doesn't mean thinkpad found.
+  * On error, kfree() cleanup on tp->* is not performed, caller must do it */
+ static int __must_check __init get_thinkpad_model_data(
+                                               struct thinkpad_id_data *tp)
+ {
+       const struct dmi_device *dev = NULL;
+       char ec_fw_string[18];
+       char const *s;
+ 
+       if (!tp)
+               return -EINVAL;
+ 
+       memset(tp, 0, sizeof(*tp));
+ 
+       if (dmi_name_in_vendors("IBM"))
+               tp->vendor = PCI_VENDOR_ID_IBM;
+       else if (dmi_name_in_vendors("LENOVO"))
+               tp->vendor = PCI_VENDOR_ID_LENOVO;
+       else
+               return 0;
+ 
+       s = dmi_get_system_info(DMI_BIOS_VERSION);
+       tp->bios_version_str = kstrdup(s, GFP_KERNEL);
+       if (s && !tp->bios_version_str)
+               return -ENOMEM;
+       if (!tp->bios_version_str)
+               return 0;
+       tp->bios_model = tp->bios_version_str[0]
+                        | (tp->bios_version_str[1] << 8);
+ 
+       /*
+        * ThinkPad T23 or newer, A31 or newer, R50e or newer,
+        * X32 or newer, all Z series;  Some models must have an
+        * up-to-date BIOS or they will not be detected.
+        *
+        * See http://thinkwiki.org/wiki/List_of_DMI_IDs
+        */
+       while ((dev = dmi_find_device(DMI_DEV_TYPE_OEM_STRING, NULL, dev))) {
+               if (sscanf(dev->name,
+                          "IBM ThinkPad Embedded Controller -[%17c",
+                          ec_fw_string) == 1) {
+                       ec_fw_string[sizeof(ec_fw_string) - 1] = 0;
+                       ec_fw_string[strcspn(ec_fw_string, " ]")] = 0;
+ 
+                       tp->ec_version_str = kstrdup(ec_fw_string, GFP_KERNEL);
+                       if (!tp->ec_version_str)
+                               return -ENOMEM;
+                       tp->ec_model = ec_fw_string[0]
+                                       | (ec_fw_string[1] << 8);
+                       break;
+               }
+       }
+ 
+       s = dmi_get_system_info(DMI_PRODUCT_VERSION);
+       if (s && !strnicmp(s, "ThinkPad", 8)) {
+               tp->model_str = kstrdup(s, GFP_KERNEL);
+               if (!tp->model_str)
+                       return -ENOMEM;
+       }
+ 
+       s = dmi_get_system_info(DMI_PRODUCT_NAME);
+       tp->nummodel_str = kstrdup(s, GFP_KERNEL);
+       if (s && !tp->nummodel_str)
+               return -ENOMEM;
+ 
+       return 0;
+ }
+ 
+ static int __init probe_for_thinkpad(void)
+ {
+       int is_thinkpad;
+ 
+       if (acpi_disabled)
+               return -ENODEV;
+ 
+       /*
+        * Non-ancient models have better DMI tagging, but very old models
+        * don't.
+        */
+       is_thinkpad = (thinkpad_id.model_str != NULL);
+ 
+       /* ec is required because many other handles are relative to it */
+       TPACPI_ACPIHANDLE_INIT(ec);
+       if (!ec_handle) {
+               if (is_thinkpad)
+                       printk(TPACPI_ERR
+                               "Not yet supported ThinkPad detected!\n");
+               return -ENODEV;
+       }
+ 
+       /*
+        * Risks a regression on very old machines, but reduces potential
+        * false positives a damn great deal
+        */
+       if (!is_thinkpad)
+               is_thinkpad = (thinkpad_id.vendor == PCI_VENDOR_ID_IBM);
+ 
+       if (!is_thinkpad && !force_load)
+               return -ENODEV;
+ 
+       return 0;
+ }
+ 
+ 
+ /* Module init, exit, parameters */
+ 
+ static struct ibm_init_struct ibms_init[] __initdata = {
+       {
+               .init = thinkpad_acpi_driver_init,
+               .data = &thinkpad_acpi_driver_data,
+       },
+       {
+               .init = hotkey_init,
+               .data = &hotkey_driver_data,
+       },
+       {
+               .init = bluetooth_init,
+               .data = &bluetooth_driver_data,
+       },
+       {
+               .init = wan_init,
+               .data = &wan_driver_data,
+       },
+       {
+               .init = uwb_init,
+               .data = &uwb_driver_data,
+       },
+ #ifdef CONFIG_THINKPAD_ACPI_VIDEO
+       {
+               .init = video_init,
+               .data = &video_driver_data,
+       },
+ #endif
+       {
+               .init = light_init,
+               .data = &light_driver_data,
+       },
+ #ifdef CONFIG_THINKPAD_ACPI_DOCK
+       {
+               .init = dock_init,
+               .data = &dock_driver_data[0],
+       },
+       {
+               .init = dock_init2,
+               .data = &dock_driver_data[1],
+       },
+ #endif
+ #ifdef CONFIG_THINKPAD_ACPI_BAY
+       {
+               .init = bay_init,
+               .data = &bay_driver_data,
+       },
+ #endif
+       {
+               .init = cmos_init,
+               .data = &cmos_driver_data,
+       },
+       {
+               .init = led_init,
+               .data = &led_driver_data,
+       },
+       {
+               .init = beep_init,
+               .data = &beep_driver_data,
+       },
+       {
+               .init = thermal_init,
+               .data = &thermal_driver_data,
+       },
+       {
+               .data = &ecdump_driver_data,
+       },
+       {
+               .init = brightness_init,
+               .data = &brightness_driver_data,
+       },
+       {
+               .data = &volume_driver_data,
+       },
+       {
+               .init = fan_init,
+               .data = &fan_driver_data,
+       },
+ };
+ 
+ static int __init set_ibm_param(const char *val, struct kernel_param *kp)
+ {
+       unsigned int i;
+       struct ibm_struct *ibm;
+ 
+       if (!kp || !kp->name || !val)
+               return -EINVAL;
+ 
+       for (i = 0; i < ARRAY_SIZE(ibms_init); i++) {
+               ibm = ibms_init[i].data;
+               WARN_ON(ibm == NULL);
+ 
+               if (!ibm || !ibm->name)
+                       continue;
+ 
+               if (strcmp(ibm->name, kp->name) == 0 && ibm->write) {
+                       if (strlen(val) > sizeof(ibms_init[i].param) - 2)
+                               return -ENOSPC;
+                       strcpy(ibms_init[i].param, val);
+                       strcat(ibms_init[i].param, ",");
+                       return 0;
+               }
+       }
+ 
+       return -EINVAL;
+ }
+ 
+ module_param(experimental, int, 0);
+ MODULE_PARM_DESC(experimental,
+                "Enables experimental features when non-zero");
+ 
+ module_param_named(debug, dbg_level, uint, 0);
+ MODULE_PARM_DESC(debug, "Sets debug level bit-mask");
+ 
+ module_param(force_load, bool, 0);
+ MODULE_PARM_DESC(force_load,
+                "Attempts to load the driver even on a "
+                "mis-identified ThinkPad when true");
+ 
+ module_param_named(fan_control, fan_control_allowed, bool, 0);
+ MODULE_PARM_DESC(fan_control,
+                "Enables setting fan parameters features when true");
+ 
+ module_param_named(brightness_mode, brightness_mode, int, 0);
+ MODULE_PARM_DESC(brightness_mode,
+                "Selects brightness control strategy: "
+                "0=auto, 1=EC, 2=CMOS, 3=both");
+ 
+ module_param(brightness_enable, uint, 0);
+ MODULE_PARM_DESC(brightness_enable,
+                "Enables backlight control when 1, disables when 0");
+ 
+ module_param(hotkey_report_mode, uint, 0);
+ MODULE_PARM_DESC(hotkey_report_mode,
+                "used for backwards compatibility with userspace, "
+                "see documentation");
+ 
+ #define TPACPI_PARAM(feature) \
+       module_param_call(feature, set_ibm_param, NULL, NULL, 0); \
+       MODULE_PARM_DESC(feature, "Simulates thinkpad-acpi procfs command " \
+                        "at module load, see documentation")
+ 
+ TPACPI_PARAM(hotkey);
+ TPACPI_PARAM(bluetooth);
+ TPACPI_PARAM(video);
+ TPACPI_PARAM(light);
+ #ifdef CONFIG_THINKPAD_ACPI_DOCK
+ TPACPI_PARAM(dock);
+ #endif
+ #ifdef CONFIG_THINKPAD_ACPI_BAY
+ TPACPI_PARAM(bay);
+ #endif /* CONFIG_THINKPAD_ACPI_BAY */
+ TPACPI_PARAM(cmos);
+ TPACPI_PARAM(led);
+ TPACPI_PARAM(beep);
+ TPACPI_PARAM(ecdump);
+ TPACPI_PARAM(brightness);
+ TPACPI_PARAM(volume);
+ TPACPI_PARAM(fan);
+ 
+ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
+ module_param(dbg_wlswemul, uint, 0);
+ MODULE_PARM_DESC(dbg_wlswemul, "Enables WLSW emulation");
+ module_param_named(wlsw_state, tpacpi_wlsw_emulstate, bool, 0);
+ MODULE_PARM_DESC(wlsw_state,
+                "Initial state of the emulated WLSW switch");
+ 
+ module_param(dbg_bluetoothemul, uint, 0);
+ MODULE_PARM_DESC(dbg_bluetoothemul, "Enables bluetooth switch emulation");
+ module_param_named(bluetooth_state, tpacpi_bluetooth_emulstate, bool, 0);
+ MODULE_PARM_DESC(bluetooth_state,
+                "Initial state of the emulated bluetooth switch");
+ 
+ module_param(dbg_wwanemul, uint, 0);
+ MODULE_PARM_DESC(dbg_wwanemul, "Enables WWAN switch emulation");
+ module_param_named(wwan_state, tpacpi_wwan_emulstate, bool, 0);
+ MODULE_PARM_DESC(wwan_state,
+                "Initial state of the emulated WWAN switch");
+ 
+ module_param(dbg_uwbemul, uint, 0);
+ MODULE_PARM_DESC(dbg_uwbemul, "Enables UWB switch emulation");
+ module_param_named(uwb_state, tpacpi_uwb_emulstate, bool, 0);
+ MODULE_PARM_DESC(uwb_state,
+                "Initial state of the emulated UWB switch");
+ #endif
+ 
+ static void thinkpad_acpi_module_exit(void)
+ {
+       struct ibm_struct *ibm, *itmp;
+ 
+       tpacpi_lifecycle = TPACPI_LIFE_EXITING;
+ 
+       list_for_each_entry_safe_reverse(ibm, itmp,
+                                        &tpacpi_all_drivers,
+                                        all_drivers) {
+               ibm_exit(ibm);
+       }
+ 
+       dbg_printk(TPACPI_DBG_INIT, "finished subdriver exit path...\n");
+ 
+       if (tpacpi_inputdev) {
+               if (tp_features.input_device_registered)
+                       input_unregister_device(tpacpi_inputdev);
+               else
+                       input_free_device(tpacpi_inputdev);
+       }
+ 
+       if (tpacpi_hwmon)
+               hwmon_device_unregister(tpacpi_hwmon);
+ 
+       if (tp_features.sensors_pdev_attrs_registered)
+               device_remove_file(&tpacpi_sensors_pdev->dev,
+                                  &dev_attr_thinkpad_acpi_pdev_name);
+       if (tpacpi_sensors_pdev)
+               platform_device_unregister(tpacpi_sensors_pdev);
+       if (tpacpi_pdev)
+               platform_device_unregister(tpacpi_pdev);
+ 
+       if (tp_features.sensors_pdrv_attrs_registered)
+               tpacpi_remove_driver_attributes(&tpacpi_hwmon_pdriver.driver);
+       if (tp_features.platform_drv_attrs_registered)
+               tpacpi_remove_driver_attributes(&tpacpi_pdriver.driver);
+ 
+       if (tp_features.sensors_pdrv_registered)
+               platform_driver_unregister(&tpacpi_hwmon_pdriver);
+ 
+       if (tp_features.platform_drv_registered)
+               platform_driver_unregister(&tpacpi_pdriver);
+ 
+       if (proc_dir)
+               remove_proc_entry(TPACPI_PROC_DIR, acpi_root_dir);
+ 
+       if (tpacpi_wq)
+               destroy_workqueue(tpacpi_wq);
+ 
+       kfree(thinkpad_id.bios_version_str);
+       kfree(thinkpad_id.ec_version_str);
+       kfree(thinkpad_id.model_str);
+ }
+ 
+ 
+ static int __init thinkpad_acpi_module_init(void)
+ {
+       int ret, i;
+ 
+       tpacpi_lifecycle = TPACPI_LIFE_INIT;
+ 
+       /* Parameter checking */
+       if (hotkey_report_mode > 2)
+               return -EINVAL;
+ 
+       /* Driver-level probe */
+ 
+       ret = get_thinkpad_model_data(&thinkpad_id);
+       if (ret) {
+               printk(TPACPI_ERR
+                       "unable to get DMI data: %d\n", ret);
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       ret = probe_for_thinkpad();
+       if (ret) {
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+ 
+       /* Driver initialization */
+ 
+       TPACPI_ACPIHANDLE_INIT(ecrd);
+       TPACPI_ACPIHANDLE_INIT(ecwr);
+ 
+       tpacpi_wq = create_singlethread_workqueue(TPACPI_WORKQUEUE_NAME);
+       if (!tpacpi_wq) {
+               thinkpad_acpi_module_exit();
+               return -ENOMEM;
+       }
+ 
+       proc_dir = proc_mkdir(TPACPI_PROC_DIR, acpi_root_dir);
+       if (!proc_dir) {
+               printk(TPACPI_ERR
+                      "unable to create proc dir " TPACPI_PROC_DIR);
+               thinkpad_acpi_module_exit();
+               return -ENODEV;
+       }
+       proc_dir->owner = THIS_MODULE;
+ 
+       ret = platform_driver_register(&tpacpi_pdriver);
+       if (ret) {
+               printk(TPACPI_ERR
+                      "unable to register main platform driver\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       tp_features.platform_drv_registered = 1;
+ 
+       ret = platform_driver_register(&tpacpi_hwmon_pdriver);
+       if (ret) {
+               printk(TPACPI_ERR
+                      "unable to register hwmon platform driver\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       tp_features.sensors_pdrv_registered = 1;
+ 
+       ret = tpacpi_create_driver_attributes(&tpacpi_pdriver.driver);
+       if (!ret) {
+               tp_features.platform_drv_attrs_registered = 1;
+               ret = tpacpi_create_driver_attributes(
+                                       &tpacpi_hwmon_pdriver.driver);
+       }
+       if (ret) {
+               printk(TPACPI_ERR
+                      "unable to create sysfs driver attributes\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       tp_features.sensors_pdrv_attrs_registered = 1;
+ 
+ 
+       /* Device initialization */
+       tpacpi_pdev = platform_device_register_simple(TPACPI_DRVR_NAME, -1,
+                                                       NULL, 0);
+       if (IS_ERR(tpacpi_pdev)) {
+               ret = PTR_ERR(tpacpi_pdev);
+               tpacpi_pdev = NULL;
+               printk(TPACPI_ERR "unable to register platform device\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       tpacpi_sensors_pdev = platform_device_register_simple(
+                                               TPACPI_HWMON_DRVR_NAME,
+                                               -1, NULL, 0);
+       if (IS_ERR(tpacpi_sensors_pdev)) {
+               ret = PTR_ERR(tpacpi_sensors_pdev);
+               tpacpi_sensors_pdev = NULL;
+               printk(TPACPI_ERR
+                      "unable to register hwmon platform device\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       ret = device_create_file(&tpacpi_sensors_pdev->dev,
+                                &dev_attr_thinkpad_acpi_pdev_name);
+       if (ret) {
+               printk(TPACPI_ERR
+                      "unable to create sysfs hwmon device attributes\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       tp_features.sensors_pdev_attrs_registered = 1;
+       tpacpi_hwmon = hwmon_device_register(&tpacpi_sensors_pdev->dev);
+       if (IS_ERR(tpacpi_hwmon)) {
+               ret = PTR_ERR(tpacpi_hwmon);
+               tpacpi_hwmon = NULL;
+               printk(TPACPI_ERR "unable to register hwmon device\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       }
+       mutex_init(&tpacpi_inputdev_send_mutex);
+       tpacpi_inputdev = input_allocate_device();
+       if (!tpacpi_inputdev) {
+               printk(TPACPI_ERR "unable to allocate input device\n");
+               thinkpad_acpi_module_exit();
+               return -ENOMEM;
+       } else {
+               /* Prepare input device, but don't register */
+               tpacpi_inputdev->name = "ThinkPad Extra Buttons";
+               tpacpi_inputdev->phys = TPACPI_DRVR_NAME "/input0";
+               tpacpi_inputdev->id.bustype = BUS_HOST;
+               tpacpi_inputdev->id.vendor = (thinkpad_id.vendor) ?
+                                               thinkpad_id.vendor :
+                                               PCI_VENDOR_ID_IBM;
+               tpacpi_inputdev->id.product = TPACPI_HKEY_INPUT_PRODUCT;
+               tpacpi_inputdev->id.version = TPACPI_HKEY_INPUT_VERSION;
+       }
+       for (i = 0; i < ARRAY_SIZE(ibms_init); i++) {
+               ret = ibm_init(&ibms_init[i]);
+               if (ret >= 0 && *ibms_init[i].param)
+                       ret = ibms_init[i].data->write(ibms_init[i].param);
+               if (ret < 0) {
+                       thinkpad_acpi_module_exit();
+                       return ret;
+               }
+       }
+       ret = input_register_device(tpacpi_inputdev);
+       if (ret < 0) {
+               printk(TPACPI_ERR "unable to register input device\n");
+               thinkpad_acpi_module_exit();
+               return ret;
+       } else {
+               tp_features.input_device_registered = 1;
+       }
+ 
+       tpacpi_lifecycle = TPACPI_LIFE_RUNNING;
+       return 0;
+ }
+ 
+ /* Please remove this in year 2009 */
+ MODULE_ALIAS("ibm_acpi");
+ 
+ MODULE_ALIAS(TPACPI_DRVR_SHORTNAME);
+ 
+ /*
+  * DMI matching for module autoloading
+  *
+  * See http://thinkwiki.org/wiki/List_of_DMI_IDs
+  * See http://thinkwiki.org/wiki/BIOS_Upgrade_Downloads
+  *
+  * Only models listed in thinkwiki will be supported, so add yours
+  * if it is not there yet.
+  */
+ #define IBM_BIOS_MODULE_ALIAS(__type) \
+       MODULE_ALIAS("dmi:bvnIBM:bvr" __type "ET??WW")
+ 
+ /* Non-ancient thinkpads */
+ MODULE_ALIAS("dmi:bvnIBM:*:svnIBM:*:pvrThinkPad*:rvnIBM:*");
+ MODULE_ALIAS("dmi:bvnLENOVO:*:svnLENOVO:*:pvrThinkPad*:rvnLENOVO:*");
+ 
+ /* Ancient thinkpad BIOSes have to be identified by
+  * BIOS type or model number, and there are far less
+  * BIOS types than model numbers... */
+ IBM_BIOS_MODULE_ALIAS("I[B,D,H,I,M,N,O,T,W,V,Y,Z]");
+ IBM_BIOS_MODULE_ALIAS("1[0,3,6,8,A-G,I,K,M-P,S,T]");
+ IBM_BIOS_MODULE_ALIAS("K[U,X-Z]");
+ 
+ MODULE_AUTHOR("Borislav Deianov, Henrique de Moraes Holschuh");
+ MODULE_DESCRIPTION(TPACPI_DESC);
+ MODULE_VERSION(TPACPI_VERSION);
+ MODULE_LICENSE("GPL");
+ 
+ module_init(thinkpad_acpi_module_init);
+ module_exit(thinkpad_acpi_module_exit);
diff --cc drivers/s390/scsi/zfcp_aux.c

index f38df9a,8af7dfb..5de1f91
--- 1/drivers/s390/scsi/zfcp_aux.c
--- 2/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@@ -175,14 -175,8 +175,13 @@@ static int __init zfcp_module_init(void
         if (!zfcp_data.gid_pn_cache)
                 goto out_gid_cache;
   
+ +      zfcp_data.gpn_ft_cache = zfcp_cache_create(
+ +                      sizeof(struct ct_iu_gpn_ft_req), "zfcp_gpn");
+ +      if (!zfcp_data.gpn_ft_cache)
+ +              goto out_gpn_cache;
+ +
         zfcp_data.work_queue = create_singlethread_workqueue("zfcp_wq");
   
-       INIT_LIST_HEAD(&zfcp_data.adapter_list_head);
         sema_init(&zfcp_data.config_sema, 1);
         rwlock_init(&zfcp_data.config_lock);
   
diff --cc drivers/s390/scsi/zfcp_def.h
Simple merge
diff --cc drivers/s390/scsi/zfcp_fc.c

index 6145e51,eabdfe2..3f97d23
--- 1/drivers/s390/scsi/zfcp_fc.c
--- 2/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@@ -10,6 -11,28 +11,20 @@@
   
   #include "zfcp_ext.h"
   
+ enum rscn_address_format {
+       RSCN_PORT_ADDRESS       = 0x0,
+       RSCN_AREA_ADDRESS       = 0x1,
+       RSCN_DOMAIN_ADDRESS     = 0x2,
+       RSCN_FABRIC_ADDRESS     = 0x3,
+ };
+ 
+ static u32 rscn_range_mask[] = {
+       [RSCN_PORT_ADDRESS]             = 0xFFFFFF,
+       [RSCN_AREA_ADDRESS]             = 0xFFFF00,
+       [RSCN_DOMAIN_ADDRESS]           = 0xFF0000,
+       [RSCN_FABRIC_ADDRESS]           = 0x000000,
+ };
+ 
- -struct ct_iu_gpn_ft_req {
- -      struct ct_hdr header;
- -      u8 flags;
- -      u8 domain_id_scope;
- -      u8 area_id_scope;
- -      u8 fc4_type;
- -} __attribute__ ((packed));
- -
   struct gpn_ft_resp_acc {
         u8 control;
         u8 port_id[3];
diff --cc drivers/sbus/char/bbc_i2c.c

index ac8ef2c,f08e169..adf6395
--- 1/drivers/sbus/char/bbc_i2c.c
--- 2/drivers/sbus/char/bbc_i2c.c
+++ b/drivers/sbus/char/bbc_i2c.c
@@@ -457,33 -386,48 +386,49 @@@ static int __devinit bbc_i2c_probe(stru
         return err;
   }
   
- static void bbc_i2c_cleanup(void)
+ static int __devexit bbc_i2c_remove(struct of_device *op)
   {
-       struct bbc_i2c_bus *bp = all_bbc_i2c;
+       struct bbc_i2c_bus *bp = dev_get_drvdata(&op->dev);
   
-       bbc_envctrl_cleanup();
+       bbc_envctrl_cleanup(bp);
   
-       while (bp != NULL) {
-               struct bbc_i2c_bus *next = bp->next;
+       free_irq(op->irqs[0], bp);
   
-               free_irq(bp->bus_edev->irqs[0], bp);
+       if (bp->i2c_bussel_reg)
+               of_iounmap(&op->resource[0], bp->i2c_bussel_reg, 1);
+       if (bp->i2c_control_regs)
+               of_iounmap(&op->resource[1], bp->i2c_control_regs, 2);
   
-               if (bp->i2c_bussel_reg)
-                       iounmap(bp->i2c_bussel_reg);
-               if (bp->i2c_control_regs)
-                       iounmap(bp->i2c_control_regs);
+       kfree(bp);
   
-               kfree(bp);
+       return 0;
+ }
   
-               bp = next;
-       }
-       all_bbc_i2c = NULL;
+ static const struct of_device_id bbc_i2c_match[] = {
+       {
+               .name = "i2c",
+               .compatible = "SUNW,bbc-i2c",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, bbc_i2c_match);
+ 
+ static struct of_platform_driver bbc_i2c_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "bbc_i2c",
+       .match_table    = bbc_i2c_match,
+       .probe          = bbc_i2c_probe,
+       .remove         = __devexit_p(bbc_i2c_remove),
+ };
+ 
+ static int __init bbc_i2c_init(void)
+ {
+       return of_register_driver(&bbc_i2c_driver, &of_bus_type);
+ }
+ 
+ static void __exit bbc_i2c_exit(void)
+ {
+       of_unregister_driver(&bbc_i2c_driver);
   }
   
   module_init(bbc_i2c_init);
diff --cc drivers/sbus/char/display7seg.c

index d8f5c0c,4431578..870eff0
--- 1/drivers/sbus/char/display7seg.c
--- 2/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@@ -237,7 -250,35 +250,36 @@@ static int __devexit d7s_remove(struct 
         }
   
         misc_deregister(&d7s_miscdev);
-       d7s_free();
+       of_iounmap(&op->resource[0], p->regs, sizeof(u8));
+       kfree(p);
+ 
+       return 0;
+ }
+ 
+ static const struct of_device_id d7s_match[] = {
+       {
+               .name = "display7seg",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, d7s_match);
+ 
+ static struct of_platform_driver d7s_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = d7s_match,
+       .probe          = d7s_probe,
+       .remove         = __devexit_p(d7s_remove),
+ };
+ 
+ static int __init d7s_init(void)
+ {
+       return of_register_driver(&d7s_driver, &of_bus_type);
+ }
+ 
+ static void __exit d7s_exit(void)
+ {
+       of_unregister_driver(&d7s_driver);
   }
   
   module_init(d7s_init);
diff --cc drivers/sbus/char/envctrl.c

index a408402,58e583b..cbea7e4
--- 1/drivers/sbus/char/envctrl.c
--- 2/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@@ -1128,11 -1111,39 +1111,40 @@@ static int __devexit envctrl_remove(str
   
         kthread_stop(kenvctrld_task);
   
-       iounmap(i2c);
+       of_iounmap(&op->resource[0], i2c, 0x2);
         misc_deregister(&envctrl_dev);
   
-       for (i = 0; i < ENVCTRL_MAX_CPU * 2; i++)
-               kfree(i2c_childlist[i].tables);
+       for (index = 0; index < ENVCTRL_MAX_CPU * 2; index++)
+               kfree(i2c_childlist[index].tables);
+ 
+       return 0;
+ }
+ 
+ static const struct of_device_id envctrl_match[] = {
+       {
+               .name = "i2c",
+               .compatible = "i2cpcf,8584",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, envctrl_match);
+ 
+ static struct of_platform_driver envctrl_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = envctrl_match,
+       .probe          = envctrl_probe,
+       .remove         = __devexit_p(envctrl_remove),
+ };
+ 
+ static int __init envctrl_init(void)
+ {
+       return of_register_driver(&envctrl_driver, &of_bus_type);
+ }
+ 
+ static void __exit envctrl_exit(void)
+ {
+       of_unregister_driver(&envctrl_driver);
   }
   
   module_init(envctrl_init);
diff --cc drivers/sbus/char/flash.c

index 7d95e15,4108347..7211c73
--- 1/drivers/sbus/char/flash.c
--- 2/drivers/sbus/char/flash.c
+++ b/drivers/sbus/char/flash.c
@@@ -249,9 -199,29 +199,30 @@@ static int __devexit flash_remove(struc
         return 0;
   }
   
+ static const struct of_device_id flash_match[] = {
+       {
+               .name = "flashprom",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, flash_match);
+ 
+ static struct of_platform_driver flash_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "flash",
+       .match_table    = flash_match,
+       .probe          = flash_probe,
+       .remove         = __devexit_p(flash_remove),
+ };
+ 
+ static int __init flash_init(void)
+ {
+       return of_register_driver(&flash_driver, &of_bus_type);
+ }
+ 
   static void __exit flash_cleanup(void)
   {
-       misc_deregister(&flash_dev);
+       of_unregister_driver(&flash_driver);
   }
   
   module_init(flash_init);
diff --cc drivers/sbus/char/uctrl.c

index 7776375,27993c3..6e1f54c
--- 1/drivers/sbus/char/uctrl.c
--- 2/drivers/sbus/char/uctrl.c
+++ b/drivers/sbus/char/uctrl.c
@@@ -354,71 -348,101 +348,102 @@@ static void uctrl_get_external_status(s
         
   }
   
- static int __init ts102_uctrl_init(void)
+ static int __devinit uctrl_probe(struct of_device *op,
+                                const struct of_device_id *match)
   {
-       struct uctrl_driver *driver = &drv;
-       int len;
-       struct linux_prom_irqs tmp_irq[2];
-         unsigned int vaddr[2] = { 0, 0 };
-       int tmpnode, uctrlnode = prom_getchild(prom_root_node);
-       int err;
+       struct uctrl_driver *p;
+       int err = -ENOMEM;
   
-       tmpnode = prom_searchsiblings(uctrlnode, "obio");
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p) {
+               printk(KERN_ERR "uctrl: Unable to allocate device struct.\n");
+               goto out;
+       }
   
-       if (tmpnode)
-         uctrlnode = prom_getchild(tmpnode);
+       p->regs = of_ioremap(&op->resource[0], 0,
+                            resource_size(&op->resource[0]),
+                            "uctrl");
+       if (!p->regs) {
+               printk(KERN_ERR "uctrl: Unable to map registers.\n");
+               goto out_free;
+       }
   
-       uctrlnode = prom_searchsiblings(uctrlnode, "uctrl");
+       p->irq = op->irqs[0];
+       err = request_irq(p->irq, uctrl_interrupt, 0, "uctrl", p);
+       if (err) {
+               printk(KERN_ERR "uctrl: Unable to register irq.\n");
+               goto out_iounmap;
+       }
   
-       if (!uctrlnode)
-               return -ENODEV;
+       err = misc_register(&uctrl_dev);
+       if (err) {
+               printk(KERN_ERR "uctrl: Unable to register misc device.\n");
+               goto out_free_irq;
+       }
   
-       /* the prom mapped it for us */
-       len = prom_getproperty(uctrlnode, "address", (void *) vaddr,
-                              sizeof(vaddr));
-       driver->regs = (struct uctrl_regs *)vaddr[0];
+       sbus_writel(UCTRL_INTR_RXNE_REQ|UCTRL_INTR_RXNE_MSK, &p->regs->uctrl_intr);
+       printk(KERN_INFO "%s: uctrl regs[0x%p] (irq %d)\n",
+              op->node->full_name, p->regs, p->irq);
+       uctrl_get_event_status(p);
+       uctrl_get_external_status(p);
   
-       len = prom_getproperty(uctrlnode, "intr", (char *) tmp_irq,
-                              sizeof(tmp_irq));
+       dev_set_drvdata(&op->dev, p);
+       global_driver = p;
   
-       /* Flush device */
-       READUCTLDATA(len);
+ out:
+       return err;
   
-       if(!driver->irq) 
-               driver->irq = tmp_irq[0].pri;
+ out_free_irq:
+       free_irq(p->irq, p);
   
-       err = request_irq(driver->irq, uctrl_interrupt, 0, "uctrl", driver);
-       if (err) {
-               printk("%s: unable to register irq %d\n",
-                      __func__, driver->irq);
-               return err;
-       }
+ out_iounmap:
+       of_iounmap(&op->resource[0], p->regs, resource_size(&op->resource[0]));
   
-       if (misc_register(&uctrl_dev)) {
-               printk("%s: unable to get misc minor %d\n",
-                      __func__, uctrl_dev.minor);
-               free_irq(driver->irq, driver);
-               return -ENODEV;
-       }
+ out_free:
+       kfree(p);
+       goto out;
+ }
+ 
+ static int __devexit uctrl_remove(struct of_device *op)
+ {
+       struct uctrl_driver *p = dev_get_drvdata(&op->dev);
   
-       driver->regs->uctrl_intr = UCTRL_INTR_RXNE_REQ|UCTRL_INTR_RXNE_MSK;
-       printk("uctrl: 0x%p (irq %d)\n", driver->regs, driver->irq);
-       uctrl_get_event_status();
-       uctrl_get_external_status();
-         return 0;
+       if (p) {
+               misc_deregister(&uctrl_dev);
+               free_irq(p->irq, p);
+               of_iounmap(&op->resource[0], p->regs, resource_size(&op->resource[0]));
+               kfree(p);
+       }
+       return 0;
   }
   
- static void __exit ts102_uctrl_cleanup(void)
+ static const struct of_device_id uctrl_match[] = {
+       {
+               .name = "uctrl",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, uctrl_match);
+ 
+ static struct of_platform_driver uctrl_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "uctrl",
+       .match_table    = uctrl_match,
+       .probe          = uctrl_probe,
+       .remove         = __devexit_p(uctrl_remove),
+ };
+ 
+ 
+ static int __init uctrl_init(void)
   {
-       struct uctrl_driver *driver = &drv;
+       return of_register_driver(&uctrl_driver, &of_bus_type);
+ }
   
-       misc_deregister(&uctrl_dev);
-       if (driver->irq)
-               free_irq(driver->irq, driver);
-       if (driver->regs)
-               driver->regs = NULL;
+ static void __exit uctrl_exit(void)
+ {
+       of_unregister_driver(&uctrl_driver);
   }
   
- module_init(ts102_uctrl_init);
- module_exit(ts102_uctrl_cleanup);
+ module_init(uctrl_init);
+ module_exit(uctrl_exit);
   MODULE_LICENSE("GPL");
diff --cc drivers/scsi/device_handler/scsi_dh_alua.c
Simple merge
diff --cc drivers/scsi/device_handler/scsi_dh_emc.c
Simple merge
diff --cc drivers/scsi/device_handler/scsi_dh_hp_sw.c
Simple merge
diff --cc drivers/scsi/device_handler/scsi_dh_rdac.c

index 3ab052d,5366476..c9fd9d1
--- 1/drivers/scsi/device_handler/scsi_dh_rdac.c
--- 2/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@@ -585,25 -586,25 +585,25 @@@ static int rdac_check_sense(struct scsi
   }
   
   static const struct scsi_dh_devlist rdac_dev_list[] = {
- -      {"IBM", "1722"},
- -      {"IBM", "1724"},
- -      {"IBM", "1726"},
- -      {"IBM", "1742"},
- -      {"IBM", "1814"},
- -      {"IBM", "1815"},
- -      {"IBM", "1818"},
- -      {"IBM", "3526"},
- -      {"SGI", "TP9400"},
- -      {"SGI", "TP9500"},
- -      {"SGI", "IS"},
- -      {"STK", "OPENstorage D280"},
- -      {"SUN", "CSM200_R"},
- -      {"SUN", "LCSM100_F"},
- -      {"DELL", "MD3000"},
- -      {"DELL", "MD3000i"},
- -      {"LSI", "INF-01-00"},
- -      {"ENGENIO", "INF-01-00"},
- -      {NULL, NULL},
+ +      {"IBM", "1722", 0},
+ +      {"IBM", "1724", 0},
+ +      {"IBM", "1726", 0},
+ +      {"IBM", "1742", 0},
+ +      {"IBM", "1814", 0},
+ +      {"IBM", "1815", 0},
+ +      {"IBM", "1818", 0},
+ +      {"IBM", "3526", 0},
+ +      {"SGI", "TP9400", 0},
+ +      {"SGI", "TP9500", 0},
+ +      {"SGI", "IS", 0},
+ +      {"STK", "OPENstorage D280", 0},
+ +      {"SUN", "CSM200_R", 0},
+ +      {"SUN", "LCSM100_F", 0},
+ +      {"DELL", "MD3000", 0},
+ +      {"DELL", "MD3000i", 0},
-       {"LSI", "INF-01-00"},
-       {"ENGENIO", "INF-01-00"},
++      {"LSI", "INF-01-00", 0},
++      {"ENGENIO", "INF-01-00", 0},
+ +      {NULL, NULL, 0},
   };
   
   static int rdac_bus_attach(struct scsi_device *sdev);
diff --cc drivers/scsi/ibmvscsi/ibmvscsi.c

index 4db9c29,74d07d1..01248bc
--- 1/drivers/scsi/ibmvscsi/ibmvscsi.c
--- 2/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@@ -90,9 -90,6 +90,8 @@@ static int max_channel = 3
   static int init_timeout = 5;
   static int max_requests = IBMVSCSI_MAX_REQUESTS_DEFAULT;
   static int max_events = IBMVSCSI_MAX_REQUESTS_DEFAULT + 2;
- 
+ +/*host data buffer size*/
+ +#define buff_size 4096
   
   static struct scsi_transport_template *ibmvscsi_transport_template;
   
diff --cc drivers/scsi/libfc/fc_exch.c
Simple merge
diff --cc drivers/scsi/libfc/fc_lport.c
Simple merge
diff --cc drivers/scsi/libfc/fc_rport.c
Simple merge
diff --cc drivers/scsi/qla2xxx/qla_gbl.h

index 64eddcd,a336b4b..528ef03
--- 1/drivers/scsi/qla2xxx/qla_gbl.h
--- 2/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@@ -63,7 -66,7 +66,8 @@@ extern int ql2xallocfwdump
   extern int ql2xextended_error_logging;
   extern int ql2xqfullrampup;
   extern int ql2xiidmaenable;
+ extern int ql2xmaxqueues;
+ +extern int ql2xqfulltracking;
   
   extern int qla2x00_loop_reset(scsi_qla_host_t *);
   extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
diff --cc drivers/scsi/qla2xxx/qla_isr.c

index 93bd53f,e28ad81..9c38662
--- 1/drivers/scsi/qla2xxx/qla_isr.c
--- 2/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@@ -690,11 -740,14 +740,17 @@@ static voi
   qla2x00_adjust_sdev_qdepth_up(struct scsi_device *sdev, void *data)
   {
         fc_port_t *fcport = data;
+       struct scsi_qla_host *vha = fcport->vha;
+       struct qla_hw_data *ha = vha->hw;
+       struct req_que *req = NULL;
   
+ +      if (!ql2xqfulltracking)
+ +              return;
+ +
-       if (fcport->ha->max_q_depth <= sdev->queue_depth)
+       req = ha->req_q_map[vha->req_ques[0]];
+       if (!req)
+               return;
+       if (req->max_q_depth <= sdev->queue_depth)
                 return;
   
         if (sdev->ordered_tags)
@@@ -732,11 -786,8 +789,11 @@@ qla2x00_ramp_up_queue_depth(scsi_qla_ho
         fc_port_t *fcport;
         struct scsi_device *sdev;
   
+ +      if (!ql2xqfulltracking)
+ +              return;
+ +
         sdev = sp->cmd->device;
-       if (sdev->queue_depth >= ha->max_q_depth)
+       if (sdev->queue_depth >= req->max_q_depth)
                 return;
   
         fcport = sp->fcport;
diff --cc drivers/scsi/qla2xxx/qla_os.c
Simple merge
diff --cc drivers/scsi/qla4xxx/Kconfig

index 4eda26a,69cbff3..08a07f0
--- 1/drivers/scsi/qla4xxx/Kconfig
--- 2/drivers/scsi/qla4xxx/Kconfig
+++ b/drivers/scsi/qla4xxx/Kconfig
@@@ -1,7 -1,7 +1,7 @@@
   config SCSI_QLA_ISCSI
         tristate "QLogic ISP4XXX host adapter family support"
- -      depends on PCI && SCSI && NET
+ +      depends on PCI && SCSI
         select SCSI_ISCSI_ATTRS
         ---help---
-       This driver supports the QLogic 40xx (ISP4XXX) iSCSI host 
+       This driver supports the QLogic 40xx (ISP4XXX) iSCSI host
         adapter family.
diff --cc drivers/scsi/qla4xxx/ql4_def.h

index 2b5af89,b586f27..8944c2b
--- 1/drivers/scsi/qla4xxx/ql4_def.h
--- 2/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h
@@@ -144,6 -143,8 +144,7 @@@
   #define RESET_FIRMWARE_TOV            30
   #define LOGOUT_TOV                    10
   #define IOCB_TOV_MARGIN                       10
- -#define RELOGIN_TOV                   18
+ #define ISNS_DEREG_TOV                        5
   
   #define MAX_RESET_HA_RETRIES          2
   
@@@ -253,21 -263,9 +251,21 @@@ struct ddb_entry 
   #define DF_RELOGIN            0       /* Relogin to device */
   #define DF_NO_RELOGIN         1       /* Do not relogin if IOCTL
                                          * logged it out */
- -#define DF_ISNS_DISCOVERED    2       /* Device was discovered via iSNS */
- -#define DF_FO_MASKED          3
+ +#define DF_SCAN_ISSUED                2
+ +#define DF_OFFLINE            3       /* Offline Device */
- #define DF_REMOVE             4       /* FW DDB is destroyed */
++#define DF_DELETED            4       /* Device has been removed */
+ +
+ +/*
+ + * Asynchronous Event Queue structure
+ + */
+ +struct aen {
+ +      uint32_t mbox_sts[MBOX_AEN_REG_COUNT];
+ +};
   
+ +struct ql4_aen_log {
+ +      int count;
+ +      struct aen entry[MAX_AEN_ENTRIES];
+ +};
   
   #include "ql4_fw.h"
   #include "ql4_nvram.h"
@@@ -308,10 -294,12 +306,11 @@@ struct scsi_qla_host 
   #define DPC_RELOGIN_DEVICE            3 /* 0x00000008 */
   #define DPC_RESET_HA_DESTROY_DDB_LIST 4 /* 0x00000010 */
   #define DPC_RESET_HA_INTR             5 /* 0x00000020 */
+ #define DPC_ISNS_RESTART              7 /* 0x00000080 */
   #define DPC_AEN                               9 /* 0x00000200 */
   #define DPC_GET_DHCP_IP_ADDR          15 /* 0x00008000 */
- -
- -      struct Scsi_Host *host; /* pointer to host data */
- -      uint32_t tot_ddbs;
+ +#define DPC_OFFLINE_DEVICE            16 /* 0x00010000 */
- #define DPC_REMOVE_DEVICE             17 /* 0x00020000 */
++#define DPC_DELETE_DEVICE             17 /* 0x00020000 */
   
         uint16_t        iocb_cnt;
         uint16_t        iocb_hiwat;
diff --cc drivers/scsi/qla4xxx/ql4_init.c

index cc17469,af8c323..3cf6584
--- 1/drivers/scsi/qla4xxx/ql4_init.c
--- 2/drivers/scsi/qla4xxx/ql4_init.c
+++ b/drivers/scsi/qla4xxx/ql4_init.c
@@@ -10,15 -10,9 +10,15 @@@
   #include "ql4_glbl.h"
   #include "ql4_dbg.h"
   #include "ql4_inline.h"
+ +#include "ql4_os.h"
   
- -static struct ddb_entry * qla4xxx_alloc_ddb(struct scsi_qla_host *ha,
- -                                          uint32_t fw_ddb_index);
+ +/* link auto negotiation normally takes roughly 2s.   */
+ +/* If we don't have link in 3 times that period quit. */
- #define       QLA4XXX_LINK_UP_DELAY   6
++#define        QLA4XXX_LINK_UP_DELAY   6
+ +
+ +/*
+ + * QLogic ISP4xxx Hardware Support Function Prototypes.
+ + */
   
   static void ql4xxx_set_mac_number(struct scsi_qla_host *ha)
   {
@@@ -63,10 -58,9 +63,8 @@@ void qla4xxx_free_ddb(struct scsi_qla_h
         list_del_init(&ddb_entry->list);
   
         /* Remove device pointer from index mapping arrays */
-       if (!QL_DDB_STATE_REMOVED(ddb_entry)) {
-               ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] = NULL;
-               ha->tot_ddbs--;
-       }
- -      ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] =
- -              (struct ddb_entry *) INVALID_ENTRY;
++      ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] = NULL;
+       ha->tot_ddbs--;
   
         /* Free memory and scsi-ml struct for device entry */
         qla4xxx_destroy_sess(ddb_entry);
@@@ -336,11 -435,23 +342,12 @@@ static void qla4xxx_fill_ddb(struct ddb
         ddb_entry->port = le16_to_cpu(fw_ddb_entry->port);
         ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->tgt_portal_grp);
         memcpy(ddb_entry->isid, fw_ddb_entry->isid, sizeof(ddb_entry->isid));
+ 
         memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsi_name[0],
- -             min(sizeof(ddb_entry->iscsi_name),
- -                 sizeof(fw_ddb_entry->iscsi_name)));
+ +              min(sizeof(ddb_entry->iscsi_name),
+ +              sizeof(fw_ddb_entry->iscsi_name)));
         memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ip_addr[0],
- -             min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ip_addr)));
- -
- -      DEBUG2(printk("scsi%ld: %s: ddb[%d] - State= %x status= %d.\n",
- -                    ha->host_no, __func__, fw_ddb_index,
- -                    ddb_entry->fw_ddb_device_state, status));
- -
- - exit_update_ddb:
- -      if (fw_ddb_entry)
- -              dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
- -                                fw_ddb_entry, fw_ddb_entry_dma);
- -
- -      return status;
+ +              min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ip_addr)));
   }
   
   /**
@@@ -465,31 -554,37 +472,44 @@@ static int qla4xxx_build_ddb_list(struc
                         }
                 }
   
- -              if (ddb_state != DDB_DS_SESSION_ACTIVE)
- -                      goto next_one;
- -              /*
- -               * if fw_ddb with session active state found,
- -               * add to ddb_list
- -               */
- -              DEBUG2(printk("scsi%ld: %s: DDB[%d] added to list\n",
- -                            ha->host_no, __func__, fw_ddb_index));
- -
- -              /* Add DDB to internal our ddb list. */
- -              ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt);
- -              if (ddb_entry == NULL) {
- -                      DEBUG2(printk("scsi%ld: %s: Unable to allocate memory "
- -                                    "for device at fw_ddb_index %d\n",
- -                                    ha->host_no, __func__, fw_ddb_index));
- -                      return QLA_ERROR;
- -              }
- -              /* Fill in the device structure */
- -              if (qla4xxx_update_ddb_entry(ha, ddb_entry, fw_ddb_index) ==
- -                  QLA_ERROR) {
- -                      ha->fw_ddb_index_map[fw_ddb_index] =
- -                              (struct ddb_entry *)INVALID_ENTRY;
- -
- -
- -                      DEBUG2(printk("scsi%ld: %s: update_ddb_entry failed "
- -                                    "for fw_ddb_index %d.\n",
- -                                    ha->host_no, __func__, fw_ddb_index));
- -                      return QLA_ERROR;
+ +              if (!(le16_to_cpu(fw_ddb_entry->options) & DDB_OPT_DISC_SESSION) &&
+ +                      (ddb_state != DDB_DS_UNASSIGNED) &&
+ +                      (strlen(fw_ddb_entry->iscsi_name) != 0)){
+ +                      ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index);
+ +                      if (ddb_entry == NULL) {
+ +                              DEBUG2(dev_info(&ha->pdev->dev,"%s alloc_ddb %d"
+ +                                      "failed\n", __func__, fw_ddb_index));
+ +                              goto exit_ddb_list;
+ +                      }
+ +                      ddb_entry->fw_ddb_index = fw_ddb_index;
+ +                      ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry;
+ +                      ddb_entry->tcp_source_port_num = src_port;
+ +                      ddb_entry->connection_id = conn_id;
+ +                      qla4xxx_fill_ddb(ddb_entry, fw_ddb_entry);
+ +                      ddb_entry->fw_ddb_device_state = ddb_state;
+ +
++                      if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE) {
++                              atomic_set(&ddb_entry->state, DDB_STATE_ONLINE);
++                              dev_info(&ha->pdev->dev,
++                                      "scsi%ld: %s: ddb[%d] os[%d] marked ONLINE\n",
++                                      ha->host_no, __func__, ddb_entry->fw_ddb_index,
++                                      ddb_entry->os_target_id);
++                      } else {
++                              atomic_set(&ddb_entry->state, DDB_STATE_MISSING);
++                              dev_info(&ha->pdev->dev,
++                                      "scsi%ld: %s: ddb[%d] os[%d] marked MISSING\n",
++                                      ha->host_no, __func__, ddb_entry->fw_ddb_index,
++                                      ddb_entry->os_target_id);
++                      }
+ +                      DEBUG6(dev_info(&ha->pdev->dev, "%s: DDB[%d] osIdx = %d State %04x"
+ +                              " ConnErr %08x %d.%d.%d.%d:%04d \"%s\"\n", __func__,
+ +                              fw_ddb_index, ddb_entry->os_target_id, ddb_state, conn_err,
+ +                              fw_ddb_entry->ip_addr[0], fw_ddb_entry->ip_addr[1],
+ +                              fw_ddb_entry->ip_addr[2], fw_ddb_entry->ip_addr[3],
+ +                              le16_to_cpu(fw_ddb_entry->port),
+ +                              fw_ddb_entry->iscsi_name));
                 }
   
- -next_one:
                 /* We know we've reached the last device when
                  * next_fw_ddb_index is 0 */
                 if (next_fw_ddb_index == 0)
@@@ -717,29 -798,16 +737,28 @@@ int qla4xxx_reinitialize_ddb_list(struc
   
         /* Update the device information for all devices. */
         list_for_each_entry_safe(ddb_entry, detemp, &ha->ddb_list, list) {
-               if (!QL_DDB_STATE_REMOVED(ddb_entry) &&
-                       (qla4xxx_get_fwddb_entry(ha, ddb_entry->fw_ddb_index,
-                               fw_ddb_entry, fw_ddb_entry_dma, NULL, NULL,
-                               &ddb_entry->fw_ddb_device_state, NULL,
-                               &ddb_entry->tcp_source_port_num,
-                               &ddb_entry->connection_id) == QLA_SUCCESS)) {
- -              qla4xxx_update_ddb_entry(ha, ddb_entry,
- -                                       ddb_entry->fw_ddb_index);
- -              if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE) {
- -                      atomic_set(&ddb_entry->state, DDB_STATE_ONLINE);
- -                      DEBUG2(printk ("scsi%ld: %s: ddb index [%d] marked "
- -                                     "ONLINE\n", ha->host_no, __func__,
- -                                     ddb_entry->fw_ddb_index));
- -              } else if (atomic_read(&ddb_entry->state) == DDB_STATE_ONLINE)
- -                      qla4xxx_mark_device_missing(ha, ddb_entry);
++              if (qla4xxx_get_fwddb_entry(ha, ddb_entry->fw_ddb_index,
++                      fw_ddb_entry, fw_ddb_entry_dma, NULL, NULL,
++                      &ddb_entry->fw_ddb_device_state, NULL,
++                      &ddb_entry->tcp_source_port_num,
++                      &ddb_entry->connection_id) == QLA_SUCCESS) {
+ +
+ +                      qla4xxx_fill_ddb(ddb_entry, fw_ddb_entry);
+ +
+ +                      if (ddb_entry->fw_ddb_device_state ==
+ +                              DDB_DS_SESSION_ACTIVE) {
+ +                              atomic_set(&ddb_entry->state, DDB_STATE_ONLINE);
+ +                              dev_info(&ha->pdev->dev,
+ +                                      "%s: ddb[%d] os[%d] marked ONLINE\n",
+ +                                      __func__, ddb_entry->fw_ddb_index,
+ +                                      ddb_entry->os_target_id);
+ +                      } else if (atomic_read(&ddb_entry->state) ==
+ +                                      DDB_STATE_ONLINE)
+ +                              qla4xxx_mark_device_missing(ha, ddb_entry);
+ +              }
         }
+ +      dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
+ +              fw_ddb_entry, fw_ddb_entry_dma);
         return status;
   }
   
@@@ -1166,93 -1231,37 +1185,88 @@@ static void qla4xxx_add_device_dynamica
                 return;
         }
   
- -      if (!new_tgt && (ddb_entry->fw_ddb_index != fw_ddb_index)) {
- -              /* Target has been bound to a new fw_ddb_index */
- -              qla4xxx_free_ddb(ha, ddb_entry);
+ +      if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, fw_ddb_entry,
+ +              fw_ddb_entry_dma, NULL, NULL, &ddb_state, NULL, &src_port,
+ +              &conn_id) == QLA_ERROR) {
+ +              DEBUG2(dev_info(&ha->pdev->dev, "%s getddb %d failed\n",
+ +                      __func__, fw_ddb_index));
+ +              return;
+ +      }
+ +
+ +      list_for_each_entry(ddb_entry, &ha->ddb_list, list) {
-               if ((memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsi_name,
-                       ISCSI_NAME_SIZE) == 0) &&
-                       (ddb_entry->tpgt ==
-                               le32_to_cpu(fw_ddb_entry->tgt_portal_grp)) &&
-                       (memcmp(ddb_entry->isid, fw_ddb_entry->isid,
-                               sizeof(ddb_entry->isid)) == 0) &&
-                       !QL_DDB_STATE_REMOVED(ddb_entry)) {
++              if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsi_name,
++                      ISCSI_NAME_SIZE) == 0) {
+ +                      found = 1;
+ +
+ +                      DEBUG6(dev_info(&ha->pdev->dev, "%s found target ddb = 0x%p"
+ +                              " sess 0x%p conn 0x%p state 0x%x nidx 0x%x oidx 0x%x\n",
+ +                              __func__, ddb_entry, ddb_entry->sess, ddb_entry->conn,
+ +                              ddb_entry->state, fw_ddb_index, ddb_entry->fw_ddb_index));
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (!found) {
                 ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index);
+ +
                 if (ddb_entry == NULL) {
- -                      DEBUG2(printk(KERN_WARNING
- -                              "scsi%ld: Unable to allocate memory"
- -                              " to add fw_ddb_index %d\n",
- -                              ha->host_no, fw_ddb_index));
- -                      return;
+ +                      DEBUG2(dev_info(&ha->pdev->dev, "%s NULL DDB %d\n",
+ +                              __func__, fw_ddb_index));
+ +                      goto exit_dyn_add;
                 }
- -      }
- -      if (qla4xxx_update_ddb_entry(ha, ddb_entry, fw_ddb_index) ==
- -                                  QLA_ERROR) {
- -              ha->fw_ddb_index_map[fw_ddb_index] =
- -                                      (struct ddb_entry *)INVALID_ENTRY;
- -              DEBUG2(printk(KERN_WARNING
- -                            "scsi%ld: failed to add new device at index "
- -                            "[%d]\n Unable to retrieve fw ddb entry\n",
- -                            ha->host_no, fw_ddb_index));
- -              qla4xxx_free_ddb(ha, ddb_entry);
- -              return;
- -      }
   
- -      if (qla4xxx_add_sess(ddb_entry)) {
- -              DEBUG2(printk(KERN_WARNING
- -                            "scsi%ld: failed to add new device at index "
- -                            "[%d]\n Unable to add connection and session\n",
- -                            ha->host_no, fw_ddb_index));
- -              qla4xxx_free_ddb(ha, ddb_entry);
+ +              ddb_entry->fw_ddb_index = fw_ddb_index;
+ +              ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry;
+ +              ddb_entry->tcp_source_port_num = src_port;
+ +              ddb_entry->connection_id = conn_id;
+ +              qla4xxx_fill_ddb(ddb_entry, fw_ddb_entry);
+ +              ddb_entry->fw_ddb_device_state = ddb_state;
+ +
+ +              if (probe)
+ +                      goto exit_dyn_add;
+ +
+ +              if (qla4xxx_add_sess(ddb_entry, 1)) {
+ +                      DEBUG2(dev_info(&ha->pdev->dev,
+ +                            "%s: failed to add new ddb %d\n",
+ +                            __func__, fw_ddb_index));
+ +                      qla4xxx_free_ddb(ha, ddb_entry);
+ +              } else {
+ +                      DEBUG6(dev_info(&ha->pdev->dev,
+ +                              "%s added ddb 0x%p sess 0x%p"
+ +                              " conn 0x%p state 0x%x\n",
+ +                              __func__, ddb_entry,
+ +                              ddb_entry->sess, ddb_entry->conn,
+ +                              ddb_entry->state));
+ +              }
+ +      } else if (ddb_entry->fw_ddb_index != fw_ddb_index) {
+ +              /* Target has been bound to a new fw_ddb_index */
+ +              ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] = NULL;
+ +              ddb_entry->fw_ddb_index = fw_ddb_index;
+ +              ddb_entry->fw_ddb_device_state = ddb_state;
+ +              ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry;
+ +              atomic_set(&ddb_entry->port_down_timer,
+ +                         ha->port_down_retry_count);
+ +              atomic_set(&ddb_entry->relogin_retry_count, 0);
+ +              atomic_set(&ddb_entry->relogin_timer, 0);
+ +              clear_bit(DF_RELOGIN, &ddb_entry->flags);
+ +              clear_bit(DF_NO_RELOGIN, &ddb_entry->flags);
+ +              atomic_set(&ddb_entry->state, DDB_STATE_ONLINE);
+ +
+ +              dev_info(&ha->pdev->dev,
+ +                      "scsi%ld: %s: ddb[%d] os[%d] marked ONLINE sess:%p conn:%p\n",
+ +                      ha->host_no, __func__, ddb_entry->fw_ddb_index,
+ +                      ddb_entry->os_target_id, ddb_entry->sess, ddb_entry->conn);
+ +
+ +              if (!probe)
+ +                      qla4xxx_conn_start(ddb_entry->conn);
+ +              DEBUG6(dev_info(&ha->pdev->dev, "%s calling conn_start ddb 0x%p sess 0x%p"
+ +                      " conn 0x%p state 0x%x\n", __func__, ddb_entry, ddb_entry->sess,
+ +                      ddb_entry->conn, ddb_entry->state));
         }
+ +exit_dyn_add:
+ +      dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), fw_ddb_entry,
+ +              fw_ddb_entry_dma);
+ +      return;
   }
   
   /**
diff --cc drivers/scsi/qla4xxx/ql4_inline.h

index a93b817,6375eb0..bdf2475
--- 1/drivers/scsi/qla4xxx/ql4_inline.h
--- 2/drivers/scsi/qla4xxx/ql4_inline.h
+++ b/drivers/scsi/qla4xxx/ql4_inline.h
@@@ -34,34 -35,6 +34,34 @@@ qla4xxx_lookup_ddb_by_fw_index(struct s
         return ddb_entry;
   }
   
+ +/*
+ + * The MBOX_CMD_CLEAR_DATABASE_ENTRY (0x31) mailbox command does not
+ + * result in an AEN, so we need to process it seperately.
+ + */
+ +static inline void qla4xxx_check_for_clear_ddb(struct scsi_qla_host *ha,
+ +              uint32_t *mbox_cmd)
+ +{
+ +      uint32_t fw_ddb_index;
+ +      struct ddb_entry *ddb_entry = NULL;
+ +
+ +      if (mbox_cmd[0] == MBOX_CMD_CLEAR_DATABASE_ENTRY) {
+ +
+ +              fw_ddb_index = mbox_cmd[1];
+ +
+ +              if (fw_ddb_index < MAX_DDB_ENTRIES)
+ +                      ddb_entry = ha->fw_ddb_index_map[fw_ddb_index];
+ +
+ +              if (ddb_entry) {
+ +                      dev_info(&ha->pdev->dev, "%s: ddb[%d] os[%d] freed\n",
+ +                              __func__, ddb_entry->fw_ddb_index,
+ +                              ddb_entry->os_target_id);
-                       set_bit(DF_REMOVE, &ddb_entry->flags);
-                       set_bit(DPC_REMOVE_DEVICE, &ha->dpc_flags);
++                      set_bit(DF_DELETED, &ddb_entry->flags);
++                      set_bit(DPC_DELETE_DEVICE, &ha->dpc_flags);
+ +                      queue_work(ha->dpc_thread, &ha->dpc_work);
+ +              }
+ +      }
+ +}
+ +
   static inline void
   __qla4xxx_enable_intrs(struct scsi_qla_host *ha)
   {
diff --cc drivers/scsi/qla4xxx/ql4_iocb.c

index 67071d6,912a674..bfe4d1e
--- 1/drivers/scsi/qla4xxx/ql4_iocb.c
--- 2/drivers/scsi/qla4xxx/ql4_iocb.c
+++ b/drivers/scsi/qla4xxx/ql4_iocb.c
@@@ -30,19 -160,6 +36,13 @@@ static void qla4xxx_build_scsi_iocbs(st
         avail_dsds = COMMAND_SEG;
         cur_dsd = (struct data_seg_a64 *) & (cmd_entry->dataseg[0]);
   
+ +      if (srb->flags & SRB_SCSI_PASSTHRU) {
+ +              cur_dsd->base.addrLow = cpu_to_le32(LSDW(srb->dma_handle));
+ +              cur_dsd->base.addrHigh = cpu_to_le32(MSDW(srb->dma_handle));
+ +              cur_dsd->count = cpu_to_le32(srb->dma_len);
+ +              return;
+ +      }
+ +
-       if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
-               /* No data being transferred */
-               cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0);
-               return;
-       }
- 
         scsi_for_each_sg(cmd, sg, tot_dsds, i) {
                 dma_addr_t sle_dma;
   
@@@ -161,13 -257,10 +161,10 @@@ int qla4xxx_send_command_to_isp(struct 
         cmd_entry->hdr.entryCount = req_cnt;
   
         /* Set data transfer direction control flags
- -       * NOTE: Look at data_direction bits iff there is data to be
- -       *       transferred, as the data direction bit is sometimed filled
- -       *       in when there is no data to be transferred */
+ +         * NOTE: Look at data_direction bits iff there is data to be
+ +         *       transferred, as the data direction bit is sometimed filled
+ +         *       in when there is no data to be transferred */
         cmd_entry->control_flags = CF_NO_DATA;
- 
-       cmd_entry->ttlByteCnt = cpu_to_le32(scsi_bufflen(cmd));
- 
         if (scsi_bufflen(cmd)) {
                 if (cmd->sc_data_direction == DMA_TO_DEVICE)
                         cmd_entry->control_flags = CF_WRITE;
@@@ -206,11 -300,11 +203,11 @@@
         wmb();
   
         /*
-          * Check to see if adapter is online before placing request on
+        * Check to see if adapter is online before placing request on
- -       * request queue.  If a reset occurs and a request is in the queue,
- -       * the firmware will still attempt to process the request, retrieving
- -       * garbage for pointers.
- -       */
+ +         * request queue.  If a reset occurs and a request is in the queue,
+ +         * the firmware will still attempt to process the request, retrieving
+ +         * garbage for pointers.
+ +         */
         if (!test_bit(AF_ONLINE, &ha->flags)) {
                 DEBUG2(printk("scsi%ld: %s: Adapter OFFLINE! "
                               "Do not issue command.\n",
diff --cc drivers/scsi/qla4xxx/ql4_mbx.c
Simple merge
diff --cc drivers/scsi/qla4xxx/ql4_os.c

index a5cd40d,eb3a414..7532bc4
--- 1/drivers/scsi/qla4xxx/ql4_os.c
--- 2/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@@ -111,8 -94,9 +111,6 @@@ static struct scsi_host_template qla4xx
         .slave_alloc            = qla4xxx_slave_alloc,
         .slave_destroy          = qla4xxx_slave_destroy,
   
-       .target_destroy         = qla4xxx_target_destroy,
- 
- -      .scan_finished          = iscsi_scan_finished,
- -      .scan_start             = qla4xxx_scan_start,
- -
         .this_id                = -1,
         .cmd_per_lun            = 3,
         .use_clustering         = ENABLE_CLUSTERING,
@@@ -235,8 -206,7 +233,7 @@@ static int qla4xxx_conn_get_param(struc
                 break;
         case ISCSI_PARAM_CONN_ADDRESS:
                 /* TODO: what are the ipv6 bits */
-               len = sprintf(buf, "%u.%u.%u.%u",
-                                       NIPQUAD(ddb_entry->ip_addr));
- -              len = sprintf(buf, "%pI4\n", &ddb_entry->ip_addr);
++              len = sprintf(buf, "%pI4", &ddb_entry->ip_addr);
                 break;
         default:
                 return -ENOSYS;
@@@ -287,13 -254,10 +284,12 @@@ void qla4xxx_destroy_sess(struct ddb_en
   {
         if (!ddb_entry->sess)
                 return;
+ +      free_osindex(ddb_entry->ha, ddb_entry->os_target_id);
   
         if (ddb_entry->conn) {
- -              atomic_set(&ddb_entry->state, DDB_STATE_DEAD);
+ +              QL_ISCSI_IF_DESTROY_SESSION_DONE(ddb_entry);
+ +              QL_ISCSI_DESTROY_CONN(ddb_entry);
-               if (!QL_DDB_STATE_REMOVED(ddb_entry))
-                       iscsi_remove_session(ddb_entry->sess);
+               iscsi_remove_session(ddb_entry->sess);
         }
         iscsi_free_session(ddb_entry->sess);
   }
@@@ -384,12 -348,11 +380,11 @@@ void qla4xxx_mark_device_missing(struc
                                  struct ddb_entry *ddb_entry)
   {
         atomic_set(&ddb_entry->state, DDB_STATE_MISSING);
- -      DEBUG3(printk("scsi%d:%d:%d: index [%d] marked MISSING\n",
- -                    ha->host_no, ddb_entry->bus, ddb_entry->target,
- -                    ddb_entry->fw_ddb_index));
- -      iscsi_block_session(ddb_entry->sess);
- -      iscsi_conn_error_event(ddb_entry->conn, ISCSI_ERR_CONN_FAILED);
+ +
+ +      dev_info(&ha->pdev->dev, "%s: ddb[%d] os[%d] marked MISSING\n",
+ +               __func__, ddb_entry->fw_ddb_index, ddb_entry->os_target_id);
+ +
-       if (ddb_entry->conn)
-               qla4xxx_conn_stop(ddb_entry->conn, STOP_CONN_RECOVER);
++      qla4xxx_conn_stop(ddb_entry->conn, STOP_CONN_RECOVER);
   }
   
   static struct srb* qla4xxx_get_new_srb(struct scsi_qla_host *ha,
@@@ -447,9 -421,20 +442,8 @@@ static int qla4xxx_queuecommand(struct 
         struct srb *srb;
         int rval;
   
- -      if (!sess) {
- -              cmd->result = DID_IMM_RETRY << 16;
- -              goto qc_fail_command;
- -      }
- -
- -      rval = iscsi_session_chkready(sess);
- -      if (rval) {
- -              cmd->result = rval;
- -              goto qc_fail_command;
- -      }
- -
         if (atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) {
-               if ((atomic_read(&ddb_entry->state) == DDB_STATE_DEAD) ||
-                       QL_DDB_STATE_REMOVED(ddb_entry)) {
+               if (atomic_read(&ddb_entry->state) == DDB_STATE_DEAD) {
                         cmd->result = DID_NO_CONNECT << 16;
                         goto qc_fail_command;
                 }
@@@ -607,13 -590,12 +601,12 @@@ static void qla4xxx_timer(struct scsi_q
         list_for_each_entry_safe(ddb_entry, dtemp, &ha->ddb_list, list) {
                 /* Count down time between sending relogins */
                 if (adapter_up(ha) &&
-                       !test_bit(DF_RELOGIN, &ddb_entry->flags) &&
-                       !QL_DDB_STATE_REMOVED(ddb_entry) &&
-                       atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) {
- -                  !test_bit(DF_RELOGIN, &ddb_entry->flags) &&
- -                  atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) {
++                       !test_bit(DF_RELOGIN, &ddb_entry->flags) &&
++                       atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) {
                         if (atomic_read(&ddb_entry->retry_relogin_timer) !=
- -                          INVALID_ENTRY) {
+ +                               INVALID_ENTRY) {
                                 if (atomic_read(&ddb_entry->retry_relogin_timer)
- -                                              == 0) {
+ +                                                      == 0) {
                                         atomic_set(&ddb_entry->
                                                 retry_relogin_timer,
                                                 INVALID_ENTRY);
@@@ -632,16 -614,15 +625,15 @@@
   
                 /* Wait for relogin to timeout */
                 if (atomic_read(&ddb_entry->relogin_timer) &&
-                       (atomic_dec_and_test(&ddb_entry->relogin_timer) != 0)) {
- -                  (atomic_dec_and_test(&ddb_entry->relogin_timer) != 0)) {
++                       (atomic_dec_and_test(&ddb_entry->relogin_timer) != 0)) {
                         /*
                          * If the relogin times out and the device is
                          * still NOT ONLINE then try and relogin again.
                          */
                         if (atomic_read(&ddb_entry->state) !=
-                               DDB_STATE_ONLINE &&
-                               !QL_DDB_STATE_REMOVED(ddb_entry) &&
-                               ddb_entry->fw_ddb_device_state ==
-                               DDB_DS_SESSION_FAILED) {
- -                          DDB_STATE_ONLINE &&
- -                          ddb_entry->fw_ddb_device_state ==
- -                          DDB_DS_SESSION_FAILED) {
++                               DDB_STATE_ONLINE &&
++                               ddb_entry->fw_ddb_device_state ==
++                               DDB_DS_SESSION_FAILED) {
                                 /* Reset retry relogin timer */
                                 atomic_inc(&ddb_entry->relogin_retry_count);
                                 DEBUG2(printk("scsi%ld: index[%d] relogin"
@@@ -679,19 -661,17 +671,18 @@@
   
         /* Wakeup the dpc routine for this adapter, if needed. */
         if ((start_dpc ||
- -           test_bit(DPC_RESET_HA, &ha->dpc_flags) ||
- -           test_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags) ||
- -           test_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags) ||
- -           test_bit(DPC_RESET_HA_DESTROY_DDB_LIST, &ha->dpc_flags) ||
- -           test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags) ||
- -           test_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags) ||
- -           test_bit(DPC_AEN, &ha->dpc_flags)) &&
- -           ha->dpc_thread) {
+ +              test_bit(DPC_RESET_HA, &ha->dpc_flags) ||
+ +              test_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags) ||
+ +              test_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags) ||
+ +              test_bit(DPC_RESET_HA_DESTROY_DDB_LIST, &ha->dpc_flags) ||
+ +              test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags) ||
+ +              test_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags) ||
-               test_bit(DPC_REMOVE_DEVICE, &ha->dpc_flags) ||
+ +              QL_DPC_OFFLINE_SET(ha) ||
+ +              test_bit(DPC_AEN, &ha->dpc_flags)) &&
+ +              ha->dpc_thread) {
                 DEBUG2(printk("scsi%ld: %s: scheduling dpc routine"
- -                            " - dpc flags = 0x%lx\n",
- -                            ha->host_no, __func__, ha->dpc_flags));
+ +                                      " - dpc flags = 0x%lx\n",
+ +                                      ha->host_no, __func__, ha->dpc_flags));
                 queue_work(ha->dpc_thread, &ha->dpc_work);
         }
   
@@@ -1067,19 -1052,13 +1058,42 @@@ static QL_DECLARE_DPC(qla4xxx_do_dpc, d
         if (test_and_clear_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags))
                 qla4xxx_get_dhcp_ip_address(ha);
   
+ +      qla4xxx_check_dev_offline(ha);
+ +
-       qla4xxx_remove_device(ha);
++      if (test_and_clear_bit(DPC_DELETE_DEVICE, &ha->dpc_flags)) {
++              list_for_each_entry_safe(ddb_entry, dtemp,
++                      &ha->ddb_list, list) {
++                      if (test_and_clear_bit(DF_DELETED,
++                              &ddb_entry->flags)) {
++                              if (atomic_read(&ddb_entry->state) ==
++                                      DDB_STATE_DEAD) {
++                                      dev_info(&ha->pdev->dev,
++                                      "%s: ddb[%d] os[%d] - "
++                                      "delete\n",
++                                      __func__,
++                                      ddb_entry->fw_ddb_index,
++                                      ddb_entry->os_target_id);
++                              } else {
++                                      dev_info(&ha->pdev->dev,
++                                      "%s: ddb[%d] os[%d] - "
++                                      "ddb state not dead but"
++                                      " marked for delete\n",
++                                      __func__,
++                                      ddb_entry->fw_ddb_index,
++                                      ddb_entry->os_target_id);
++                              }
++                      }
++              }
++      }
+ +
         /* ---- relogin device? --- */
         if (adapter_up(ha) &&
- -          test_and_clear_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags)) {
+ +               test_and_clear_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags)) {
                 list_for_each_entry_safe(ddb_entry, dtemp,
                                          &ha->ddb_list, list) {
                         if (test_and_clear_bit(DF_RELOGIN, &ddb_entry->flags) &&
-                               !QL_DDB_STATE_REMOVED(ddb_entry) &&
- -                          atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE)
+ +                              (atomic_read(&ddb_entry->state) !=
+ +                                       DDB_STATE_ONLINE))
                                 qla4xxx_relogin_device(ha, ddb_entry);
   
                         /*
@@@ -1328,53 -1311,15 +1343,53 @@@ static int __devinit qla4xxx_probe_adap
         if (ret)
                 goto probe_failed;
   
- -      printk(KERN_INFO
- -             " QLogic iSCSI HBA Driver version: %s\n"
- -             "  QLogic ISP%04x @ %s, host#=%ld, fw=%02d.%02d.%02d.%02d\n",
- -             qla4xxx_version_str, ha->pdev->device, pci_name(ha->pdev),
- -             ha->host_no, ha->firmware_version[0], ha->firmware_version[1],
- -             ha->patch_number, ha->build_number);
- -      scsi_scan_host(host);
+ +      if ((ret = QL_ISCSI_REGISTER_HOST(host, qla4xxx_scsi_transport)))
+ +              goto remove_host;
+ +
+ +      /* Update transport device information for all devices. */
+ +      list_for_each_entry_safe(ddb_entry, ddbtemp, &ha->ddb_list, list) {
+ +
+ +              if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE)
+ +                      set_bit(DF_SCAN_ISSUED, &ddb_entry->flags);
+ +
+ +              if (qla4xxx_add_sess(ddb_entry,
+ +                      test_bit(DF_SCAN_ISSUED, &ddb_entry->flags))) {
+ +                      QL_ISCSI_UNREGISTER_HOST(host, qla4xxx_scsi_transport);
+ +                      goto remove_host;
+ +              }
+ +              if (!test_bit(DF_SCAN_ISSUED, &ddb_entry->flags))
+ +                      qla4xxx_mark_device_missing(ha, ddb_entry);
+ +      }
+ +
+ +      dev_info(&ha->pdev->dev, " QLogic iSCSI HBA Driver version: %s\n"
+ +              "  QLogic ISP%04x @ %s, pdev = %p host#=%ld,"
+ +              " fw=%02d.%02d.%02d.%02d\n", qla4xxx_version_str,
+ +              ha->pdev->device, pci_name(ha->pdev), pdev,
+ +              ha->host_no, ha->firmware_version[0], ha->firmware_version[1],
+ +              ha->patch_number, ha->build_number);
+ +
+ +      /* Insert new entry into the list of adapters. */
+ +      klist_add_tail(&ha->node, &qla4xxx_hostlist);
+ +      ha->instance = atomic_inc_return(&qla4xxx_hba_count) - 1;
+ +
+ +      if (qla4xxx_ioctl_init(ha)) {
+ +              dev_info(&ha->pdev->dev, "ioctl init failed\n");
+ +              QL_ISCSI_UNREGISTER_HOST(host, qla4xxx_scsi_transport);
+ +              goto remove_host;
+ +      }
+ +
+ +      DEBUG2(dev_info(&ha->pdev->dev, "listhead=%p, done adding ha=%p i=%d\n",
+ +              &qla4xxx_hostlist, &ha->node, ha->instance));
+ +
-       set_bit(AF_INIT_DONE, &ha->flags);
++//    set_bit(AF_INIT_DONE, &ha->flags);
+ +      dev_info(&ha->pdev->dev, "%s: AF_INIT_DONE\n", __func__);
+ +
         return 0;
   
+ +remove_host:
+ +      qla4xxx_free_ddb_list(ha);
+ +      scsi_remove_host(host);
+ +
   probe_failed:
         qla4xxx_free_adapter(ha);
         scsi_host_put(ha->host);
@@@ -1445,14 -1383,13 +1460,13 @@@ void qla4xxx_config_dma_addressing(stru
   
   static int qla4xxx_slave_alloc(struct scsi_device *sdev)
   {
- -      struct iscsi_cls_session *sess = starget_to_session(sdev->sdev_target);
- -      struct ddb_entry *ddb = sess->dd_data;
+ +      struct iscsi_cls_session *sess = QL_ISCSI_SDEV_TO_SESS(sdev);
   
- -      sdev->hostdata = ddb;
- -      sdev->tagged_supported = 1;
- -      scsi_activate_tcq(sdev, QL4_DEF_QDEPTH);
- -      return 0;
+ +      if (sess) {
+ +              sdev->hostdata = sess->dd_data;
-               QL_SET_SDEV_HOSTDATA(sdev, sess);
+ +              return 0;
+ +      }
+ +      return FAILED;
   }
   
   static int qla4xxx_slave_configure(struct scsi_device *sdev)
diff --cc drivers/scsi/qla4xxx/ql4_os.h

index c96ab8f,0000000..71c3da0

mode 100644,000000..100644
--- 1/drivers/scsi/qla4xxx/ql4_os.h
--- /dev/null
+++ b/drivers/scsi/qla4xxx/ql4_os.h
@@@ -1,144 -1,0 +1,125 @@@
+ +/*
+ + * QLogic iSCSI HBA Driver
+ + * Copyright (c)  2003-2006 QLogic Corporation
+ + *
+ + * See LICENSE.qla4xxx for copyright and licensing details.
+ + */
+ +
+ +/*
+ + * This file encapsulates RHEL5 Specific Code
+ + */
+ +
+ +#ifndef __QLA4x_OS_H
+ +#define       __QLA4x_OS_H
+ +
+ +/* Common across all O.S platforms */
+ +#define IOCB_CMD_TIMEOUT      30
+ +#define RELOGIN_TOV           18
+ +#define RECOVERY_TIMEOUT      20 /* ddb state MISSING -> DEAD */
+ +
+ +#define QL_IOCB_CMD_TIMEOUT(cmd)
+ +
+ +#define QL_SET_DDB_OFFLINE(ha, ddb_entry)
+ +
+ +#define QL_SESS_RECOVERY_TO(ddb_entry) ddb_entry->ha->port_down_retry_count
+ +
+ +#define QL_DPC_OFFLINE_SET(ha) 0
+ +
+ +#define QL_ISCSI_CONN_TO_SESS(conn) iscsi_dev_to_session(conn->dev.parent)
+ +
+ +#define QL_ISCSI_SDEV_TO_SESS(sdev) starget_to_session(sdev->sdev_target)
+ +
+ +#define QL_ISCSI_ADD_SESS(ddb_entry) \
+ +              iscsi_add_session(ddb_entry->sess, ddb_entry->os_target_id)
+ +
+ +#define QL_ISCSI_REGISTER_HOST(host, trans) 0
+ +#define QL_ISCSI_UNREGISTER_HOST(host, trans)
+ +
+ +#define QL_ISCSI_SESSION_ID(ddb_entry) ddb_entry->sess->sid
+ +#define QL_ISCSI_IF_DESTROY_SESSION_DONE(ddb_entry)
+ +#define QL_ISCSI_DESTROY_CONN(ddb_entry)
+ +#define QL_ISCSI_CREATE_CONN(ddb_entry) \
+ +              iscsi_create_conn(ddb_entry->sess, 0, 0)
+ +#define QL_ISCSI_CREATE_SESS_DONE(ddb_entry) \
+ +              iscsi_unblock_session(ddb_entry->sess)
+ +#define QL_ISCSI_ALLOC_SESSION(ha, trans) \
+ +              iscsi_alloc_session(ha->host, trans, sizeof(struct ddb_entry))
- #define QL_SET_SDEV_HOSTDATA(sdev, sess)
+ +
- #define QL_DDB_STATE_REMOVED(ddb_entry) 0
+ +
+ +#define QL_MISC_INIT 0
+ +#define QL_MISC_EXIT
+ +
+ +#define qla4xxx_check_dev_offline(ha)
+ +#define qla4xxx_proc_info NULL
- #define qla4xxx_target_destroy NULL
+ +
+ +#define QL_SET_SCSI_RESID(cmd, residual) scsi_set_resid(cmd, residual)
+ +#define QL_SCSI_BUFFLEN(cmd) scsi_bufflen(cmd)
+ +
+ +#define QL_DPC_DATA_TO_HA(work) \
+ +      container_of((struct work_struct *)work, struct scsi_qla_host, dpc_work)
+ +
+ +#define QL_INIT_WORK(ha, dpc_func) INIT_WORK(&ha->dpc_work, dpc_func)
+ +
+ +#define QL_REQ_IRQ_FLAGS (IRQF_DISABLED | IRQF_SHARED)
+ +
+ +#define QL_DECLARE_INTR_HANDLER(intr_func, irq, dev_id, regs) \
+ +              irqreturn_t intr_func(int irq, void *dev_id)
+ +
+ +#define QL_DECLARE_DPC(dpc_func, data) \
+ +              void dpc_func(struct work_struct *data)
+ +
+ +#define QL_INIT_SESSION_DATASIZE(sessiondata_size)
++//            .sessiondata_size       = sizeof(struct ddb_entry),
+ +
+ +#define QL_INIT_HOST_TEMPLATE(host_template)
++//            .host_template          = &qla4xxx_driver_template,
+ +
+ +QL_DECLARE_INTR_HANDLER(qla4xxx_intr_handler, irq, dev_id, regs);
+ +
+ +static inline struct kmem_cache *ql_kmem_cache_create(void)
+ +{
+ +      return (kmem_cache_create("qla4xxx_srbs", sizeof(struct srb), 0,
+ +                      SLAB_HWCACHE_ALIGN, NULL));
+ +}
+ +
+ +static inline void qla4xxx_scan_target(struct ddb_entry * ddb_entry)
+ +{
+ +      scsi_scan_target(&ddb_entry->sess->dev, 0,
+ +              ddb_entry->sess->target_id, SCAN_WILD_CARD, 0);
+ +}
+ +
+ +static void ql4_get_aen_log(struct scsi_qla_host *ha, struct ql4_aen_log *aenl)
+ +{
+ +      if (aenl) {
+ +              memcpy(aenl, &ha->aen_log, sizeof (ha->aen_log));
+ +              ha->aen_log.count = 0;
+ +      }
+ +}
+ +
+ +static inline int qla4xxx_ioctl_init(struct scsi_qla_host *ha)
+ +{
+ +      ha->ql4mbx = qla4xxx_mailbox_command;
+ +      ha->ql4cmd = qla4xxx_send_command_to_isp;
+ +      ha->ql4getaenlog = ql4_get_aen_log;
+ +      return 0;
+ +}
+ +
+ +static inline void qla4xxx_ioctl_exit(struct scsi_qla_host *ha)
+ +{
+ +      return;
+ +}
+ +
+ +static inline void qla4xxx_srb_free_dma(struct scsi_qla_host *ha,
+ +                      struct srb *srb)
+ +{
+ +      struct scsi_cmnd *cmd = srb->cmd;
+ +
+ +      if (srb->flags & SRB_DMA_VALID) {
+ +              scsi_dma_unmap(cmd);
+ +              srb->flags &= ~SRB_DMA_VALID;
+ +      }
+ +
+ +      cmd->SCp.ptr = NULL;
+ +}
+ +
- static inline void qla4xxx_remove_device(struct scsi_qla_host *ha)
- {
-       struct ddb_entry *ddb_entry, *dtemp;
- 
-       if (test_and_clear_bit(DPC_REMOVE_DEVICE, &ha->dpc_flags)) {
-               list_for_each_entry_safe(ddb_entry, dtemp,
-                       &ha->ddb_list, list) {
-                       if (test_and_clear_bit(DF_REMOVE, &ddb_entry->flags)) {
-                               dev_info(&ha->pdev->dev,
-                                       "%s: ddb[%d] os[%d] - removed\n",
-                                       __func__, ddb_entry->fw_ddb_index,
-                                       ddb_entry->os_target_id);
-                               qla4xxx_free_ddb(ha, ddb_entry);
-                       }
-               }
-       }
- }
- 
+ +#endif        /* _QLA4x_OS_H */
diff --cc drivers/scsi/qla4xxx/ql4_version.h

index 073e401,ab984cb..d6ff1b7
--- 1/drivers/scsi/qla4xxx/ql4_version.h
--- 2/drivers/scsi/qla4xxx/ql4_version.h
+++ b/drivers/scsi/qla4xxx/ql4_version.h
@@@ -5,5 -5,5 +5,5 @@@
    * See LICENSE.qla4xxx for copyright and licensing details.
    */
   
- #define QLA4XXX_DRIVER_VERSION   "5.01.00-k8_sles11-04"
- -#define QLA4XXX_DRIVER_VERSION        "5.01.00-k8"
++#define QLA4XXX_DRIVER_VERSION   "5.01.00-k8_sles11-03"
   
diff --cc drivers/scsi/qlogicpti.c
Simple merge
diff --cc drivers/scsi/scsi_error.c

index 50faf07,ad6a137..eeef5e4
--- 1/drivers/scsi/scsi_error.c
--- 2/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@@ -229,80 -213,6 +217,80 @@@ static inline void scsi_eh_prt_fail_sta
   }
   #endif
   
+ +#ifdef CONFIG_SCSI_NETLINK
+ +/**
+ + * scsi_post_sense_event - called to post a 'Sense Code' event
+ + *
+ + * @sdev:             SCSI device the sense code occured on
+ + * @sshdr:            SCSI sense code
+ + *
+ + * Returns:
+ + *   0 on succesful return
+ + *   otherwise, failing error code
+ + *
+ + */
+ +static void scsi_post_sense_event(struct scsi_device *sdev,
+ +                      struct scsi_sense_hdr *sshdr)
+ +{
+ +      struct sk_buff *skb;
+ +      struct nlmsghdr *nlh;
+ +      struct scsi_nl_sense_msg *msg;
+ +      u32 len, skblen;
+ +      int err;
+ +
+ +      if (!scsi_nl_sock) {
+ +              err = -ENOENT;
+ +              goto send_fail;
+ +      }
+ +
+ +      len = SCSI_NL_MSGALIGN(sizeof(*msg));
+ +      skblen = NLMSG_SPACE(len);
+ +
+ +      skb = alloc_skb(skblen, GFP_ATOMIC);
+ +      if (!skb) {
+ +              err = -ENOBUFS;
+ +              goto send_fail;
+ +      }
+ +
+ +      nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG,
+ +                              skblen - sizeof(*nlh), 0);
+ +      if (!nlh) {
+ +              err = -ENOBUFS;
+ +              goto send_fail_skb;
+ +      }
+ +      msg = NLMSG_DATA(nlh);
+ +
+ +      INIT_SCSI_NL_HDR(&msg->snlh, SCSI_NL_TRANSPORT_ML,
+ +                       ML_NL_SCSI_SENSE, len);
+ +      msg->host_no = sdev->host->host_no;
+ +      msg->channel = sdev->channel;
+ +      msg->id = sdev->id;
+ +      msg->lun = sdev->lun;
+ +      msg->sense = (sshdr->response_code << 24) | (sshdr->sense_key << 16) |
+ +              (sshdr->asc << 8) | sshdr->ascq;
+ +
+ +      err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_ML_EVENTS,
-                             GFP_ATOMIC);
++                            GFP_KERNEL);
+ +      if (err && (err != -ESRCH))
+ +              /* nlmsg_multicast already kfree_skb'd */
+ +              goto send_fail;
+ +
+ +      return;
+ +
+ +send_fail_skb:
+ +      kfree_skb(skb);
+ +send_fail:
+ +      sdev_printk(KERN_WARNING, sdev,
+ +                  "Dropped SCSI Msg %02x/%02x/%02x/%02x: err %d\n",
+ +                  sshdr->response_code, sshdr->sense_key,
+ +                  sshdr->asc, sshdr->ascq, err);
+ +      return;
+ +}
+ +#else
+ +static inline void scsi_post_sense_event(struct scsi_device *sdev,
+ +                         struct scsi_sense_hdr *sshdr) {}
+ +#endif
+ +
   /**
    * scsi_check_sense - Examine scsi cmd sense
    * @scmd:     Cmd to have sense checked.
diff --cc drivers/scsi/scsi_lib.c
Simple merge
diff --cc drivers/scsi/scsi_netlink.c
Simple merge
diff --cc drivers/scsi/scsi_proc.c
Simple merge
diff --cc drivers/scsi/scsi_scan.c
Simple merge
diff --cc drivers/scsi/scsi_sysfs.c
Simple merge
diff --cc drivers/scsi/sd.c
Simple merge
diff --cc drivers/scsi/sr.c
Simple merge
diff --cc drivers/scsi/sun_esp.c
Simple merge
diff --cc drivers/serial/8250.c

index ced1068,0d934bf..a4f67f8
--- 1/drivers/serial/8250.c
--- 2/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@@ -43,21 -43,10 +43,25 @@@
   #include <asm/irq.h>
   
   #include "8250.h"
+ 
+ #ifdef CONFIG_SPARC
+ #include "suncore.h"
+ #endif
+ +#ifdef        CONFIG_KDB
+ +#include <linux/kdb.h>
+ +/*
+ + * kdb_serial_line records the serial line number of the first serial console.
+ + * NOTE: The kernel ignores characters on the serial line unless a user space
+ + * program has opened the line first.  To enter kdb before user space has opened
+ + * the serial line, you can use the 'kdb=early' flag to lilo and set the
+ + * appropriate breakpoints.
+ + */
+ +
+ +static int  kdb_serial_line = -1;
+ +static const char *kdb_serial_ptr = kdb_serial_str;
+ +#else
+ +#define KDB_8250() 0
+ +#endif        /* CONFIG_KDB */
   
   /*
    * Configuration:
diff --cc drivers/serial/cpm_uart/cpm_uart_core.c
Simple merge
diff --cc drivers/serial/jsm/jsm_tty.c

index a697914,3547558..324c74d
--- 1/drivers/serial/jsm/jsm_tty.c
--- 2/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@@ -161,6 -161,6 +161,11 @@@ static void jsm_tty_stop_rx(struct uart
         channel->ch_bd->bd_ops->disable_receiver(channel);
   }
   
++static void jsm_tty_enable_ms(struct uart_port *port)
++{
++      /* Nothing needed */
++}
++
   static void jsm_tty_break(struct uart_port *port, int break_state)
   {
         unsigned long lock_flags;
@@@ -345,6 -345,6 +350,7 @@@ static struct uart_ops jsm_ops = 
         .start_tx       = jsm_tty_start_tx,
         .send_xchar     = jsm_tty_send_xchar,
         .stop_rx        = jsm_tty_stop_rx,
++      .enable_ms      = jsm_tty_enable_ms,
         .break_ctl      = jsm_tty_break,
         .startup        = jsm_tty_open,
         .shutdown       = jsm_tty_close,
diff --cc drivers/serial/mpc52xx_uart.c
Simple merge
diff --cc drivers/serial/sn_console.c
Simple merge
diff --cc drivers/serial/sunhv.c
Simple merge
diff --cc drivers/serial/sunsab.c
Simple merge
diff --cc drivers/serial/sunsu.c
Simple merge
diff --cc drivers/serial/sunzilog.c
Simple merge
diff --cc drivers/usb/core/hcd.c

index 7a3f518,3c711db..8a91836
--- 1/drivers/usb/core/hcd.c
--- 2/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@@ -2027,23 -2026,9 +2026,23 @@@ usb_hcd_platform_shutdown(struct platfo
   }
   EXPORT_SYMBOL_GPL(usb_hcd_platform_shutdown);
   
+ +#ifdef CONFIG_KDB_USB
+ +void *
+ +usb_hcd_get_kdb_poll_func(struct usb_device *udev)
+ +{
+ +      struct usb_hcd  *hcd = bus_to_hcd(udev->bus);
+ +
+ +      if (hcd && hcd->driver)
+ +              return (void *)(hcd->driver->kdb_poll_char);
+ +
+ +      return NULL;
+ +}
+ +EXPORT_SYMBOL_GPL (usb_hcd_get_kdb_poll_func);
+ +#endif /* CONFIG_KDB_USB */
+ +
   /*-------------------------------------------------------------------------*/
   
- #if defined(CONFIG_USB_MON)
+ #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
   
   struct usb_mon_operations *mon_ops;
   
diff --cc drivers/usb/core/hcd.h
Simple merge
diff --cc drivers/usb/gadget/fsl_qe_udc.c

index 0000000,d6c5bcd..a53693d

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@@ -1,0 -1,2769 +1,2770 @@@
+ /*
+  * driver/usb/gadget/fsl_qe_udc.c
+  *
+  * Copyright (c) 2006-2008 Freescale Semiconductor, Inc. All rights reserved.
+  *
+  *    Xie Xiaobo <X.Xie@freescale.com>
+  *    Li Yang <leoli@freescale.com>
+  *    Based on bareboard code from Shlomi Gridish.
+  *
+  * Description:
+  * Freescle QE/CPM USB Pheripheral Controller Driver
+  * The controller can be found on MPC8360, MPC8272, and etc.
+  * MPC8360 Rev 1.1 may need QE mircocode update
+  *
+  * This program is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License as published by the
+  * Free Software Foundation;  either version 2 of the License, or (at your
+  * option) any later version.
+  */
+ 
+ #undef USB_TRACE
+ 
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+ #include <linux/ioport.h>
+ #include <linux/types.h>
+ #include <linux/errno.h>
+ #include <linux/err.h>
+ #include <linux/slab.h>
+ #include <linux/list.h>
+ #include <linux/interrupt.h>
+ #include <linux/io.h>
+ #include <linux/moduleparam.h>
+ #include <linux/of_platform.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/usb/ch9.h>
+ #include <linux/usb/gadget.h>
+ #include <linux/usb/otg.h>
+ #include <asm/qe.h>
+ #include <asm/cpm.h>
+ #include <asm/dma.h>
+ #include <asm/reg.h>
+ #include "fsl_qe_udc.h"
+ 
+ #define DRIVER_DESC     "Freescale QE/CPM USB Device Controller driver"
+ #define DRIVER_AUTHOR   "Xie XiaoBo"
+ #define DRIVER_VERSION  "1.0"
+ 
+ #define DMA_ADDR_INVALID        (~(dma_addr_t)0)
+ 
+ static const char driver_name[] = "fsl_qe_udc";
+ static const char driver_desc[] = DRIVER_DESC;
+ 
+ /*ep name is important in gadget, it should obey the convention of ep_match()*/
+ static const char *const ep_name[] = {
+       "ep0-control", /* everyone has ep0 */
+       /* 3 configurable endpoints */
+       "ep1",
+       "ep2",
+       "ep3",
+ };
+ 
+ static struct usb_endpoint_descriptor qe_ep0_desc = {
+       .bLength =              USB_DT_ENDPOINT_SIZE,
+       .bDescriptorType =      USB_DT_ENDPOINT,
+ 
+       .bEndpointAddress =     0,
+       .bmAttributes =         USB_ENDPOINT_XFER_CONTROL,
+       .wMaxPacketSize =       USB_MAX_CTRL_PAYLOAD,
+ };
+ 
+ /* it is initialized in probe()  */
+ static struct qe_udc *udc_controller;
+ 
+ /********************************************************************
+  *      Internal Used Function Start
+ ********************************************************************/
+ /*-----------------------------------------------------------------
+  * done() - retire a request; caller blocked irqs
+  *--------------------------------------------------------------*/
+ static void done(struct qe_ep *ep, struct qe_req *req, int status)
+ {
+       struct qe_udc *udc = ep->udc;
+       unsigned char stopped = ep->stopped;
+ 
+       /* the req->queue pointer is used by ep_queue() func, in which
+        * the request will be added into a udc_ep->queue 'd tail
+        * so here the req will be dropped from the ep->queue
+        */
+       list_del_init(&req->queue);
+ 
+       /* req.status should be set as -EINPROGRESS in ep_queue() */
+       if (req->req.status == -EINPROGRESS)
+               req->req.status = status;
+       else
+               status = req->req.status;
+ 
+       if (req->mapped) {
+               dma_unmap_single(udc->gadget.dev.parent,
+                       req->req.dma, req->req.length,
+                       ep_is_in(ep)
+                               ? DMA_TO_DEVICE
+                               : DMA_FROM_DEVICE);
+               req->req.dma = DMA_ADDR_INVALID;
+               req->mapped = 0;
+       } else
+               dma_sync_single_for_cpu(udc->gadget.dev.parent,
+                       req->req.dma, req->req.length,
+                       ep_is_in(ep)
+                               ? DMA_TO_DEVICE
+                               : DMA_FROM_DEVICE);
+ 
+       if (status && (status != -ESHUTDOWN))
+               dev_vdbg(udc->dev, "complete %s req %p stat %d len %u/%u\n",
+                       ep->ep.name, &req->req, status,
+                       req->req.actual, req->req.length);
+ 
+       /* don't modify queue heads during completion callback */
+       ep->stopped = 1;
+       spin_unlock(&udc->lock);
+ 
+       /* this complete() should a func implemented by gadget layer,
+        * eg fsg->bulk_in_complete() */
+       if (req->req.complete)
+               req->req.complete(&ep->ep, &req->req);
+ 
+       spin_lock(&udc->lock);
+ 
+       ep->stopped = stopped;
+ }
+ 
+ /*-----------------------------------------------------------------
+  * nuke(): delete all requests related to this ep
+  *--------------------------------------------------------------*/
+ static void nuke(struct qe_ep *ep, int status)
+ {
+       /* Whether this eq has request linked */
+       while (!list_empty(&ep->queue)) {
+               struct qe_req *req = NULL;
+               req = list_entry(ep->queue.next, struct qe_req, queue);
+ 
+               done(ep, req, status);
+       }
+ }
+ 
+ /*---------------------------------------------------------------------------*
+  * USB and Endpoint manipulate process, include parameter and register       *
+  *---------------------------------------------------------------------------*/
+ /* @value: 1--set stall 0--clean stall */
+ static int qe_eprx_stall_change(struct qe_ep *ep, int value)
+ {
+       u16 tem_usep;
+       u8 epnum = ep->epnum;
+       struct qe_udc *udc = ep->udc;
+ 
+       tem_usep = in_be16(&udc->usb_regs->usb_usep[epnum]);
+       tem_usep = tem_usep & ~USB_RHS_MASK;
+       if (value == 1)
+               tem_usep |= USB_RHS_STALL;
+       else if (ep->dir == USB_DIR_IN)
+               tem_usep |= USB_RHS_IGNORE_OUT;
+ 
+       out_be16(&udc->usb_regs->usb_usep[epnum], tem_usep);
+       return 0;
+ }
+ 
+ static int qe_eptx_stall_change(struct qe_ep *ep, int value)
+ {
+       u16 tem_usep;
+       u8 epnum = ep->epnum;
+       struct qe_udc *udc = ep->udc;
+ 
+       tem_usep = in_be16(&udc->usb_regs->usb_usep[epnum]);
+       tem_usep = tem_usep & ~USB_THS_MASK;
+       if (value == 1)
+               tem_usep |= USB_THS_STALL;
+       else if (ep->dir == USB_DIR_OUT)
+               tem_usep |= USB_THS_IGNORE_IN;
+ 
+       out_be16(&udc->usb_regs->usb_usep[epnum], tem_usep);
+ 
+       return 0;
+ }
+ 
+ static int qe_ep0_stall(struct qe_udc *udc)
+ {
+       qe_eptx_stall_change(&udc->eps[0], 1);
+       qe_eprx_stall_change(&udc->eps[0], 1);
+       udc_controller->ep0_state = WAIT_FOR_SETUP;
+       udc_controller->ep0_dir = 0;
+       return 0;
+ }
+ 
+ static int qe_eprx_nack(struct qe_ep *ep)
+ {
+       u8 epnum = ep->epnum;
+       struct qe_udc *udc = ep->udc;
+ 
+       if (ep->state == EP_STATE_IDLE) {
+               /* Set the ep's nack */
+               clrsetbits_be16(&udc->usb_regs->usb_usep[epnum],
+                               USB_RHS_MASK, USB_RHS_NACK);
+ 
+               /* Mask Rx and Busy interrupts */
+               clrbits16(&udc->usb_regs->usb_usbmr,
+                               (USB_E_RXB_MASK | USB_E_BSY_MASK));
+ 
+               ep->state = EP_STATE_NACK;
+       }
+       return 0;
+ }
+ 
+ static int qe_eprx_normal(struct qe_ep *ep)
+ {
+       struct qe_udc *udc = ep->udc;
+ 
+       if (ep->state == EP_STATE_NACK) {
+               clrsetbits_be16(&udc->usb_regs->usb_usep[ep->epnum],
+                               USB_RTHS_MASK, USB_THS_IGNORE_IN);
+ 
+               /* Unmask RX interrupts */
+               out_be16(&udc->usb_regs->usb_usber,
+                               USB_E_BSY_MASK | USB_E_RXB_MASK);
+               setbits16(&udc->usb_regs->usb_usbmr,
+                               (USB_E_RXB_MASK | USB_E_BSY_MASK));
+ 
+               ep->state = EP_STATE_IDLE;
+               ep->has_data = 0;
+       }
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_cmd_stoptx(struct qe_ep *ep)
+ {
+       if (ep->udc->soc_type == PORT_CPM)
+               cpm_command(CPM_USB_STOP_TX | (ep->epnum << CPM_USB_EP_SHIFT),
+                               CPM_USB_STOP_TX_OPCODE);
+       else
+               qe_issue_cmd(QE_USB_STOP_TX, QE_CR_SUBBLOCK_USB,
+                               ep->epnum, 0);
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_cmd_restarttx(struct qe_ep *ep)
+ {
+       if (ep->udc->soc_type == PORT_CPM)
+               cpm_command(CPM_USB_RESTART_TX | (ep->epnum <<
+                               CPM_USB_EP_SHIFT), CPM_USB_RESTART_TX_OPCODE);
+       else
+               qe_issue_cmd(QE_USB_RESTART_TX, QE_CR_SUBBLOCK_USB,
+                               ep->epnum, 0);
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_flushtxfifo(struct qe_ep *ep)
+ {
+       struct qe_udc *udc = ep->udc;
+       int i;
+ 
+       i = (int)ep->epnum;
+ 
+       qe_ep_cmd_stoptx(ep);
+       out_8(&udc->usb_regs->usb_uscom,
+               USB_CMD_FLUSH_FIFO | (USB_CMD_EP_MASK & (ep->epnum)));
+       out_be16(&udc->ep_param[i]->tbptr, in_be16(&udc->ep_param[i]->tbase));
+       out_be32(&udc->ep_param[i]->tstate, 0);
+       out_be16(&udc->ep_param[i]->tbcnt, 0);
+ 
+       ep->c_txbd = ep->txbase;
+       ep->n_txbd = ep->txbase;
+       qe_ep_cmd_restarttx(ep);
+       return 0;
+ }
+ 
+ static int qe_ep_filltxfifo(struct qe_ep *ep)
+ {
+       struct qe_udc *udc = ep->udc;
+ 
+       out_8(&udc->usb_regs->usb_uscom,
+                       USB_CMD_STR_FIFO | (USB_CMD_EP_MASK & (ep->epnum)));
+       return 0;
+ }
+ 
+ static int qe_epbds_reset(struct qe_udc *udc, int pipe_num)
+ {
+       struct qe_ep *ep;
+       u32 bdring_len;
+       struct qe_bd __iomem *bd;
+       int i;
+ 
+       ep = &udc->eps[pipe_num];
+ 
+       if (ep->dir == USB_DIR_OUT)
+               bdring_len = USB_BDRING_LEN_RX;
+       else
+               bdring_len = USB_BDRING_LEN;
+ 
+       bd = ep->rxbase;
+       for (i = 0; i < (bdring_len - 1); i++) {
+               out_be32((u32 __iomem *)bd, R_E | R_I);
+               bd++;
+       }
+       out_be32((u32 __iomem *)bd, R_E | R_I | R_W);
+ 
+       bd = ep->txbase;
+       for (i = 0; i < USB_BDRING_LEN_TX - 1; i++) {
+               out_be32(&bd->buf, 0);
+               out_be32((u32 __iomem *)bd, 0);
+               bd++;
+       }
+       out_be32((u32 __iomem *)bd, T_W);
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_reset(struct qe_udc *udc, int pipe_num)
+ {
+       struct qe_ep *ep;
+       u16 tmpusep;
+ 
+       ep = &udc->eps[pipe_num];
+       tmpusep = in_be16(&udc->usb_regs->usb_usep[pipe_num]);
+       tmpusep &= ~USB_RTHS_MASK;
+ 
+       switch (ep->dir) {
+       case USB_DIR_BOTH:
+               qe_ep_flushtxfifo(ep);
+               break;
+       case USB_DIR_OUT:
+               tmpusep |= USB_THS_IGNORE_IN;
+               break;
+       case USB_DIR_IN:
+               qe_ep_flushtxfifo(ep);
+               tmpusep |= USB_RHS_IGNORE_OUT;
+               break;
+       default:
+               break;
+       }
+       out_be16(&udc->usb_regs->usb_usep[pipe_num], tmpusep);
+ 
+       qe_epbds_reset(udc, pipe_num);
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_toggledata01(struct qe_ep *ep)
+ {
+       ep->data01 ^= 0x1;
+       return 0;
+ }
+ 
+ static int qe_ep_bd_init(struct qe_udc *udc, unsigned char pipe_num)
+ {
+       struct qe_ep *ep = &udc->eps[pipe_num];
+       unsigned long tmp_addr = 0;
+       struct usb_ep_para __iomem *epparam;
+       int i;
+       struct qe_bd __iomem *bd;
+       int bdring_len;
+ 
+       if (ep->dir == USB_DIR_OUT)
+               bdring_len = USB_BDRING_LEN_RX;
+       else
+               bdring_len = USB_BDRING_LEN;
+ 
+       epparam = udc->ep_param[pipe_num];
+       /* alloc multi-ram for BD rings and set the ep parameters */
+       tmp_addr = cpm_muram_alloc(sizeof(struct qe_bd) * (bdring_len +
+                               USB_BDRING_LEN_TX), QE_ALIGNMENT_OF_BD);
+       if (IS_ERR_VALUE(tmp_addr))
+               return -ENOMEM;
+ 
+       out_be16(&epparam->rbase, (u16)tmp_addr);
+       out_be16(&epparam->tbase, (u16)(tmp_addr +
+                               (sizeof(struct qe_bd) * bdring_len)));
+ 
+       out_be16(&epparam->rbptr, in_be16(&epparam->rbase));
+       out_be16(&epparam->tbptr, in_be16(&epparam->tbase));
+ 
+       ep->rxbase = cpm_muram_addr(tmp_addr);
+       ep->txbase = cpm_muram_addr(tmp_addr + (sizeof(struct qe_bd)
+                               * bdring_len));
+       ep->n_rxbd = ep->rxbase;
+       ep->e_rxbd = ep->rxbase;
+       ep->n_txbd = ep->txbase;
+       ep->c_txbd = ep->txbase;
+       ep->data01 = 0; /* data0 */
+ 
+       /* Init TX and RX bds */
+       bd = ep->rxbase;
+       for (i = 0; i < bdring_len - 1; i++) {
+               out_be32(&bd->buf, 0);
+               out_be32((u32 __iomem *)bd, 0);
+               bd++;
+       }
+       out_be32(&bd->buf, 0);
+       out_be32((u32 __iomem *)bd, R_W);
+ 
+       bd = ep->txbase;
+       for (i = 0; i < USB_BDRING_LEN_TX - 1; i++) {
+               out_be32(&bd->buf, 0);
+               out_be32((u32 __iomem *)bd, 0);
+               bd++;
+       }
+       out_be32(&bd->buf, 0);
+       out_be32((u32 __iomem *)bd, T_W);
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_rxbd_update(struct qe_ep *ep)
+ {
+       unsigned int size;
+       int i;
+       unsigned int tmp;
+       struct qe_bd __iomem *bd;
+       unsigned int bdring_len;
+ 
+       if (ep->rxbase == NULL)
+               return -EINVAL;
+ 
+       bd = ep->rxbase;
+ 
+       ep->rxframe = kmalloc(sizeof(*ep->rxframe), GFP_ATOMIC);
+       if (ep->rxframe == NULL) {
+               dev_err(ep->udc->dev, "malloc rxframe failed\n");
+               return -ENOMEM;
+       }
+ 
+       qe_frame_init(ep->rxframe);
+ 
+       if (ep->dir == USB_DIR_OUT)
+               bdring_len = USB_BDRING_LEN_RX;
+       else
+               bdring_len = USB_BDRING_LEN;
+ 
+       size = (ep->ep.maxpacket + USB_CRC_SIZE + 2) * (bdring_len + 1);
+       ep->rxbuffer = kzalloc(size, GFP_ATOMIC);
+       if (ep->rxbuffer == NULL) {
+               dev_err(ep->udc->dev, "malloc rxbuffer failed,size=%d\n",
+                               size);
+               kfree(ep->rxframe);
+               return -ENOMEM;
+       }
+ 
+       ep->rxbuf_d = virt_to_phys((void *)ep->rxbuffer);
+       if (ep->rxbuf_d == DMA_ADDR_INVALID) {
+               ep->rxbuf_d = dma_map_single(udc_controller->gadget.dev.parent,
+                                       ep->rxbuffer,
+                                       size,
+                                       DMA_FROM_DEVICE);
+               ep->rxbufmap = 1;
+       } else {
+               dma_sync_single_for_device(udc_controller->gadget.dev.parent,
+                                       ep->rxbuf_d, size,
+                                       DMA_FROM_DEVICE);
+               ep->rxbufmap = 0;
+       }
+ 
+       size = ep->ep.maxpacket + USB_CRC_SIZE + 2;
+       tmp = ep->rxbuf_d;
+       tmp = (u32)(((tmp >> 2) << 2) + 4);
+ 
+       for (i = 0; i < bdring_len - 1; i++) {
+               out_be32(&bd->buf, tmp);
+               out_be32((u32 __iomem *)bd, (R_E | R_I));
+               tmp = tmp + size;
+               bd++;
+       }
+       out_be32(&bd->buf, tmp);
+       out_be32((u32 __iomem *)bd, (R_E | R_I | R_W));
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_register_init(struct qe_udc *udc, unsigned char pipe_num)
+ {
+       struct qe_ep *ep = &udc->eps[pipe_num];
+       struct usb_ep_para __iomem *epparam;
+       u16 usep, logepnum;
+       u16 tmp;
+       u8 rtfcr = 0;
+ 
+       epparam = udc->ep_param[pipe_num];
+ 
+       usep = 0;
+       logepnum = (ep->desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
+       usep |= (logepnum << USB_EPNUM_SHIFT);
+ 
+       switch (ep->desc->bmAttributes & 0x03) {
+       case USB_ENDPOINT_XFER_BULK:
+               usep |= USB_TRANS_BULK;
+               break;
+       case USB_ENDPOINT_XFER_ISOC:
+               usep |=  USB_TRANS_ISO;
+               break;
+       case USB_ENDPOINT_XFER_INT:
+               usep |= USB_TRANS_INT;
+               break;
+       default:
+               usep |= USB_TRANS_CTR;
+               break;
+       }
+ 
+       switch (ep->dir) {
+       case USB_DIR_OUT:
+               usep |= USB_THS_IGNORE_IN;
+               break;
+       case USB_DIR_IN:
+               usep |= USB_RHS_IGNORE_OUT;
+               break;
+       default:
+               break;
+       }
+       out_be16(&udc->usb_regs->usb_usep[pipe_num], usep);
+ 
+       rtfcr = 0x30;
+       out_8(&epparam->rbmr, rtfcr);
+       out_8(&epparam->tbmr, rtfcr);
+ 
+       tmp = (u16)(ep->ep.maxpacket + USB_CRC_SIZE);
+       /* MRBLR must be divisble by 4 */
+       tmp = (u16)(((tmp >> 2) << 2) + 4);
+       out_be16(&epparam->mrblr, tmp);
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_init(struct qe_udc *udc,
+                     unsigned char pipe_num,
+                     const struct usb_endpoint_descriptor *desc)
+ {
+       struct qe_ep *ep = &udc->eps[pipe_num];
+       unsigned long flags;
+       int reval = 0;
+       u16 max = 0;
+ 
+       max = le16_to_cpu(desc->wMaxPacketSize);
+ 
+       /* check the max package size validate for this endpoint */
+       /* Refer to USB2.0 spec table 9-13,
+       */
+       if (pipe_num != 0) {
+               switch (desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
+               case USB_ENDPOINT_XFER_BULK:
+                       if (strstr(ep->ep.name, "-iso")
+                                       || strstr(ep->ep.name, "-int"))
+                               goto en_done;
+                       switch (udc->gadget.speed) {
+                       case USB_SPEED_HIGH:
+                       if ((max == 128) || (max == 256) || (max == 512))
+                               break;
+                       default:
+                               switch (max) {
+                               case 4:
+                               case 8:
+                               case 16:
+                               case 32:
+                               case 64:
+                                       break;
+                               default:
+                               case USB_SPEED_LOW:
+                                       goto en_done;
+                               }
+                       }
+                       break;
+               case USB_ENDPOINT_XFER_INT:
+                       if (strstr(ep->ep.name, "-iso"))        /* bulk is ok */
+                               goto en_done;
+                       switch (udc->gadget.speed) {
+                       case USB_SPEED_HIGH:
+                               if (max <= 1024)
+                                       break;
+                       case USB_SPEED_FULL:
+                               if (max <= 64)
+                                       break;
+                       default:
+                               if (max <= 8)
+                                       break;
+                               goto en_done;
+                       }
+                       break;
+               case USB_ENDPOINT_XFER_ISOC:
+                       if (strstr(ep->ep.name, "-bulk")
+                               || strstr(ep->ep.name, "-int"))
+                               goto en_done;
+                       switch (udc->gadget.speed) {
+                       case USB_SPEED_HIGH:
+                               if (max <= 1024)
+                                       break;
+                       case USB_SPEED_FULL:
+                               if (max <= 1023)
+                                       break;
+                       default:
+                               goto en_done;
+                       }
+                       break;
+               case USB_ENDPOINT_XFER_CONTROL:
+                       if (strstr(ep->ep.name, "-iso")
+                               || strstr(ep->ep.name, "-int"))
+                               goto en_done;
+                       switch (udc->gadget.speed) {
+                       case USB_SPEED_HIGH:
+                       case USB_SPEED_FULL:
+                               switch (max) {
+                               case 1:
+                               case 2:
+                               case 4:
+                               case 8:
+                               case 16:
+                               case 32:
+                               case 64:
+                                       break;
+                               default:
+                                       goto en_done;
+                               }
+                       case USB_SPEED_LOW:
+                               switch (max) {
+                               case 1:
+                               case 2:
+                               case 4:
+                               case 8:
+                                       break;
+                               default:
+                                       goto en_done;
+                               }
+                       default:
+                               goto en_done;
+                       }
+                       break;
+ 
+               default:
+                       goto en_done;
+               }
+       } /* if ep0*/
+ 
+       spin_lock_irqsave(&udc->lock, flags);
+ 
+       /* initialize ep structure */
+       ep->ep.maxpacket = max;
+       ep->tm = (u8)(desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK);
+       ep->desc = desc;
+       ep->stopped = 0;
+       ep->init = 1;
+ 
+       if (pipe_num == 0) {
+               ep->dir = USB_DIR_BOTH;
+               udc->ep0_dir = USB_DIR_OUT;
+               udc->ep0_state = WAIT_FOR_SETUP;
+       } else  {
+               switch (desc->bEndpointAddress & USB_ENDPOINT_DIR_MASK) {
+               case USB_DIR_OUT:
+                       ep->dir = USB_DIR_OUT;
+                       break;
+               case USB_DIR_IN:
+                       ep->dir = USB_DIR_IN;
+               default:
+                       break;
+               }
+       }
+ 
+       /* hardware special operation */
+       qe_ep_bd_init(udc, pipe_num);
+       if ((ep->tm == USBP_TM_CTL) || (ep->dir == USB_DIR_OUT)) {
+               reval = qe_ep_rxbd_update(ep);
+               if (reval)
+                       goto en_done1;
+       }
+ 
+       if ((ep->tm == USBP_TM_CTL) || (ep->dir == USB_DIR_IN)) {
+               ep->txframe = kmalloc(sizeof(*ep->txframe), GFP_ATOMIC);
+               if (ep->txframe == NULL) {
+                       dev_err(udc->dev, "malloc txframe failed\n");
+                       goto en_done2;
+               }
+               qe_frame_init(ep->txframe);
+       }
+ 
+       qe_ep_register_init(udc, pipe_num);
+ 
+       /* Now HW will be NAKing transfers to that EP,
+        * until a buffer is queued to it. */
+       spin_unlock_irqrestore(&udc->lock, flags);
+ 
+       return 0;
+ en_done2:
+       kfree(ep->rxbuffer);
+       kfree(ep->rxframe);
+ en_done1:
+       spin_unlock_irqrestore(&udc->lock, flags);
+ en_done:
+       dev_err(udc->dev, "failed to initialize %s\n", ep->ep.name);
+       return -ENODEV;
+ }
+ 
+ static inline void qe_usb_enable(void)
+ {
+       setbits8(&udc_controller->usb_regs->usb_usmod, USB_MODE_EN);
+ }
+ 
+ static inline void qe_usb_disable(void)
+ {
+       clrbits8(&udc_controller->usb_regs->usb_usmod, USB_MODE_EN);
+ }
+ 
+ /*----------------------------------------------------------------------------*
+  *            USB and EP basic manipulate function end                      *
+  *----------------------------------------------------------------------------*/
+ 
+ 
+ /******************************************************************************
+               UDC transmit and receive process
+  ******************************************************************************/
+ static void recycle_one_rxbd(struct qe_ep *ep)
+ {
+       u32 bdstatus;
+ 
+       bdstatus = in_be32((u32 __iomem *)ep->e_rxbd);
+       bdstatus = R_I | R_E | (bdstatus & R_W);
+       out_be32((u32 __iomem *)ep->e_rxbd, bdstatus);
+ 
+       if (bdstatus & R_W)
+               ep->e_rxbd = ep->rxbase;
+       else
+               ep->e_rxbd++;
+ }
+ 
+ static void recycle_rxbds(struct qe_ep *ep, unsigned char stopatnext)
+ {
+       u32 bdstatus;
+       struct qe_bd __iomem *bd, *nextbd;
+       unsigned char stop = 0;
+ 
+       nextbd = ep->n_rxbd;
+       bd = ep->e_rxbd;
+       bdstatus = in_be32((u32 __iomem *)bd);
+ 
+       while (!(bdstatus & R_E) && !(bdstatus & BD_LENGTH_MASK) && !stop) {
+               bdstatus = R_E | R_I | (bdstatus & R_W);
+               out_be32((u32 __iomem *)bd, bdstatus);
+ 
+               if (bdstatus & R_W)
+                       bd = ep->rxbase;
+               else
+                       bd++;
+ 
+               bdstatus = in_be32((u32 __iomem *)bd);
+               if (stopatnext && (bd == nextbd))
+                       stop = 1;
+       }
+ 
+       ep->e_rxbd = bd;
+ }
+ 
+ static void ep_recycle_rxbds(struct qe_ep *ep)
+ {
+       struct qe_bd __iomem *bd = ep->n_rxbd;
+       u32 bdstatus;
+       u8 epnum = ep->epnum;
+       struct qe_udc *udc = ep->udc;
+ 
+       bdstatus = in_be32((u32 __iomem *)bd);
+       if (!(bdstatus & R_E) && !(bdstatus & BD_LENGTH_MASK)) {
+               bd = ep->rxbase +
+                               ((in_be16(&udc->ep_param[epnum]->rbptr) -
+                                 in_be16(&udc->ep_param[epnum]->rbase))
+                                >> 3);
+               bdstatus = in_be32((u32 __iomem *)bd);
+ 
+               if (bdstatus & R_W)
+                       bd = ep->rxbase;
+               else
+                       bd++;
+ 
+               ep->e_rxbd = bd;
+               recycle_rxbds(ep, 0);
+               ep->e_rxbd = ep->n_rxbd;
+       } else
+               recycle_rxbds(ep, 1);
+ 
+       if (in_be16(&udc->usb_regs->usb_usber) & USB_E_BSY_MASK)
+               out_be16(&udc->usb_regs->usb_usber, USB_E_BSY_MASK);
+ 
+       if (ep->has_data <= 0 && (!list_empty(&ep->queue)))
+               qe_eprx_normal(ep);
+ 
+       ep->localnack = 0;
+ }
+ 
+ static void setup_received_handle(struct qe_udc *udc,
+                                       struct usb_ctrlrequest *setup);
+ static int qe_ep_rxframe_handle(struct qe_ep *ep);
+ static void ep0_req_complete(struct qe_udc *udc, struct qe_req *req);
+ /* when BD PID is setup, handle the packet */
+ static int ep0_setup_handle(struct qe_udc *udc)
+ {
+       struct qe_ep *ep = &udc->eps[0];
+       struct qe_frame *pframe;
+       unsigned int fsize;
+       u8 *cp;
+ 
+       pframe = ep->rxframe;
+       if ((frame_get_info(pframe) & PID_SETUP)
+                       && (udc->ep0_state == WAIT_FOR_SETUP)) {
+               fsize = frame_get_length(pframe);
+               if (unlikely(fsize != 8))
+                       return -EINVAL;
+               cp = (u8 *)&udc->local_setup_buff;
+               memcpy(cp, pframe->data, fsize);
+               ep->data01 = 1;
+ 
+               /* handle the usb command base on the usb_ctrlrequest */
+               setup_received_handle(udc, &udc->local_setup_buff);
+               return 0;
+       }
+       return -EINVAL;
+ }
+ 
+ static int qe_ep0_rx(struct qe_udc *udc)
+ {
+       struct qe_ep *ep = &udc->eps[0];
+       struct qe_frame *pframe;
+       struct qe_bd __iomem *bd;
+       u32 bdstatus, length;
+       u32 vaddr;
+ 
+       pframe = ep->rxframe;
+ 
+       if (ep->dir == USB_DIR_IN) {
+               dev_err(udc->dev, "ep0 not a control endpoint\n");
+               return -EINVAL;
+       }
+ 
+       bd = ep->n_rxbd;
+       bdstatus = in_be32((u32 __iomem *)bd);
+       length = bdstatus & BD_LENGTH_MASK;
+ 
+       while (!(bdstatus & R_E) && length) {
+               if ((bdstatus & R_F) && (bdstatus & R_L)
+                       && !(bdstatus & R_ERROR)) {
+                       if (length == USB_CRC_SIZE) {
+                               udc->ep0_state = WAIT_FOR_SETUP;
+                               dev_vdbg(udc->dev,
+                                       "receive a ZLP in status phase\n");
+                       } else {
+                               qe_frame_clean(pframe);
+                               vaddr = (u32)phys_to_virt(in_be32(&bd->buf));
+                               frame_set_data(pframe, (u8 *)vaddr);
+                               frame_set_length(pframe,
+                                               (length - USB_CRC_SIZE));
+                               frame_set_status(pframe, FRAME_OK);
+                               switch (bdstatus & R_PID) {
+                               case R_PID_SETUP:
+                                       frame_set_info(pframe, PID_SETUP);
+                                       break;
+                               case R_PID_DATA1:
+                                       frame_set_info(pframe, PID_DATA1);
+                                       break;
+                               default:
+                                       frame_set_info(pframe, PID_DATA0);
+                                       break;
+                               }
+ 
+                               if ((bdstatus & R_PID) == R_PID_SETUP)
+                                       ep0_setup_handle(udc);
+                               else
+                                       qe_ep_rxframe_handle(ep);
+                       }
+               } else {
+                       dev_err(udc->dev, "The receive frame with error!\n");
+               }
+ 
+               /* note: don't clear the rxbd's buffer address */
+               recycle_one_rxbd(ep);
+ 
+               /* Get next BD */
+               if (bdstatus & R_W)
+                       bd = ep->rxbase;
+               else
+                       bd++;
+ 
+               bdstatus = in_be32((u32 __iomem *)bd);
+               length = bdstatus & BD_LENGTH_MASK;
+ 
+       }
+ 
+       ep->n_rxbd = bd;
+ 
+       return 0;
+ }
+ 
+ static int qe_ep_rxframe_handle(struct qe_ep *ep)
+ {
+       struct qe_frame *pframe;
+       u8 framepid = 0;
+       unsigned int fsize;
+       u8 *cp;
+       struct qe_req *req;
+ 
+       pframe = ep->rxframe;
+ 
+       if (frame_get_info(pframe) & PID_DATA1)
+               framepid = 0x1;
+ 
+       if (framepid != ep->data01) {
+               dev_err(ep->udc->dev, "the data01 error!\n");
+               return -EIO;
+       }
+ 
+       fsize = frame_get_length(pframe);
+       if (list_empty(&ep->queue)) {
+               dev_err(ep->udc->dev, "the %s have no requeue!\n", ep->name);
+       } else {
+               req = list_entry(ep->queue.next, struct qe_req, queue);
+ 
+               cp = (u8 *)(req->req.buf) + req->req.actual;
+               if (cp) {
+                       memcpy(cp, pframe->data, fsize);
+                       req->req.actual += fsize;
+                       if ((fsize < ep->ep.maxpacket) ||
+                                       (req->req.actual >= req->req.length)) {
+                               if (ep->epnum == 0)
+                                       ep0_req_complete(ep->udc, req);
+                               else
+                                       done(ep, req, 0);
+                               if (list_empty(&ep->queue) && ep->epnum != 0)
+                                       qe_eprx_nack(ep);
+                       }
+               }
+       }
+ 
+       qe_ep_toggledata01(ep);
+ 
+       return 0;
+ }
+ 
+ static void ep_rx_tasklet(unsigned long data)
+ {
+       struct qe_udc *udc = (struct qe_udc *)data;
+       struct qe_ep *ep;
+       struct qe_frame *pframe;
+       struct qe_bd __iomem *bd;
+       unsigned long flags;
+       u32 bdstatus, length;
+       u32 vaddr, i;
+ 
+       spin_lock_irqsave(&udc->lock, flags);
+ 
+       for (i = 1; i < USB_MAX_ENDPOINTS; i++) {
+               ep = &udc->eps[i];
+ 
+               if (ep->dir == USB_DIR_IN || ep->enable_tasklet == 0) {
+                       dev_dbg(udc->dev,
+                               "This is a transmit ep or disable tasklet!\n");
+                       continue;
+               }
+ 
+               pframe = ep->rxframe;
+               bd = ep->n_rxbd;
+               bdstatus = in_be32((u32 __iomem *)bd);
+               length = bdstatus & BD_LENGTH_MASK;
+ 
+               while (!(bdstatus & R_E) && length) {
+                       if (list_empty(&ep->queue)) {
+                               qe_eprx_nack(ep);
+                               dev_dbg(udc->dev,
+                                       "The rxep have noreq %d\n",
+                                       ep->has_data);
+                               break;
+                       }
+ 
+                       if ((bdstatus & R_F) && (bdstatus & R_L)
+                               && !(bdstatus & R_ERROR)) {
+                               qe_frame_clean(pframe);
+                               vaddr = (u32)phys_to_virt(in_be32(&bd->buf));
+                               frame_set_data(pframe, (u8 *)vaddr);
+                               frame_set_length(pframe,
+                                               (length - USB_CRC_SIZE));
+                               frame_set_status(pframe, FRAME_OK);
+                               switch (bdstatus & R_PID) {
+                               case R_PID_DATA1:
+                                       frame_set_info(pframe, PID_DATA1);
+                                       break;
+                               case R_PID_SETUP:
+                                       frame_set_info(pframe, PID_SETUP);
+                                       break;
+                               default:
+                                       frame_set_info(pframe, PID_DATA0);
+                                       break;
+                               }
+                               /* handle the rx frame */
+                               qe_ep_rxframe_handle(ep);
+                       } else {
+                               dev_err(udc->dev,
+                                       "error in received frame\n");
+                       }
+                       /* note: don't clear the rxbd's buffer address */
+                       /*clear the length */
+                       out_be32((u32 __iomem *)bd, bdstatus & BD_STATUS_MASK);
+                       ep->has_data--;
+                       if (!(ep->localnack))
+                               recycle_one_rxbd(ep);
+ 
+                       /* Get next BD */
+                       if (bdstatus & R_W)
+                               bd = ep->rxbase;
+                       else
+                               bd++;
+ 
+                       bdstatus = in_be32((u32 __iomem *)bd);
+                       length = bdstatus & BD_LENGTH_MASK;
+               }
+ 
+               ep->n_rxbd = bd;
+ 
+               if (ep->localnack)
+                       ep_recycle_rxbds(ep);
+ 
+               ep->enable_tasklet = 0;
+       } /* for i=1 */
+ 
+       spin_unlock_irqrestore(&udc->lock, flags);
+ }
+ 
+ static int qe_ep_rx(struct qe_ep *ep)
+ {
+       struct qe_udc *udc;
+       struct qe_frame *pframe;
+       struct qe_bd __iomem *bd;
+       u16 swoffs, ucoffs, emptybds;
+ 
+       udc = ep->udc;
+       pframe = ep->rxframe;
+ 
+       if (ep->dir == USB_DIR_IN) {
+               dev_err(udc->dev, "transmit ep in rx function\n");
+               return -EINVAL;
+       }
+ 
+       bd = ep->n_rxbd;
+ 
+       swoffs = (u16)(bd - ep->rxbase);
+       ucoffs = (u16)((in_be16(&udc->ep_param[ep->epnum]->rbptr) -
+                       in_be16(&udc->ep_param[ep->epnum]->rbase)) >> 3);
+       if (swoffs < ucoffs)
+               emptybds = USB_BDRING_LEN_RX - ucoffs + swoffs;
+       else
+               emptybds = swoffs - ucoffs;
+ 
+       if (emptybds < MIN_EMPTY_BDS) {
+               qe_eprx_nack(ep);
+               ep->localnack = 1;
+               dev_vdbg(udc->dev, "%d empty bds, send NACK\n", emptybds);
+       }
+       ep->has_data = USB_BDRING_LEN_RX - emptybds;
+ 
+       if (list_empty(&ep->queue)) {
+               qe_eprx_nack(ep);
+               dev_vdbg(udc->dev, "The rxep have no req queued with %d BDs\n",
+                               ep->has_data);
+               return 0;
+       }
+ 
+       tasklet_schedule(&udc->rx_tasklet);
+       ep->enable_tasklet = 1;
+ 
+       return 0;
+ }
+ 
+ /* send data from a frame, no matter what tx_req */
+ static int qe_ep_tx(struct qe_ep *ep, struct qe_frame *frame)
+ {
+       struct qe_udc *udc = ep->udc;
+       struct qe_bd __iomem *bd;
+       u16 saveusbmr;
+       u32 bdstatus, pidmask;
+       u32 paddr;
+ 
+       if (ep->dir == USB_DIR_OUT) {
+               dev_err(udc->dev, "receive ep passed to tx function\n");
+               return -EINVAL;
+       }
+ 
+       /* Disable the Tx interrupt */
+       saveusbmr = in_be16(&udc->usb_regs->usb_usbmr);
+       out_be16(&udc->usb_regs->usb_usbmr,
+                       saveusbmr & ~(USB_E_TXB_MASK | USB_E_TXE_MASK));
+ 
+       bd = ep->n_txbd;
+       bdstatus = in_be32((u32 __iomem *)bd);
+ 
+       if (!(bdstatus & (T_R | BD_LENGTH_MASK))) {
+               if (frame_get_length(frame) == 0) {
+                       frame_set_data(frame, udc->nullbuf);
+                       frame_set_length(frame, 2);
+                       frame->info |= (ZLP | NO_CRC);
+                       dev_vdbg(udc->dev, "the frame size = 0\n");
+               }
+               paddr = virt_to_phys((void *)frame->data);
+               out_be32(&bd->buf, paddr);
+               bdstatus = (bdstatus&T_W);
+               if (!(frame_get_info(frame) & NO_CRC))
+                       bdstatus |= T_R | T_I | T_L | T_TC
+                                       | frame_get_length(frame);
+               else
+                       bdstatus |= T_R | T_I | T_L | frame_get_length(frame);
+ 
+               /* if the packet is a ZLP in status phase */
+               if ((ep->epnum == 0) && (udc->ep0_state == DATA_STATE_NEED_ZLP))
+                       ep->data01 = 0x1;
+ 
+               if (ep->data01) {
+                       pidmask = T_PID_DATA1;
+                       frame->info |= PID_DATA1;
+               } else {
+                       pidmask = T_PID_DATA0;
+                       frame->info |= PID_DATA0;
+               }
+               bdstatus |= T_CNF;
+               bdstatus |= pidmask;
+               out_be32((u32 __iomem *)bd, bdstatus);
+               qe_ep_filltxfifo(ep);
+ 
+               /* enable the TX interrupt */
+               out_be16(&udc->usb_regs->usb_usbmr, saveusbmr);
+ 
+               qe_ep_toggledata01(ep);
+               if (bdstatus & T_W)
+                       ep->n_txbd = ep->txbase;
+               else
+                       ep->n_txbd++;
+ 
+               return 0;
+       } else {
+               out_be16(&udc->usb_regs->usb_usbmr, saveusbmr);
+               dev_vdbg(udc->dev, "The tx bd is not ready!\n");
+               return -EBUSY;
+       }
+ }
+ 
+ /* when a bd was transmitted, the function can
+  * handle the tx_req, not include ep0           */
+ static int txcomplete(struct qe_ep *ep, unsigned char restart)
+ {
+       if (ep->tx_req != NULL) {
+               if (!restart) {
+                       int asent = ep->last;
+                       ep->sent += asent;
+                       ep->last -= asent;
+               } else {
+                       ep->last = 0;
+               }
+ 
+               /* a request already were transmitted completely */
+               if ((ep->tx_req->req.length - ep->sent) <= 0) {
+                       ep->tx_req->req.actual = (unsigned int)ep->sent;
+                       done(ep, ep->tx_req, 0);
+                       ep->tx_req = NULL;
+                       ep->last = 0;
+                       ep->sent = 0;
+               }
+       }
+ 
+       /* we should gain a new tx_req fot this endpoint */
+       if (ep->tx_req == NULL) {
+               if (!list_empty(&ep->queue)) {
+                       ep->tx_req = list_entry(ep->queue.next, struct qe_req,
+                                                       queue);
+                       ep->last = 0;
+                       ep->sent = 0;
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ /* give a frame and a tx_req, send some data */
+ static int qe_usb_senddata(struct qe_ep *ep, struct qe_frame *frame)
+ {
+       unsigned int size;
+       u8 *buf;
+ 
+       qe_frame_clean(frame);
+       size = min_t(u32, (ep->tx_req->req.length - ep->sent),
+                               ep->ep.maxpacket);
+       buf = (u8 *)ep->tx_req->req.buf + ep->sent;
+       if (buf && size) {
+               ep->last = size;
+               frame_set_data(frame, buf);
+               frame_set_length(frame, size);
+               frame_set_status(frame, FRAME_OK);
+               frame_set_info(frame, 0);
+               return qe_ep_tx(ep, frame);
+       }
+       return -EIO;
+ }
+ 
+ /* give a frame struct,send a ZLP */
+ static int sendnulldata(struct qe_ep *ep, struct qe_frame *frame, uint infor)
+ {
+       struct qe_udc *udc = ep->udc;
+ 
+       if (frame == NULL)
+               return -ENODEV;
+ 
+       qe_frame_clean(frame);
+       frame_set_data(frame, (u8 *)udc->nullbuf);
+       frame_set_length(frame, 2);
+       frame_set_status(frame, FRAME_OK);
+       frame_set_info(frame, (ZLP | NO_CRC | infor));
+ 
+       return qe_ep_tx(ep, frame);
+ }
+ 
+ static int frame_create_tx(struct qe_ep *ep, struct qe_frame *frame)
+ {
+       struct qe_req *req = ep->tx_req;
+       int reval;
+ 
+       if (req == NULL)
+               return -ENODEV;
+ 
+       if ((req->req.length - ep->sent) > 0)
+               reval = qe_usb_senddata(ep, frame);
+       else
+               reval = sendnulldata(ep, frame, 0);
+ 
+       return reval;
+ }
+ 
+ /* if direction is DIR_IN, the status is Device->Host
+  * if direction is DIR_OUT, the status transaction is Device<-Host
+  * in status phase, udc create a request and gain status */
+ static int ep0_prime_status(struct qe_udc *udc, int direction)
+ {
+ 
+       struct qe_ep *ep = &udc->eps[0];
+ 
+       if (direction == USB_DIR_IN) {
+               udc->ep0_state = DATA_STATE_NEED_ZLP;
+               udc->ep0_dir = USB_DIR_IN;
+               sendnulldata(ep, ep->txframe, SETUP_STATUS | NO_REQ);
+       } else {
+               udc->ep0_dir = USB_DIR_OUT;
+               udc->ep0_state = WAIT_FOR_OUT_STATUS;
+       }
+ 
+       return 0;
+ }
+ 
+ /* a request complete in ep0, whether gadget request or udc request */
+ static void ep0_req_complete(struct qe_udc *udc, struct qe_req *req)
+ {
+       struct qe_ep *ep = &udc->eps[0];
+       /* because usb and ep's status already been set in ch9setaddress() */
+ 
+       switch (udc->ep0_state) {
+       case DATA_STATE_XMIT:
+               done(ep, req, 0);
+               /* receive status phase */
+               if (ep0_prime_status(udc, USB_DIR_OUT))
+                       qe_ep0_stall(udc);
+               break;
+ 
+       case DATA_STATE_NEED_ZLP:
+               done(ep, req, 0);
+               udc->ep0_state = WAIT_FOR_SETUP;
+               break;
+ 
+       case DATA_STATE_RECV:
+               done(ep, req, 0);
+               /* send status phase */
+               if (ep0_prime_status(udc, USB_DIR_IN))
+                       qe_ep0_stall(udc);
+               break;
+ 
+       case WAIT_FOR_OUT_STATUS:
+               done(ep, req, 0);
+               udc->ep0_state = WAIT_FOR_SETUP;
+               break;
+ 
+       case WAIT_FOR_SETUP:
+               dev_vdbg(udc->dev, "Unexpected interrupt\n");
+               break;
+ 
+       default:
+               qe_ep0_stall(udc);
+               break;
+       }
+ }
+ 
+ static int ep0_txcomplete(struct qe_ep *ep, unsigned char restart)
+ {
+       struct qe_req *tx_req = NULL;
+       struct qe_frame *frame = ep->txframe;
+ 
+       if ((frame_get_info(frame) & (ZLP | NO_REQ)) == (ZLP | NO_REQ)) {
+               if (!restart)
+                       ep->udc->ep0_state = WAIT_FOR_SETUP;
+               else
+                       sendnulldata(ep, ep->txframe, SETUP_STATUS | NO_REQ);
+               return 0;
+       }
+ 
+       tx_req = ep->tx_req;
+       if (tx_req != NULL) {
+               if (!restart) {
+                       int asent = ep->last;
+                       ep->sent += asent;
+                       ep->last -= asent;
+               } else {
+                       ep->last = 0;
+               }
+ 
+               /* a request already were transmitted completely */
+               if ((ep->tx_req->req.length - ep->sent) <= 0) {
+                       ep->tx_req->req.actual = (unsigned int)ep->sent;
+                       ep0_req_complete(ep->udc, ep->tx_req);
+                       ep->tx_req = NULL;
+                       ep->last = 0;
+                       ep->sent = 0;
+               }
+       } else {
+               dev_vdbg(ep->udc->dev, "the ep0_controller have no req\n");
+       }
+ 
+       return 0;
+ }
+ 
+ static int ep0_txframe_handle(struct qe_ep *ep)
+ {
+       /* if have error, transmit again */
+       if (frame_get_status(ep->txframe) & FRAME_ERROR) {
+               qe_ep_flushtxfifo(ep);
+               dev_vdbg(ep->udc->dev, "The EP0 transmit data have error!\n");
+               if (frame_get_info(ep->txframe) & PID_DATA0)
+                       ep->data01 = 0;
+               else
+                       ep->data01 = 1;
+ 
+               ep0_txcomplete(ep, 1);
+       } else
+               ep0_txcomplete(ep, 0);
+ 
+       frame_create_tx(ep, ep->txframe);
+       return 0;
+ }
+ 
+ static int qe_ep0_txconf(struct qe_ep *ep)
+ {
+       struct qe_bd __iomem *bd;
+       struct qe_frame *pframe;
+       u32 bdstatus;
+ 
+       bd = ep->c_txbd;
+       bdstatus = in_be32((u32 __iomem *)bd);
+       while (!(bdstatus & T_R) && (bdstatus & ~T_W)) {
+               pframe = ep->txframe;
+ 
+               /* clear and recycle the BD */
+               out_be32((u32 __iomem *)bd, bdstatus & T_W);
+               out_be32(&bd->buf, 0);
+               if (bdstatus & T_W)
+                       ep->c_txbd = ep->txbase;
+               else
+                       ep->c_txbd++;
+ 
+               if (ep->c_txbd == ep->n_txbd) {
+                       if (bdstatus & DEVICE_T_ERROR) {
+                               frame_set_status(pframe, FRAME_ERROR);
+                               if (bdstatus & T_TO)
+                                       pframe->status |= TX_ER_TIMEOUT;
+                               if (bdstatus & T_UN)
+                                       pframe->status |= TX_ER_UNDERUN;
+                       }
+                       ep0_txframe_handle(ep);
+               }
+ 
+               bd = ep->c_txbd;
+               bdstatus = in_be32((u32 __iomem *)bd);
+       }
+ 
+       return 0;
+ }
+ 
+ static int ep_txframe_handle(struct qe_ep *ep)
+ {
+       if (frame_get_status(ep->txframe) & FRAME_ERROR) {
+               qe_ep_flushtxfifo(ep);
+               dev_vdbg(ep->udc->dev, "The EP0 transmit data have error!\n");
+               if (frame_get_info(ep->txframe) & PID_DATA0)
+                       ep->data01 = 0;
+               else
+                       ep->data01 = 1;
+ 
+               txcomplete(ep, 1);
+       } else
+               txcomplete(ep, 0);
+ 
+       frame_create_tx(ep, ep->txframe); /* send the data */
+       return 0;
+ }
+ 
+ /* confirm the already trainsmited bd */
+ static int qe_ep_txconf(struct qe_ep *ep)
+ {
+       struct qe_bd __iomem *bd;
+       struct qe_frame *pframe = NULL;
+       u32 bdstatus;
+       unsigned char breakonrxinterrupt = 0;
+ 
+       bd = ep->c_txbd;
+       bdstatus = in_be32((u32 __iomem *)bd);
+       while (!(bdstatus & T_R) && (bdstatus & ~T_W)) {
+               pframe = ep->txframe;
+               if (bdstatus & DEVICE_T_ERROR) {
+                       frame_set_status(pframe, FRAME_ERROR);
+                       if (bdstatus & T_TO)
+                               pframe->status |= TX_ER_TIMEOUT;
+                       if (bdstatus & T_UN)
+                               pframe->status |= TX_ER_UNDERUN;
+               }
+ 
+               /* clear and recycle the BD */
+               out_be32((u32 __iomem *)bd, bdstatus & T_W);
+               out_be32(&bd->buf, 0);
+               if (bdstatus & T_W)
+                       ep->c_txbd = ep->txbase;
+               else
+                       ep->c_txbd++;
+ 
+               /* handle the tx frame */
+               ep_txframe_handle(ep);
+               bd = ep->c_txbd;
+               bdstatus = in_be32((u32 __iomem *)bd);
+       }
+       if (breakonrxinterrupt)
+               return -EIO;
+       else
+               return 0;
+ }
+ 
+ /* Add a request in queue, and try to transmit a packet */
+ static int ep_req_send(struct qe_ep *ep, struct qe_req *req)
+ {
+       int reval = 0;
+ 
+       if (ep->tx_req == NULL) {
+               ep->sent = 0;
+               ep->last = 0;
+               txcomplete(ep, 0); /* can gain a new tx_req */
+               reval = frame_create_tx(ep, ep->txframe);
+       }
+       return reval;
+ }
+ 
+ /* Maybe this is a good ideal */
+ static int ep_req_rx(struct qe_ep *ep, struct qe_req *req)
+ {
+       struct qe_udc *udc = ep->udc;
+       struct qe_frame *pframe = NULL;
+       struct qe_bd __iomem *bd;
+       u32 bdstatus, length;
+       u32 vaddr, fsize;
+       u8 *cp;
+       u8 finish_req = 0;
+       u8 framepid;
+ 
+       if (list_empty(&ep->queue)) {
+               dev_vdbg(udc->dev, "the req already finish!\n");
+               return 0;
+       }
+       pframe = ep->rxframe;
+ 
+       bd = ep->n_rxbd;
+       bdstatus = in_be32((u32 __iomem *)bd);
+       length = bdstatus & BD_LENGTH_MASK;
+ 
+       while (!(bdstatus & R_E) && length) {
+               if (finish_req)
+                       break;
+               if ((bdstatus & R_F) && (bdstatus & R_L)
+                                       && !(bdstatus & R_ERROR)) {
+                       qe_frame_clean(pframe);
+                       vaddr = (u32)phys_to_virt(in_be32(&bd->buf));
+                       frame_set_data(pframe, (u8 *)vaddr);
+                       frame_set_length(pframe, (length - USB_CRC_SIZE));
+                       frame_set_status(pframe, FRAME_OK);
+                       switch (bdstatus & R_PID) {
+                       case R_PID_DATA1:
+                               frame_set_info(pframe, PID_DATA1); break;
+                       default:
+                               frame_set_info(pframe, PID_DATA0); break;
+                       }
+                       /* handle the rx frame */
+ 
+                       if (frame_get_info(pframe) & PID_DATA1)
+                               framepid = 0x1;
+                       else
+                               framepid = 0;
+ 
+                       if (framepid != ep->data01) {
+                               dev_vdbg(udc->dev, "the data01 error!\n");
+                       } else {
+                               fsize = frame_get_length(pframe);
+ 
+                               cp = (u8 *)(req->req.buf) + req->req.actual;
+                               if (cp) {
+                                       memcpy(cp, pframe->data, fsize);
+                                       req->req.actual += fsize;
+                                       if ((fsize < ep->ep.maxpacket)
+                                               || (req->req.actual >=
+                                                       req->req.length)) {
+                                               finish_req = 1;
+                                               done(ep, req, 0);
+                                               if (list_empty(&ep->queue))
+                                                       qe_eprx_nack(ep);
+                                       }
+                               }
+                               qe_ep_toggledata01(ep);
+                       }
+               } else {
+                       dev_err(udc->dev, "The receive frame with error!\n");
+               }
+ 
+               /* note: don't clear the rxbd's buffer address *
+                * only Clear the length */
+               out_be32((u32 __iomem *)bd, (bdstatus & BD_STATUS_MASK));
+               ep->has_data--;
+ 
+               /* Get next BD */
+               if (bdstatus & R_W)
+                       bd = ep->rxbase;
+               else
+                       bd++;
+ 
+               bdstatus = in_be32((u32 __iomem *)bd);
+               length = bdstatus & BD_LENGTH_MASK;
+       }
+ 
+       ep->n_rxbd = bd;
+       ep_recycle_rxbds(ep);
+ 
+       return 0;
+ }
+ 
+ /* only add the request in queue */
+ static int ep_req_receive(struct qe_ep *ep, struct qe_req *req)
+ {
+       if (ep->state == EP_STATE_NACK) {
+               if (ep->has_data <= 0) {
+                       /* Enable rx and unmask rx interrupt */
+                       qe_eprx_normal(ep);
+               } else {
+                       /* Copy the exist BD data */
+                       ep_req_rx(ep, req);
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ /********************************************************************
+       Internal Used Function End
+ ********************************************************************/
+ 
+ /*-----------------------------------------------------------------------
+       Endpoint Management Functions For Gadget
+  -----------------------------------------------------------------------*/
+ static int qe_ep_enable(struct usb_ep *_ep,
+                        const struct usb_endpoint_descriptor *desc)
+ {
+       struct qe_udc *udc;
+       struct qe_ep *ep;
+       int retval = 0;
+       unsigned char epnum;
+ 
+       ep = container_of(_ep, struct qe_ep, ep);
+ 
+       /* catch various bogus parameters */
+       if (!_ep || !desc || ep->desc || _ep->name == ep_name[0] ||
+                       (desc->bDescriptorType != USB_DT_ENDPOINT))
+               return -EINVAL;
+ 
+       udc = ep->udc;
+       if (!udc->driver || (udc->gadget.speed == USB_SPEED_UNKNOWN))
+               return -ESHUTDOWN;
+ 
+       epnum = (u8)desc->bEndpointAddress & 0xF;
+ 
+       retval = qe_ep_init(udc, epnum, desc);
+       if (retval != 0) {
+               cpm_muram_free(cpm_muram_offset(ep->rxbase));
+               dev_dbg(udc->dev, "enable ep%d failed\n", ep->epnum);
+               return -EINVAL;
+       }
+       dev_dbg(udc->dev, "enable ep%d successful\n", ep->epnum);
+       return 0;
+ }
+ 
+ static int qe_ep_disable(struct usb_ep *_ep)
+ {
+       struct qe_udc *udc;
+       struct qe_ep *ep;
+       unsigned long flags;
+       unsigned int size;
+ 
+       ep = container_of(_ep, struct qe_ep, ep);
+       udc = ep->udc;
+ 
+       if (!_ep || !ep->desc) {
+               dev_dbg(udc->dev, "%s not enabled\n", _ep ? ep->ep.name : NULL);
+               return -EINVAL;
+       }
+ 
+       spin_lock_irqsave(&udc->lock, flags);
+       /* Nuke all pending requests (does flush) */
+       nuke(ep, -ESHUTDOWN);
+       ep->desc = NULL;
+       ep->stopped = 1;
+       spin_unlock_irqrestore(&udc->lock, flags);
+ 
+       cpm_muram_free(cpm_muram_offset(ep->rxbase));
+ 
+       if (ep->dir == USB_DIR_OUT)
+               size = (ep->ep.maxpacket + USB_CRC_SIZE + 2) *
+                               (USB_BDRING_LEN_RX + 1);
+       else
+               size = (ep->ep.maxpacket + USB_CRC_SIZE + 2) *
+                               (USB_BDRING_LEN + 1);
+ 
+       if (ep->dir != USB_DIR_IN) {
+               kfree(ep->rxframe);
+               if (ep->rxbufmap) {
+                       dma_unmap_single(udc_controller->gadget.dev.parent,
+                                       ep->rxbuf_d, size,
+                                       DMA_FROM_DEVICE);
+                       ep->rxbuf_d = DMA_ADDR_INVALID;
+               } else {
+                       dma_sync_single_for_cpu(
+                                       udc_controller->gadget.dev.parent,
+                                       ep->rxbuf_d, size,
+                                       DMA_FROM_DEVICE);
+               }
+               kfree(ep->rxbuffer);
+       }
+ 
+       if (ep->dir != USB_DIR_OUT)
+               kfree(ep->txframe);
+ 
+       dev_dbg(udc->dev, "disabled %s OK\n", _ep->name);
+       return 0;
+ }
+ 
+ static struct usb_request *qe_alloc_request(struct usb_ep *_ep,       gfp_t gfp_flags)
+ {
+       struct qe_req *req;
+ 
+       req = kzalloc(sizeof(*req), gfp_flags);
+       if (!req)
+               return NULL;
+ 
+       req->req.dma = DMA_ADDR_INVALID;
+ 
+       INIT_LIST_HEAD(&req->queue);
+ 
+       return &req->req;
+ }
+ 
+ static void qe_free_request(struct usb_ep *_ep, struct usb_request *_req)
+ {
+       struct qe_req *req;
+ 
+       req = container_of(_req, struct qe_req, req);
+ 
+       if (_req)
+               kfree(req);
+ }
+ 
+ /* queues (submits) an I/O request to an endpoint */
+ static int qe_ep_queue(struct usb_ep *_ep, struct usb_request *_req,
+                               gfp_t gfp_flags)
+ {
+       struct qe_ep *ep = container_of(_ep, struct qe_ep, ep);
+       struct qe_req *req = container_of(_req, struct qe_req, req);
+       struct qe_udc *udc;
+       unsigned long flags;
+       int reval;
+ 
+       udc = ep->udc;
+       /* catch various bogus parameters */
+       if (!_req || !req->req.complete || !req->req.buf
+                       || !list_empty(&req->queue)) {
+               dev_dbg(udc->dev, "bad params\n");
+               return -EINVAL;
+       }
+       if (!_ep || (!ep->desc && ep_index(ep))) {
+               dev_dbg(udc->dev, "bad ep\n");
+               return -EINVAL;
+       }
+ 
+       if (!udc->driver || udc->gadget.speed == USB_SPEED_UNKNOWN)
+               return -ESHUTDOWN;
+ 
+       req->ep = ep;
+ 
+       /* map virtual address to hardware */
+       if (req->req.dma == DMA_ADDR_INVALID) {
+               req->req.dma = dma_map_single(ep->udc->gadget.dev.parent,
+                                       req->req.buf,
+                                       req->req.length,
+                                       ep_is_in(ep)
+                                       ? DMA_TO_DEVICE :
+                                       DMA_FROM_DEVICE);
+               req->mapped = 1;
+       } else {
+               dma_sync_single_for_device(ep->udc->gadget.dev.parent,
+                                       req->req.dma, req->req.length,
+                                       ep_is_in(ep)
+                                       ? DMA_TO_DEVICE :
+                                       DMA_FROM_DEVICE);
+               req->mapped = 0;
+       }
+ 
+       req->req.status = -EINPROGRESS;
+       req->req.actual = 0;
+ 
+       list_add_tail(&req->queue, &ep->queue);
+       dev_vdbg(udc->dev, "gadget have request in %s! %d\n",
+                       ep->name, req->req.length);
+       spin_lock_irqsave(&udc->lock, flags);
+       /* push the request to device */
+       if (ep_is_in(ep))
+               reval = ep_req_send(ep, req);
+ 
+       /* EP0 */
+       if (ep_index(ep) == 0 && req->req.length > 0) {
+               if (ep_is_in(ep))
+                       udc->ep0_state = DATA_STATE_XMIT;
+               else
+                       udc->ep0_state = DATA_STATE_RECV;
+       }
+ 
+       if (ep->dir == USB_DIR_OUT)
+               reval = ep_req_receive(ep, req);
+ 
+       spin_unlock_irqrestore(&udc->lock, flags);
+ 
+       return 0;
+ }
+ 
+ /* dequeues (cancels, unlinks) an I/O request from an endpoint */
+ static int qe_ep_dequeue(struct usb_ep *_ep, struct usb_request *_req)
+ {
+       struct qe_ep *ep = container_of(_ep, struct qe_ep, ep);
+       struct qe_req *req;
+       unsigned long flags;
+ 
+       if (!_ep || !_req)
+               return -EINVAL;
+ 
+       spin_lock_irqsave(&ep->udc->lock, flags);
+ 
+       /* make sure it's actually queued on this endpoint */
+       list_for_each_entry(req, &ep->queue, queue) {
+               if (&req->req == _req)
+                       break;
+       }
+ 
+       if (&req->req != _req) {
+               spin_unlock_irqrestore(&ep->udc->lock, flags);
+               return -EINVAL;
+       }
+ 
+       done(ep, req, -ECONNRESET);
+ 
+       spin_unlock_irqrestore(&ep->udc->lock, flags);
+       return 0;
+ }
+ 
+ /*-----------------------------------------------------------------
+  * modify the endpoint halt feature
+  * @ep: the non-isochronous endpoint being stalled
+  * @value: 1--set halt  0--clear halt
+  * Returns zero, or a negative error code.
+ *----------------------------------------------------------------*/
+ static int qe_ep_set_halt(struct usb_ep *_ep, int value)
+ {
+       struct qe_ep *ep;
+       unsigned long flags;
+       int status = -EOPNOTSUPP;
+       struct qe_udc *udc;
+ 
+       ep = container_of(_ep, struct qe_ep, ep);
+       if (!_ep || !ep->desc) {
+               status = -EINVAL;
+               goto out;
+       }
+ 
+       udc = ep->udc;
+       /* Attempt to halt IN ep will fail if any transfer requests
+        * are still queue */
+       if (value && ep_is_in(ep) && !list_empty(&ep->queue)) {
+               status = -EAGAIN;
+               goto out;
+       }
+ 
+       status = 0;
+       spin_lock_irqsave(&ep->udc->lock, flags);
+       qe_eptx_stall_change(ep, value);
+       qe_eprx_stall_change(ep, value);
+       spin_unlock_irqrestore(&ep->udc->lock, flags);
+ 
+       if (ep->epnum == 0) {
+               udc->ep0_state = WAIT_FOR_SETUP;
+               udc->ep0_dir = 0;
+       }
+ 
+       /* set data toggle to DATA0 on clear halt */
+       if (value == 0)
+               ep->data01 = 0;
+ out:
+       dev_vdbg(udc->dev, "%s %s halt stat %d\n", ep->ep.name,
+                       value ?  "set" : "clear", status);
+ 
+       return status;
+ }
+ 
+ static struct usb_ep_ops qe_ep_ops = {
+       .enable = qe_ep_enable,
+       .disable = qe_ep_disable,
+ 
+       .alloc_request = qe_alloc_request,
+       .free_request = qe_free_request,
+ 
+       .queue = qe_ep_queue,
+       .dequeue = qe_ep_dequeue,
+ 
+       .set_halt = qe_ep_set_halt,
+ };
+ 
+ /*------------------------------------------------------------------------
+       Gadget Driver Layer Operations
+  ------------------------------------------------------------------------*/
+ 
+ /* Get the current frame number */
+ static int qe_get_frame(struct usb_gadget *gadget)
+ {
+       u16 tmp;
+ 
+       tmp = in_be16(&udc_controller->usb_param->frame_n);
+       if (tmp & 0x8000)
+               tmp = tmp & 0x07ff;
+       else
+               tmp = -EINVAL;
+ 
+       return (int)tmp;
+ }
+ 
+ /* Tries to wake up the host connected to this gadget
+  *
+  * Return : 0-success
+  * Negative-this feature not enabled by host or not supported by device hw
+  */
+ static int qe_wakeup(struct usb_gadget *gadget)
+ {
+       return -ENOTSUPP;
+ }
+ 
+ /* Notify controller that VBUS is powered, Called by whatever
+    detects VBUS sessions */
+ static int qe_vbus_session(struct usb_gadget *gadget, int is_active)
+ {
+       return -ENOTSUPP;
+ }
+ 
+ /* constrain controller's VBUS power usage
+  * This call is used by gadget drivers during SET_CONFIGURATION calls,
+  * reporting how much power the device may consume.  For example, this
+  * could affect how quickly batteries are recharged.
+  *
+  * Returns zero on success, else negative errno.
+  */
+ static int qe_vbus_draw(struct usb_gadget *gadget, unsigned mA)
+ {
+       return -ENOTSUPP;
+ }
+ 
+ /* Change Data+ pullup status
+  * this func is used by usb_gadget_connect/disconnect
+  */
+ static int qe_pullup(struct usb_gadget *gadget, int is_on)
+ {
+       return -ENOTSUPP;
+ }
+ 
+ /* defined in usb_gadget.h */
+ static struct usb_gadget_ops qe_gadget_ops = {
+       .get_frame = qe_get_frame,
+       .wakeup = qe_wakeup,
+ /*    .set_selfpowered = qe_set_selfpowered,*/ /* always selfpowered */
+       .vbus_session = qe_vbus_session,
+       .vbus_draw = qe_vbus_draw,
+       .pullup = qe_pullup,
+ };
+ 
+ /*-------------------------------------------------------------------------
+       USB ep0 Setup process in BUS Enumeration
+  -------------------------------------------------------------------------*/
+ static int udc_reset_ep_queue(struct qe_udc *udc, u8 pipe)
+ {
+       struct qe_ep *ep = &udc->eps[pipe];
+ 
+       nuke(ep, -ECONNRESET);
+       ep->tx_req = NULL;
+       return 0;
+ }
+ 
+ static int reset_queues(struct qe_udc *udc)
+ {
+       u8 pipe;
+ 
+       for (pipe = 0; pipe < USB_MAX_ENDPOINTS; pipe++)
+               udc_reset_ep_queue(udc, pipe);
+ 
+       /* report disconnect; the driver is already quiesced */
+       spin_unlock(&udc->lock);
+       udc->driver->disconnect(&udc->gadget);
+       spin_lock(&udc->lock);
+ 
+       return 0;
+ }
+ 
+ static void ch9setaddress(struct qe_udc *udc, u16 value, u16 index,
+                       u16 length)
+ {
+       /* Save the new address to device struct */
+       udc->device_address = (u8) value;
+       /* Update usb state */
+       udc->usb_state = USB_STATE_ADDRESS;
+ 
+       /* Status phase , send a ZLP */
+       if (ep0_prime_status(udc, USB_DIR_IN))
+               qe_ep0_stall(udc);
+ }
+ 
+ static void ownercomplete(struct usb_ep *_ep, struct usb_request *_req)
+ {
+       struct qe_req *req = container_of(_req, struct qe_req, req);
+ 
+       req->req.buf = NULL;
+       kfree(req);
+ }
+ 
+ static void ch9getstatus(struct qe_udc *udc, u8 request_type, u16 value,
+                       u16 index, u16 length)
+ {
+       u16 usb_status = 0;
+       struct qe_req *req;
+       struct qe_ep *ep;
+       int status = 0;
+ 
+       ep = &udc->eps[0];
+       if ((request_type & USB_RECIP_MASK) == USB_RECIP_DEVICE) {
+               /* Get device status */
+               usb_status = 1 << USB_DEVICE_SELF_POWERED;
+       } else if ((request_type & USB_RECIP_MASK) == USB_RECIP_INTERFACE) {
+               /* Get interface status */
+               /* We don't have interface information in udc driver */
+               usb_status = 0;
+       } else if ((request_type & USB_RECIP_MASK) == USB_RECIP_ENDPOINT) {
+               /* Get endpoint status */
+               int pipe = index & USB_ENDPOINT_NUMBER_MASK;
+               struct qe_ep *target_ep = &udc->eps[pipe];
+               u16 usep;
+ 
+               /* stall if endpoint doesn't exist */
+               if (!target_ep->desc)
+                       goto stall;
+ 
+               usep = in_be16(&udc->usb_regs->usb_usep[pipe]);
+               if (index & USB_DIR_IN) {
+                       if (target_ep->dir != USB_DIR_IN)
+                               goto stall;
+                       if ((usep & USB_THS_MASK) == USB_THS_STALL)
+                               usb_status = 1 << USB_ENDPOINT_HALT;
+               } else {
+                       if (target_ep->dir != USB_DIR_OUT)
+                               goto stall;
+                       if ((usep & USB_RHS_MASK) == USB_RHS_STALL)
+                               usb_status = 1 << USB_ENDPOINT_HALT;
+               }
+       }
+ 
+       req = container_of(qe_alloc_request(&ep->ep, GFP_KERNEL),
+                                       struct qe_req, req);
+       req->req.length = 2;
+       req->req.buf = udc->statusbuf;
+       *(u16 *)req->req.buf = cpu_to_le16(usb_status);
+       req->req.status = -EINPROGRESS;
+       req->req.actual = 0;
+       req->req.complete = ownercomplete;
+ 
+       udc->ep0_dir = USB_DIR_IN;
+ 
+       /* data phase */
+       status = qe_ep_queue(&ep->ep, &req->req, GFP_ATOMIC);
+ 
+       if (status == 0)
+               return;
+ stall:
+       dev_err(udc->dev, "Can't respond to getstatus request \n");
+       qe_ep0_stall(udc);
+ }
+ 
+ /* only handle the setup request, suppose the device in normal status */
+ static void setup_received_handle(struct qe_udc *udc,
+                               struct usb_ctrlrequest *setup)
+ {
+       /* Fix Endian (udc->local_setup_buff is cpu Endian now)*/
+       u16 wValue = le16_to_cpu(setup->wValue);
+       u16 wIndex = le16_to_cpu(setup->wIndex);
+       u16 wLength = le16_to_cpu(setup->wLength);
+ 
+       /* clear the previous request in the ep0 */
+       udc_reset_ep_queue(udc, 0);
+ 
+       if (setup->bRequestType & USB_DIR_IN)
+               udc->ep0_dir = USB_DIR_IN;
+       else
+               udc->ep0_dir = USB_DIR_OUT;
+ 
+       switch (setup->bRequest) {
+       case USB_REQ_GET_STATUS:
+               /* Data+Status phase form udc */
+               if ((setup->bRequestType & (USB_DIR_IN | USB_TYPE_MASK))
+                                       != (USB_DIR_IN | USB_TYPE_STANDARD))
+                       break;
+               ch9getstatus(udc, setup->bRequestType, wValue, wIndex,
+                                       wLength);
+               return;
+ 
+       case USB_REQ_SET_ADDRESS:
+               /* Status phase from udc */
+               if (setup->bRequestType != (USB_DIR_OUT | USB_TYPE_STANDARD |
+                                               USB_RECIP_DEVICE))
+                       break;
+               ch9setaddress(udc, wValue, wIndex, wLength);
+               return;
+ 
+       case USB_REQ_CLEAR_FEATURE:
+       case USB_REQ_SET_FEATURE:
+               /* Requests with no data phase, status phase from udc */
+               if ((setup->bRequestType & USB_TYPE_MASK)
+                                       != USB_TYPE_STANDARD)
+                       break;
+ 
+               if ((setup->bRequestType & USB_RECIP_MASK)
+                               == USB_RECIP_ENDPOINT) {
+                       int pipe = wIndex & USB_ENDPOINT_NUMBER_MASK;
+                       struct qe_ep *ep;
+ 
+                       if (wValue != 0 || wLength != 0
+                               || pipe > USB_MAX_ENDPOINTS)
+                               break;
+                       ep = &udc->eps[pipe];
+ 
+                       spin_unlock(&udc->lock);
+                       qe_ep_set_halt(&ep->ep,
+                                       (setup->bRequest == USB_REQ_SET_FEATURE)
+                                               ? 1 : 0);
+                       spin_lock(&udc->lock);
+               }
+ 
+               ep0_prime_status(udc, USB_DIR_IN);
+ 
+               return;
+ 
+       default:
+               break;
+       }
+ 
+       if (wLength) {
+               /* Data phase from gadget, status phase from udc */
+               if (setup->bRequestType & USB_DIR_IN) {
+                       udc->ep0_state = DATA_STATE_XMIT;
+                       udc->ep0_dir = USB_DIR_IN;
+               } else {
+                       udc->ep0_state = DATA_STATE_RECV;
+                       udc->ep0_dir = USB_DIR_OUT;
+               }
+               spin_unlock(&udc->lock);
+               if (udc->driver->setup(&udc->gadget,
+                                       &udc->local_setup_buff) < 0)
+                       qe_ep0_stall(udc);
+               spin_lock(&udc->lock);
+       } else {
+               /* No data phase, IN status from gadget */
+               udc->ep0_dir = USB_DIR_IN;
+               spin_unlock(&udc->lock);
+               if (udc->driver->setup(&udc->gadget,
+                                       &udc->local_setup_buff) < 0)
+                       qe_ep0_stall(udc);
+               spin_lock(&udc->lock);
+               udc->ep0_state = DATA_STATE_NEED_ZLP;
+       }
+ }
+ 
+ /*-------------------------------------------------------------------------
+       USB Interrupt handlers
+  -------------------------------------------------------------------------*/
+ static void suspend_irq(struct qe_udc *udc)
+ {
+       udc->resume_state = udc->usb_state;
+       udc->usb_state = USB_STATE_SUSPENDED;
+ 
+       /* report suspend to the driver ,serial.c not support this*/
+       if (udc->driver->suspend)
+               udc->driver->suspend(&udc->gadget);
+ }
+ 
+ static void resume_irq(struct qe_udc *udc)
+ {
+       udc->usb_state = udc->resume_state;
+       udc->resume_state = 0;
+ 
+       /* report resume to the driver , serial.c not support this*/
+       if (udc->driver->resume)
+               udc->driver->resume(&udc->gadget);
+ }
+ 
+ static void idle_irq(struct qe_udc *udc)
+ {
+       u8 usbs;
+ 
+       usbs = in_8(&udc->usb_regs->usb_usbs);
+       if (usbs & USB_IDLE_STATUS_MASK) {
+               if ((udc->usb_state) != USB_STATE_SUSPENDED)
+                       suspend_irq(udc);
+       } else {
+               if (udc->usb_state == USB_STATE_SUSPENDED)
+                       resume_irq(udc);
+       }
+ }
+ 
+ static int reset_irq(struct qe_udc *udc)
+ {
+       unsigned char i;
+ 
+       qe_usb_disable();
+       out_8(&udc->usb_regs->usb_usadr, 0);
+ 
+       for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
+               if (udc->eps[i].init)
+                       qe_ep_reset(udc, i);
+       }
+ 
+       reset_queues(udc);
+       udc->usb_state = USB_STATE_DEFAULT;
+       udc->ep0_state = WAIT_FOR_SETUP;
+       udc->ep0_dir = USB_DIR_OUT;
+       qe_usb_enable();
+       return 0;
+ }
+ 
+ static int bsy_irq(struct qe_udc *udc)
+ {
+       return 0;
+ }
+ 
+ static int txe_irq(struct qe_udc *udc)
+ {
+       return 0;
+ }
+ 
+ /* ep0 tx interrupt also in here */
+ static int tx_irq(struct qe_udc *udc)
+ {
+       struct qe_ep *ep;
+       struct qe_bd __iomem *bd;
+       int i, res = 0;
+ 
+       if ((udc->usb_state == USB_STATE_ADDRESS)
+               && (in_8(&udc->usb_regs->usb_usadr) == 0))
+               out_8(&udc->usb_regs->usb_usadr, udc->device_address);
+ 
+       for (i = (USB_MAX_ENDPOINTS-1); ((i >= 0) && (res == 0)); i--) {
+               ep = &udc->eps[i];
+               if (ep && ep->init && (ep->dir != USB_DIR_OUT)) {
+                       bd = ep->c_txbd;
+                       if (!(in_be32((u32 __iomem *)bd) & T_R)
+                                               && (in_be32(&bd->buf))) {
+                               /* confirm the transmitted bd */
+                               if (ep->epnum == 0)
+                                       res = qe_ep0_txconf(ep);
+                               else
+                                       res = qe_ep_txconf(ep);
+                       }
+               }
+       }
+       return res;
+ }
+ 
+ 
+ /* setup packect's rx is handle in the function too */
+ static void rx_irq(struct qe_udc *udc)
+ {
+       struct qe_ep *ep;
+       struct qe_bd __iomem *bd;
+       int i;
+ 
+       for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
+               ep = &udc->eps[i];
+               if (ep && ep->init && (ep->dir != USB_DIR_IN)) {
+                       bd = ep->n_rxbd;
+                       if (!(in_be32((u32 __iomem *)bd) & R_E)
+                                               && (in_be32(&bd->buf))) {
+                               if (ep->epnum == 0) {
+                                       qe_ep0_rx(udc);
+                               } else {
+                                       /*non-setup package receive*/
+                                       qe_ep_rx(ep);
+                               }
+                       }
+               }
+       }
+ }
+ 
+ static irqreturn_t qe_udc_irq(int irq, void *_udc)
+ {
+       struct qe_udc *udc = (struct qe_udc *)_udc;
+       u16 irq_src;
+       irqreturn_t status = IRQ_NONE;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&udc->lock, flags);
+ 
+       irq_src = in_be16(&udc->usb_regs->usb_usber) &
+               in_be16(&udc->usb_regs->usb_usbmr);
+       /* Clear notification bits */
+       out_be16(&udc->usb_regs->usb_usber, irq_src);
+       /* USB Interrupt */
+       if (irq_src & USB_E_IDLE_MASK) {
+               idle_irq(udc);
+               irq_src &= ~USB_E_IDLE_MASK;
+               status = IRQ_HANDLED;
+       }
+ 
+       if (irq_src & USB_E_TXB_MASK) {
+               tx_irq(udc);
+               irq_src &= ~USB_E_TXB_MASK;
+               status = IRQ_HANDLED;
+       }
+ 
+       if (irq_src & USB_E_RXB_MASK) {
+               rx_irq(udc);
+               irq_src &= ~USB_E_RXB_MASK;
+               status = IRQ_HANDLED;
+       }
+ 
+       if (irq_src & USB_E_RESET_MASK) {
+               reset_irq(udc);
+               irq_src &= ~USB_E_RESET_MASK;
+               status = IRQ_HANDLED;
+       }
+ 
+       if (irq_src & USB_E_BSY_MASK) {
+               bsy_irq(udc);
+               irq_src &= ~USB_E_BSY_MASK;
+               status = IRQ_HANDLED;
+       }
+ 
+       if (irq_src & USB_E_TXE_MASK) {
+               txe_irq(udc);
+               irq_src &= ~USB_E_TXE_MASK;
+               status = IRQ_HANDLED;
+       }
+ 
+       spin_unlock_irqrestore(&udc->lock, flags);
+ 
+       return status;
+ }
+ 
+ /*-------------------------------------------------------------------------
+       Gadget driver register and unregister.
+  --------------------------------------------------------------------------*/
+ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+ {
+       int retval;
+       unsigned long flags = 0;
+ 
+       /* standard operations */
+       if (!udc_controller)
+               return -ENODEV;
+ 
+       if (!driver || (driver->speed != USB_SPEED_FULL
+                       && driver->speed != USB_SPEED_HIGH)
+                       || !driver->bind || !driver->disconnect
+                       || !driver->setup)
+               return -EINVAL;
+ 
+       if (udc_controller->driver)
+               return -EBUSY;
+ 
+       /* lock is needed but whether should use this lock or another */
+       spin_lock_irqsave(&udc_controller->lock, flags);
+ 
+       driver->driver.bus = NULL;
+       /* hook up the driver */
+       udc_controller->driver = driver;
+       udc_controller->gadget.dev.driver = &driver->driver;
+       udc_controller->gadget.speed = (enum usb_device_speed)(driver->speed);
+       spin_unlock_irqrestore(&udc_controller->lock, flags);
+ 
+       retval = driver->bind(&udc_controller->gadget);
+       if (retval) {
+               dev_err(udc_controller->dev, "bind to %s --> %d",
+                               driver->driver.name, retval);
+               udc_controller->gadget.dev.driver = NULL;
+               udc_controller->driver = NULL;
+               return retval;
+       }
+ 
+       /* Enable IRQ reg and Set usbcmd reg EN bit */
+       qe_usb_enable();
+ 
+       out_be16(&udc_controller->usb_regs->usb_usber, 0xffff);
+       out_be16(&udc_controller->usb_regs->usb_usbmr, USB_E_DEFAULT_DEVICE);
+       udc_controller->usb_state = USB_STATE_ATTACHED;
+       udc_controller->ep0_state = WAIT_FOR_SETUP;
+       udc_controller->ep0_dir = USB_DIR_OUT;
+       dev_info(udc_controller->dev, "%s bind to driver %s \n",
+               udc_controller->gadget.name, driver->driver.name);
+       return 0;
+ }
+ EXPORT_SYMBOL(usb_gadget_register_driver);
+ 
+ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
+ {
+       struct qe_ep *loop_ep;
+       unsigned long flags;
+ 
+       if (!udc_controller)
+               return -ENODEV;
+ 
+       if (!driver || driver != udc_controller->driver)
+               return -EINVAL;
+ 
+       /* stop usb controller, disable intr */
+       qe_usb_disable();
+ 
+       /* in fact, no needed */
+       udc_controller->usb_state = USB_STATE_ATTACHED;
+       udc_controller->ep0_state = WAIT_FOR_SETUP;
+       udc_controller->ep0_dir = 0;
+ 
+       /* stand operation */
+       spin_lock_irqsave(&udc_controller->lock, flags);
+       udc_controller->gadget.speed = USB_SPEED_UNKNOWN;
+       nuke(&udc_controller->eps[0], -ESHUTDOWN);
+       list_for_each_entry(loop_ep, &udc_controller->gadget.ep_list,
+                               ep.ep_list)
+               nuke(loop_ep, -ESHUTDOWN);
+       spin_unlock_irqrestore(&udc_controller->lock, flags);
+ 
+       /* report disconnect; the controller is already quiesced */
+       driver->disconnect(&udc_controller->gadget);
+ 
+       /* unbind gadget and unhook driver. */
+       driver->unbind(&udc_controller->gadget);
+       udc_controller->gadget.dev.driver = NULL;
+       udc_controller->driver = NULL;
+ 
+       dev_info(udc_controller->dev, "unregistered gadget driver '%s'\r\n",
+                       driver->driver.name);
+       return 0;
+ }
+ EXPORT_SYMBOL(usb_gadget_unregister_driver);
+ 
+ /* udc structure's alloc and setup, include ep-param alloc */
+ static struct qe_udc __devinit *qe_udc_config(struct of_device *ofdev)
+ {
+       struct qe_udc *udc;
+       struct device_node *np = ofdev->node;
+       unsigned int tmp_addr = 0;
+       struct usb_device_para __iomem *usbpram;
+       unsigned int i;
+       u64 size;
+       u32 offset;
+ 
+       udc = kzalloc(sizeof(*udc), GFP_KERNEL);
+       if (udc == NULL) {
+               dev_err(&ofdev->dev, "malloc udc failed\n");
+               goto cleanup;
+       }
+ 
+       udc->dev = &ofdev->dev;
+ 
+       /* get default address of usb parameter in MURAM from device tree */
+       offset = *of_get_address(np, 1, &size, NULL);
+       udc->usb_param = cpm_muram_addr(offset);
+       memset_io(udc->usb_param, 0, size);
+ 
+       usbpram = udc->usb_param;
+       out_be16(&usbpram->frame_n, 0);
+       out_be32(&usbpram->rstate, 0);
+ 
+       tmp_addr = cpm_muram_alloc((USB_MAX_ENDPOINTS *
+                                       sizeof(struct usb_ep_para)),
+                                          USB_EP_PARA_ALIGNMENT);
+       if (IS_ERR_VALUE(tmp_addr))
+               goto cleanup;
+ 
+       for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
+               out_be16(&usbpram->epptr[i], (u16)tmp_addr);
+               udc->ep_param[i] = cpm_muram_addr(tmp_addr);
+               tmp_addr += 32;
+       }
+ 
+       memset_io(udc->ep_param[0], 0,
+                       USB_MAX_ENDPOINTS * sizeof(struct usb_ep_para));
+ 
+       udc->resume_state = USB_STATE_NOTATTACHED;
+       udc->usb_state = USB_STATE_POWERED;
+       udc->ep0_dir = 0;
+ 
+       spin_lock_init(&udc->lock);
+       return udc;
+ 
+ cleanup:
+       kfree(udc);
+       return NULL;
+ }
+ 
+ /* USB Controller register init */
+ static int __devinit qe_udc_reg_init(struct qe_udc *udc)
+ {
+       struct usb_ctlr __iomem *qe_usbregs;
+       qe_usbregs = udc->usb_regs;
+ 
+       /* Init the usb register */
+       out_8(&qe_usbregs->usb_usmod, 0x01);
+       out_be16(&qe_usbregs->usb_usbmr, 0);
+       out_8(&qe_usbregs->usb_uscom, 0);
+       out_be16(&qe_usbregs->usb_usber, USBER_ALL_CLEAR);
+ 
+       return 0;
+ }
+ 
+ static int __devinit qe_ep_config(struct qe_udc *udc, unsigned char pipe_num)
+ {
+       struct qe_ep *ep = &udc->eps[pipe_num];
+ 
+       ep->udc = udc;
+       strcpy(ep->name, ep_name[pipe_num]);
+       ep->ep.name = ep_name[pipe_num];
+ 
+       ep->ep.ops = &qe_ep_ops;
+       ep->stopped = 1;
+       ep->ep.maxpacket = (unsigned short) ~0;
+       ep->desc = NULL;
+       ep->dir = 0xff;
+       ep->epnum = (u8)pipe_num;
+       ep->sent = 0;
+       ep->last = 0;
+       ep->init = 0;
+       ep->rxframe = NULL;
+       ep->txframe = NULL;
+       ep->tx_req = NULL;
+       ep->state = EP_STATE_IDLE;
+       ep->has_data = 0;
+ 
+       /* the queue lists any req for this ep */
+       INIT_LIST_HEAD(&ep->queue);
+ 
+       /* gagdet.ep_list used for ep_autoconfig so no ep0*/
+       if (pipe_num != 0)
+               list_add_tail(&ep->ep.ep_list, &udc->gadget.ep_list);
+ 
+       ep->gadget = &udc->gadget;
+ 
+       return 0;
+ }
+ 
+ /*-----------------------------------------------------------------------
+  *    UDC device Driver operation functions                           *
+  *----------------------------------------------------------------------*/
+ static void qe_udc_release(struct device *dev)
+ {
+       int i = 0;
+ 
+       complete(udc_controller->done);
+       cpm_muram_free(cpm_muram_offset(udc_controller->ep_param[0]));
+       for (i = 0; i < USB_MAX_ENDPOINTS; i++)
+               udc_controller->ep_param[i] = NULL;
+ 
+       kfree(udc_controller);
+       udc_controller = NULL;
+ }
+ 
+ /* Driver probe functions */
+ static int __devinit qe_udc_probe(struct of_device *ofdev,
+                       const struct of_device_id *match)
+ {
+       struct device_node *np = ofdev->node;
+       struct qe_ep *ep;
+       unsigned int ret = 0;
+       unsigned int i;
+       const void *prop;
+ 
+       prop = of_get_property(np, "mode", NULL);
+       if (!prop || strcmp(prop, "peripheral"))
+               return -ENODEV;
+ 
+       /* Initialize the udc structure including QH member and other member */
+       udc_controller = qe_udc_config(ofdev);
+       if (!udc_controller) {
+               dev_err(&ofdev->dev, "failed to initialize\n");
+               return -ENOMEM;
+       }
+ 
+       udc_controller->soc_type = (unsigned long)match->data;
+       udc_controller->usb_regs = of_iomap(np, 0);
+       if (!udc_controller->usb_regs) {
+               ret = -ENOMEM;
+               goto err1;
+       }
+ 
+       /* initialize usb hw reg except for regs for EP,
+        * leave usbintr reg untouched*/
+       qe_udc_reg_init(udc_controller);
+ 
+       /* here comes the stand operations for probe
+        * set the qe_udc->gadget.xxx */
+       udc_controller->gadget.ops = &qe_gadget_ops;
+ 
+       /* gadget.ep0 is a pointer */
+       udc_controller->gadget.ep0 = &udc_controller->eps[0].ep;
+ 
+       INIT_LIST_HEAD(&udc_controller->gadget.ep_list);
+ 
+       /* modify in register gadget process */
+       udc_controller->gadget.speed = USB_SPEED_UNKNOWN;
+ 
+       /* name: Identifies the controller hardware type. */
+       udc_controller->gadget.name = driver_name;
+ 
+       device_initialize(&udc_controller->gadget.dev);
+ 
+       dev_set_name(&udc_controller->gadget.dev, "gadget");
+ 
+       udc_controller->gadget.dev.release = qe_udc_release;
+       udc_controller->gadget.dev.parent = &ofdev->dev;
+ 
+       /* initialize qe_ep struct */
+       for (i = 0; i < USB_MAX_ENDPOINTS ; i++) {
+               /* because the ep type isn't decide here so
+                * qe_ep_init() should be called in ep_enable() */
+ 
+               /* setup the qe_ep struct and link ep.ep.list
+                * into gadget.ep_list */
+               qe_ep_config(udc_controller, (unsigned char)i);
+       }
+ 
+       /* ep0 initialization in here */
+       ret = qe_ep_init(udc_controller, 0, &qe_ep0_desc);
+       if (ret)
+               goto err2;
+ 
+       /* create a buf for ZLP send, need to remain zeroed */
+       udc_controller->nullbuf = kzalloc(256, GFP_KERNEL);
+       if (udc_controller->nullbuf == NULL) {
+               dev_err(udc_controller->dev, "cannot alloc nullbuf\n");
+               ret = -ENOMEM;
+               goto err3;
+       }
+ 
+       /* buffer for data of get_status request */
+       udc_controller->statusbuf = kzalloc(2, GFP_KERNEL);
+       if (udc_controller->statusbuf == NULL) {
+               ret = -ENOMEM;
+               goto err4;
+       }
+ 
+       udc_controller->nullp = virt_to_phys((void *)udc_controller->nullbuf);
+       if (udc_controller->nullp == DMA_ADDR_INVALID) {
+               udc_controller->nullp = dma_map_single(
+                                       udc_controller->gadget.dev.parent,
+                                       udc_controller->nullbuf,
+                                       256,
+                                       DMA_TO_DEVICE);
+               udc_controller->nullmap = 1;
+       } else {
+               dma_sync_single_for_device(udc_controller->gadget.dev.parent,
+                                       udc_controller->nullp, 256,
+                                       DMA_TO_DEVICE);
+       }
+ 
+       tasklet_init(&udc_controller->rx_tasklet, ep_rx_tasklet,
+                       (unsigned long)udc_controller);
+       /* request irq and disable DR  */
+       udc_controller->usb_irq = irq_of_parse_and_map(np, 0);
+ 
+       ret = request_irq(udc_controller->usb_irq, qe_udc_irq, 0,
+                               driver_name, udc_controller);
+       if (ret) {
+               dev_err(udc_controller->dev, "cannot request irq %d err %d \n",
+                       udc_controller->usb_irq, ret);
+               goto err5;
+       }
+ 
+       ret = device_add(&udc_controller->gadget.dev);
+       if (ret)
+               goto err6;
+ 
+       dev_info(udc_controller->dev,
+                       "%s USB controller initialized as device\n",
+                       (udc_controller->soc_type == PORT_QE) ? "QE" : "CPM");
+       return 0;
+ 
+ err6:
+       free_irq(udc_controller->usb_irq, udc_controller);
+ err5:
+       if (udc_controller->nullmap) {
+               dma_unmap_single(udc_controller->gadget.dev.parent,
+                       udc_controller->nullp, 256,
+                               DMA_TO_DEVICE);
+                       udc_controller->nullp = DMA_ADDR_INVALID;
+       } else {
+               dma_sync_single_for_cpu(udc_controller->gadget.dev.parent,
+                       udc_controller->nullp, 256,
+                               DMA_TO_DEVICE);
+       }
+       kfree(udc_controller->statusbuf);
+ err4:
+       kfree(udc_controller->nullbuf);
+ err3:
+       ep = &udc_controller->eps[0];
+       cpm_muram_free(cpm_muram_offset(ep->rxbase));
+       kfree(ep->rxframe);
+       kfree(ep->rxbuffer);
+       kfree(ep->txframe);
+ err2:
+       iounmap(udc_controller->usb_regs);
+ err1:
+       kfree(udc_controller);
+ 
+       return ret;
+ }
+ 
+ #ifdef CONFIG_PM
+ static int qe_udc_suspend(struct of_device *dev, pm_message_t state)
+ {
+       return -ENOTSUPP;
+ }
+ 
+ static int qe_udc_resume(struct of_device *dev)
+ {
+       return -ENOTSUPP;
+ }
+ #endif
+ 
+ static int __devexit qe_udc_remove(struct of_device *ofdev)
+ {
+       struct qe_ep *ep;
+       unsigned int size;
+ 
+       DECLARE_COMPLETION(done);
+ 
+       if (!udc_controller)
+               return -ENODEV;
+ 
+       udc_controller->done = &done;
+       tasklet_disable(&udc_controller->rx_tasklet);
+ 
+       if (udc_controller->nullmap) {
+               dma_unmap_single(udc_controller->gadget.dev.parent,
+                       udc_controller->nullp, 256,
+                               DMA_TO_DEVICE);
+                       udc_controller->nullp = DMA_ADDR_INVALID;
+       } else {
+               dma_sync_single_for_cpu(udc_controller->gadget.dev.parent,
+                       udc_controller->nullp, 256,
+                               DMA_TO_DEVICE);
+       }
+       kfree(udc_controller->statusbuf);
+       kfree(udc_controller->nullbuf);
+ 
+       ep = &udc_controller->eps[0];
+       cpm_muram_free(cpm_muram_offset(ep->rxbase));
+       size = (ep->ep.maxpacket + USB_CRC_SIZE + 2) * (USB_BDRING_LEN + 1);
+ 
+       kfree(ep->rxframe);
+       if (ep->rxbufmap) {
+               dma_unmap_single(udc_controller->gadget.dev.parent,
+                               ep->rxbuf_d, size,
+                               DMA_FROM_DEVICE);
+               ep->rxbuf_d = DMA_ADDR_INVALID;
+       } else {
+               dma_sync_single_for_cpu(udc_controller->gadget.dev.parent,
+                               ep->rxbuf_d, size,
+                               DMA_FROM_DEVICE);
+       }
+ 
+       kfree(ep->rxbuffer);
+       kfree(ep->txframe);
+ 
+       free_irq(udc_controller->usb_irq, udc_controller);
+ 
+       tasklet_kill(&udc_controller->rx_tasklet);
+ 
+       iounmap(udc_controller->usb_regs);
+ 
+       device_unregister(&udc_controller->gadget.dev);
+       /* wait for release() of gadget.dev to free udc */
+       wait_for_completion(&done);
+ 
+       return 0;
+ }
+ 
+ /*-------------------------------------------------------------------------*/
+ static struct of_device_id __devinitdata qe_udc_match[] = {
+       {
+               .compatible = "fsl,mpc8360-qe-usb",
+               .data = (void *)PORT_QE,
+       },
+       {
+               .compatible = "fsl,mpc8272-cpm-usb",
+               .data = (void *)PORT_CPM,
+       },
+       {},
+ };
+ 
+ MODULE_DEVICE_TABLE(of, qe_udc_match);
+ 
+ static struct of_platform_driver udc_driver = {
++      .owner          = THIS_MODULE,
+       .name           = (char *)driver_name,
+       .match_table    = qe_udc_match,
+       .probe          = qe_udc_probe,
+       .remove         = __devexit_p(qe_udc_remove),
+ #ifdef CONFIG_PM
+       .suspend        = qe_udc_suspend,
+       .resume         = qe_udc_resume,
+ #endif
+ };
+ 
+ static int __init qe_udc_init(void)
+ {
+       printk(KERN_INFO "%s: %s, %s\n", driver_name, driver_desc,
+                       DRIVER_VERSION);
+       return of_register_platform_driver(&udc_driver);
+ }
+ 
+ static void __exit qe_udc_exit(void)
+ {
+       of_unregister_platform_driver(&udc_driver);
+ }
+ 
+ module_init(qe_udc_init);
+ module_exit(qe_udc_exit);
+ 
+ MODULE_DESCRIPTION(DRIVER_DESC);
+ MODULE_AUTHOR(DRIVER_AUTHOR);
+ MODULE_LICENSE("GPL");
+ 
diff --cc drivers/usb/host/ehci-hcd.c

index 70de3d4,4725d15..5ff6f68
--- 1/drivers/usb/host/ehci-hcd.c
--- 2/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@@ -991,53 -993,9 +993,51 @@@ static int ehci_get_frame (struct usb_h
                 ehci->periodic_size;
   }
   
+ +#ifdef CONFIG_KDB_USB
+ +
+ +int
+ +ehci_kdb_poll_char(struct urb *urb)
+ +{
+ +        struct ehci_hcd *ehci;
+ +
+ +        /* just to make sure */
+ +        if (!urb || !urb->dev || !urb->dev->bus)
+ +                return -1;
+ +
+ +        ehci = (struct ehci_hcd *) hcd_to_ehci(bus_to_hcd(urb->dev->bus));
+ +
+ +        /* make sure */
+ +        if (!ehci)
+ +                return -1;
+ +
+ +        if (!HC_IS_RUNNING (ehci_to_hcd(ehci)->state))
+ +                return -1;
+ +
+ +      /*
+ +       * If ehci->lock is held coming into this routine, it could
+ +       * mean KDB was entered while the HC driver was in the midst
+ +       * of processing URBs. Therefore it could be dangerous to
+ +       * processes URBs from this poll routine. And, we can't wait on
+ +       * the lock since we are in KDB and kernel threads (including the
+ +       * one holding the lock) are suspended.
+ +       * So, we punt and return an error. Keyboards attached to this
+ +       * HC will not be useable from KDB at this time.
+ +       */
+ +      if (spin_is_locked(&ehci->lock))
+ +              return -EBUSY;
+ +
+ +      /* processes the URB */
+ +        if (qh_completions_kdb(ehci, urb->hcpriv, urb))
+ +                return 0;
+ +
+ +        return -1;
+ +}
+ +
+ +#endif /* CONFIG_KDB_USB */
+ +
   /*-------------------------------------------------------------------------*/
   
- #define DRIVER_INFO DRIVER_VERSION " " DRIVER_DESC
- 
- MODULE_DESCRIPTION (DRIVER_INFO);
+ MODULE_DESCRIPTION(DRIVER_DESC);
   MODULE_AUTHOR (DRIVER_AUTHOR);
   MODULE_LICENSE ("GPL");
   
diff --cc drivers/usb/host/ehci-pci.c
Simple merge
diff --cc drivers/usb/host/isp1760-if.c
Simple merge
diff --cc drivers/usb/host/ohci-hcd.c

index 83f9c61,5cf5f1e..cfb3504
--- 1/drivers/usb/host/ohci-hcd.c
--- 2/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@@ -984,77 -983,8 +983,75 @@@ static int ohci_restart (struct ohci_hc
   
   /*-------------------------------------------------------------------------*/
   
+ +#ifdef        CONFIG_KDB_USB
+ +
+ +int
+ +ohci_kdb_poll_char(struct urb *urb)
+ +{
+ +      struct ohci_hcd *ohci;
+ +      struct ohci_regs * regs;
+ +
+ +        /* just to make sure */
+ +        if (!urb || !urb->dev || !urb->dev->bus)
+ +                return -1;
+ +
+ +      ohci = (struct ohci_hcd *) hcd_to_ohci(bus_to_hcd(urb->dev->bus));
+ +
+ +        /* make sure */
+ +        if (!ohci || !ohci->hcca)
+ +                return -1;
+ +
+ +        if (!HC_IS_RUNNING (ohci_to_hcd(ohci)->state))
+ +                return -1;
+ +
+ +      /*
+ +       * If ohci->lock is held coming into this routine, it could
+ +       * mean KDB was entered while the HC driver was in the midst
+ +       * of processing URBs. Therefore it could be dangerous to
+ +       * processes URBs from this poll routine. And, we can't wait on
+ +       * the lock since we are in KDB and kernel threads (including the
+ +       * one holding the lock) are suspended.
+ +       * So, we punt and return an error. Keyboards attached to this
+ +       * HC will not be useable from KDB at this time.
+ +       */
+ +      if (spin_is_locked(&ohci->lock))
+ +              return -EBUSY;
+ +
+ +      regs = ohci->regs;
+ +
+ +      /* if the urb is not currently in progress resubmit it */
+ +      if (urb->status != -EINPROGRESS) {
+ +
+ +              if (usb_submit_urb (urb, GFP_ATOMIC))
+ +                      return -1;
+ +
+ +              /* make sure the HC registers are set correctly */
+ +              ohci_writel (ohci, OHCI_INTR_WDH, &regs->intrenable);
+ +              ohci_writel (ohci, OHCI_INTR_WDH, &regs->intrstatus);
+ +              ohci_writel (ohci, OHCI_INTR_MIE, &regs->intrenable);
+ +
+ +              // flush those pci writes
+ +              (void) ohci_readl (ohci, &ohci->regs->control);
+ +      }
+ +
+ +      if (ohci->hcca->done_head) {
+ +              dl_done_list_kdb (ohci, urb);
+ +              ohci_writel (ohci, OHCI_INTR_WDH, &regs->intrstatus);
+ +              // flush the pci write
+ +              (void) ohci_readl (ohci, &ohci->regs->control);
+ +
+ +              return 0;
+ +      }
+ +
+ +      return -1;
+ +}
+ +
+ +#endif /* CONFIG_KDB_USB */
+ +
+ +/*-------------------------------------------------------------------------*/
+ +
- #define DRIVER_INFO DRIVER_VERSION " " DRIVER_DESC
- 
   MODULE_AUTHOR (DRIVER_AUTHOR);
- MODULE_DESCRIPTION (DRIVER_INFO);
+ MODULE_DESCRIPTION(DRIVER_DESC);
   MODULE_LICENSE ("GPL");
   
   #ifdef CONFIG_PCI
diff --cc drivers/usb/host/ohci-pci.c
Simple merge
diff --cc drivers/usb/storage/usb.c
Simple merge
diff --cc drivers/video/Kconfig
Simple merge
diff --cc drivers/video/Makefile
Simple merge
diff --cc drivers/video/bw2.c
Simple merge
diff --cc drivers/video/cg14.c
Simple merge
diff --cc drivers/video/cg3.c
Simple merge
diff --cc drivers/video/cg6.c
Simple merge
diff --cc drivers/video/console/fbcon.c

index 33b7e2f,1657b96..90fc6aa
--- 1/drivers/video/console/fbcon.c
--- 2/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@@ -78,21 -78,8 +78,11 @@@
   #include <asm/fb.h>
   #include <asm/irq.h>
   #include <asm/system.h>
- #ifdef CONFIG_ATARI
- #include <asm/atariints.h>
- #endif
- #ifdef CONFIG_MAC
- #include <asm/macints.h>
- #endif
- #if defined(__mc68000__)
- #include <asm/machdep.h>
- #include <asm/setup.h>
- #endif
   
   #include "fbcon.h"
+ +#ifdef CONFIG_BOOTSPLASH
+ +#include "../bootsplash/bootsplash.h"
+ +#endif
   
   #ifdef FBCONDEBUG
   #  define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __func__ , ## args)
diff --cc drivers/video/console/vgacon.c
Simple merge
diff --cc drivers/video/ffb.c
Simple merge
diff --cc drivers/video/leo.c
Simple merge
diff --cc drivers/video/p9100.c
Simple merge
diff --cc drivers/video/tcx.c
Simple merge
diff --cc drivers/watchdog/cpwd.c

index 0000000,084dfe9..9ccfbf0

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@@ -1,0 -1,695 +1,696 @@@
+ /* cpwd.c - driver implementation for hardware watchdog
+  * timers found on Sun Microsystems CP1400 and CP1500 boards.
+  *
+  * This device supports both the generic Linux watchdog 
+  * interface and Solaris-compatible ioctls as best it is
+  * able.
+  *
+  * NOTE:      CP1400 systems appear to have a defective intr_mask
+  *                    register on the PLD, preventing the disabling of
+  *                    timer interrupts.  We use a timer to periodically 
+  *                    reset 'stopped' watchdogs on affected platforms.
+  *
+  * Copyright (c) 2000 Eric Brower (ebrower@usa.net)
+  * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/errno.h>
+ #include <linux/major.h>
+ #include <linux/init.h>
+ #include <linux/miscdevice.h>
+ #include <linux/interrupt.h>
+ #include <linux/ioport.h>
+ #include <linux/timer.h>
+ #include <linux/smp_lock.h>
+ #include <linux/io.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/irq.h>
+ #include <asm/uaccess.h>
+ 
+ #include <asm/watchdog.h>
+ 
+ #define DRIVER_NAME   "cpwd"
+ #define PFX           DRIVER_NAME ": "
+ 
+ #define WD_OBPNAME    "watchdog"
+ #define WD_BADMODEL   "SUNW,501-5336"
+ #define WD_BTIMEOUT   (jiffies + (HZ * 1000))
+ #define WD_BLIMIT     0xFFFF
+ 
+ #define WD0_MINOR     212
+ #define WD1_MINOR     213     
+ #define WD2_MINOR     214     
+ 
+ /* Internal driver definitions.  */
+ #define WD0_ID                        0
+ #define WD1_ID                        1
+ #define WD2_ID                        2
+ #define WD_NUMDEVS            3
+ 
+ #define WD_INTR_OFF           0
+ #define WD_INTR_ON            1
+ 
+ #define WD_STAT_INIT  0x01    /* Watchdog timer is initialized        */
+ #define WD_STAT_BSTOP 0x02    /* Watchdog timer is brokenstopped      */
+ #define WD_STAT_SVCD  0x04    /* Watchdog interrupt occurred          */
+ 
+ /* Register value definitions
+  */
+ #define WD0_INTR_MASK 0x01    /* Watchdog device interrupt masks      */
+ #define WD1_INTR_MASK 0x02
+ #define WD2_INTR_MASK 0x04
+ 
+ #define WD_S_RUNNING  0x01    /* Watchdog device status running       */
+ #define WD_S_EXPIRED  0x02    /* Watchdog device status expired       */
+ 
+ struct cpwd {
+       void __iomem    *regs;
+       spinlock_t      lock;
+ 
+       unsigned int    irq;
+ 
+       unsigned long   timeout;
+       bool            enabled;
+       bool            reboot;
+       bool            broken;
+       bool            initialized;
+ 
+       struct {
+               struct miscdevice       misc;
+               void __iomem            *regs;
+               u8                      intr_mask;
+               u8                      runstatus;
+               u16                     timeout;
+       } devs[WD_NUMDEVS];
+ };
+ 
+ static struct cpwd *cpwd_device;
+ 
+ /* Sun uses Altera PLD EPF8820ATC144-4 
+  * providing three hardware watchdogs:
+  *
+  *    1) RIC - sends an interrupt when triggered
+  *    2) XIR - asserts XIR_B_RESET when triggered, resets CPU
+  *    3) POR - asserts POR_B_RESET when triggered, resets CPU, backplane, board
+  *
+  *** Timer register block definition (struct wd_timer_regblk)
+  *
+  * dcntr and limit registers (halfword access):      
+  * -------------------
+  * | 15 | ...| 1 | 0 |
+  * -------------------
+  * |-  counter val  -|
+  * -------------------
+  * dcntr -    Current 16-bit downcounter value.
+  *                    When downcounter reaches '0' watchdog expires.
+  *                    Reading this register resets downcounter with 'limit' value.
+  * limit -    16-bit countdown value in 1/10th second increments.
+  *                    Writing this register begins countdown with input value.
+  *                    Reading from this register does not affect counter.
+  * NOTES:     After watchdog reset, dcntr and limit contain '1'
+  *
+  * status register (byte access):
+  * ---------------------------
+  * | 7 | ... | 2 |  1  |  0  |
+  * --------------+------------
+  * |-   UNUSED  -| EXP | RUN |
+  * ---------------------------
+  * status-    Bit 0 - Watchdog is running
+  *                    Bit 1 - Watchdog has expired
+  *
+  *** PLD register block definition (struct wd_pld_regblk)
+  *
+  * intr_mask register (byte access):
+  * ---------------------------------
+  * | 7 | ... | 3 |  2  |  1  |  0  |
+  * +-------------+------------------
+  * |-   UNUSED  -| WD3 | WD2 | WD1 |
+  * ---------------------------------
+  * WD3 -  1 == Interrupt disabled for watchdog 3
+  * WD2 -  1 == Interrupt disabled for watchdog 2
+  * WD1 -  1 == Interrupt disabled for watchdog 1
+  *
+  * pld_status register (byte access):
+  * UNKNOWN, MAGICAL MYSTERY REGISTER
+  *
+  */
+ #define WD_TIMER_REGSZ        16
+ #define WD0_OFF               0
+ #define WD1_OFF               (WD_TIMER_REGSZ * 1)
+ #define WD2_OFF               (WD_TIMER_REGSZ * 2)
+ #define PLD_OFF               (WD_TIMER_REGSZ * 3)
+ 
+ #define WD_DCNTR      0x00
+ #define WD_LIMIT      0x04
+ #define WD_STATUS     0x08
+ 
+ #define PLD_IMASK     (PLD_OFF + 0x00)
+ #define PLD_STATUS    (PLD_OFF + 0x04)
+ 
+ static struct timer_list cpwd_timer;
+ 
+ static int wd0_timeout = 0;
+ static int wd1_timeout = 0;
+ static int wd2_timeout = 0;
+ 
+ module_param  (wd0_timeout, int, 0);
+ MODULE_PARM_DESC(wd0_timeout, "Default watchdog0 timeout in 1/10secs");
+ module_param  (wd1_timeout, int, 0);
+ MODULE_PARM_DESC(wd1_timeout, "Default watchdog1 timeout in 1/10secs");
+ module_param  (wd2_timeout, int, 0);
+ MODULE_PARM_DESC(wd2_timeout, "Default watchdog2 timeout in 1/10secs");
+ 
+ MODULE_AUTHOR("Eric Brower <ebrower@usa.net>");
+ MODULE_DESCRIPTION("Hardware watchdog driver for Sun Microsystems CP1400/1500");
+ MODULE_LICENSE("GPL");
+ MODULE_SUPPORTED_DEVICE("watchdog");
+ 
+ static void cpwd_writew(u16 val, void __iomem *addr)
+ {
+       writew(cpu_to_le16(val), addr);
+ }
+ static u16 cpwd_readw(void __iomem *addr)
+ {
+       u16 val = readw(addr);
+ 
+       return le16_to_cpu(val);
+ }
+ 
+ static void cpwd_writeb(u8 val, void __iomem *addr)
+ {
+       writeb(val, addr);
+ }
+ 
+ static u8 cpwd_readb(void __iomem *addr)
+ {
+       return readb(addr);
+ }
+ 
+ /* Enable or disable watchdog interrupts
+  * Because of the CP1400 defect this should only be
+  * called during initialzation or by wd_[start|stop]timer()
+  *
+  * index      - sub-device index, or -1 for 'all'
+  * enable     - non-zero to enable interrupts, zero to disable
+  */
+ static void cpwd_toggleintr(struct cpwd *p, int index, int enable)
+ {
+       unsigned char curregs = cpwd_readb(p->regs + PLD_IMASK);
+       unsigned char setregs = 
+               (index == -1) ? 
+               (WD0_INTR_MASK | WD1_INTR_MASK | WD2_INTR_MASK) : 
+               (p->devs[index].intr_mask);
+ 
+       if (enable == WD_INTR_ON)
+               curregs &= ~setregs;
+       else
+               curregs |= setregs;
+ 
+       cpwd_writeb(curregs, p->regs + PLD_IMASK);
+ }
+ 
+ /* Restarts timer with maximum limit value and
+  * does not unset 'brokenstop' value.
+  */
+ static void cpwd_resetbrokentimer(struct cpwd *p, int index)
+ {
+       cpwd_toggleintr(p, index, WD_INTR_ON);
+       cpwd_writew(WD_BLIMIT, p->devs[index].regs + WD_LIMIT);
+ }
+ 
+ /* Timer method called to reset stopped watchdogs--
+  * because of the PLD bug on CP1400, we cannot mask
+  * interrupts within the PLD so me must continually
+  * reset the timers ad infinitum.
+  */
+ static void cpwd_brokentimer(unsigned long data)
+ {
+       struct cpwd *p = (struct cpwd *) data;
+       int id, tripped = 0;
+ 
+       /* kill a running timer instance, in case we
+        * were called directly instead of by kernel timer
+        */
+       if (timer_pending(&cpwd_timer))
+               del_timer(&cpwd_timer);
+ 
+       for (id = 0; id < WD_NUMDEVS; id++) {
+               if (p->devs[id].runstatus & WD_STAT_BSTOP) {
+                       ++tripped;
+                       cpwd_resetbrokentimer(p, id);
+               }
+       }
+ 
+       if (tripped) {
+               /* there is at least one timer brokenstopped-- reschedule */
+               cpwd_timer.expires = WD_BTIMEOUT;
+               add_timer(&cpwd_timer);
+       }
+ }
+ 
+ /* Reset countdown timer with 'limit' value and continue countdown.
+  * This will not start a stopped timer.
+  */
+ static void cpwd_pingtimer(struct cpwd *p, int index)
+ {
+       if (cpwd_readb(p->devs[index].regs + WD_STATUS) & WD_S_RUNNING)
+               cpwd_readw(p->devs[index].regs + WD_DCNTR);
+ }
+ 
+ /* Stop a running watchdog timer-- the timer actually keeps
+  * running, but the interrupt is masked so that no action is
+  * taken upon expiration.
+  */
+ static void cpwd_stoptimer(struct cpwd *p, int index)
+ {
+       if (cpwd_readb(p->devs[index].regs + WD_STATUS) & WD_S_RUNNING) {
+               cpwd_toggleintr(p, index, WD_INTR_OFF);
+ 
+               if (p->broken) {
+                       p->devs[index].runstatus |= WD_STAT_BSTOP;
+                       cpwd_brokentimer((unsigned long) p);
+               }
+       }
+ }
+ 
+ /* Start a watchdog timer with the specified limit value
+  * If the watchdog is running, it will be restarted with
+  * the provided limit value.
+  *
+  * This function will enable interrupts on the specified
+  * watchdog.
+  */
+ static void cpwd_starttimer(struct cpwd *p, int index)
+ {
+       if (p->broken)
+               p->devs[index].runstatus &= ~WD_STAT_BSTOP;
+ 
+       p->devs[index].runstatus &= ~WD_STAT_SVCD;
+ 
+       cpwd_writew(p->devs[index].timeout, p->devs[index].regs + WD_LIMIT);
+       cpwd_toggleintr(p, index, WD_INTR_ON);
+ }
+ 
+ static int cpwd_getstatus(struct cpwd *p, int index)
+ {
+       unsigned char stat = cpwd_readb(p->devs[index].regs + WD_STATUS);
+       unsigned char intr = cpwd_readb(p->devs[index].regs + PLD_IMASK);
+       unsigned char ret  = WD_STOPPED;
+ 
+       /* determine STOPPED */
+       if (!stat) 
+               return ret;
+ 
+       /* determine EXPIRED vs FREERUN vs RUNNING */
+       else if (WD_S_EXPIRED & stat) {
+               ret = WD_EXPIRED;
+       } else if(WD_S_RUNNING & stat) {
+               if (intr & p->devs[index].intr_mask) {
+                       ret = WD_FREERUN;
+               } else {
+                       /* Fudge WD_EXPIRED status for defective CP1400--
+                        * IF timer is running 
+                        *      AND brokenstop is set 
+                        *      AND an interrupt has been serviced
+                        * we are WD_EXPIRED.
+                        *
+                        * IF timer is running 
+                        *      AND brokenstop is set 
+                        *      AND no interrupt has been serviced
+                        * we are WD_FREERUN.
+                        */
+                       if (p->broken &&
+                           (p->devs[index].runstatus & WD_STAT_BSTOP)) {
+                               if (p->devs[index].runstatus & WD_STAT_SVCD) {
+                                       ret = WD_EXPIRED;
+                               } else {
+                                       /* we could as well pretend we are expired */
+                                       ret = WD_FREERUN;
+                               }
+                       } else {
+                               ret = WD_RUNNING;
+                       }
+               }
+       }
+ 
+       /* determine SERVICED */
+       if (p->devs[index].runstatus & WD_STAT_SVCD)
+               ret |= WD_SERVICED;
+ 
+       return(ret);
+ }
+ 
+ static irqreturn_t cpwd_interrupt(int irq, void *dev_id)
+ {
+       struct cpwd *p = dev_id;
+ 
+       /* Only WD0 will interrupt-- others are NMI and we won't
+        * see them here....
+        */
+       spin_lock_irq(&p->lock);
+ 
+       cpwd_stoptimer(p, WD0_ID);
+       p->devs[WD0_ID].runstatus |=  WD_STAT_SVCD;
+ 
+       spin_unlock_irq(&p->lock);
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ static int cpwd_open(struct inode *inode, struct file *f)
+ {
+       struct cpwd *p = cpwd_device;
+ 
+       lock_kernel();
+       switch(iminor(inode)) {
+               case WD0_MINOR:
+               case WD1_MINOR:
+               case WD2_MINOR:
+                       break;
+ 
+               default:
+                       unlock_kernel();
+                       return -ENODEV;
+       }
+ 
+       /* Register IRQ on first open of device */
+       if (!p->initialized) {
+               if (request_irq(p->irq, &cpwd_interrupt, 
+                               IRQF_SHARED, DRIVER_NAME, p)) {
+                       printk(KERN_ERR PFX "Cannot register IRQ %d\n", 
+                               p->irq);
+                       unlock_kernel();
+                       return -EBUSY;
+               }
+               p->initialized = true;
+       }
+ 
+       unlock_kernel();
+ 
+       return nonseekable_open(inode, f);
+ }
+ 
+ static int cpwd_release(struct inode *inode, struct file *file)
+ {
+       return 0;
+ }
+ 
+ static int cpwd_ioctl(struct inode *inode, struct file *file, 
+                     unsigned int cmd, unsigned long arg)
+ {
+       static struct watchdog_info info = {
+               .options                = WDIOF_SETTIMEOUT,
+               .firmware_version       = 1,
+               .identity               = DRIVER_NAME,
+       };
+       void __user *argp = (void __user *)arg;
+       int index = iminor(inode) - WD0_MINOR;
+       struct cpwd *p = cpwd_device;
+       int setopt = 0;
+ 
+       switch (cmd) {
+       /* Generic Linux IOCTLs */
+       case WDIOC_GETSUPPORT:
+               if (copy_to_user(argp, &info, sizeof(struct watchdog_info)))
+                       return -EFAULT;
+               break;
+ 
+       case WDIOC_GETSTATUS:
+       case WDIOC_GETBOOTSTATUS:
+               if (put_user(0, (int __user *)argp))
+                       return -EFAULT;
+               break;
+ 
+       case WDIOC_KEEPALIVE:
+               cpwd_pingtimer(p, index);
+               break;
+ 
+       case WDIOC_SETOPTIONS:
+               if (copy_from_user(&setopt, argp, sizeof(unsigned int)))
+                       return -EFAULT;
+ 
+               if (setopt & WDIOS_DISABLECARD) {
+                       if (p->enabled)
+                               return -EINVAL;
+                       cpwd_stoptimer(p, index);
+               } else if (setopt & WDIOS_ENABLECARD) {
+                       cpwd_starttimer(p, index);
+               } else {
+                       return -EINVAL;
+               }       
+               break;
+ 
+       /* Solaris-compatible IOCTLs */
+       case WIOCGSTAT:
+               setopt = cpwd_getstatus(p, index);
+               if (copy_to_user(argp, &setopt, sizeof(unsigned int)))
+                       return -EFAULT;
+               break;
+ 
+       case WIOCSTART:
+               cpwd_starttimer(p, index);
+               break;
+ 
+       case WIOCSTOP:
+               if (p->enabled)
+                       return(-EINVAL);
+ 
+               cpwd_stoptimer(p, index);
+               break;
+ 
+       default:
+               return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ static long cpwd_compat_ioctl(struct file *file, unsigned int cmd,
+                             unsigned long arg)
+ {
+       int rval = -ENOIOCTLCMD;
+ 
+       switch (cmd) {
+       /* solaris ioctls are specific to this driver */
+       case WIOCSTART:
+       case WIOCSTOP:
+       case WIOCGSTAT:
+               lock_kernel();
+               rval = cpwd_ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
+               unlock_kernel();
+               break;
+ 
+       /* everything else is handled by the generic compat layer */
+       default:
+               break;
+       }
+ 
+       return rval;
+ }
+ 
+ static ssize_t cpwd_write(struct file *file, const char __user *buf, 
+                         size_t count, loff_t *ppos)
+ {
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct cpwd *p = cpwd_device;
+       int index = iminor(inode);
+ 
+       if (count) {
+               cpwd_pingtimer(p, index);
+               return 1;
+       }
+ 
+       return 0;
+ }
+ 
+ static ssize_t cpwd_read(struct file * file, char __user *buffer,
+                        size_t count, loff_t *ppos)
+ {
+       return -EINVAL;
+ }
+ 
+ static const struct file_operations cpwd_fops = {
+       .owner =        THIS_MODULE,
+       .ioctl =        cpwd_ioctl,
+       .compat_ioctl = cpwd_compat_ioctl,
+       .open =         cpwd_open,
+       .write =        cpwd_write,
+       .read =         cpwd_read,
+       .release =      cpwd_release,
+ };
+ 
+ static int __devinit cpwd_probe(struct of_device *op,
+                               const struct of_device_id *match)
+ {
+       struct device_node *options;
+       const char *str_prop;
+       const void *prop_val;
+       int i, err = -EINVAL;
+       struct cpwd *p;
+ 
+       if (cpwd_device)
+               return -EINVAL;
+ 
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       err = -ENOMEM;
+       if (!p) {
+               printk(KERN_ERR PFX "Unable to allocate struct cpwd.\n");
+               goto out;
+       }
+ 
+       p->irq = op->irqs[0];
+ 
+       spin_lock_init(&p->lock);
+ 
+       p->regs = of_ioremap(&op->resource[0], 0,
+                            4 * WD_TIMER_REGSZ, DRIVER_NAME);
+       if (!p->regs) {
+               printk(KERN_ERR PFX "Unable to map registers.\n");
+               goto out_free;
+       }
+ 
+       options = of_find_node_by_path("/options");
+       err = -ENODEV;
+       if (!options) {
+               printk(KERN_ERR PFX "Unable to find /options node.\n");
+               goto out_iounmap;
+       }
+ 
+       prop_val = of_get_property(options, "watchdog-enable?", NULL);
+       p->enabled = (prop_val ? true : false);
+ 
+       prop_val = of_get_property(options, "watchdog-reboot?", NULL);
+       p->reboot = (prop_val ? true : false);
+ 
+       str_prop = of_get_property(options, "watchdog-timeout", NULL);
+       if (str_prop)
+               p->timeout = simple_strtoul(str_prop, NULL, 10);
+ 
+       /* CP1400s seem to have broken PLD implementations-- the
+        * interrupt_mask register cannot be written, so no timer
+        * interrupts can be masked within the PLD.
+        */
+       str_prop = of_get_property(op->node, "model", NULL);
+       p->broken = (str_prop && !strcmp(str_prop, WD_BADMODEL));
+ 
+       if (!p->enabled)
+               cpwd_toggleintr(p, -1, WD_INTR_OFF);
+ 
+       for (i = 0; i < WD_NUMDEVS; i++) {
+               static const char *cpwd_names[] = { "RIC", "XIR", "POR" };
+               static int *parms[] = { &wd0_timeout,
+                                       &wd1_timeout,
+                                       &wd2_timeout };
+               struct miscdevice *mp = &p->devs[i].misc;
+ 
+               mp->minor = WD0_MINOR + i;
+               mp->name = cpwd_names[i];
+               mp->fops = &cpwd_fops;
+ 
+               p->devs[i].regs = p->regs + (i * WD_TIMER_REGSZ);
+               p->devs[i].intr_mask = (WD0_INTR_MASK << i);
+               p->devs[i].runstatus &= ~WD_STAT_BSTOP;
+               p->devs[i].runstatus |= WD_STAT_INIT;
+               p->devs[i].timeout = p->timeout;
+               if (*parms[i])
+                       p->devs[i].timeout = *parms[i];
+ 
+               err = misc_register(&p->devs[i].misc);
+               if (err) {
+                       printk(KERN_ERR "Could not register misc device for "
+                              "dev %d\n", i);
+                       goto out_unregister;
+               }
+       }
+ 
+       if (p->broken) {
+               init_timer(&cpwd_timer);
+               cpwd_timer.function     = cpwd_brokentimer;
+               cpwd_timer.data         = (unsigned long) p;
+               cpwd_timer.expires      = WD_BTIMEOUT;
+ 
+               printk(KERN_INFO PFX "PLD defect workaround enabled for "
+                      "model " WD_BADMODEL ".\n");
+       }
+ 
+       dev_set_drvdata(&op->dev, p);
+       cpwd_device = p;
+       err = 0;
+ 
+ out:
+       return err;
+ 
+ out_unregister:
+       for (i--; i >= 0; i--)
+               misc_deregister(&p->devs[i].misc);
+ 
+ out_iounmap:
+       of_iounmap(&op->resource[0], p->regs, 4 * WD_TIMER_REGSZ);
+ 
+ out_free:
+       kfree(p);
+       goto out;
+ }
+ 
+ static int __devexit cpwd_remove(struct of_device *op)
+ {
+       struct cpwd *p = dev_get_drvdata(&op->dev);
+       int i;
+ 
+       for (i = 0; i < 4; i++) {
+               misc_deregister(&p->devs[i].misc);
+ 
+               if (!p->enabled) {
+                       cpwd_stoptimer(p, i);
+                       if (p->devs[i].runstatus & WD_STAT_BSTOP)
+                               cpwd_resetbrokentimer(p, i);
+               }
+       }
+ 
+       if (p->broken)
+               del_timer_sync(&cpwd_timer);
+ 
+       if (p->initialized)
+               free_irq(p->irq, p);
+ 
+       of_iounmap(&op->resource[0], p->regs, 4 * WD_TIMER_REGSZ);
+       kfree(p);
+ 
+       cpwd_device = NULL;
+ 
+       return 0;
+ }
+ 
+ static const struct of_device_id cpwd_match[] = {
+       {
+               .name = "watchdog",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, cpwd_match);
+ 
+ static struct of_platform_driver cpwd_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = cpwd_match,
+       .probe          = cpwd_probe,
+       .remove         = __devexit_p(cpwd_remove),
+ };
+ 
+ static int __init cpwd_init(void)
+ {
+       return of_register_driver(&cpwd_driver, &of_bus_type);
+ }
+ 
+ static void __exit cpwd_exit(void)
+ {
+       of_unregister_driver(&cpwd_driver);
+ }
+ 
+ module_init(cpwd_init);
+ module_exit(cpwd_exit);
diff --cc drivers/watchdog/riowd.c

index 0000000,09cb183..f0f3374

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/watchdog/riowd.c
+++ b/drivers/watchdog/riowd.c
@@@ -1,0 -1,259 +1,260 @@@
+ /* riowd.c - driver for hw watchdog inside Super I/O of RIO
+  *
+  * Copyright (C) 2001, 2008 David S. Miller (davem@davemloft.net)
+  */
+ 
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/fs.h>
+ #include <linux/errno.h>
+ #include <linux/init.h>
+ #include <linux/miscdevice.h>
+ #include <linux/smp_lock.h>
+ #include <linux/watchdog.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+ 
+ #include <asm/io.h>
+ #include <asm/uaccess.h>
+ 
+ 
+ /* RIO uses the NatSemi Super I/O power management logical device
+  * as its' watchdog.
+  *
+  * When the watchdog triggers, it asserts a line to the BBC (Boot Bus
+  * Controller) of the machine.  The BBC can only be configured to
+  * trigger a power-on reset when the signal is asserted.  The BBC
+  * can be configured to ignore the signal entirely as well.
+  *
+  * The only Super I/O device register we care about is at index
+  * 0x05 (WDTO_INDEX) which is the watchdog time-out in minutes (1-255).
+  * If set to zero, this disables the watchdog.  When set, the system
+  * must periodically (before watchdog expires) clear (set to zero) and
+  * re-set the watchdog else it will trigger.
+  *
+  * There are two other indexed watchdog registers inside this Super I/O
+  * logical device, but they are unused.  The first, at index 0x06 is
+  * the watchdog control and can be used to make the watchdog timer re-set
+  * when the PS/2 mouse or serial lines show activity.  The second, at
+  * index 0x07 is merely a sampling of the line from the watchdog to the
+  * BBC.
+  *
+  * The watchdog device generates no interrupts.
+  */
+ 
+ MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
+ MODULE_DESCRIPTION("Hardware watchdog driver for Sun RIO");
+ MODULE_SUPPORTED_DEVICE("watchdog");
+ MODULE_LICENSE("GPL");
+ 
+ #define DRIVER_NAME   "riowd"
+ #define PFX           DRIVER_NAME ": "
+ 
+ struct riowd {
+       void __iomem            *regs;
+       spinlock_t              lock;
+ };
+ 
+ static struct riowd *riowd_device;
+ 
+ #define WDTO_INDEX    0x05
+ 
+ static int riowd_timeout = 1;         /* in minutes */
+ module_param(riowd_timeout, int, 0);
+ MODULE_PARM_DESC(riowd_timeout, "Watchdog timeout in minutes");
+ 
+ static void riowd_writereg(struct riowd *p, u8 val, int index)
+ {
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&p->lock, flags);
+       writeb(index, p->regs + 0);
+       writeb(val, p->regs + 1);
+       spin_unlock_irqrestore(&p->lock, flags);
+ }
+ 
+ static int riowd_open(struct inode *inode, struct file *filp)
+ {
+       cycle_kernel_lock();
+       nonseekable_open(inode, filp);
+       return 0;
+ }
+ 
+ static int riowd_release(struct inode *inode, struct file *filp)
+ {
+       return 0;
+ }
+ 
+ static int riowd_ioctl(struct inode *inode, struct file *filp,
+                      unsigned int cmd, unsigned long arg)
+ {
+       static struct watchdog_info info = {
+               .options                = WDIOF_SETTIMEOUT,
+               .firmware_version       = 1,
+               .identity               = DRIVER_NAME,
+       };
+       void __user *argp = (void __user *)arg;
+       struct riowd *p = riowd_device;
+       unsigned int options;
+       int new_margin;
+ 
+       switch (cmd) {
+       case WDIOC_GETSUPPORT:
+               if (copy_to_user(argp, &info, sizeof(info)))
+                       return -EFAULT;
+               break;
+ 
+       case WDIOC_GETSTATUS:
+       case WDIOC_GETBOOTSTATUS:
+               if (put_user(0, (int __user *)argp))
+                       return -EFAULT;
+               break;
+ 
+       case WDIOC_KEEPALIVE:
+               riowd_writereg(p, riowd_timeout, WDTO_INDEX);
+               break;
+ 
+       case WDIOC_SETOPTIONS:
+               if (copy_from_user(&options, argp, sizeof(options)))
+                       return -EFAULT;
+ 
+               if (options & WDIOS_DISABLECARD)
+                       riowd_writereg(p, 0, WDTO_INDEX);
+               else if (options & WDIOS_ENABLECARD)
+                       riowd_writereg(p, riowd_timeout, WDTO_INDEX);
+               else
+                       return -EINVAL;
+ 
+               break;
+ 
+       case WDIOC_SETTIMEOUT:
+               if (get_user(new_margin, (int __user *)argp))
+                       return -EFAULT;
+               if ((new_margin < 60) || (new_margin > (255 * 60)))
+                       return -EINVAL;
+               riowd_timeout = (new_margin + 59) / 60;
+               riowd_writereg(p, riowd_timeout, WDTO_INDEX);
+               /* Fall */
+ 
+       case WDIOC_GETTIMEOUT:
+               return put_user(riowd_timeout * 60, (int __user *)argp);
+ 
+       default:
+               return -EINVAL;
+       };
+ 
+       return 0;
+ }
+ 
+ static ssize_t riowd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+ {
+       struct riowd *p = riowd_device;
+ 
+       if (count) {
+               riowd_writereg(p, riowd_timeout, WDTO_INDEX);
+               return 1;
+       }
+ 
+       return 0;
+ }
+ 
+ static const struct file_operations riowd_fops = {
+       .owner =        THIS_MODULE,
+       .llseek =       no_llseek,
+       .ioctl =        riowd_ioctl,
+       .open =         riowd_open,
+       .write =        riowd_write,
+       .release =      riowd_release,
+ };
+ 
+ static struct miscdevice riowd_miscdev = {
+       .minor  = WATCHDOG_MINOR,
+       .name   = "watchdog",
+       .fops   = &riowd_fops
+ };
+ 
+ static int __devinit riowd_probe(struct of_device *op,
+                                const struct of_device_id *match)
+ {
+       struct riowd *p;
+       int err = -EINVAL;
+ 
+       if (riowd_device)
+               goto out;
+ 
+       err = -ENOMEM;
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               goto out;
+ 
+       spin_lock_init(&p->lock);
+ 
+       p->regs = of_ioremap(&op->resource[0], 0, 2, DRIVER_NAME);
+       if (!p->regs) {
+               printk(KERN_ERR PFX "Cannot map registers.\n");
+               goto out_free;
+       }
+ 
+       err = misc_register(&riowd_miscdev);
+       if (err) {
+               printk(KERN_ERR PFX "Cannot register watchdog misc device.\n");
+               goto out_iounmap;
+       }
+ 
+       printk(KERN_INFO PFX "Hardware watchdog [%i minutes], "
+              "regs at %p\n", riowd_timeout, p->regs);
+ 
+       dev_set_drvdata(&op->dev, p);
+       riowd_device = p;
+       err = 0;
+ 
+ out_iounmap:
+       of_iounmap(&op->resource[0], p->regs, 2);
+ 
+ out_free:
+       kfree(p);
+ 
+ out:
+       return err;
+ }
+ 
+ static int __devexit riowd_remove(struct of_device *op)
+ {
+       struct riowd *p = dev_get_drvdata(&op->dev);
+ 
+       misc_deregister(&riowd_miscdev);
+       of_iounmap(&op->resource[0], p->regs, 2);
+       kfree(p);
+ 
+       return 0;
+ }
+ 
+ static const struct of_device_id riowd_match[] = {
+       {
+               .name = "pmc",
+       },
+       {},
+ };
+ MODULE_DEVICE_TABLE(of, riowd_match);
+ 
+ static struct of_platform_driver riowd_driver = {
++      .owner          = THIS_MODULE,
+       .name           = DRIVER_NAME,
+       .match_table    = riowd_match,
+       .probe          = riowd_probe,
+       .remove         = __devexit_p(riowd_remove),
+ };
+ 
+ static int __init riowd_init(void)
+ {
+       return of_register_driver(&riowd_driver, &of_bus_type);
+ }
+ 
+ static void __exit riowd_exit(void)
+ {
+       of_unregister_driver(&riowd_driver);
+ }
+ 
+ module_init(riowd_init);
+ module_exit(riowd_exit);
diff --cc fs/Kconfig

index 09620c6,93945dd..83f13ea
--- 1/fs/Kconfig
--- 2/fs/Kconfig
+++ b/fs/Kconfig
@@@ -426,167 -39,23 +39,46 @@@ config FS_POSIX_AC
         bool
         default n
   
+ config FILE_LOCKING
+       bool "Enable POSIX file locking API" if EMBEDDED
+       default y
+       help
+         This option enables standard file locking support, required
+           for filesystems like NFS and for the flock() system
+           call. Disabling this option saves about 11k.
+ 
+ +config FS_NFS4ACL
+ +      bool
+ +      default n
+ +
   source "fs/xfs/Kconfig"
   source "fs/gfs2/Kconfig"
- 
- config OCFS2_FS
-       tristate "OCFS2 file system support"
-       depends on NET && SYSFS
-       select CONFIGFS_FS
-       select JBD2
-       select CRC32
-       help
-         OCFS2 is a general purpose extent based shared disk cluster file
-         system with many similarities to ext3. It supports 64 bit inode
-         numbers, and has automatically extending metadata groups which may
-         also make it attractive for non-clustered use.
- 
-         You'll want to install the ocfs2-tools package in order to at least
-         get "mount.ocfs2".
- 
-         Project web page:    http://oss.oracle.com/projects/ocfs2
-         Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
-         OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
- 
-         For more information on OCFS2, see the file
-         <file:Documentation/filesystems/ocfs2.txt>.
- 
- config OCFS2_FS_O2CB
-       tristate "O2CB Kernelspace Clustering"
-       depends on OCFS2_FS
-       default y
-       help
-         OCFS2 includes a simple kernelspace clustering package, the OCFS2
-         Cluster Base.  It only requires a very small userspace component
-         to configure it. This comes with the standard ocfs2-tools package.
-         O2CB is limited to maintaining a cluster for OCFS2 file systems.
-         It cannot manage any other cluster applications.
- 
-         It is always safe to say Y here, as the clustering method is
-         run-time selectable.
- 
- config OCFS2_FS_USERSPACE_CLUSTER
-       tristate "OCFS2 Userspace Clustering"
-       depends on OCFS2_FS && DLM
-       default y
-       help
-         This option will allow OCFS2 to use userspace clustering services
-         in conjunction with the DLM in fs/dlm.  If you are using a
-         userspace cluster manager, say Y here.
- 
-         It is safe to say Y, as the clustering method is run-time
-         selectable.
- 
- config OCFS2_FS_STATS
-       bool "OCFS2 statistics"
-       depends on OCFS2_FS
-       default y
-       help
-         This option allows some fs statistics to be captured. Enabling
-         this option may increase the memory consumption.
- 
- config OCFS2_DEBUG_MASKLOG
-       bool "OCFS2 logging support"
-       depends on OCFS2_FS
-       default y
-       help
-         The ocfs2 filesystem has an extensive logging system.  The system
-         allows selection of events to log via files in /sys/o2cb/logmask/.
-         This option will enlarge your kernel, but it allows debugging of
-         ocfs2 filesystem issues.
- 
- config OCFS2_DEBUG_FS
-       bool "OCFS2 expensive checks"
-       depends on OCFS2_FS
-       default n
-       help
-         This option will enable expensive consistency checks. Enable
-         this option for debugging only as it is likely to decrease
-         performance of the filesystem.
- 
- config OCFS2_COMPAT_JBD
-       bool "Use JBD for compatibility"
-       depends on OCFS2_FS
-       default n
-       select JBD
-       help
-         The ocfs2 filesystem now uses JBD2 for its journalling.  JBD2
-         is backwards compatible with JBD.  It is safe to say N here.
-         However, if you really want to use the original JBD, say Y here.
- 
- config OCFS2_FS_POSIX_ACL
-       bool "OCFS2 POSIX Access Control Lists"
-       depends on OCFS2_FS
-       select FS_POSIX_ACL
-       default n
-       help
-         Posix Access Control Lists (ACLs) support permissions for users and
-         groups beyond the owner/group/world scheme.
+ source "fs/ocfs2/Kconfig"
+ source "fs/btrfs/Kconfig"
   
   endif # BLOCK
   
- config DNOTIFY
-       bool "Dnotify support"
-       default y
-       help
-         Dnotify is a directory-based per-fd file change notification system
-         that uses signals to communicate events to user-space.  There exist
-         superior alternatives, but some applications may still rely on
-         dnotify.
- 
-         If unsure, say Y.
- 
- config INOTIFY
-       bool "Inotify file change notification support"
-       default y
-       ---help---
-         Say Y here to enable inotify support.  Inotify is a file change
-         notification system and a replacement for dnotify.  Inotify fixes
-         numerous shortcomings in dnotify and introduces several new features
-         including multiple file events, one-shot support, and unmount
-         notification.
- 
-         For more information, see <file:Documentation/filesystems/inotify.txt>
- 
-         If unsure, say Y.
- 
- config INOTIFY_USER
-       bool "Inotify support for userspace"
-       depends on INOTIFY
-       default y
-       ---help---
-         Say Y here to enable inotify support for userspace, including the
-         associated system calls.  Inotify allows monitoring of both files and
-         directories via a single open fd.  Events are read from the file
-         descriptor, which is also select()- and poll()-able.
- 
-         For more information, see <file:Documentation/filesystems/inotify.txt>
- 
-         If unsure, say Y.
+ source "fs/notify/Kconfig"
   
+ +config DMAPI
+ +      tristate "DMAPI support"
+ +      help
+ +        The Data Management API is a system interface used to implement
+ +        the interface defined in the X/Open document:
+ +        "Systems Management: Data Storage Management (XDSM) API",
+ +        dated February 1997.  This interface is used by hierarchical
+ +        storage management systems.
+ +
+ +        If any DMAPI-capable filesystem is built into the kernel, then
+ +        DMAPI must also be built into the kernel.
+ +
+ +config DMAPI_DEBUG
+ +      bool "DMAPI debugging support"
+ +      depends on DMAPI
+ +      help
+ +        If you don't know whether you need it, then you don't need it:
+ +        answer N.
+ +
   config QUOTA
         bool "Quota support"
         help
@@@ -1862,354 -264,13 +287,14 @@@ config NFS_COMMO
         depends on NFSD || NFS_FS
         default y
   
- config SUNRPC
-       tristate
- 
- config SUNRPC_GSS
-       tristate
- 
- config SUNRPC_XPRT_RDMA
-       tristate
-       depends on SUNRPC && INFINIBAND && EXPERIMENTAL
-       default SUNRPC && INFINIBAND
-       help
-         This option enables an RPC client transport capability that
-         allows the NFS client to mount servers via an RDMA-enabled
-         transport.
- 
-         To compile RPC client RDMA transport support as a module,
-         choose M here: the module will be called xprtrdma.
- 
-         If unsure, say N.
- 
- config SUNRPC_SWAP
-       def_bool n
-       depends on SUNRPC
-       select NETVM
- 
- config RPCSEC_GSS_KRB5
-       tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
-       depends on SUNRPC && EXPERIMENTAL
-       select SUNRPC_GSS
-       select CRYPTO
-       select CRYPTO_MD5
-       select CRYPTO_DES
-       select CRYPTO_CBC
-       help
-         Choose Y here to enable Secure RPC using the Kerberos version 5
-         GSS-API mechanism (RFC 1964).
- 
-         Secure RPC calls with Kerberos require an auxiliary user-space
-         daemon which may be found in the Linux nfs-utils package
-         available from http://linux-nfs.org/.  In addition, user-space
-         Kerberos support should be installed.
- 
-         If unsure, say N.
- 
- config RPCSEC_GSS_SPKM3
-       tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
-       depends on SUNRPC && EXPERIMENTAL
-       select SUNRPC_GSS
-       select CRYPTO
-       select CRYPTO_MD5
-       select CRYPTO_DES
-       select CRYPTO_CAST5
-       select CRYPTO_CBC
-       help
-         Choose Y here to enable Secure RPC using the SPKM3 public key
-         GSS-API mechansim (RFC 2025).
- 
-         Secure RPC calls with SPKM3 require an auxiliary userspace
-         daemon which may be found in the Linux nfs-utils package
-         available from http://linux-nfs.org/.
- 
-         If unsure, say N.
- 
- config SMB_FS
-       tristate "SMB file system support (OBSOLETE, please use CIFS)"
-       depends on INET
-       select NLS
-       help
-         SMB (Server Message Block) is the protocol Windows for Workgroups
-         (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
-         files and printers over local networks.  Saying Y here allows you to
-         mount their file systems (often called "shares" in this context) and
-         access them just like any other Unix directory.  Currently, this
-         works only if the Windows machines use TCP/IP as the underlying
-         transport protocol, and not NetBEUI.  For details, read
-         <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
-         available from <http://www.tldp.org/docs.html#howto>.
- 
-         Note: if you just want your box to act as an SMB *server* and make
-         files and printing services available to Windows clients (which need
-         to have a TCP/IP stack), you don't need to say Y here; you can use
-         the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
-         for that.
- 
-         General information about how to connect Linux, Windows machines and
-         Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
- 
-         To compile the SMB support as a module, choose M here:
-         the module will be called smbfs.  Most people say N, however.
- 
- config SMB_NLS_DEFAULT
-       bool "Use a default NLS"
-       depends on SMB_FS
-       help
-         Enabling this will make smbfs use nls translations by default. You
-         need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
-         settings and you need to give the default nls for the SMB server as
-         CONFIG_SMB_NLS_REMOTE.
- 
-         The nls settings can be changed at mount time, if your smbmount
-         supports that, using the codepage and iocharset parameters.
- 
-         smbmount from samba 2.2.0 or later supports this.
- 
- config SMB_NLS_REMOTE
-       string "Default Remote NLS Option"
-       depends on SMB_NLS_DEFAULT
-       default "cp437"
-       help
-         This setting allows you to specify a default value for which
-         codepage the server uses. If this field is left blank no
-         translations will be done by default. The local codepage/charset
-         default to CONFIG_NLS_DEFAULT.
- 
-         The nls settings can be changed at mount time, if your smbmount
-         supports that, using the codepage and iocharset parameters.
- 
-         smbmount from samba 2.2.0 or later supports this.
- 
- config CIFS
-       tristate "CIFS support (advanced network filesystem, SMBFS successor)"
-       depends on INET
-       select NLS
-       help
-         This is the client VFS module for the Common Internet File System
-         (CIFS) protocol which is the successor to the Server Message Block 
-         (SMB) protocol, the native file sharing mechanism for most early
-         PC operating systems.  The CIFS protocol is fully supported by 
-         file servers such as Windows 2000 (including Windows 2003, NT 4  
-         and Windows XP) as well by Samba (which provides excellent CIFS
-         server support for Linux and many other operating systems). Limited
-         support for OS/2 and Windows ME and similar servers is provided as
-         well.
- 
-         The cifs module provides an advanced network file system
-         client for mounting to CIFS compliant servers.  It includes
-         support for DFS (hierarchical name space), secure per-user
-         session establishment via Kerberos or NTLM or NTLMv2,
-         safe distributed caching (oplock), optional packet
-         signing, Unicode and other internationalization improvements.
-         If you need to mount to Samba or Windows from this machine, say Y.
- 
- config CIFS_STATS
-         bool "CIFS statistics"
-         depends on CIFS
-         help
-           Enabling this option will cause statistics for each server share
-         mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
- 
- config CIFS_STATS2
-       bool "Extended statistics"
-       depends on CIFS_STATS
-       help
-         Enabling this option will allow more detailed statistics on SMB
-         request timing to be displayed in /proc/fs/cifs/DebugData and also
-         allow optional logging of slow responses to dmesg (depending on the
-         value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
-         These additional statistics may have a minor effect on performance
-         and memory utilization.
- 
-         Unless you are a developer or are doing network performance analysis
-         or tuning, say N.
- 
- config CIFS_WEAK_PW_HASH
-       bool "Support legacy servers which use weaker LANMAN security"
-       depends on CIFS
-       help
-         Modern CIFS servers including Samba and most Windows versions
-         (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
-         security mechanisms. These hash the password more securely
-         than the mechanisms used in the older LANMAN version of the
-         SMB protocol but LANMAN based authentication is needed to
-         establish sessions with some old SMB servers.
- 
-         Enabling this option allows the cifs module to mount to older
-         LANMAN based servers such as OS/2 and Windows 95, but such
-         mounts may be less secure than mounts using NTLM or more recent
-         security mechanisms if you are on a public network.  Unless you
-         have a need to access old SMB servers (and are on a private
-         network) you probably want to say N.  Even if this support
-         is enabled in the kernel build, LANMAN authentication will not be
-         used automatically. At runtime LANMAN mounts are disabled but
-         can be set to required (or optional) either in
-         /proc/fs/cifs (see fs/cifs/README for more detail) or via an
-         option on the mount command. This support is disabled by
-         default in order to reduce the possibility of a downgrade
-         attack.
- 
-         If unsure, say N.
- 
- config CIFS_UPCALL
-         bool "Kerberos/SPNEGO advanced session setup"
-         depends on CIFS && KEYS
-         help
-           Enables an upcall mechanism for CIFS which accesses
-           userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-           Kerberos tickets which are needed to mount to certain secure servers
-           (for which more secure Kerberos authentication is required). If
-           unsure, say N.
- 
- config CIFS_XATTR
-         bool "CIFS extended attributes"
-         depends on CIFS
-         help
-           Extended attributes are name:value pairs associated with inodes by
-           the kernel or by users (see the attr(5) manual page, or visit
-           <http://acl.bestbits.at/> for details).  CIFS maps the name of
-           extended attributes beginning with the user namespace prefix
-           to SMB/CIFS EAs. EAs are stored on Windows servers without the
-           user namespace prefix, but their names are seen by Linux cifs clients
-           prefaced by the user namespace prefix. The system namespace
-           (used by some filesystems to store ACLs) is not supported at
-           this time.
- 
-           If unsure, say N.
- 
- config CIFS_POSIX
-         bool "CIFS POSIX Extensions"
-         depends on CIFS_XATTR
-         help
-           Enabling this option will cause the cifs client to attempt to
-         negotiate a newer dialect with servers, such as Samba 3.0.5
-         or later, that optionally can handle more POSIX like (rather
-         than Windows like) file behavior.  It also enables
-         support for POSIX ACLs (getfacl and setfacl) to servers
-         (such as Samba 3.10 and later) which can negotiate
-         CIFS POSIX ACL support.  If unsure, say N.
- 
- config CIFS_DEBUG2
-       bool "Enable additional CIFS debugging routines"
-       depends on CIFS
-       help
-          Enabling this option adds a few more debugging routines
-          to the cifs code which slightly increases the size of
-          the cifs module and can cause additional logging of debug
-          messages in some error paths, slowing performance. This
-          option can be turned off unless you are debugging
-          cifs problems.  If unsure, say N.
- 
- config CIFS_EXPERIMENTAL
-         bool "CIFS Experimental Features (EXPERIMENTAL)"
-         depends on CIFS && EXPERIMENTAL
-         help
-           Enables cifs features under testing. These features are
-           experimental and currently include DFS support and directory 
-           change notification ie fcntl(F_DNOTIFY), as well as the upcall
-           mechanism which will be used for Kerberos session negotiation
-           and uid remapping.  Some of these features also may depend on 
-           setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
-           (which is disabled by default). See the file fs/cifs/README 
-           for more details.  If unsure, say N.
- 
- config CIFS_DFS_UPCALL
-         bool "DFS feature support (EXPERIMENTAL)"
-         depends on CIFS_EXPERIMENTAL
-         depends on KEYS
-         help
-           Enables an upcall mechanism for CIFS which contacts userspace
-           helper utilities to provide server name resolution (host names to
-           IP addresses) which is needed for implicit mounts of DFS junction
-           points. If unsure, say N.
- 
- config NCP_FS
-       tristate "NCP file system support (to mount NetWare volumes)"
-       depends on IPX!=n || INET
-       help
-         NCP (NetWare Core Protocol) is a protocol that runs over IPX and is
-         used by Novell NetWare clients to talk to file servers.  It is to
-         IPX what NFS is to TCP/IP, if that helps.  Saying Y here allows you
-         to mount NetWare file server volumes and to access them just like
-         any other Unix directory.  For details, please read the file
-         <file:Documentation/filesystems/ncpfs.txt> in the kernel source and
-         the IPX-HOWTO from <http://www.tldp.org/docs.html#howto>.
- 
-         You do not have to say Y here if you want your Linux box to act as a
-         file *server* for Novell NetWare clients.
- 
-         General information about how to connect Linux, Windows machines and
-         Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
- 
-         To compile this as a module, choose M here: the module will be called
-         ncpfs.  Say N unless you are connected to a Novell network.
- 
+ source "net/sunrpc/Kconfig"
+ source "fs/smbfs/Kconfig"
+ source "fs/cifs/Kconfig"
   source "fs/ncpfs/Kconfig"
- 
- config CODA_FS
-       tristate "Coda file system support (advanced network fs)"
-       depends on INET
-       help
-         Coda is an advanced network file system, similar to NFS in that it
-         enables you to mount file systems of a remote server and access them
-         with regular Unix commands as if they were sitting on your hard
-         disk.  Coda has several advantages over NFS: support for
-         disconnected operation (e.g. for laptops), read/write server
-         replication, security model for authentication and encryption,
-         persistent client caches and write back caching.
- 
-         If you say Y here, your Linux box will be able to act as a Coda
-         *client*.  You will need user level code as well, both for the
-         client and server.  Servers are currently user level, i.e. they need
-         no kernel support.  Please read
-         <file:Documentation/filesystems/coda.txt> and check out the Coda
-         home page <http://www.coda.cs.cmu.edu/>.
- 
-         To compile the coda client support as a module, choose M here: the
-         module will be called coda.
- 
- config AFS_FS
-       tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
-       depends on INET && EXPERIMENTAL
-       select AF_RXRPC
-       help
-         If you say Y here, you will get an experimental Andrew File System
-         driver. It currently only supports unsecured read-only AFS access.
- 
-         See <file:Documentation/filesystems/afs.txt> for more information.
- 
-         If unsure, say N.
- 
- config AFS_DEBUG
-       bool "AFS dynamic debugging"
-       depends on AFS_FS
-       help
-         Say Y here to make runtime controllable debugging messages appear.
- 
-         See <file:Documentation/filesystems/afs.txt> for more information.
- 
-         If unsure, say N.
- 
- config 9P_FS
-       tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
-       depends on INET && NET_9P && EXPERIMENTAL
-       help
-         If you say Y here, you will get experimental support for
-         Plan 9 resource sharing via the 9P2000 protocol.
- 
-         See <http://v9fs.sf.net> for more information.
- 
-         If unsure, say N.
- 
- config NOVFS
-       tristate "Novell Netware Filesystem support (novfs) (EXPERIMENTAL)"
-       depends on INET && EXPERIMENTAL
-       help
-         If you say Y here, you will get an experimental Novell Netware
-         filesystem driver.
- 
-         If unsure, say N.
+ source "fs/coda/Kconfig"
+ source "fs/afs/Kconfig"
+ source "fs/9p/Kconfig"
++source "fs/novfs/Kconfig"
   
   endif # NETWORK_FILESYSTEMS
   
diff --cc fs/Makefile

index c607e28,38bc735..4a26de5
--- 1/fs/Makefile
--- 2/fs/Makefile
+++ b/fs/Makefile
@@@ -60,10 -57,6 +61,8 @@@ obj-$(CONFIG_QFMT_V2)                += quota_v2.
   obj-$(CONFIG_QUOTA_TREE)      += quota_tree.o
   obj-$(CONFIG_QUOTACTL)                += quota.o
   
+ +obj-$(CONFIG_DMAPI)           += dmapi/
+ +
- obj-$(CONFIG_DNOTIFY)         += dnotify.o
- 
   obj-$(CONFIG_PROC_FS)         += proc/
   obj-y                         += partitions/
   obj-$(CONFIG_SYSFS)           += sysfs/
@@@ -129,5 -120,5 +126,6 @@@ obj-$(CONFIG_HOSTFS)               += hostfs
   obj-$(CONFIG_HPPFS)           += hppfs/
   obj-$(CONFIG_DEBUG_FS)                += debugfs/
   obj-$(CONFIG_OCFS2_FS)                += ocfs2/
+ obj-$(CONFIG_BTRFS_FS)                += btrfs/
   obj-$(CONFIG_GFS2_FS)           += gfs2/
+ +obj-$(CONFIG_NOVFS)           += novfs/
diff --cc fs/bio-integrity.c
Simple merge
diff --cc fs/bio.c
Simple merge
diff --cc fs/buffer.c

index f5bcff4,b58208f..68e07fa
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -3384,3 -3436,3 +3439,6 @@@ EXPORT_SYMBOL(mark_buffer_dirty)
   EXPORT_SYMBOL(submit_bh);
   EXPORT_SYMBOL(sync_dirty_buffer);
   EXPORT_SYMBOL(unlock_buffer);
++
++DEFINE_TRACE(fs_buffer_wait_start);
++DEFINE_TRACE(fs_buffer_wait_end);
diff --cc fs/cifs/connect.c

index f254235,e9ea394..9efaf7b
--- 1/fs/cifs/connect.c
--- 2/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@@ -1410,6 -1415,143 +1415,143 @@@ cifs_put_tcp_session(struct TCP_Server_
                 force_sig(SIGKILL, task);
   }
   
+ static struct TCP_Server_Info *
+ cifs_get_tcp_session(struct smb_vol *volume_info)
+ {
+       struct TCP_Server_Info *tcp_ses = NULL;
- -      struct sockaddr addr;
++      struct sockaddr_storage addr;
+       struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr;
+       struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr;
+       int rc;
+ 
- -      memset(&addr, 0, sizeof(struct sockaddr));
++      memset(&addr, 0, sizeof(struct sockaddr_storage));
+ 
+       if (volume_info->UNCip && volume_info->UNC) {
+               rc = cifs_inet_pton(AF_INET, volume_info->UNCip,
+                                   &sin_server->sin_addr.s_addr);
+ 
+               if (rc <= 0) {
+                       /* not ipv4 address, try ipv6 */
+                       rc = cifs_inet_pton(AF_INET6, volume_info->UNCip,
+                                           &sin_server6->sin6_addr.in6_u);
+                       if (rc > 0)
- -                              addr.sa_family = AF_INET6;
++                              addr.ss_family = AF_INET6;
+               } else {
- -                      addr.sa_family = AF_INET;
++                      addr.ss_family = AF_INET;
+               }
+ 
+               if (rc <= 0) {
+                       /* we failed translating address */
+                       rc = -EINVAL;
+                       goto out_err;
+               }
+ 
+               cFYI(1, ("UNC: %s ip: %s", volume_info->UNC,
+                        volume_info->UNCip));
+       } else if (volume_info->UNCip) {
+               /* BB using ip addr as tcp_ses name to connect to the
+                  DFS root below */
+               cERROR(1, ("Connecting to DFS root not implemented yet"));
+               rc = -EINVAL;
+               goto out_err;
+       } else /* which tcp_sess DFS root would we conect to */ {
+               cERROR(1,
+                      ("CIFS mount error: No UNC path (e.g. -o "
+                       "unc=//192.168.1.100/public) specified"));
+               rc = -EINVAL;
+               goto out_err;
+       }
+ 
+       /* see if we already have a matching tcp_ses */
+       tcp_ses = cifs_find_tcp_session(&addr);
+       if (tcp_ses)
+               return tcp_ses;
+ 
+       tcp_ses = kzalloc(sizeof(struct TCP_Server_Info), GFP_KERNEL);
+       if (!tcp_ses) {
+               rc = -ENOMEM;
+               goto out_err;
+       }
+ 
+       tcp_ses->hostname = extract_hostname(volume_info->UNC);
+       if (IS_ERR(tcp_ses->hostname)) {
+               rc = PTR_ERR(tcp_ses->hostname);
+               goto out_err;
+       }
+ 
+       tcp_ses->noblocksnd = volume_info->noblocksnd;
+       tcp_ses->noautotune = volume_info->noautotune;
+       atomic_set(&tcp_ses->inFlight, 0);
+       init_waitqueue_head(&tcp_ses->response_q);
+       init_waitqueue_head(&tcp_ses->request_q);
+       INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
+       mutex_init(&tcp_ses->srv_mutex);
+       memcpy(tcp_ses->workstation_RFC1001_name,
+               volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
+       memcpy(tcp_ses->server_RFC1001_name,
+               volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
+       tcp_ses->sequence_number = 0;
+       INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
+       INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
+ 
+       /*
+        * at this point we are the only ones with the pointer
+        * to the struct since the kernel thread not created yet
+        * no need to spinlock this init of tcpStatus or srv_count
+        */
+       tcp_ses->tcpStatus = CifsNew;
+       ++tcp_ses->srv_count;
+ 
- -      if (addr.sa_family == AF_INET6) {
++      if (addr.ss_family == AF_INET6) {
+               cFYI(1, ("attempting ipv6 connect"));
+               /* BB should we allow ipv6 on port 139? */
+               /* other OS never observed in Wild doing 139 with v6 */
+               memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
+                       sizeof(struct sockaddr_in6));
+               sin_server6->sin6_port = htons(volume_info->port);
+               rc = ipv6_connect(tcp_ses);
+       } else {
+               memcpy(&tcp_ses->addr.sockAddr, sin_server,
+                       sizeof(struct sockaddr_in));
+               sin_server->sin_port = htons(volume_info->port);
+               rc = ipv4_connect(tcp_ses);
+       }
+       if (rc < 0) {
+               cERROR(1, ("Error connecting to socket. Aborting operation"));
+               goto out_err;
+       }
+ 
+       /*
+        * since we're in a cifs function already, we know that
+        * this will succeed. No need for try_module_get().
+        */
+       __module_get(THIS_MODULE);
+       tcp_ses->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread,
+                                 tcp_ses, "cifsd");
+       if (IS_ERR(tcp_ses->tsk)) {
+               rc = PTR_ERR(tcp_ses->tsk);
+               cERROR(1, ("error %d create cifsd thread", rc));
+               module_put(THIS_MODULE);
+               goto out_err;
+       }
+ 
+       /* thread spawned, put it on the list */
+       write_lock(&cifs_tcp_ses_lock);
+       list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
+       write_unlock(&cifs_tcp_ses_lock);
+ 
+       return tcp_ses;
+ 
+ out_err:
+       if (tcp_ses) {
+               kfree(tcp_ses->hostname);
+               if (tcp_ses->ssocket)
+                       sock_release(tcp_ses->ssocket);
+               kfree(tcp_ses);
+       }
+       return ERR_PTR(rc);
+ }
+ 
   static struct cifsSesInfo *
   cifs_find_smb_ses(struct TCP_Server_Info *server, char *username)
   {
diff --cc fs/compat.c

index a6b9ee8,65a070e..f1edaeb
--- 1/fs/compat.c
--- 2/fs/compat.c
+++ b/fs/compat.c
@@@ -1406,20 -1450,16 +1451,17 @@@ int compat_do_execve(char * filename
                 goto out;
   
         retval = search_binary_handler(bprm, regs);
-       if (retval >= 0) {
-               trace_fs_exec(filename);
-               /* execve success */
-               security_bprm_free(bprm);
-               acct_update_integrals(current);
-               free_bprm(bprm);
-               return retval;
-       }
+       if (retval < 0)
+               goto out;
   
- out:
-       if (bprm->security)
-               security_bprm_free(bprm);
+       /* execve succeeded */
+       mutex_unlock(&current->cred_exec_mutex);
++      trace_fs_exec(filename);
+       acct_update_integrals(current);
+       free_bprm(bprm);
+       return retval;
   
- out_mm:
+ out:
         if (bprm->mm)
                 mmput(bprm->mm);
   
diff --cc fs/compat_ioctl.c
Simple merge
diff --cc fs/dmapi/dmapi_kern.h

index 649dacb,0000000..2a4aca7

mode 100644,000000..100644
--- 1/fs/dmapi/dmapi_kern.h
--- /dev/null
+++ b/fs/dmapi/dmapi_kern.h
@@@ -1,599 -1,0 +1,598 @@@
+ +/*
+ + * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ + *
+ + * This program is free software; you can redistribute it and/or modify it
+ + * under the terms of version 2 of the GNU General Public License as
+ + * published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it would be useful, but
+ + * WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ + *
+ + * Further, this software is distributed without any warranty that it is
+ + * free of the rightful claim of any third person regarding infringement
+ + * or the like.        Any license provided herein, whether implied or
+ + * otherwise, applies only to this software file.  Patent licenses, if
+ + * any, provided herein do not apply to combinations of this program with
+ + * other software, or any other product whatsoever.
+ + *
+ + * You should have received a copy of the GNU General Public License along
+ + * with this program; if not, write the Free Software Foundation, Inc., 59
+ + * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ + *
+ + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ + * Mountain View, CA  94043, or:
+ + *
+ + * http://www.sgi.com
+ + *
+ + * For further information regarding this notice, see:
+ + *
+ + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ + */
+ +
+ +#ifndef __DMAPI_KERN_H__
+ +#define __DMAPI_KERN_H__
+ +
+ +#include <linux/fs.h>
+ +
+ +union sys_dmapi_uarg {
+ +      void *p;
+ +      __u64 u;
+ +};
+ +typedef union sys_dmapi_uarg sys_dmapi_u;
+ +
+ +struct sys_dmapi_args {
+ +      sys_dmapi_u uarg1, uarg2, uarg3, uarg4, uarg5, uarg6, uarg7, uarg8,
+ +              uarg9, uarg10, uarg11;
+ +};
+ +typedef struct sys_dmapi_args sys_dmapi_args_t;
+ +
+ +#define DM_Uarg(uap,i)        uap->uarg##i.u
+ +#define DM_Parg(uap,i)        uap->uarg##i.p
+ +
+ +#ifdef __KERNEL__
+ +
+ +struct dm_handle_t;
+ +
+ +/* The first group of definitions and prototypes define the filesystem's
+ +   interface into the DMAPI code.
+ +*/
+ +
+ +
+ +/* Definitions used for the flags field on dm_send_data_event(),
+ +   dm_send_unmount_event(), and dm_send_namesp_event() calls.
+ +*/
+ +
+ +#define DM_FLAGS_NDELAY               0x001   /* return EAGAIN after dm_pending() */
+ +#define DM_FLAGS_UNWANTED     0x002   /* event not in fsys dm_eventset_t */
+ +
+ +/* Possible code levels reported by dm_code_level(). */
+ +
+ +#define DM_CLVL_INIT  0       /* DMAPI prior to X/Open compliance */
+ +#define DM_CLVL_XOPEN 1       /* X/Open compliant DMAPI */
+ +
+ +
+ +/*
+ + * Filesystem operations accessed by the DMAPI core.
+ + */
+ +struct filesystem_dmapi_operations {
+ +      int (*get_fsys_vector)(struct super_block *sb, void *addr);
+ +      int (*fh_to_inode)(struct super_block *sb, struct inode **ip,
+ +                         dm_fid_t *fid);
-       const struct file_operations * (*get_invis_ops)(struct inode *ip);
+ +      int (*inode_to_fh)(struct inode *ip, dm_fid_t *fid,
+ +                         dm_fsid_t *fsid );
+ +      void (*get_fsid)(struct super_block *sb, dm_fsid_t *fsid);
+ +#define HAVE_DM_QUEUE_FLUSH
+ +      int (*flushing)(struct inode *ip);
+ +};
+ +
+ +
+ +/* Prototypes used outside of the DMI module/directory. */
+ +
+ +int           dm_send_data_event(
+ +                      dm_eventtype_t  event,
+ +                      struct inode    *ip,
+ +                      dm_right_t      vp_right,
+ +                      dm_off_t        off,
+ +                      size_t          len,
+ +                      int             flags);
+ +
+ +int           dm_send_destroy_event(
+ +                      struct inode    *ip,
+ +                      dm_right_t      vp_right);
+ +
+ +int           dm_send_mount_event(
+ +                      struct super_block      *sb,
+ +                      dm_right_t      vfsp_right,
+ +                      struct inode    *ip,
+ +                      dm_right_t      vp_right,
+ +                      struct inode    *rootip,
+ +                      dm_right_t      rootvp_right,
+ +                      char            *name1,
+ +                      char            *name2);
+ +
+ +int           dm_send_namesp_event(
+ +                      dm_eventtype_t  event,
+ +                      struct super_block      *sb,
+ +                      struct inode    *ip1,
+ +                      dm_right_t      vp1_right,
+ +                      struct inode    *ip2,
+ +                      dm_right_t      vp2_right,
+ +                      const char      *name1,
+ +                      const char      *name2,
+ +                      mode_t          mode,
+ +                      int             retcode,
+ +                      int             flags);
+ +
+ +void          dm_send_unmount_event(
+ +                      struct super_block *sbp,
+ +                      struct inode    *ip,
+ +                      dm_right_t      sbp_right,
+ +                      mode_t          mode,
+ +                      int             retcode,
+ +                      int             flags);
+ +
+ +int           dm_code_level(void);
+ +
+ +int           dm_ip_to_handle (
+ +                      struct inode    *ip,
+ +                      dm_handle_t     *handlep);
+ +
+ +#define HAVE_DM_RELEASE_THREADS_ERRNO
+ +int           dm_release_threads(
+ +                      struct super_block      *sb,
+ +                      struct inode            *inode,
+ +                      int                     errno);
+ +
+ +void          dmapi_register(
+ +                      struct file_system_type *fstype,
+ +                      struct filesystem_dmapi_operations *dmapiops);
+ +
+ +void          dmapi_unregister(
+ +                      struct file_system_type *fstype);
+ +
+ +int           dmapi_registered(
+ +                      struct file_system_type *fstype,
+ +                      struct filesystem_dmapi_operations **dmapiops);
+ +
+ +
+ +/* The following prototypes and definitions are used by DMAPI as its
+ +   interface into the filesystem code.        Communication between DMAPI and the
+ +   filesystem are established as follows:
+ +   1. DMAPI uses the VFS_DMAPI_FSYS_VECTOR to ask for the addresses
+ +      of all the functions within the filesystem that it may need to call.
+ +   2. The filesystem returns an array of function name/address pairs which
+ +      DMAPI builds into a function vector.
+ +   The VFS_DMAPI_FSYS_VECTOR call is only made one time for a particular
+ +   filesystem type.  From then on, DMAPI uses its function vector to call the
+ +   filesystem functions directly.  Functions in the array which DMAPI doesn't
+ +   recognize are ignored.  A dummy function which returns ENOSYS is used for
+ +   any function that DMAPI needs but which was not provided by the filesystem.
+ +   If XFS doesn't recognize the VFS_DMAPI_FSYS_VECTOR, DMAPI assumes that it
+ +   doesn't have the X/Open support code; in this case DMAPI uses the XFS-code
+ +   originally bundled within DMAPI.
+ +
+ +   The goal of this interface is allow incremental changes to be made to
+ +   both the filesystem and to DMAPI while minimizing inter-patch dependencies,
+ +   and to eventually allow DMAPI to support multiple filesystem types at the
+ +   same time should that become necessary.
+ +*/
+ +
+ +typedef enum {
+ +      DM_FSYS_CLEAR_INHERIT           =  0,
+ +      DM_FSYS_CREATE_BY_HANDLE        =  1,
+ +      DM_FSYS_DOWNGRADE_RIGHT         =  2,
+ +      DM_FSYS_GET_ALLOCINFO_RVP       =  3,
+ +      DM_FSYS_GET_BULKALL_RVP         =  4,
+ +      DM_FSYS_GET_BULKATTR_RVP        =  5,
+ +      DM_FSYS_GET_CONFIG              =  6,
+ +      DM_FSYS_GET_CONFIG_EVENTS       =  7,
+ +      DM_FSYS_GET_DESTROY_DMATTR      =  8,
+ +      DM_FSYS_GET_DIOINFO             =  9,
+ +      DM_FSYS_GET_DIRATTRS_RVP        = 10,
+ +      DM_FSYS_GET_DMATTR              = 11,
+ +      DM_FSYS_GET_EVENTLIST           = 12,
+ +      DM_FSYS_GET_FILEATTR            = 13,
+ +      DM_FSYS_GET_REGION              = 14,
+ +      DM_FSYS_GETALL_DMATTR           = 15,
+ +      DM_FSYS_GETALL_INHERIT          = 16,
+ +      DM_FSYS_INIT_ATTRLOC            = 17,
+ +      DM_FSYS_MKDIR_BY_HANDLE         = 18,
+ +      DM_FSYS_PROBE_HOLE              = 19,
+ +      DM_FSYS_PUNCH_HOLE              = 20,
+ +      DM_FSYS_READ_INVIS_RVP          = 21,
+ +      DM_FSYS_RELEASE_RIGHT           = 22,
+ +      DM_FSYS_REMOVE_DMATTR           = 23,
+ +      DM_FSYS_REQUEST_RIGHT           = 24,
+ +      DM_FSYS_SET_DMATTR              = 25,
+ +      DM_FSYS_SET_EVENTLIST           = 26,
+ +      DM_FSYS_SET_FILEATTR            = 27,
+ +      DM_FSYS_SET_INHERIT             = 28,
+ +      DM_FSYS_SET_REGION              = 29,
+ +      DM_FSYS_SYMLINK_BY_HANDLE       = 30,
+ +      DM_FSYS_SYNC_BY_HANDLE          = 31,
+ +      DM_FSYS_UPGRADE_RIGHT           = 32,
+ +      DM_FSYS_WRITE_INVIS_RVP         = 33,
+ +      DM_FSYS_OBJ_REF_HOLD            = 34,
+ +      DM_FSYS_MAX                     = 35
+ +} dm_fsys_switch_t;
+ +
+ +
+ +#define DM_FSYS_OBJ   0x1             /* object refers to a fsys handle */
+ +
+ +
+ +/*
+ + *  Prototypes for filesystem-specific functions.
+ + */
+ +
+ +typedef int   (*dm_fsys_clear_inherit_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_attrname_t   __user *attrnamep);
+ +
+ +typedef int   (*dm_fsys_create_by_handle_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      void            __user *hanp,
+ +                      size_t          hlen,
+ +                      char            __user *cname);
+ +
+ +typedef int   (*dm_fsys_downgrade_right_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           type);          /* DM_FSYS_OBJ or zero */
+ +
+ +typedef int   (*dm_fsys_get_allocinfo_rvp_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_off_t        __user  *offp,
+ +                      u_int           nelem,
+ +                      dm_extent_t     __user *extentp,
+ +                      u_int           __user *nelemp,
+ +                      int             *rvalp);
+ +
+ +typedef int   (*dm_fsys_get_bulkall_rvp_t)(
+ +                      struct inode    *ip,            /* root inode */
+ +                      dm_right_t      right,
+ +                      u_int           mask,
+ +                      dm_attrname_t   __user *attrnamep,
+ +                      dm_attrloc_t    __user *locp,
+ +                      size_t          buflen,
+ +                      void            __user *bufp,
+ +                      size_t          __user *rlenp,
+ +                      int             *rvalp);
+ +
+ +typedef int   (*dm_fsys_get_bulkattr_rvp_t)(
+ +                      struct inode    *ip,            /* root inode */
+ +                      dm_right_t      right,
+ +                      u_int           mask,
+ +                      dm_attrloc_t    __user *locp,
+ +                      size_t          buflen,
+ +                      void            __user *bufp,
+ +                      size_t          __user *rlenp,
+ +                      int             *rvalp);
+ +
+ +typedef int   (*dm_fsys_get_config_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_config_t     flagname,
+ +                      dm_size_t       __user *retvalp);
+ +
+ +typedef int   (*dm_fsys_get_config_events_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           nelem,
+ +                      dm_eventset_t   __user *eventsetp,
+ +                      u_int           __user *nelemp);
+ +
+ +typedef int   (*dm_fsys_get_destroy_dmattr_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_attrname_t   *attrnamep,
+ +                      char            **valuepp,
+ +                      int             *vlenp);
+ +
+ +typedef int   (*dm_fsys_get_dioinfo_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_dioinfo_t    __user *diop);
+ +
+ +typedef int   (*dm_fsys_get_dirattrs_rvp_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           mask,
+ +                      dm_attrloc_t    __user *locp,
+ +                      size_t          buflen,
+ +                      void            __user  *bufp,
+ +                      size_t          __user *rlenp,
+ +                      int             *rvalp);
+ +
+ +typedef int   (*dm_fsys_get_dmattr_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_attrname_t   __user *attrnamep,
+ +                      size_t          buflen,
+ +                      void            __user *bufp,
+ +                      size_t          __user *rlenp);
+ +
+ +typedef int   (*dm_fsys_get_eventlist_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           type,
+ +                      u_int           nelem,
+ +                      dm_eventset_t   *eventsetp,     /* in kernel space! */
+ +                      u_int           *nelemp);       /* in kernel space! */
+ +
+ +typedef int   (*dm_fsys_get_fileattr_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           mask,
+ +                      dm_stat_t       __user *statp);
+ +
+ +typedef int   (*dm_fsys_get_region_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           nelem,
+ +                      dm_region_t     __user *regbufp,
+ +                      u_int           __user *nelemp);
+ +
+ +typedef int   (*dm_fsys_getall_dmattr_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      size_t          buflen,
+ +                      void            __user *bufp,
+ +                      size_t          __user *rlenp);
+ +
+ +typedef int   (*dm_fsys_getall_inherit_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           nelem,
+ +                      dm_inherit_t    __user *inheritbufp,
+ +                      u_int __user    *nelemp);
+ +
+ +typedef int   (*dm_fsys_init_attrloc_t)(
+ +                      struct inode    *ip,    /* sometimes root inode */
+ +                      dm_right_t      right,
+ +                      dm_attrloc_t    __user *locp);
+ +
+ +typedef int   (*dm_fsys_mkdir_by_handle_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      void            __user *hanp,
+ +                      size_t          hlen,
+ +                      char            __user *cname);
+ +
+ +typedef int   (*dm_fsys_probe_hole_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_off_t        off,
+ +                      dm_size_t       len,
+ +                      dm_off_t        __user  *roffp,
+ +                      dm_size_t       __user *rlenp);
+ +
+ +typedef int   (*dm_fsys_punch_hole_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_off_t        off,
+ +                      dm_size_t       len);
+ +
+ +typedef int   (*dm_fsys_read_invis_rvp_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_off_t        off,
+ +                      dm_size_t       len,
+ +                      void            __user *bufp,
+ +                      int             *rvp);
+ +
+ +typedef int   (*dm_fsys_release_right_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           type);
+ +
+ +typedef int   (*dm_fsys_remove_dmattr_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      int             setdtime,
+ +                      dm_attrname_t   __user *attrnamep);
+ +
+ +typedef int   (*dm_fsys_request_right_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           type,   /* DM_FSYS_OBJ or zero */
+ +                      u_int           flags,
+ +                      dm_right_t      newright);
+ +
+ +typedef int   (*dm_fsys_set_dmattr_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_attrname_t   __user *attrnamep,
+ +                      int             setdtime,
+ +                      size_t          buflen,
+ +                      void            __user *bufp);
+ +
+ +typedef int   (*dm_fsys_set_eventlist_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           type,
+ +                      dm_eventset_t   *eventsetp,     /* in kernel space! */
+ +                      u_int           maxevent);
+ +
+ +typedef int   (*dm_fsys_set_fileattr_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           mask,
+ +                      dm_fileattr_t   __user *attrp);
+ +
+ +typedef int   (*dm_fsys_set_inherit_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      dm_attrname_t   __user *attrnamep,
+ +                      mode_t          mode);
+ +
+ +typedef int   (*dm_fsys_set_region_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           nelem,
+ +                      dm_region_t     __user *regbufp,
+ +                      dm_boolean_t    __user *exactflagp);
+ +
+ +typedef int   (*dm_fsys_symlink_by_handle_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      void            __user *hanp,
+ +                      size_t          hlen,
+ +                      char            __user *cname,
+ +                      char            __user *path);
+ +
+ +typedef int   (*dm_fsys_sync_by_handle_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right);
+ +
+ +typedef int   (*dm_fsys_upgrade_right_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      u_int           type);  /* DM_FSYS_OBJ or zero */
+ +
+ +typedef int   (*dm_fsys_write_invis_rvp_t)(
+ +                      struct inode    *ip,
+ +                      dm_right_t      right,
+ +                      int             flags,
+ +                      dm_off_t        off,
+ +                      dm_size_t       len,
+ +                      void            __user *bufp,
+ +                      int             *rvp);
+ +
+ +typedef void  (*dm_fsys_obj_ref_hold_t)(
+ +                      struct inode    *ip);
+ +
+ +
+ +/* Structure definitions used by the VFS_DMAPI_FSYS_VECTOR call. */
+ +
+ +typedef struct {
+ +      dm_fsys_switch_t  func_no;      /* function number */
+ +      union {
+ +              dm_fsys_clear_inherit_t clear_inherit;
+ +              dm_fsys_create_by_handle_t create_by_handle;
+ +              dm_fsys_downgrade_right_t downgrade_right;
+ +              dm_fsys_get_allocinfo_rvp_t get_allocinfo_rvp;
+ +              dm_fsys_get_bulkall_rvp_t get_bulkall_rvp;
+ +              dm_fsys_get_bulkattr_rvp_t get_bulkattr_rvp;
+ +              dm_fsys_get_config_t get_config;
+ +              dm_fsys_get_config_events_t get_config_events;
+ +              dm_fsys_get_destroy_dmattr_t get_destroy_dmattr;
+ +              dm_fsys_get_dioinfo_t get_dioinfo;
+ +              dm_fsys_get_dirattrs_rvp_t get_dirattrs_rvp;
+ +              dm_fsys_get_dmattr_t get_dmattr;
+ +              dm_fsys_get_eventlist_t get_eventlist;
+ +              dm_fsys_get_fileattr_t get_fileattr;
+ +              dm_fsys_get_region_t get_region;
+ +              dm_fsys_getall_dmattr_t getall_dmattr;
+ +              dm_fsys_getall_inherit_t getall_inherit;
+ +              dm_fsys_init_attrloc_t init_attrloc;
+ +              dm_fsys_mkdir_by_handle_t mkdir_by_handle;
+ +              dm_fsys_probe_hole_t probe_hole;
+ +              dm_fsys_punch_hole_t punch_hole;
+ +              dm_fsys_read_invis_rvp_t read_invis_rvp;
+ +              dm_fsys_release_right_t release_right;
+ +              dm_fsys_remove_dmattr_t remove_dmattr;
+ +              dm_fsys_request_right_t request_right;
+ +              dm_fsys_set_dmattr_t set_dmattr;
+ +              dm_fsys_set_eventlist_t set_eventlist;
+ +              dm_fsys_set_fileattr_t set_fileattr;
+ +              dm_fsys_set_inherit_t set_inherit;
+ +              dm_fsys_set_region_t set_region;
+ +              dm_fsys_symlink_by_handle_t symlink_by_handle;
+ +              dm_fsys_sync_by_handle_t sync_by_handle;
+ +              dm_fsys_upgrade_right_t upgrade_right;
+ +              dm_fsys_write_invis_rvp_t write_invis_rvp;
+ +              dm_fsys_obj_ref_hold_t obj_ref_hold;
+ +      } u_fc;
+ +} fsys_function_vector_t;
+ +
+ +struct dm_fcntl_vector {
+ +      int     code_level;
+ +      int     count;          /* Number of functions in the vector */
+ +      fsys_function_vector_t *vecp;
+ +};
+ +typedef struct dm_fcntl_vector dm_fcntl_vector_t;
+ +
+ +struct dm_fcntl_mapevent {
+ +      size_t  length;                 /* length of transfer */
+ +      dm_eventtype_t  max_event;      /* Maximum (WRITE or READ)  event */
+ +      int     error;                  /* returned error code */
+ +};
+ +typedef struct dm_fcntl_mapevent dm_fcntl_mapevent_t;
+ +
+ +#endif        /* __KERNEL__ */
+ +
+ +
+ +/* The following definitions are needed both by the kernel and by the
+ +   library routines.
+ +*/
+ +
+ +#define DM_MAX_HANDLE_SIZE    56      /* maximum size for a file handle */
+ +
+ +
+ +/*
+ + *  Opcodes for dmapi ioctl.
+ + */
+ +
+ +#define DM_CLEAR_INHERIT      1
+ +#define DM_CREATE_BY_HANDLE   2
+ +#define DM_CREATE_SESSION     3
+ +#define DM_CREATE_USEREVENT   4
+ +#define DM_DESTROY_SESSION    5
+ +#define DM_DOWNGRADE_RIGHT    6
+ +#define DM_FD_TO_HANDLE               7
+ +#define DM_FIND_EVENTMSG      8
+ +#define DM_GET_ALLOCINFO      9
+ +#define DM_GET_BULKALL                10
+ +#define DM_GET_BULKATTR               11
+ +#define DM_GET_CONFIG         12
+ +#define DM_GET_CONFIG_EVENTS  13
+ +#define DM_GET_DIOINFO                14
+ +#define DM_GET_DIRATTRS               15
+ +#define DM_GET_DMATTR         16
+ +#define DM_GET_EVENTLIST      17
+ +#define DM_GET_EVENTS         18
+ +#define DM_GET_FILEATTR               19
+ +#define DM_GET_MOUNTINFO      20
+ +#define DM_GET_REGION         21
+ +#define DM_GETALL_DISP                22
+ +#define DM_GETALL_DMATTR      23
+ +#define DM_GETALL_INHERIT     24
+ +#define DM_GETALL_SESSIONS    25
+ +#define DM_GETALL_TOKENS      26
+ +#define DM_INIT_ATTRLOC               27
+ +#define DM_MKDIR_BY_HANDLE    28
+ +#define DM_MOVE_EVENT         29
+ +#define DM_OBJ_REF_HOLD               30
+ +#define DM_OBJ_REF_QUERY      31
+ +#define DM_OBJ_REF_RELE               32
+ +#define DM_PATH_TO_FSHANDLE   33
+ +#define DM_PATH_TO_HANDLE     34
+ +#define DM_PENDING            35
+ +#define DM_PROBE_HOLE         36
+ +#define DM_PUNCH_HOLE         37
+ +#define DM_QUERY_RIGHT                38
+ +#define DM_QUERY_SESSION      39
+ +#define DM_READ_INVIS         40
+ +#define DM_RELEASE_RIGHT      41
+ +#define DM_REMOVE_DMATTR      42
+ +#define DM_REQUEST_RIGHT      43
+ +#define DM_RESPOND_EVENT      44
+ +#define DM_SEND_MSG           45
+ +#define DM_SET_DISP           46
+ +#define DM_SET_DMATTR         47
+ +#define DM_SET_EVENTLIST      48
+ +#define DM_SET_FILEATTR               49
+ +#define DM_SET_INHERIT                50
+ +#define DM_SET_REGION         51
+ +#define DM_SET_RETURN_ON_DESTROY 52
+ +#define DM_SYMLINK_BY_HANDLE  53
+ +#define DM_SYNC_BY_HANDLE     54
+ +#define DM_UPGRADE_RIGHT      55
+ +#define DM_WRITE_INVIS                56
+ +#define DM_OPEN_BY_HANDLE     57
+ +
+ +#endif /* __DMAPI_KERN_H__ */
diff --cc fs/dmapi/dmapi_register.c

index b68ffe8,0000000..836da57

mode 100644,000000..100644
--- 1/fs/dmapi/dmapi_register.c
--- /dev/null
+++ b/fs/dmapi/dmapi_register.c
@@@ -1,1644 -1,0 +1,1639 @@@
+ +/*
+ + * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ + *
+ + * This program is free software; you can redistribute it and/or modify it
+ + * under the terms of version 2 of the GNU General Public License as
+ + * published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it would be useful, but
+ + * WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ + *
+ + * Further, this software is distributed without any warranty that it is
+ + * free of the rightful claim of any third person regarding infringement
+ + * or the like.        Any license provided herein, whether implied or
+ + * otherwise, applies only to this software file.  Patent licenses, if
+ + * any, provided herein do not apply to combinations of this program with
+ + * other software, or any other product whatsoever.
+ + *
+ + * You should have received a copy of the GNU General Public License along
+ + * with this program; if not, write the Free Software Foundation, Inc., 59
+ + * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ + *
+ + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ + * Mountain View, CA  94043, or:
+ + *
+ + * http://www.sgi.com
+ + *
+ + * For further information regarding this notice, see:
+ + *
+ + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ + */
+ +
+ +#include <linux/version.h>
+ +#include <linux/mm.h>
+ +#include <linux/proc_fs.h>
+ +#include <linux/module.h>
+ +#include <linux/mount.h>
+ +#include <linux/namei.h>
+ +#include <asm/uaccess.h>
+ +#include <linux/fs.h>
+ +#include <linux/file.h>
+ +#include "dmapi.h"
+ +#include "dmapi_kern.h"
+ +#include "dmapi_private.h"
+ +
+ +/* LOOKUP_POSTIVE was removed in Linux 2.6 */
+ +#ifndef LOOKUP_POSITIVE
+ +#define LOOKUP_POSITIVE       0
+ +#endif
+ +
+ +dm_fsreg_t    *dm_registers;  /* head of filesystem registration list */
+ +int           dm_fsys_cnt;    /* number of filesystems on dm_registers list */
+ +lock_t                dm_reg_lock = SPIN_LOCK_UNLOCKED;/* lock for dm_registers */
+ +
+ +
+ +
+ +#ifdef CONFIG_PROC_FS
+ +static int
+ +fsreg_read_pfs(char *buffer, char **start, off_t offset,
+ +               int count, int *eof, void *data)
+ +{
+ +      int len;
+ +      int i;
+ +      dm_fsreg_t      *fsrp = (dm_fsreg_t*)data;
+ +      char            statebuf[30];
+ +
+ +#define CHKFULL if(len >= count) break;
+ +#define ADDBUF(a,b)   len += sprintf(buffer + len, a, b); CHKFULL;
+ +
+ +      switch (fsrp->fr_state) {
+ +      case DM_STATE_MOUNTING:         sprintf(statebuf, "mounting"); break;
+ +      case DM_STATE_MOUNTED:          sprintf(statebuf, "mounted"); break;
+ +      case DM_STATE_UNMOUNTING:       sprintf(statebuf, "unmounting"); break;
+ +      case DM_STATE_UNMOUNTED:        sprintf(statebuf, "unmounted"); break;
+ +      default:
+ +              sprintf(statebuf, "unknown:%d", (int)fsrp->fr_state);
+ +              break;
+ +      }
+ +
+ +      len=0;
+ +      while(1){
+ +              ADDBUF("fsrp=0x%p\n", fsrp);
+ +              ADDBUF("fr_next=0x%p\n", fsrp->fr_next);
+ +              ADDBUF("fr_sb=0x%p\n", fsrp->fr_sb);
+ +              ADDBUF("fr_tevp=0x%p\n", fsrp->fr_tevp);
+ +              ADDBUF("fr_fsid=%c\n", '?');
+ +              ADDBUF("fr_msg=0x%p\n", fsrp->fr_msg);
+ +              ADDBUF("fr_msgsize=%d\n", fsrp->fr_msgsize);
+ +              ADDBUF("fr_state=%s\n", statebuf);
+ +              ADDBUF("fr_dispq=%c\n", '?');
+ +              ADDBUF("fr_dispcnt=%d\n", fsrp->fr_dispcnt);
+ +
+ +              ADDBUF("fr_evt_dispq.eq_head=0x%p\n", fsrp->fr_evt_dispq.eq_head);
+ +              ADDBUF("fr_evt_dispq.eq_tail=0x%p\n", fsrp->fr_evt_dispq.eq_tail);
+ +              ADDBUF("fr_evt_dispq.eq_count=%d\n", fsrp->fr_evt_dispq.eq_count);
+ +
+ +              ADDBUF("fr_queue=%c\n", '?');
+ +              ADDBUF("fr_lock=%c\n", '?');
+ +              ADDBUF("fr_hdlcnt=%d\n", fsrp->fr_hdlcnt);
+ +              ADDBUF("fr_vfscnt=%d\n", fsrp->fr_vfscnt);
+ +              ADDBUF("fr_unmount=%d\n", fsrp->fr_unmount);
+ +
+ +              len += sprintf(buffer + len, "fr_rattr=");
+ +              CHKFULL;
+ +              for(i = 0; i <= DM_ATTR_NAME_SIZE; ++i){
+ +                      ADDBUF("%c", fsrp->fr_rattr.an_chars[i]);
+ +              }
+ +              CHKFULL;
+ +              len += sprintf(buffer + len, "\n");
+ +              CHKFULL;
+ +
+ +              for(i = 0; i < DM_EVENT_MAX; i++){
+ +                      if( fsrp->fr_sessp[i] != NULL ){
+ +                              ADDBUF("fr_sessp[%d]=", i);
+ +                              ADDBUF("0x%p\n", fsrp->fr_sessp[i]);
+ +                      }
+ +              }
+ +              CHKFULL;
+ +
+ +              break;
+ +      }
+ +
+ +      if (offset >= len) {
+ +              *start = buffer;
+ +              *eof = 1;
+ +              return 0;
+ +      }
+ +      *start = buffer + offset;
+ +      if ((len -= offset) > count)
+ +              return count;
+ +      *eof = 1;
+ +
+ +      return len;
+ +}
+ +#endif
+ +
+ +
+ +/* Returns a pointer to the filesystem structure for the filesystem
+ +   referenced by fsidp. The caller is responsible for obtaining dm_reg_lock
+ +   before calling this routine.
+ +*/
+ +
+ +static dm_fsreg_t *
+ +dm_find_fsreg(
+ +      dm_fsid_t       *fsidp)
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +
+ +      for (fsrp = dm_registers; fsrp; fsrp = fsrp->fr_next) {
+ +              if (!memcmp(&fsrp->fr_fsid, fsidp, sizeof(*fsidp)))
+ +                      break;
+ +      }
+ +      return(fsrp);
+ +}
+ +
+ +
+ +/* Given a fsid_t, dm_find_fsreg_and_lock() finds the dm_fsreg_t structure
+ +   for that filesytem if one exists, and returns a pointer to the structure
+ +   after obtaining its 'fr_lock' so that the caller can safely modify the
+ +   dm_fsreg_t.        The caller is responsible for releasing 'fr_lock'.
+ +*/
+ +
+ +static dm_fsreg_t *
+ +dm_find_fsreg_and_lock(
+ +      dm_fsid_t       *fsidp,
+ +      unsigned long   *lcp)           /* address of returned lock cookie */
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +
+ +      for (;;) {
+ +              *lcp = mutex_spinlock(&dm_reg_lock);
+ +
+ +              if ((fsrp = dm_find_fsreg(fsidp)) == NULL) {
+ +                      mutex_spinunlock(&dm_reg_lock, *lcp);
+ +                      return(NULL);
+ +              }
+ +              if (spin_trylock(&fsrp->fr_lock)) {
+ +                      nested_spinunlock(&dm_reg_lock);
+ +                      return(fsrp);   /* success */
+ +              }
+ +
+ +              /* If the second lock is not available, drop the first and
+ +                 start over.  This gives the CPU a chance to process any
+ +                 interrupts, and also allows processes which want a fr_lock
+ +                 for a different filesystem to proceed.
+ +              */
+ +
+ +              mutex_spinunlock(&dm_reg_lock, *lcp);
+ +      }
+ +}
+ +
+ +
+ +/* dm_add_fsys_entry() is called when a DM_EVENT_MOUNT event is about to be
+ +   sent.  It creates a dm_fsreg_t structure for the filesystem and stores a
+ +   pointer to a copy of the mount event within that structure so that it is
+ +   available for subsequent dm_get_mountinfo() calls.
+ +*/
+ +
+ +int
+ +dm_add_fsys_entry(
+ +      struct super_block *sb,
+ +      dm_tokevent_t   *tevp)
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +      int             msgsize;
+ +      void            *msg;
+ +      unsigned long   lc;                     /* lock cookie */
+ +      dm_fsid_t       fsid;
+ +      struct filesystem_dmapi_operations *dops;
+ +
+ +      dops = dm_fsys_ops(sb);
+ +      ASSERT(dops);
+ +      dops->get_fsid(sb, &fsid);
+ +
+ +      /* Allocate and initialize a dm_fsreg_t structure for the filesystem. */
+ +
+ +      msgsize = tevp->te_allocsize - offsetof(dm_tokevent_t, te_event);
+ +      msg = kmalloc(msgsize, GFP_KERNEL);
+ +      if (msg == NULL) {
+ +              printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +              return -ENOMEM;
+ +      }
+ +      memcpy(msg, &tevp->te_event, msgsize);
+ +
+ +      fsrp = kmem_cache_alloc(dm_fsreg_cachep, GFP_KERNEL);
+ +      if (fsrp == NULL) {
+ +              kfree(msg);
+ +              printk("%s/%d: kmem_cache_alloc(dm_fsreg_cachep) returned NULL\n", __FUNCTION__, __LINE__);
+ +              return -ENOMEM;
+ +      }
+ +      memset(fsrp, 0, sizeof(*fsrp));
+ +
+ +      fsrp->fr_sb = sb;
+ +      fsrp->fr_tevp = tevp;
+ +      memcpy(&fsrp->fr_fsid, &fsid, sizeof(fsid));
+ +      fsrp->fr_msg = msg;
+ +      fsrp->fr_msgsize = msgsize;
+ +      fsrp->fr_state = DM_STATE_MOUNTING;
+ +      sv_init(&fsrp->fr_dispq, SV_DEFAULT, "fr_dispq");
+ +      sv_init(&fsrp->fr_queue, SV_DEFAULT, "fr_queue");
+ +      spinlock_init(&fsrp->fr_lock, "fr_lock");
+ +
+ +      /* If no other mounted DMAPI filesystem already has this same
+ +         fsid_t, then add this filesystem to the list.
+ +      */
+ +
+ +      lc = mutex_spinlock(&dm_reg_lock);
+ +
+ +      if (!dm_find_fsreg(&fsid)) {
+ +              fsrp->fr_next = dm_registers;
+ +              dm_registers = fsrp;
+ +              dm_fsys_cnt++;
+ +              mutex_spinunlock(&dm_reg_lock, lc);
+ +#ifdef CONFIG_PROC_FS
+ +              {
+ +              char buf[100];
+ +              struct proc_dir_entry *entry;
+ +
+ +              sprintf(buf, DMAPI_DBG_PROCFS "/fsreg/0x%p", fsrp);
+ +              entry = create_proc_read_entry(buf, 0, NULL, fsreg_read_pfs, fsrp);
+ +              entry->owner = THIS_MODULE;
+ +              }
+ +#endif
+ +              return(0);
+ +      }
+ +
+ +      /* A fsid_t collision occurred, so prevent this new filesystem from
+ +         mounting.
+ +      */
+ +
+ +      mutex_spinunlock(&dm_reg_lock, lc);
+ +
+ +      sv_destroy(&fsrp->fr_dispq);
+ +      sv_destroy(&fsrp->fr_queue);
+ +      spinlock_destroy(&fsrp->fr_lock);
+ +      kfree(msg);
+ +      kmem_cache_free(dm_fsreg_cachep, fsrp);
+ +      return(-EBUSY);
+ +}
+ +
+ +
+ +/* dm_change_fsys_entry() is called whenever a filesystem's mount state is
+ +   about to change.  The state is changed to DM_STATE_MOUNTED after a
+ +   successful DM_EVENT_MOUNT event or after a failed unmount.  It is changed
+ +   to DM_STATE_UNMOUNTING after a successful DM_EVENT_PREUNMOUNT event.
+ +   Finally, the state is changed to DM_STATE_UNMOUNTED after a successful
+ +   unmount.  It stays in this state until the DM_EVENT_UNMOUNT event is
+ +   queued, at which point the filesystem entry is removed.
+ +*/
+ +
+ +void
+ +dm_change_fsys_entry(
+ +      struct super_block *sb,
+ +      dm_fsstate_t    newstate)
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +      int             seq_error;
+ +      unsigned long   lc;                     /* lock cookie */
+ +      dm_fsid_t       fsid;
+ +      struct filesystem_dmapi_operations *dops;
+ +
+ +      /* Find the filesystem referenced by the sb's fsid_t.    This should
+ +         always succeed.
+ +      */
+ +
+ +      dops = dm_fsys_ops(sb);
+ +      ASSERT(dops);
+ +      dops->get_fsid(sb, &fsid);
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(&fsid, &lc)) == NULL) {
+ +              panic("dm_change_fsys_entry: can't find DMAPI fsrp for "
+ +                      "sb %p\n", sb);
+ +      }
+ +
+ +      /* Make sure that the new state is acceptable given the current state
+ +         of the filesystem.  Any error here is a major DMAPI/filesystem
+ +         screwup.
+ +      */
+ +
+ +      seq_error = 0;
+ +      switch (newstate) {
+ +      case DM_STATE_MOUNTED:
+ +              if (fsrp->fr_state != DM_STATE_MOUNTING &&
+ +                  fsrp->fr_state != DM_STATE_UNMOUNTING) {
+ +                      seq_error++;
+ +              }
+ +              break;
+ +      case DM_STATE_UNMOUNTING:
+ +              if (fsrp->fr_state != DM_STATE_MOUNTED)
+ +                      seq_error++;
+ +              break;
+ +      case DM_STATE_UNMOUNTED:
+ +              if (fsrp->fr_state != DM_STATE_UNMOUNTING)
+ +                      seq_error++;
+ +              break;
+ +      default:
+ +              seq_error++;
+ +              break;
+ +      }
+ +      if (seq_error) {
+ +              panic("dm_change_fsys_entry: DMAPI sequence error: old state "
+ +                      "%d, new state %d, fsrp %p\n", fsrp->fr_state,
+ +                      newstate, fsrp);
+ +      }
+ +
+ +      /* If the old state was DM_STATE_UNMOUNTING, then processes could be
+ +         sleeping in dm_handle_to_ip() waiting for their DM_NO_TOKEN handles
+ +         to be translated to inodes.  Wake them up so that they either
+ +         continue (new state is DM_STATE_MOUNTED) or fail (new state is
+ +         DM_STATE_UNMOUNTED).
+ +      */
+ +
+ +      if (fsrp->fr_state == DM_STATE_UNMOUNTING) {
+ +              if (fsrp->fr_hdlcnt)
+ +                      sv_broadcast(&fsrp->fr_queue);
+ +      }
+ +
+ +      /* Change the filesystem's mount state to its new value. */
+ +
+ +      fsrp->fr_state = newstate;
+ +      fsrp->fr_tevp = NULL;           /* not valid after DM_STATE_MOUNTING */
+ +
+ +      /* If the new state is DM_STATE_UNMOUNTING, wait until any application
+ +         threads currently in the process of making VFS_VGET and VFS_ROOT
+ +         calls are done before we let this unmount thread continue the
+ +         unmount.  (We want to make sure that the unmount will see these
+ +         inode references during its scan.)
+ +      */
+ +
+ +      if (newstate == DM_STATE_UNMOUNTING) {
+ +              while (fsrp->fr_vfscnt) {
+ +                      fsrp->fr_unmount++;
+ +                      sv_wait(&fsrp->fr_queue, 1, &fsrp->fr_lock, lc);
+ +                      lc = mutex_spinlock(&fsrp->fr_lock);
+ +                      fsrp->fr_unmount--;
+ +              }
+ +      }
+ +
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +}
+ +
+ +
+ +/* dm_remove_fsys_entry() gets called after a failed mount or after an
+ +   DM_EVENT_UNMOUNT event has been queued.  (The filesystem entry must stay
+ +   until the DM_EVENT_UNMOUNT reply is queued so that the event can use the
+ +   'fr_sessp' list to see which session to send the event to.)
+ +*/
+ +
+ +void
+ +dm_remove_fsys_entry(
+ +      struct super_block *sb)
+ +{
+ +      dm_fsreg_t      **fsrpp;
+ +      dm_fsreg_t      *fsrp;
+ +      unsigned long   lc;                     /* lock cookie */
+ +      struct filesystem_dmapi_operations *dops;
+ +      dm_fsid_t       fsid;
+ +
+ +      dops = dm_fsys_ops(sb);
+ +      ASSERT(dops);
+ +      dops->get_fsid(sb, &fsid);
+ +
+ +      /* Find the filesystem referenced by the sb's fsid_t and dequeue
+ +         it after verifying that the fr_state shows a filesystem that is
+ +         either mounting or unmounted.
+ +      */
+ +
+ +      lc = mutex_spinlock(&dm_reg_lock);
+ +
+ +      fsrpp = &dm_registers;
+ +      while ((fsrp = *fsrpp) != NULL) {
+ +              if (!memcmp(&fsrp->fr_fsid, &fsid, sizeof(fsrp->fr_fsid)))
+ +                      break;
+ +              fsrpp = &fsrp->fr_next;
+ +      }
+ +      if (fsrp == NULL) {
+ +              mutex_spinunlock(&dm_reg_lock, lc);
+ +              panic("dm_remove_fsys_entry: can't find DMAPI fsrp for "
+ +                      "sb %p\n", sb);
+ +      }
+ +
+ +      nested_spinlock(&fsrp->fr_lock);
+ +
+ +      /* Verify that it makes sense to remove this entry. */
+ +
+ +      if (fsrp->fr_state != DM_STATE_MOUNTING &&
+ +          fsrp->fr_state != DM_STATE_UNMOUNTED) {
+ +              nested_spinunlock(&fsrp->fr_lock);
+ +              mutex_spinunlock(&dm_reg_lock, lc);
+ +              panic("dm_remove_fsys_entry: DMAPI sequence error: old state "
+ +                      "%d, fsrp %p\n", fsrp->fr_state, fsrp);
+ +      }
+ +
+ +      *fsrpp = fsrp->fr_next;
+ +      dm_fsys_cnt--;
+ +
+ +      nested_spinunlock(&dm_reg_lock);
+ +
+ +      /* Since the filesystem is about to finish unmounting, we must be sure
+ +         that no inodes are being referenced within the filesystem before we
+ +         let this event thread continue.  If the filesystem is currently in
+ +         state DM_STATE_MOUNTING, then we know by definition that there can't
+ +         be any references.  If the filesystem is DM_STATE_UNMOUNTED, then
+ +         any application threads referencing handles with DM_NO_TOKEN should
+ +         have already been awakened by dm_change_fsys_entry and should be
+ +         long gone by now.  Just in case they haven't yet left, sleep here
+ +         until they are really gone.
+ +      */
+ +
+ +      while (fsrp->fr_hdlcnt) {
+ +              fsrp->fr_unmount++;
+ +              sv_wait(&fsrp->fr_queue, 1, &fsrp->fr_lock, lc);
+ +              lc = mutex_spinlock(&fsrp->fr_lock);
+ +              fsrp->fr_unmount--;
+ +      }
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +
+ +      /* Release all memory. */
+ +
+ +#ifdef CONFIG_PROC_FS
+ +      {
+ +      char buf[100];
+ +      sprintf(buf, DMAPI_DBG_PROCFS "/fsreg/0x%p", fsrp);
+ +      remove_proc_entry(buf, NULL);
+ +      }
+ +#endif
+ +      dm_fsys_ops_release(sb);
+ +      sv_destroy(&fsrp->fr_dispq);
+ +      sv_destroy(&fsrp->fr_queue);
+ +      spinlock_destroy(&fsrp->fr_lock);
+ +      kfree(fsrp->fr_msg);
+ +      kmem_cache_free(dm_fsreg_cachep, fsrp);
+ +}
+ +
+ +
+ +/* Get an inode for the object referenced by handlep.  We cannot use
+ +   altgetvfs() because it fails if the VFS_OFFLINE bit is set, which means
+ +   that any call to dm_handle_to_ip() while a umount is in progress would
+ +   return an error, even if the umount can't possibly succeed because users
+ +   are in the filesystem.  The requests would start to fail as soon as the
+ +   umount begins, even before the application receives the DM_EVENT_PREUNMOUNT
+ +   event.
+ +
+ +   dm_handle_to_ip() emulates the behavior of lookup() while an unmount is
+ +   in progress.        Any call to dm_handle_to_ip() while the filesystem is in the
+ +   DM_STATE_UNMOUNTING state will block.  If the unmount eventually succeeds,
+ +   the requests will wake up and fail.        If the unmount fails, the requests will
+ +   wake up and complete normally.
+ +
+ +   While a filesystem is in state DM_STATE_MOUNTING, dm_handle_to_ip() will
+ +   fail all requests.  Per the DMAPI spec, the only handles in the filesystem
+ +   which are valid during a mount event are the handles within the event
+ +   itself.
+ +*/
+ +
+ +struct inode *
+ +dm_handle_to_ip(
+ +      dm_handle_t     *handlep,
+ +      short           *typep)
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +      short           type;
+ +      unsigned long   lc;                     /* lock cookie */
+ +      int             error = 0;
+ +      dm_fid_t        *fidp;
+ +      struct super_block *sb;
+ +      struct inode    *ip;
+ +      int             filetype;
+ +      struct filesystem_dmapi_operations *dmapiops;
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(&handlep->ha_fsid, &lc)) == NULL)
+ +              return NULL;
+ +
+ +      fidp = (dm_fid_t*)&handlep->ha_fid;
+ +      /* If mounting, and we are not asking for a filesystem handle,
+ +       * then fail the request.  (dm_fid_len==0 for fshandle)
+ +       */
+ +      if ((fsrp->fr_state == DM_STATE_MOUNTING) &&
+ +          (fidp->dm_fid_len != 0)) {
+ +              mutex_spinunlock(&fsrp->fr_lock, lc);
+ +              return NULL;
+ +      }
+ +
+ +      for (;;) {
+ +              if (fsrp->fr_state == DM_STATE_MOUNTING)
+ +                      break;
+ +              if (fsrp->fr_state == DM_STATE_MOUNTED)
+ +                      break;
+ +              if (fsrp->fr_state == DM_STATE_UNMOUNTED) {
+ +                      if (fsrp->fr_unmount && fsrp->fr_hdlcnt == 0)
+ +                              sv_broadcast(&fsrp->fr_queue);
+ +                      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +                      return NULL;
+ +              }
+ +
+ +              /* Must be DM_STATE_UNMOUNTING. */
+ +
+ +              fsrp->fr_hdlcnt++;
+ +              sv_wait(&fsrp->fr_queue, 1, &fsrp->fr_lock, lc);
+ +              lc = mutex_spinlock(&fsrp->fr_lock);
+ +              fsrp->fr_hdlcnt--;
+ +      }
+ +
+ +      fsrp->fr_vfscnt++;
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +
+ +      /* Now that the mutex is released, wait until we have access to the
+ +         inode.
+ +      */
+ +
+ +      sb = fsrp->fr_sb;
+ +      error = -ENOSYS;
+ +      dmapiops = dm_fsys_ops(sb);
+ +      ASSERT(dmapiops);
+ +      if (dmapiops->fh_to_inode)
+ +              error = dmapiops->fh_to_inode(sb, &ip, (void*)fidp);
+ +
+ +      lc = mutex_spinlock(&fsrp->fr_lock);
+ +
+ +      fsrp->fr_vfscnt--;
+ +      if (fsrp->fr_unmount && fsrp->fr_vfscnt == 0)
+ +              sv_broadcast(&fsrp->fr_queue);
+ +
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +      if (error || ip == NULL)
+ +              return NULL;
+ +
+ +      filetype = ip->i_mode & S_IFMT;
+ +      if (fidp->dm_fid_len == 0) {
+ +              type = DM_TDT_VFS;
+ +      } else if (filetype == S_IFREG) {
+ +              type = DM_TDT_REG;
+ +      } else if (filetype == S_IFDIR) {
+ +              type = DM_TDT_DIR;
+ +      } else if (filetype == S_IFLNK) {
+ +              type = DM_TDT_LNK;
+ +      } else {
+ +              type = DM_TDT_OTH;
+ +      }
+ +      *typep = type;
+ +      return ip;
+ +}
+ +
+ +
+ +int
+ +dm_ip_to_handle(
+ +      struct inode    *ip,
+ +      dm_handle_t     *handlep)
+ +{
+ +      int             error;
+ +      dm_fid_t        fid;
+ +      dm_fsid_t       fsid;
+ +      int             hsize;
+ +      struct filesystem_dmapi_operations *dops;
+ +
+ +      dops = dm_fsys_ops(ip->i_sb);
+ +      ASSERT(dops);
+ +
+ +      error = dops->inode_to_fh(ip, &fid, &fsid);
+ +      if (error)
+ +              return error;
+ +
+ +      memcpy(&handlep->ha_fsid, &fsid, sizeof(fsid));
+ +      memcpy(&handlep->ha_fid, &fid, fid.dm_fid_len + sizeof fid.dm_fid_len);
+ +      hsize = DM_HSIZE(*handlep);
+ +      memset((char *)handlep + hsize, 0, sizeof(*handlep) - hsize);
+ +      return 0;
+ +}
+ +
+ +
+ +/* Given an inode, check if that inode resides in filesystem that supports
+ +   DMAPI.  Returns zero if the inode is in a DMAPI filesystem, otherwise
+ +   returns an errno.
+ +*/
+ +
+ +int
+ +dm_check_dmapi_ip(
+ +      struct inode    *ip)
+ +{
+ +      dm_handle_t     handle;
+ +      /* REFERENCED */
+ +      dm_fsreg_t      *fsrp;
+ +      int             error;
+ +      unsigned long   lc;                     /* lock cookie */
+ +
+ +      if ((error = dm_ip_to_handle(ip, &handle)) != 0)
+ +              return(error);
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(&handle.ha_fsid, &lc)) == NULL)
+ +              return(-EBADF);
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +      return(0);
+ +}
+ +
+ +
+ +/* Return a pointer to the DM_EVENT_MOUNT event while a mount is still in
+ +   progress.  This is only called by dm_get_config and dm_get_config_events
+ +   which need to access the filesystem during a mount but which don't have
+ +   a session and token to use.
+ +*/
+ +
+ +dm_tokevent_t *
+ +dm_find_mount_tevp_and_lock(
+ +      dm_fsid_t       *fsidp,
+ +      unsigned long   *lcp)           /* address of returned lock cookie */
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(fsidp, lcp)) == NULL)
+ +              return(NULL);
+ +
+ +      if (!fsrp->fr_tevp || fsrp->fr_state != DM_STATE_MOUNTING) {
+ +              mutex_spinunlock(&fsrp->fr_lock, *lcp);
+ +              return(NULL);
+ +      }
+ +      nested_spinlock(&fsrp->fr_tevp->te_lock);
+ +      nested_spinunlock(&fsrp->fr_lock);
+ +      return(fsrp->fr_tevp);
+ +}
+ +
+ +
+ +/* Wait interruptibly until a session registers disposition for 'event' in
+ +   filesystem 'sb'.  Upon successful exit, both the filesystem's dm_fsreg_t
+ +   structure and the session's dm_session_t structure are locked.  The caller
+ +   is responsible for unlocking both structures using the returned cookies.
+ +
+ +   Warning: The locks can be dropped in any order, but the 'lc2p' cookie MUST
+ +   BE USED FOR THE FIRST UNLOCK, and the lc1p cookie must be used for the
+ +   second unlock.  If this is not done, the CPU will be interruptible while
+ +   holding a mutex, which could deadlock the machine!
+ +*/
+ +
+ +static int
+ +dm_waitfor_disp(
+ +      struct super_block *sb,
+ +      dm_tokevent_t   *tevp,
+ +      dm_fsreg_t      **fsrpp,
+ +      unsigned long   *lc1p,          /* addr of first returned lock cookie */
+ +      dm_session_t    **sessionpp,
+ +      unsigned long   *lc2p)          /* addr of 2nd returned lock cookie */
+ +{
+ +      dm_eventtype_t  event = tevp->te_msg.ev_type;
+ +      dm_session_t    *s;
+ +      dm_fsreg_t      *fsrp;
+ +      dm_fsid_t       fsid;
+ +      struct filesystem_dmapi_operations *dops;
+ +
+ +      dops = dm_fsys_ops(sb);
+ +      ASSERT(dops);
+ +
+ +      dops->get_fsid(sb, &fsid);
+ +      if ((fsrp = dm_find_fsreg_and_lock(&fsid, lc1p)) == NULL)
+ +              return -ENOENT;
+ +
+ +      /* If no session is registered for this event in the specified
+ +         filesystem, then sleep interruptibly until one does.
+ +      */
+ +
+ +      for (;;) {
+ +              int     rc = 0;
+ +
+ +              /* The dm_find_session_and_lock() call is needed because a
+ +                 session that is in the process of being removed might still
+ +                 be in the dm_fsreg_t structure but won't be in the
+ +                 dm_sessions list.
+ +              */
+ +
+ +              if ((s = fsrp->fr_sessp[event]) != NULL &&
+ +                  dm_find_session_and_lock(s->sn_sessid, &s, lc2p) == 0) {
+ +                      break;
+ +              }
+ +
+ +              /* Noone is currently registered.  DM_EVENT_UNMOUNT events
+ +                 don't wait for anyone to register because the unmount is
+ +                 already past the point of no return.
+ +              */
+ +
+ +              if (event == DM_EVENT_UNMOUNT) {
+ +                      mutex_spinunlock(&fsrp->fr_lock, *lc1p);
+ +                      return -ENOENT;
+ +              }
+ +
+ +              /* Wait until a session registers for disposition of this
+ +                 event.
+ +              */
+ +
+ +              fsrp->fr_dispcnt++;
+ +              dm_link_event(tevp, &fsrp->fr_evt_dispq);
+ +
+ +              sv_wait_sig(&fsrp->fr_dispq, 1, &fsrp->fr_lock, *lc1p);
+ +              rc = signal_pending(current);
+ +
+ +              *lc1p = mutex_spinlock(&fsrp->fr_lock);
+ +              fsrp->fr_dispcnt--;
+ +              dm_unlink_event(tevp, &fsrp->fr_evt_dispq);
+ +#ifdef HAVE_DM_QUEUE_FLUSH
+ +              if (tevp->te_flags & DM_TEF_FLUSH) {
+ +                      mutex_spinunlock(&fsrp->fr_lock, *lc1p);
+ +                      return tevp->te_reply;
+ +              }
+ +#endif /* HAVE_DM_QUEUE_FLUSH */
+ +              if (rc) {               /* if signal was received */
+ +                      mutex_spinunlock(&fsrp->fr_lock, *lc1p);
+ +                      return -EINTR;
+ +              }
+ +      }
+ +      *sessionpp = s;
+ +      *fsrpp = fsrp;
+ +      return 0;
+ +}
+ +
+ +
+ +/* Returns the session pointer for the session registered for an event
+ +   in the given sb.  If successful, the session is locked upon return.  The
+ +   caller is responsible for releasing the lock.  If no session is currently
+ +   registered for the event, dm_waitfor_disp_session() will sleep interruptibly
+ +   until a registration occurs.
+ +*/
+ +
+ +int
+ +dm_waitfor_disp_session(
+ +      struct super_block *sb,
+ +      dm_tokevent_t   *tevp,
+ +      dm_session_t    **sessionpp,
+ +      unsigned long   *lcp)
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +      unsigned long   lc2;
+ +      int             error;
+ +
+ +      if (tevp->te_msg.ev_type < 0 || tevp->te_msg.ev_type > DM_EVENT_MAX)
+ +              return(-EIO);
+ +
+ +      error = dm_waitfor_disp(sb, tevp, &fsrp, lcp, sessionpp, &lc2);
+ +      if (!error)
+ +              mutex_spinunlock(&fsrp->fr_lock, lc2);  /* rev. cookie order*/
+ +      return(error);
+ +}
+ +
+ +
+ +/* Find the session registered for the DM_EVENT_DESTROY event on the specified
+ +   filesystem, sleeping if necessary until registration occurs.        Once found,
+ +   copy the session's return-on-destroy attribute name, if any, back to the
+ +   caller.
+ +*/
+ +
+ +int
+ +dm_waitfor_destroy_attrname(
+ +      struct super_block *sbp,
+ +      dm_attrname_t   *attrnamep)
+ +{
+ +      dm_tokevent_t   *tevp;
+ +      dm_session_t    *s;
+ +      dm_fsreg_t      *fsrp;
+ +      int             error;
+ +      unsigned long   lc1;            /* first lock cookie */
+ +      unsigned long   lc2;            /* second lock cookie */
+ +      void            *msgp;
+ +
+ +      tevp = dm_evt_create_tevp(DM_EVENT_DESTROY, 1, (void**)&msgp);
+ +      error = dm_waitfor_disp(sbp, tevp, &fsrp, &lc1, &s, &lc2);
+ +      if (!error) {
+ +              *attrnamep = fsrp->fr_rattr;            /* attribute or zeros */
+ +              mutex_spinunlock(&s->sn_qlock, lc2);    /* rev. cookie order */
+ +              mutex_spinunlock(&fsrp->fr_lock, lc1);
+ +      }
+ +      dm_evt_rele_tevp(tevp,0);
+ +      return(error);
+ +}
+ +
+ +
+ +/* Unregisters the session for the disposition of all events on all
+ +   filesystems.        This routine is not called until the session has been
+ +   dequeued from the session list and its session lock has been dropped,
+ +   but before the actual structure is freed, so it is safe to grab the
+ +   'dm_reg_lock' here.        If dm_waitfor_disp_session() happens to be called
+ +   by another thread, it won't find this session on the session list and
+ +   will wait until a new session registers.
+ +*/
+ +
+ +void
+ +dm_clear_fsreg(
+ +      dm_session_t    *s)
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +      int             event;
+ +      unsigned long   lc;                     /* lock cookie */
+ +
+ +      lc = mutex_spinlock(&dm_reg_lock);
+ +
+ +      for (fsrp = dm_registers; fsrp != NULL; fsrp = fsrp->fr_next) {
+ +              nested_spinlock(&fsrp->fr_lock);
+ +              for (event = 0; event < DM_EVENT_MAX; event++) {
+ +                      if (fsrp->fr_sessp[event] != s)
+ +                              continue;
+ +                      fsrp->fr_sessp[event] = NULL;
+ +                      if (event == DM_EVENT_DESTROY)
+ +                              memset(&fsrp->fr_rattr, 0, sizeof(fsrp->fr_rattr));
+ +              }
+ +              nested_spinunlock(&fsrp->fr_lock);
+ +      }
+ +
+ +      mutex_spinunlock(&dm_reg_lock, lc);
+ +}
+ +
+ +
+ +/*
+ + *  Return the handle for the object named by path.
+ + */
+ +
+ +int
+ +dm_path_to_hdl(
+ +      char            __user *path,   /* any path name */
+ +      void            __user *hanp,   /* user's data buffer */
+ +      size_t          __user *hlenp)  /* set to size of data copied */
+ +{
+ +      /* REFERENCED */
+ +      dm_fsreg_t      *fsrp;
+ +      dm_handle_t     handle;
+ +      size_t          hlen;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +      struct nameidata nd;
+ +      struct inode *inode;
+ +      size_t          len;
+ +      char            *name;
+ +      struct filesystem_dmapi_operations *dops;
+ +
+ +      /* XXX get things straightened out so getname() works here? */
+ +      if (!(len = strnlen_user(path, PATH_MAX)))
+ +              return(-EFAULT);
+ +      if (len == 1)
+ +              return(-ENOENT);
+ +      if (len > PATH_MAX)
+ +              return(-ENAMETOOLONG);
+ +      name = kmalloc(len, GFP_KERNEL);
+ +      if (name == NULL) {
+ +              printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +              return(-ENOMEM);
+ +      }
+ +      if (copy_from_user(name, path, len)) {
+ +              kfree(name);
+ +              return(-EFAULT);
+ +      }
+ +
+ +      error = path_lookup(name, LOOKUP_POSITIVE, &nd);
+ +      kfree(name);
+ +      if (error)
+ +              return error;
+ +
+ +      ASSERT(nd.path.dentry);
+ +      ASSERT(nd.path.dentry->d_inode);
+ +      inode = igrab(nd.path.dentry->d_inode);
+ +      path_put(&nd.path);
+ +
+ +      dops = dm_fsys_ops(inode->i_sb);
+ +      if (dops == NULL) {
+ +              /* No longer in a dmapi-capable filesystem...Toto */
+ +              iput(inode);
+ +              return -EINVAL;
+ +      }
+ +
+ +      /* we need the inode */
+ +      error = dm_ip_to_handle(inode, &handle);
+ +      iput(inode);
+ +      if (error)
+ +              return(error);
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(&handle.ha_fsid, &lc)) == NULL)
+ +              return(-EBADF);
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +
+ +      hlen = DM_HSIZE(handle);
+ +
+ +      if (copy_to_user(hanp, &handle, (int)hlen))
+ +              return(-EFAULT);
+ +      if (put_user(hlen,hlenp))
+ +              return(-EFAULT);
+ +      return 0;
+ +}
+ +
+ +
+ +/*
+ + *  Return the handle for the file system containing the object named by path.
+ + */
+ +
+ +int
+ +dm_path_to_fshdl(
+ +      char            __user *path,   /* any path name */
+ +      void            __user *hanp,   /* user's data buffer */
+ +      size_t          __user *hlenp)  /* set to size of data copied */
+ +{
+ +      /* REFERENCED */
+ +      dm_fsreg_t      *fsrp;
+ +      dm_handle_t     handle;
+ +      size_t          hlen;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +      struct nameidata nd;
+ +      struct inode *inode;
+ +      size_t          len;
+ +      char            *name;
+ +      struct filesystem_dmapi_operations *dops;
+ +
+ +      /* XXX get things straightened out so getname() works here? */
+ +      if(!(len = strnlen_user(path, PATH_MAX)))
+ +              return(-EFAULT);
+ +      if (len == 1)
+ +              return(-ENOENT);
+ +      if (len > PATH_MAX)
+ +              return(-ENAMETOOLONG);
+ +      name = kmalloc(len, GFP_KERNEL);
+ +      if (name == NULL) {
+ +              printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +              return(-ENOMEM);
+ +      }
+ +      if (copy_from_user(name, path, len)) {
+ +              kfree(name);
+ +              return(-EFAULT);
+ +      }
+ +
+ +      error = path_lookup(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd);
+ +      kfree(name);
+ +      if (error)
+ +              return error;
+ +
+ +      ASSERT(nd.path.dentry);
+ +      ASSERT(nd.path.dentry->d_inode);
+ +
+ +      inode = igrab(nd.path.dentry->d_inode);
+ +      path_put(&nd.path);
+ +
+ +      dops = dm_fsys_ops(inode->i_sb);
+ +      if (dops == NULL) {
+ +              /* No longer in a dmapi-capable filesystem...Toto */
+ +              iput(inode);
+ +              return -EINVAL;
+ +      }
+ +
+ +      error = dm_ip_to_handle(inode, &handle);
+ +      iput(inode);
+ +
+ +      if (error)
+ +              return(error);
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(&handle.ha_fsid, &lc)) == NULL)
+ +              return(-EBADF);
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +
+ +      hlen = DM_FSHSIZE;
+ +      if(copy_to_user(hanp, &handle, (int)hlen))
+ +              return(-EFAULT);
+ +      if(put_user(hlen,hlenp))
+ +              return(-EFAULT);
+ +      return 0;
+ +}
+ +
+ +
+ +int
+ +dm_fd_to_hdl(
+ +      int             fd,             /* any file descriptor */
+ +      void            __user *hanp,   /* user's data buffer */
+ +      size_t          __user *hlenp)  /* set to size of data copied */
+ +{
+ +      /* REFERENCED */
+ +      dm_fsreg_t      *fsrp;
+ +      dm_handle_t     handle;
+ +      size_t          hlen;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +      struct file     *filep = fget(fd);
+ +      struct inode    *ip = filep->f_dentry->d_inode;
+ +
+ +      if (!filep)
+ +              return(-EBADF);
+ +      if ((error = dm_ip_to_handle(ip, &handle)) != 0)
+ +              return(error);
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(&handle.ha_fsid, &lc)) == NULL)
+ +              return(-EBADF);
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +
+ +      hlen = DM_HSIZE(handle);
+ +      if (copy_to_user(hanp, &handle, (int)hlen))
+ +              return(-EFAULT);
+ +      fput(filep);
+ +      if(put_user(hlen, hlenp))
+ +              return(-EFAULT);
+ +      return 0;
+ +}
+ +
+ +
+ +/* Enable events on an object. */
+ +
+ +int
+ +dm_set_eventlist(
+ +      dm_sessid_t     sid,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      dm_token_t      token,
+ +      dm_eventset_t   __user *eventsetp,
+ +      u_int           maxevent)
+ +{
+ +      dm_fsys_vector_t *fsys_vector;
+ +      dm_eventset_t   eventset;
+ +      dm_tokdata_t    *tdp;
+ +      int             error;
+ +
+ +      if (copy_from_user(&eventset, eventsetp, sizeof(eventset)))
+ +              return(-EFAULT);
+ +
+ +      /* Do some minor sanity checking. */
+ +
+ +      if (maxevent == 0 || maxevent > DM_EVENT_MAX)
+ +              return(-EINVAL);
+ +
+ +      /* Access the specified object. */
+ +
+ +      error = dm_app_get_tdp(sid, hanp, hlen, token, DM_TDT_ANY,
+ +              DM_RIGHT_EXCL, &tdp);
+ +      if (error != 0)
+ +              return(error);
+ +
+ +      fsys_vector = dm_fsys_vector(tdp->td_ip);
+ +      error = fsys_vector->set_eventlist(tdp->td_ip, tdp->td_right,
+ +              (tdp->td_type == DM_TDT_VFS ? DM_FSYS_OBJ : 0),
+ +              &eventset, maxevent);
+ +
+ +      dm_app_put_tdp(tdp);
+ +      return(error);
+ +}
+ +
+ +
+ +/* Return the list of enabled events for an object. */
+ +
+ +int
+ +dm_get_eventlist(
+ +      dm_sessid_t     sid,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      dm_token_t      token,
+ +      u_int           nelem,
+ +      dm_eventset_t   __user *eventsetp,
+ +      u_int           __user *nelemp)
+ +{
+ +      dm_fsys_vector_t *fsys_vector;
+ +      dm_tokdata_t    *tdp;
+ +      dm_eventset_t   eventset;
+ +      u_int           elem;
+ +      int             error;
+ +
+ +      if (nelem == 0)
+ +              return(-EINVAL);
+ +
+ +      /* Access the specified object. */
+ +
+ +      error = dm_app_get_tdp(sid, hanp, hlen, token, DM_TDT_ANY,
+ +              DM_RIGHT_SHARED, &tdp);
+ +      if (error != 0)
+ +              return(error);
+ +
+ +      /* Get the object's event list. */
+ +
+ +      fsys_vector = dm_fsys_vector(tdp->td_ip);
+ +      error = fsys_vector->get_eventlist(tdp->td_ip, tdp->td_right,
+ +              (tdp->td_type == DM_TDT_VFS ? DM_FSYS_OBJ : 0),
+ +              nelem, &eventset, &elem);
+ +
+ +      dm_app_put_tdp(tdp);
+ +
+ +      if (error)
+ +              return(error);
+ +
+ +      if (copy_to_user(eventsetp, &eventset, sizeof(eventset)))
+ +              return(-EFAULT);
+ +      if (put_user(nelem, nelemp))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +/* Register for disposition of events.        The handle must either be the
+ +   global handle or must be the handle of a file system.  The list of events
+ +   is pointed to by eventsetp.
+ +*/
+ +
+ +int
+ +dm_set_disp(
+ +      dm_sessid_t     sid,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      dm_token_t      token,
+ +      dm_eventset_t   __user *eventsetp,
+ +      u_int           maxevent)
+ +{
+ +      dm_session_t    *s;
+ +      dm_fsreg_t      *fsrp;
+ +      dm_tokdata_t    *tdp;
+ +      dm_eventset_t   eventset;
+ +      int             error;
+ +      unsigned long   lc1;            /* first lock cookie */
+ +      unsigned long   lc2;            /* second lock cookie */
+ +      u_int           i;
+ +
+ +      /* Copy in and validate the event mask.  Only the lower maxevent bits
+ +         are meaningful, so clear any bits set above maxevent.
+ +      */
+ +
+ +      if (maxevent == 0 || maxevent > DM_EVENT_MAX)
+ +              return(-EINVAL);
+ +      if (copy_from_user(&eventset, eventsetp, sizeof(eventset)))
+ +              return(-EFAULT);
+ +      eventset &= (1 << maxevent) - 1;
+ +
+ +      /* If the caller specified the global handle, then the only valid token
+ +         is DM_NO_TOKEN, and the only valid event in the event mask is
+ +         DM_EVENT_MOUNT.  If it is set, add the session to the list of
+ +         sessions that want to receive mount events.  If it is clear, remove
+ +         the session from the list.  Since DM_EVENT_MOUNT events never block
+ +         waiting for a session to register, there is noone to wake up if we
+ +         do add the session to the list.
+ +      */
+ +
+ +      if (DM_GLOBALHAN(hanp, hlen)) {
+ +              if (token != DM_NO_TOKEN)
+ +                      return(-EINVAL);
+ +              if ((error = dm_find_session_and_lock(sid, &s, &lc1)) != 0)
+ +                      return(error);
+ +              if (eventset == 0) {
+ +                      s->sn_flags &= ~DM_SN_WANTMOUNT;
+ +                      error = 0;
+ +              } else if (eventset == 1 << DM_EVENT_MOUNT) {
+ +                      s->sn_flags |= DM_SN_WANTMOUNT;
+ +                      error = 0;
+ +              } else {
+ +                      error = -EINVAL;
+ +              }
+ +              mutex_spinunlock(&s->sn_qlock, lc1);
+ +              return(error);
+ +      }
+ +
+ +      /* Since it's not the global handle, it had better be a filesystem
+ +         handle.  Verify that the first 'maxevent' events in the event list
+ +         are all valid for a filesystem handle.
+ +      */
+ +
+ +      if (eventset & ~DM_VALID_DISP_EVENTS)
+ +              return(-EINVAL);
+ +
+ +      /* Verify that the session is valid, that the handle is a filesystem
+ +         handle, and that the filesystem is capable of sending events.  (If
+ +         a dm_fsreg_t structure exists, then the filesystem can issue events.)
+ +      */
+ +
+ +      error = dm_app_get_tdp(sid, hanp, hlen, token, DM_TDT_VFS,
+ +              DM_RIGHT_EXCL, &tdp);
+ +      if (error != 0)
+ +              return(error);
+ +
+ +      fsrp = dm_find_fsreg_and_lock(&tdp->td_handle.ha_fsid, &lc1);
+ +      if (fsrp == NULL) {
+ +              dm_app_put_tdp(tdp);
+ +              return(-EINVAL);
+ +      }
+ +
+ +      /* Now that we own 'fsrp->fr_lock', get the lock on the session so that
+ +         it can't disappear while we add it to the filesystem's event mask.
+ +      */
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, &lc2)) != 0) {
+ +              mutex_spinunlock(&fsrp->fr_lock, lc1);
+ +              dm_app_put_tdp(tdp);
+ +              return(error);
+ +      }
+ +
+ +      /* Update the event disposition array for this filesystem, adding
+ +         and/or removing the session as appropriate.  If this session is
+ +         dropping registration for DM_EVENT_DESTROY, or is overriding some
+ +         other session's registration for DM_EVENT_DESTROY, then clear any
+ +         any attr-on-destroy attribute name also.
+ +      */
+ +
+ +      for (i = 0; i < DM_EVENT_MAX; i++) {
+ +              if (DMEV_ISSET(i, eventset)) {
+ +                      if (i == DM_EVENT_DESTROY && fsrp->fr_sessp[i] != s)
+ +                              memset(&fsrp->fr_rattr, 0, sizeof(fsrp->fr_rattr));
+ +                      fsrp->fr_sessp[i] = s;
+ +              } else if (fsrp->fr_sessp[i] == s) {
+ +                      if (i == DM_EVENT_DESTROY)
+ +                              memset(&fsrp->fr_rattr, 0, sizeof(fsrp->fr_rattr));
+ +                      fsrp->fr_sessp[i] = NULL;
+ +              }
+ +      }
+ +      mutex_spinunlock(&s->sn_qlock, lc2);    /* reverse cookie order */
+ +
+ +      /* Wake up all processes waiting for a disposition on this filesystem
+ +         in case any of them happen to be waiting for an event which we just
+ +         added.
+ +      */
+ +
+ +      if (fsrp->fr_dispcnt)
+ +              sv_broadcast(&fsrp->fr_dispq);
+ +
+ +      mutex_spinunlock(&fsrp->fr_lock, lc1);
+ +
+ +      dm_app_put_tdp(tdp);
+ +      return(0);
+ +}
+ +
+ +
+ +/*
+ + *    Register a specific attribute name with a filesystem.  The value of
+ + *    the attribute is to be returned with an asynchronous destroy event.
+ + */
+ +
+ +int
+ +dm_set_return_on_destroy(
+ +      dm_sessid_t     sid,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      dm_token_t      token,
+ +      dm_attrname_t   __user *attrnamep,
+ +      dm_boolean_t    enable)
+ +{
+ +      dm_attrname_t   attrname;
+ +      dm_tokdata_t    *tdp;
+ +      dm_fsreg_t      *fsrp;
+ +      dm_session_t    *s;
+ +      int             error;
+ +      unsigned long   lc1;            /* first lock cookie */
+ +      unsigned long   lc2;            /* second lock cookie */
+ +
+ +      /* If a dm_attrname_t is provided, copy it in and validate it. */
+ +
+ +      if (enable && (error = copy_from_user(&attrname, attrnamep, sizeof(attrname))) != 0)
+ +              return(error);
+ +
+ +      /* Validate the filesystem handle and use it to get the filesystem's
+ +         disposition structure.
+ +      */
+ +
+ +      error = dm_app_get_tdp(sid, hanp, hlen, token, DM_TDT_VFS,
+ +              DM_RIGHT_EXCL, &tdp);
+ +      if (error != 0)
+ +              return(error);
+ +
+ +      fsrp = dm_find_fsreg_and_lock(&tdp->td_handle.ha_fsid, &lc1);
+ +      if (fsrp == NULL) {
+ +              dm_app_put_tdp(tdp);
+ +              return(-EINVAL);
+ +      }
+ +
+ +      /* Now that we own 'fsrp->fr_lock', get the lock on the session so that
+ +         it can't disappear while we add it to the filesystem's event mask.
+ +      */
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, &lc2)) != 0) {
+ +              mutex_spinunlock(&fsrp->fr_lock, lc1);
+ +              dm_app_put_tdp(tdp);
+ +              return(error);
+ +      }
+ +
+ +      /* A caller cannot disable return-on-destroy if he is not registered
+ +         for DM_EVENT_DESTROY.  Enabling return-on-destroy is an implicit
+ +         dm_set_disp() for DM_EVENT_DESTROY; we wake up all processes
+ +         waiting for a disposition in case any was waiting for a
+ +         DM_EVENT_DESTROY event.
+ +      */
+ +
+ +      error = 0;
+ +      if (enable) {
+ +              fsrp->fr_sessp[DM_EVENT_DESTROY] = s;
+ +              fsrp->fr_rattr = attrname;
+ +              if (fsrp->fr_dispcnt)
+ +                      sv_broadcast(&fsrp->fr_dispq);
+ +      } else if (fsrp->fr_sessp[DM_EVENT_DESTROY] != s) {
+ +              error = -EINVAL;
+ +      } else {
+ +              memset(&fsrp->fr_rattr, 0, sizeof(fsrp->fr_rattr));
+ +      }
+ +      mutex_spinunlock(&s->sn_qlock, lc2);    /* reverse cookie order */
+ +      mutex_spinunlock(&fsrp->fr_lock, lc1);
+ +      dm_app_put_tdp(tdp);
+ +      return(error);
+ +}
+ +
+ +
+ +int
+ +dm_get_mountinfo(
+ +      dm_sessid_t     sid,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      dm_token_t      token,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user *rlenp)
+ +{
+ +      dm_fsreg_t      *fsrp;
+ +      dm_tokdata_t    *tdp;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      /* Make sure that the caller's buffer is 8-byte aligned. */
+ +
+ +      if (((unsigned long)bufp & (sizeof(__u64) - 1)) != 0)
+ +              return(-EFAULT);
+ +
+ +      /* Verify that the handle is a filesystem handle, and that the
+ +         filesystem is capable of sending events.  If not, return an error.
+ +      */
+ +
+ +      error = dm_app_get_tdp(sid, hanp, hlen, token, DM_TDT_VFS,
+ +              DM_RIGHT_SHARED, &tdp);
+ +      if (error != 0)
+ +              return(error);
+ +
+ +      /* Find the filesystem entry.  This should always succeed as the
+ +         dm_app_get_tdp call created a filesystem reference.  Once we find
+ +         the entry, drop the lock.  The mountinfo message is never modified,
+ +         the filesystem entry can't disappear, and we don't want to hold a
+ +         spinlock while doing copyout calls.
+ +      */
+ +
+ +      fsrp = dm_find_fsreg_and_lock(&tdp->td_handle.ha_fsid, &lc);
+ +      if (fsrp == NULL) {
+ +              dm_app_put_tdp(tdp);
+ +              return(-EINVAL);
+ +      }
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +
+ +      /* Copy the message into the user's buffer and update his 'rlenp'. */
+ +
+ +      if (put_user(fsrp->fr_msgsize, rlenp)) {
+ +              error = -EFAULT;
+ +      } else if (fsrp->fr_msgsize > buflen) { /* user buffer not big enough */
+ +              error = -E2BIG;
+ +      } else if (copy_to_user(bufp, fsrp->fr_msg, fsrp->fr_msgsize)) {
+ +              error = -EFAULT;
+ +      } else {
+ +              error = 0;
+ +      }
+ +      dm_app_put_tdp(tdp);
+ +      return(error);
+ +}
+ +
+ +
+ +int
+ +dm_getall_disp(
+ +      dm_sessid_t     sid,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user  *rlenp)
+ +{
+ +      dm_session_t    *s;             /* pointer to session given by sid */
+ +      unsigned long   lc1;            /* first lock cookie */
+ +      unsigned long   lc2;            /* second lock cookie */
+ +      int             totalsize;
+ +      int             msgsize;
+ +      int             fsyscnt;
+ +      dm_dispinfo_t   *prevmsg;
+ +      dm_fsreg_t      *fsrp;
+ +      int             error;
+ +      char            *kbuf;
+ +
+ +      int tmp3;
+ +      int tmp4;
+ +
+ +      /* Because the dm_getall_disp structure contains a __u64 field,
+ +         make sure that the buffer provided by the caller is aligned so
+ +         that he can read such fields successfully.
+ +      */
+ +
+ +      if (((unsigned long)bufp & (sizeof(__u64) - 1)) != 0)
+ +              return(-EFAULT);
+ +
+ +      /* Compute the size of a dm_dispinfo structure, rounding up to an
+ +         8-byte boundary so that any subsequent structures will also be
+ +         aligned.
+ +      */
+ +
+ +#if 0
+ +      /* XXX  ug, what is going on here? */
+ +      msgsize = (sizeof(dm_dispinfo_t) + DM_FSHSIZE + sizeof(uint64_t) - 1) &
+ +              ~(sizeof(uint64_t) - 1);
+ +#else
+ +      tmp3 = sizeof(dm_dispinfo_t) + DM_FSHSIZE;
+ +      tmp3 += sizeof(__u64);
+ +      tmp3 -= 1;
+ +      tmp4 = ~((int)sizeof(__u64) - 1);
+ +      msgsize = tmp3 & tmp4;
+ +#endif
+ +
+ +      /* Loop until we can get the right amount of temp space, being careful
+ +         not to hold a mutex during the allocation.  Usually only one trip.
+ +      */
+ +
+ +      for (;;) {
+ +              if ((fsyscnt = dm_fsys_cnt) == 0) {
+ +                      /*if (dm_cpoutsizet(rlenp, 0))*/
+ +                      if (put_user(0,rlenp))
+ +                              return(-EFAULT);
+ +                      return(0);
+ +              }
+ +              kbuf = kmalloc(fsyscnt * msgsize, GFP_KERNEL);
+ +              if (kbuf == NULL) {
+ +                      printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +                      return -ENOMEM;
+ +              }
+ +
+ +              lc1 = mutex_spinlock(&dm_reg_lock);
+ +              if (fsyscnt == dm_fsys_cnt)
+ +                      break;
+ +
+ +              mutex_spinunlock(&dm_reg_lock, lc1);
+ +              kfree(kbuf);
+ +      }
+ +
+ +      /* Find the indicated session and lock it. */
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, &lc2)) != 0) {
+ +              mutex_spinunlock(&dm_reg_lock, lc1);
+ +              kfree(kbuf);
+ +              return(error);
+ +      }
+ +
+ +      /* Create a dm_dispinfo structure for each filesystem in which
+ +         this session has at least one event selected for disposition.
+ +      */
+ +
+ +      totalsize = 0;          /* total bytes to transfer to the user */
+ +      prevmsg = NULL;
+ +
+ +      for (fsrp = dm_registers; fsrp; fsrp = fsrp->fr_next) {
+ +              dm_dispinfo_t   *disp;
+ +              int             event;
+ +              int             found;
+ +
+ +              disp = (dm_dispinfo_t *)(kbuf + totalsize);
+ +
+ +              DMEV_ZERO(disp->di_eventset);
+ +
+ +              for (event = 0, found = 0; event < DM_EVENT_MAX; event++) {
+ +                      if (fsrp->fr_sessp[event] != s)
+ +                              continue;
+ +                      DMEV_SET(event, disp->di_eventset);
+ +                      found++;
+ +              }
+ +              if (!found)
+ +                      continue;
+ +
+ +              disp->_link = 0;
+ +              disp->di_fshandle.vd_offset = sizeof(dm_dispinfo_t);
+ +              disp->di_fshandle.vd_length = DM_FSHSIZE;
+ +
+ +              memcpy((char *)disp + disp->di_fshandle.vd_offset,
+ +                      &fsrp->fr_fsid, disp->di_fshandle.vd_length);
+ +
+ +              if (prevmsg)
+ +                      prevmsg->_link = msgsize;
+ +
+ +              prevmsg = disp;
+ +              totalsize += msgsize;
+ +      }
+ +      mutex_spinunlock(&s->sn_qlock, lc2);    /* reverse cookie order */
+ +      mutex_spinunlock(&dm_reg_lock, lc1);
+ +
+ +      if (put_user(totalsize, rlenp)) {
+ +              error = -EFAULT;
+ +      } else if (totalsize > buflen) {        /* no more room */
+ +              error = -E2BIG;
+ +      } else if (totalsize && copy_to_user(bufp, kbuf, totalsize)) {
+ +              error = -EFAULT;
+ +      } else {
+ +              error = 0;
+ +      }
+ +
+ +      kfree(kbuf);
+ +      return(error);
+ +}
+ +
+ +int
+ +dm_open_by_handle_rvp(
+ +      unsigned int    fd,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      int             flags,
+ +      int             *rvp)
+ +{
++      const struct cred *cred = current_cred();
+ +      dm_handle_t     handle;
+ +      int             error;
+ +      short           td_type;
+ +      struct dentry   *dentry;
+ +      struct inode    *inodep;
+ +      int             new_fd;
+ +      struct file     *mfilp;
+ +      struct file     *filp;
+ +
+ +      if ((error = dm_copyin_handle(hanp, hlen, &handle)) != 0) {
+ +              return(error);
+ +      }
+ +
+ +      if ((inodep = dm_handle_to_ip(&handle, &td_type)) == NULL) {
+ +              return(-EBADF);
+ +      }
+ +      if ((td_type == DM_TDT_VFS) || (td_type == DM_TDT_OTH)) {
+ +              iput(inodep);
+ +              return(-EBADF);
+ +      }
+ +
+ +      if ((new_fd = get_unused_fd()) < 0) {
+ +              iput(inodep);
+ +              return(-EMFILE);
+ +      }
+ +
-       dentry = d_alloc_anon(inodep);
++      dentry = d_obtain_alias(inodep);
+ +      if (dentry == NULL) {
+ +              iput(inodep);
+ +              put_unused_fd(new_fd);
+ +              return(-ENOMEM);
+ +      }
+ +
+ +      mfilp = fget(fd);
+ +      if (!mfilp) {
+ +              dput(dentry);
+ +              put_unused_fd(new_fd);
+ +              return(-EBADF);
+ +      }
+ +
+ +      mntget(mfilp->f_vfsmnt);
+ +
+ +      /* Create file pointer */
-       filp = dentry_open(dentry, mfilp->f_vfsmnt, flags);
++      filp = dentry_open(dentry, mfilp->f_vfsmnt, flags, cred);
+ +      if (IS_ERR(filp)) {
+ +              put_unused_fd(new_fd);
+ +              fput(mfilp);
+ +              return PTR_ERR(filp);
+ +      }
+ +
-       if (td_type == DM_TDT_REG) {
-               struct filesystem_dmapi_operations *dmapiops;
-               dmapiops = dm_fsys_ops(inodep->i_sb);
-               if (dmapiops && dmapiops->get_invis_ops) {
-                       /* invisible operation should not change atime */
-                       filp->f_flags |= O_NOATIME;
-                       filp->f_op = dmapiops->get_invis_ops(inodep);
-               }
-       }
++      if (td_type == DM_TDT_REG)
++              filp->f_mode |= FMODE_NOCMTIME;
++
+ +      fd_install(new_fd, filp);
+ +      fput(mfilp);
+ +      *rvp = new_fd;
+ +      return 0;
+ +}
+ +
+ +
+ +#ifdef HAVE_DM_QUEUE_FLUSH
+ +/* Find the threads that have a reference to our filesystem and force
+ +   them to return with the specified errno.
+ +   We look for them in each dm_fsreg_t's fr_evt_dispq.
+ +*/
+ +
+ +int
+ +dm_release_disp_threads(
+ +      dm_fsid_t       *fsidp,
+ +      struct inode    *inode, /* may be null */
+ +      int             errno)
+ +{
+ +      unsigned long   lc;
+ +      dm_fsreg_t      *fsrp;
+ +      dm_tokevent_t   *tevp;
+ +      dm_tokdata_t    *tdp;
+ +      dm_eventq_t     *queue;
+ +      int             found_events = 0;
+ +
+ +      if ((fsrp = dm_find_fsreg_and_lock(fsidp, &lc)) == NULL){
+ +              return 0;
+ +      }
+ +
+ +      queue = &fsrp->fr_evt_dispq;
+ +      for (tevp = queue->eq_head; tevp; tevp = tevp->te_next) {
+ +              nested_spinlock(&tevp->te_lock);
+ +              if (inode) {
+ +                      for (tdp = tevp->te_tdp; tdp; tdp = tdp->td_next) {
+ +                              if( tdp->td_ip == inode ) {
+ +                                      tevp->te_flags |= DM_TEF_FLUSH;
+ +                                      tevp->te_reply = errno;
+ +                                      found_events = 1;
+ +                                      break;
+ +                              }
+ +                      }
+ +              }
+ +              else {
+ +                      tevp->te_flags |= DM_TEF_FLUSH;
+ +                      tevp->te_reply = errno;
+ +                      found_events = 1;
+ +              }
+ +              nested_spinunlock(&tevp->te_lock);
+ +      }
+ +
+ +      if (found_events && fsrp->fr_dispcnt)
+ +              sv_broadcast(&fsrp->fr_dispq);
+ +      mutex_spinunlock(&fsrp->fr_lock, lc);
+ +      return 0;
+ +}
+ +#endif /* HAVE_DM_QUEUE_FLUSH */
diff --cc fs/dmapi/dmapi_session.c

index 508068d,0000000..8f54938

mode 100644,000000..100644
--- 1/fs/dmapi/dmapi_session.c
--- /dev/null
+++ b/fs/dmapi/dmapi_session.c
@@@ -1,1825 -1,0 +1,1825 @@@
+ +/*
+ + * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ + *
+ + * This program is free software; you can redistribute it and/or modify it
+ + * under the terms of version 2 of the GNU General Public License as
+ + * published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it would be useful, but
+ + * WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ + *
+ + * Further, this software is distributed without any warranty that it is
+ + * free of the rightful claim of any third person regarding infringement
+ + * or the like.        Any license provided herein, whether implied or
+ + * otherwise, applies only to this software file.  Patent licenses, if
+ + * any, provided herein do not apply to combinations of this program with
+ + * other software, or any other product whatsoever.
+ + *
+ + * You should have received a copy of the GNU General Public License along
+ + * with this program; if not, write the Free Software Foundation, Inc., 59
+ + * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ + *
+ + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ + * Mountain View, CA  94043, or:
+ + *
+ + * http://www.sgi.com
+ + *
+ + * For further information regarding this notice, see:
+ + *
+ + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ + */
+ +
+ +#include <linux/init.h>
+ +#include <linux/proc_fs.h>
+ +#include <asm/uaccess.h>
+ +#ifdef CONFIG_PROC_FS
+ +#include <linux/module.h>
+ +#endif
+ +#include "dmapi.h"
+ +#include "dmapi_kern.h"
+ +#include "dmapi_private.h"
+ +
+ +dm_session_t  *dm_sessions = NULL;    /* head of session list */
+ +u_int         dm_sessions_active = 0; /* # sessions currently active */
+ +dm_sessid_t   dm_next_sessid = 1;     /* next session ID to use */
+ +lock_t                dm_session_lock = SPIN_LOCK_UNLOCKED;/* lock for session list */
+ +
+ +dm_token_t    dm_next_token = 1;      /* next token ID to use */
+ +dm_sequence_t dm_next_sequence = 1;   /* next sequence number to use */
+ +lock_t                dm_token_lock = SPIN_LOCK_UNLOCKED;/* dm_next_token/dm_next_sequence lock */
+ +
+ +int   dm_max_queued_msgs = 2048;      /* max # undelivered msgs/session */
+ +
+ +int   dm_hash_buckets = 1009;         /* prime -- number of buckets */
+ +
+ +#define DM_SHASH(sess,inodenum)       \
+ +              ((sess)->sn_sesshash + do_mod((inodenum), dm_hash_buckets))
+ +
+ +
+ +#ifdef CONFIG_PROC_FS
+ +static int
+ +sessions_read_pfs(char *buffer, char **start, off_t offset,
+ +               int count, int *eof, void *data)
+ +{
+ +      int len;
+ +      dm_session_t    *sessp = (dm_session_t*)data;
+ +
+ +#define CHKFULL if(len >= count) break;
+ +#define ADDBUF(a,b)   len += sprintf(buffer + len, a, b); CHKFULL;
+ +
+ +      len=0;
+ +      while(1){
+ +              ADDBUF("sessp=0x%p\n", sessp);
+ +              ADDBUF("sn_next=0x%p\n", sessp->sn_next);
+ +              ADDBUF("sn_sessid=%d\n", sessp->sn_sessid);
+ +              ADDBUF("sn_flags=%x\n", sessp->sn_flags);
+ +              ADDBUF("sn_qlock=%c\n", '?');
+ +              ADDBUF("sn_readerq=%c\n", '?');
+ +              ADDBUF("sn_writerq=%c\n", '?');
+ +              ADDBUF("sn_readercnt=%u\n", sessp->sn_readercnt);
+ +              ADDBUF("sn_writercnt=%u\n", sessp->sn_writercnt);
+ +
+ +              ADDBUF("sn_newq.eq_head=0x%p\n", sessp->sn_newq.eq_head);
+ +              ADDBUF("sn_newq.eq_tail=0x%p\n", sessp->sn_newq.eq_tail);
+ +              ADDBUF("sn_newq.eq_count=%d\n", sessp->sn_newq.eq_count);
+ +
+ +              ADDBUF("sn_delq.eq_head=0x%p\n", sessp->sn_delq.eq_head);
+ +              ADDBUF("sn_delq.eq_tail=0x%p\n", sessp->sn_delq.eq_tail);
+ +              ADDBUF("sn_delq.eq_count=%d\n", sessp->sn_delq.eq_count);
+ +
+ +              ADDBUF("sn_evt_writerq.eq_head=0x%p\n", sessp->sn_evt_writerq.eq_head);
+ +              ADDBUF("sn_evt_writerq.eq_tail=0x%p\n", sessp->sn_evt_writerq.eq_tail);
+ +              ADDBUF("sn_evt_writerq.eq_count=%d\n", sessp->sn_evt_writerq.eq_count);
+ +
+ +              ADDBUF("sn_info=\"%s\"\n", sessp->sn_info);
+ +
+ +              break;
+ +      }
+ +
+ +      if (offset >= len) {
+ +              *start = buffer;
+ +              *eof = 1;
+ +              return 0;
+ +      }
+ +      *start = buffer + offset;
+ +      if ((len -= offset) > count)
+ +              return count;
+ +      *eof = 1;
+ +
+ +      return len;
+ +}
+ +#endif
+ +
+ +
+ +/* Link a session to the end of the session list.  New sessions are always
+ +   added at the end of the list so that dm_enqueue_mount_event() doesn't
+ +   miss a session.  The caller must have obtained dm_session_lock before
+ +   calling this routine.
+ +*/
+ +
+ +static void
+ +link_session(
+ +      dm_session_t    *s)
+ +{
+ +      dm_session_t    *tmp;
+ +
+ +      if ((tmp = dm_sessions) == NULL) {
+ +              dm_sessions = s;
+ +      } else {
+ +              while (tmp->sn_next != NULL)
+ +                      tmp = tmp->sn_next;
+ +              tmp->sn_next = s;
+ +      }
+ +      s->sn_next = NULL;
+ +      dm_sessions_active++;
+ +}
+ +
+ +
+ +/* Remove a session from the session list.  The caller must have obtained
+ +   dm_session_lock before calling this routine.        unlink_session() should only
+ +   be used in situations where the session is known to be on the dm_sessions
+ +   list; otherwise it panics.
+ +*/
+ +
+ +static void
+ +unlink_session(
+ +      dm_session_t    *s)
+ +{
+ +      dm_session_t    *tmp;
+ +
+ +      if (dm_sessions == s) {
+ +              dm_sessions = dm_sessions->sn_next;
+ +      } else {
+ +              for (tmp = dm_sessions; tmp; tmp = tmp->sn_next) {
+ +                      if (tmp->sn_next == s)
+ +                              break;
+ +              }
+ +              if (tmp == NULL) {
+ +                      panic("unlink_session: corrupt DMAPI session list, "
+ +                              "dm_sessions %p, session %p\n",
+ +                              dm_sessions, s);
+ +              }
+ +              tmp->sn_next = s->sn_next;
+ +      }
+ +      s->sn_next = NULL;
+ +      dm_sessions_active--;
+ +}
+ +
+ +
+ +/* Link an event to the end of an event queue.        The caller must have obtained
+ +   the session's sn_qlock before calling this routine.
+ +*/
+ +
+ +void
+ +dm_link_event(
+ +      dm_tokevent_t   *tevp,
+ +      dm_eventq_t     *queue)
+ +{
+ +      if (queue->eq_tail) {
+ +              queue->eq_tail->te_next = tevp;
+ +              queue->eq_tail = tevp;
+ +      } else {
+ +              queue->eq_head = queue->eq_tail = tevp;
+ +      }
+ +      tevp->te_next = NULL;
+ +      queue->eq_count++;
+ +}
+ +
+ +
+ +/* Remove an event from an event queue.        The caller must have obtained the
+ +   session's sn_qlock before calling this routine.  dm_unlink_event() should
+ +   only be used in situations where the event is known to be on the queue;
+ +   otherwise it panics.
+ +*/
+ +
+ +void
+ +dm_unlink_event(
+ +      dm_tokevent_t   *tevp,
+ +      dm_eventq_t     *queue)
+ +{
+ +      dm_tokevent_t   *tmp;
+ +
+ +      if (queue->eq_head == tevp) {
+ +              queue->eq_head = tevp->te_next;
+ +              if (queue->eq_head == NULL)
+ +                      queue->eq_tail = NULL;
+ +      } else {
+ +              tmp = queue->eq_head;
+ +              while (tmp && tmp->te_next != tevp)
+ +                      tmp = tmp->te_next;
+ +              if (tmp == NULL) {
+ +                      panic("dm_unlink_event: corrupt DMAPI queue %p, "
+ +                              "tevp %p\n", queue, tevp);
+ +              }
+ +              tmp->te_next = tevp->te_next;
+ +              if (tmp->te_next == NULL)
+ +                      queue->eq_tail = tmp;
+ +      }
+ +      tevp->te_next = NULL;
+ +      queue->eq_count--;
+ +}
+ +
+ +/* Link a regular file event to a hash bucket.        The caller must have obtained
+ +   the session's sn_qlock before calling this routine.
+ +   The tokevent must be for a regular file object--DM_TDT_REG.
+ +*/
+ +
+ +static void
+ +hash_event(
+ +      dm_session_t    *s,
+ +      dm_tokevent_t   *tevp)
+ +{
+ +      dm_sesshash_t   *sh;
+ +      dm_ino_t        ino;
+ +
+ +      if (s->sn_sesshash == NULL) {
+ +              s->sn_sesshash = kmalloc(dm_hash_buckets * sizeof(dm_sesshash_t), GFP_KERNEL);
+ +              if (s->sn_sesshash == NULL) {
+ +                      printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +                      return;
+ +              }
+ +              memset(s->sn_sesshash, 0, dm_hash_buckets * sizeof(dm_sesshash_t));
+ +      }
+ +
+ +      ino = (&tevp->te_tdp->td_handle.ha_fid)->dm_fid_ino;
+ +      sh = DM_SHASH(s, ino);
+ +
+ +#ifdef DM_SHASH_DEBUG
+ +      if (sh->h_next == NULL) {
+ +              s->sn_buckets_in_use++;
+ +              if (s->sn_buckets_in_use > s->sn_max_buckets_in_use)
+ +                      s->sn_max_buckets_in_use++;
+ +      }
+ +      sh->maxlength++;
+ +      sh->curlength++;
+ +      sh->num_adds++;
+ +#endif
+ +
+ +      tevp->te_flags |= DM_TEF_HASHED;
+ +      tevp->te_hashnext = sh->h_next;
+ +      sh->h_next = tevp;
+ +}
+ +
+ +
+ +/* Remove a regular file event from a hash bucket.  The caller must have
+ +   obtained the session's sn_qlock before calling this routine.
+ +   The tokevent must be for a regular file object--DM_TDT_REG.
+ +*/
+ +
+ +static void
+ +unhash_event(
+ +      dm_session_t    *s,
+ +      dm_tokevent_t   *tevp)
+ +{
+ +      dm_sesshash_t   *sh;
+ +      dm_tokevent_t   *tmp;
+ +      dm_ino_t        ino;
+ +
+ +      if (s->sn_sesshash == NULL)
+ +              return;
+ +
+ +      ino = (&tevp->te_tdp->td_handle.ha_fid)->dm_fid_ino;
+ +      sh = DM_SHASH(s, ino);
+ +
+ +      if (sh->h_next == tevp) {
+ +              sh->h_next = tevp->te_hashnext; /* leap frog */
+ +      } else {
+ +              tmp = sh->h_next;
+ +              while (tmp->te_hashnext != tevp) {
+ +                      tmp = tmp->te_hashnext;
+ +              }
+ +              tmp->te_hashnext = tevp->te_hashnext; /* leap frog */
+ +      }
+ +      tevp->te_hashnext = NULL;
+ +      tevp->te_flags &= ~DM_TEF_HASHED;
+ +
+ +#ifdef DM_SHASH_DEBUG
+ +      if (sh->h_next == NULL)
+ +              s->sn_buckets_in_use--;
+ +      sh->curlength--;
+ +      sh->num_dels++;
+ +#endif
+ +}
+ +
+ +
+ +/* Determine if this is a repeat event.        The caller MUST be holding
+ +   the session lock.
+ +   The tokevent must be for a regular file object--DM_TDT_REG.
+ +   Returns:
+ +      0 == match not found
+ +      1 == match found
+ +*/
+ +
+ +static int
+ +repeated_event(
+ +      dm_session_t    *s,
+ +      dm_tokevent_t   *tevp)
+ +{
+ +      dm_sesshash_t   *sh;
+ +      dm_data_event_t *d_event1;
+ +      dm_data_event_t *d_event2;
+ +      dm_tokevent_t   *tevph;
+ +      dm_ino_t        ino1;
+ +      dm_ino_t        ino2;
+ +
+ +      if ((!s->sn_newq.eq_tail) && (!s->sn_delq.eq_tail)) {
+ +              return(0);
+ +      }
+ +      if (s->sn_sesshash == NULL) {
+ +              return(0);
+ +      }
+ +
+ +      ino1 = (&tevp->te_tdp->td_handle.ha_fid)->dm_fid_ino;
+ +      sh = DM_SHASH(s, ino1);
+ +
+ +      if (sh->h_next == NULL) {
+ +              /* bucket is empty, no match here */
+ +              return(0);
+ +      }
+ +
+ +      d_event1 = (dm_data_event_t *)((char *)&tevp->te_msg + tevp->te_msg.ev_data.vd_offset);
+ +      tevph = sh->h_next;
+ +      while (tevph) {
+ +              /* find something with the same event type and handle type */
+ +              if ((tevph->te_msg.ev_type == tevp->te_msg.ev_type) &&
+ +                  (tevph->te_tdp->td_type == tevp->te_tdp->td_type)) {
+ +
+ +                      ino2 = (&tevp->te_tdp->td_handle.ha_fid)->dm_fid_ino;
+ +                      d_event2 = (dm_data_event_t *)((char *)&tevph->te_msg + tevph->te_msg.ev_data.vd_offset);
+ +
+ +                      /* If the two events are operating on the same file,
+ +                         and the same part of that file, then we have a
+ +                         match.
+ +                      */
+ +                      if ((ino1 == ino2) &&
+ +                          (d_event2->de_offset == d_event1->de_offset) &&
+ +                          (d_event2->de_length == d_event1->de_length)) {
+ +                              /* found a match */
+ +#ifdef DM_SHASH_DEBUG
+ +                              sh->dup_hits++;
+ +#endif
+ +                              return(1);
+ +                      }
+ +              }
+ +              tevph = tevph->te_hashnext;
+ +      }
+ +
+ +      /* No match found */
+ +      return(0);
+ +}
+ +
+ +
+ +/* Return a pointer to a session given its session ID, or EINVAL if no session
+ +   has the session ID (per the DMAPI spec).  The caller must have obtained
+ +   dm_session_lock before calling this routine.
+ +*/
+ +
+ +static int
+ +dm_find_session(
+ +      dm_sessid_t     sid,
+ +      dm_session_t    **sessionpp)
+ +{
+ +      dm_session_t    *s;
+ +
+ +      for (s = dm_sessions; s; s = s->sn_next) {
+ +              if (s->sn_sessid == sid) {
+ +                      *sessionpp = s;
+ +                      return(0);
+ +              }
+ +      }
+ +      return(-EINVAL);
+ +}
+ +
+ +
+ +/* Return a pointer to a locked session given its session ID.  '*lcp' is
+ +   used to obtain the session's sn_qlock.  Caller is responsible for eventually
+ +   unlocking it.
+ +*/
+ +
+ +int
+ +dm_find_session_and_lock(
+ +      dm_sessid_t     sid,
+ +      dm_session_t    **sessionpp,
+ +      unsigned long   *lcp)           /* addr of returned lock cookie */
+ +{
+ +      int             error;
+ +
+ +      for (;;) {
+ +              *lcp = mutex_spinlock(&dm_session_lock);
+ +
+ +              if ((error = dm_find_session(sid, sessionpp)) != 0) {
+ +                      mutex_spinunlock(&dm_session_lock, *lcp);
+ +                      return(error);
+ +              }
+ +              if (spin_trylock(&(*sessionpp)->sn_qlock)) {
+ +                      nested_spinunlock(&dm_session_lock);
+ +                      return(0);      /* success */
+ +              }
+ +
+ +              /* If the second lock is not available, drop the first and
+ +                 start over.  This gives the CPU a chance to process any
+ +                 interrupts, and also allows processes which want a sn_qlock
+ +                 for a different session to proceed.
+ +              */
+ +
+ +              mutex_spinunlock(&dm_session_lock, *lcp);
+ +      }
+ +}
+ +
+ +
+ +/* Return a pointer to the event on the specified session's sn_delq which
+ +   contains the given token.  The caller must have obtained the session's
+ +   sn_qlock before calling this routine.
+ +*/
+ +
+ +static int
+ +dm_find_msg(
+ +      dm_session_t    *s,
+ +      dm_token_t      token,
+ +      dm_tokevent_t   **tevpp)
+ +{
+ +      dm_tokevent_t   *tevp;
+ +
+ +      if (token <= DM_INVALID_TOKEN)
+ +              return(-EINVAL);
+ +
+ +      for (tevp = s->sn_delq.eq_head; tevp; tevp = tevp->te_next) {
+ +              if (tevp->te_msg.ev_token == token) {
+ +                      *tevpp = tevp;
+ +                      return(0);
+ +              }
+ +      }
+ +      return(-ESRCH);
+ +}
+ +
+ +
+ +/* Given a session ID and token, find the tevp on the specified session's
+ +   sn_delq which corresponds to that session ID/token pair.  If a match is
+ +   found, lock the tevp's te_lock and return a pointer to the tevp.
+ +   '*lcp' is used to obtain the tevp's te_lock.        The caller is responsible
+ +   for eventually unlocking it.
+ +*/
+ +
+ +int
+ +dm_find_msg_and_lock(
+ +      dm_sessid_t     sid,
+ +      dm_token_t      token,
+ +      dm_tokevent_t   **tevpp,
+ +      unsigned long   *lcp)           /* address of returned lock cookie */
+ +{
+ +      dm_session_t    *s;
+ +      int             error;
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, lcp)) != 0)
+ +              return(error);
+ +
+ +      if ((error = dm_find_msg(s, token, tevpp)) != 0) {
+ +              mutex_spinunlock(&s->sn_qlock, *lcp);
+ +              return(error);
+ +      }
+ +      nested_spinlock(&(*tevpp)->te_lock);
+ +      nested_spinunlock(&s->sn_qlock);
+ +      return(0);
+ +}
+ +
+ +
+ +/* Create a new session, or resume an old session if one is given. */
+ +
+ +int
+ +dm_create_session(
+ +      dm_sessid_t     old,
+ +      char            __user *info,
+ +      dm_sessid_t     __user *new)
+ +{
+ +      dm_session_t    *s;
+ +      dm_sessid_t     sid;
+ +      char            sessinfo[DM_SESSION_INFO_LEN];
+ +      size_t          len;
+ +      int             error;
+ +      unsigned long           lc;             /* lock cookie */
+ +
+ +      len = strnlen_user(info, DM_SESSION_INFO_LEN-1);
+ +      if (copy_from_user(sessinfo, info, len))
+ +              return(-EFAULT);
+ +      lc = mutex_spinlock(&dm_session_lock);
+ +      sid = dm_next_sessid++;
+ +      mutex_spinunlock(&dm_session_lock, lc);
+ +      if (copy_to_user(new, &sid, sizeof(sid)))
+ +              return(-EFAULT);
+ +
+ +      if (old == DM_NO_SESSION) {
+ +              s = kmem_cache_alloc(dm_session_cachep, GFP_KERNEL);
+ +              if (s == NULL) {
+ +                      printk("%s/%d: kmem_cache_alloc(dm_session_cachep) returned NULL\n", __FUNCTION__, __LINE__);
+ +                      return -ENOMEM;
+ +              }
+ +              memset(s, 0, sizeof(*s));
+ +
+ +              sv_init(&s->sn_readerq, SV_DEFAULT, "dmreadq");
+ +              sv_init(&s->sn_writerq, SV_DEFAULT, "dmwritq");
+ +              spinlock_init(&s->sn_qlock, "sn_qlock");
+ +      } else {
+ +              lc = mutex_spinlock(&dm_session_lock);
+ +              if ((error = dm_find_session(old, &s)) != 0) {
+ +                      mutex_spinunlock(&dm_session_lock, lc);
+ +                      return(error);
+ +              }
+ +              unlink_session(s);
-               mutex_spinunlock(&dm_session_lock, lc); 
++              mutex_spinunlock(&dm_session_lock, lc);
+ +#ifdef CONFIG_PROC_FS
+ +              {
+ +              char buf[100];
+ +              sprintf(buf, DMAPI_DBG_PROCFS "/sessions/0x%p", s);
+ +              remove_proc_entry(buf, NULL);
+ +              }
+ +#endif
+ +      }
+ +      memcpy(s->sn_info, sessinfo, len);
+ +      s->sn_info[len-1] = 0;          /* if not NULL, then now 'tis */
+ +      s->sn_sessid = sid;
-       lc = mutex_spinlock(&dm_session_lock); 
++      lc = mutex_spinlock(&dm_session_lock);
+ +      link_session(s);
+ +      mutex_spinunlock(&dm_session_lock, lc);
+ +#ifdef CONFIG_PROC_FS
+ +      {
+ +      char buf[100];
+ +      struct proc_dir_entry *entry;
+ +
+ +      sprintf(buf, DMAPI_DBG_PROCFS "/sessions/0x%p", s);
+ +      entry = create_proc_read_entry(buf, 0, NULL, sessions_read_pfs, s);
+ +      entry->owner = THIS_MODULE;
+ +      }
+ +#endif
+ +      return(0);
+ +}
+ +
+ +
+ +int
+ +dm_destroy_session(
+ +      dm_sessid_t     sid)
+ +{
+ +      dm_session_t    *s;
+ +      int             error;
+ +      unsigned long           lc;             /* lock cookie */
+ +
+ +      /* The dm_session_lock must be held until the session is unlinked. */
+ +
+ +      lc = mutex_spinlock(&dm_session_lock);
+ +
+ +      if ((error = dm_find_session(sid, &s)) != 0) {
+ +              mutex_spinunlock(&dm_session_lock, lc);
+ +              return(error);
+ +      }
+ +      nested_spinlock(&s->sn_qlock);
+ +
+ +      /* The session exists.  Check to see if it is still in use.  If any
+ +         messages still exist on the sn_newq or sn_delq, or if any processes
+ +         are waiting for messages to arrive on the session, then the session
+ +         must not be destroyed.
+ +      */
+ +
+ +      if (s->sn_newq.eq_head || s->sn_readercnt || s->sn_delq.eq_head) {
+ +              nested_spinunlock(&s->sn_qlock);
+ +              mutex_spinunlock(&dm_session_lock, lc);
+ +              return(-EBUSY);
+ +      }
+ +
+ +      /* The session is not in use.  Dequeue it from the session chain. */
+ +
+ +      unlink_session(s);
+ +      nested_spinunlock(&s->sn_qlock);
+ +      mutex_spinunlock(&dm_session_lock, lc);
+ +
+ +#ifdef CONFIG_PROC_FS
+ +      {
+ +      char buf[100];
+ +      sprintf(buf, DMAPI_DBG_PROCFS "/sessions/0x%p", s);
+ +      remove_proc_entry(buf, NULL);
+ +      }
+ +#endif
+ +
+ +      /* Now clear the sessions's disposition registration, and then destroy
+ +         the session structure.
+ +      */
+ +
+ +      dm_clear_fsreg(s);
+ +
+ +      spinlock_destroy(&s->sn_qlock);
+ +      sv_destroy(&s->sn_readerq);
+ +      sv_destroy(&s->sn_writerq);
+ +      if (s->sn_sesshash)
+ +              kfree(s->sn_sesshash);
+ +      kmem_cache_free(dm_session_cachep, s);
+ +      return(0);
+ +}
+ +
+ +
+ +/*
+ + *  Return a list of all active sessions.
+ + */
+ +
+ +int
+ +dm_getall_sessions(
+ +      u_int           nelem,
+ +      dm_sessid_t     __user *sidp,
+ +      u_int           __user *nelemp)
+ +{
+ +      dm_session_t    *s;
+ +      u_int           sesscnt;
+ +      dm_sessid_t     *sesslist;
+ +      unsigned long           lc;             /* lock cookie */
+ +      int             error;
+ +      int             i;
+ +
+ +      /* Loop until we can get the right amount of temp space, being careful
+ +         not to hold a mutex during the allocation.  Usually only one trip.
+ +      */
+ +
+ +      for (;;) {
+ +              if ((sesscnt = dm_sessions_active) == 0) {
+ +                      /*if (suword(nelemp, 0))*/
+ +                      if (put_user(0, nelemp))
+ +                              return(-EFAULT);
+ +                      return(0);
+ +              }
+ +              sesslist = kmalloc(sesscnt * sizeof(*sidp), GFP_KERNEL);
+ +              if (sesslist == NULL) {
+ +                      printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +                      return -ENOMEM;
+ +              }
+ +
+ +              lc = mutex_spinlock(&dm_session_lock);
+ +              if (sesscnt == dm_sessions_active)
+ +                      break;
+ +
+ +              mutex_spinunlock(&dm_session_lock, lc);
+ +              kfree(sesslist);
+ +      }
+ +
+ +      /* Make a temp copy of the data, then release the mutex. */
+ +
+ +      for (i = 0, s = dm_sessions; i < sesscnt; i++, s = s->sn_next)
+ +              sesslist[i] = s->sn_sessid;
+ +
+ +      mutex_spinunlock(&dm_session_lock, lc);
+ +
+ +      /* Now copy the data to the user. */
+ +
+ +      if(put_user(sesscnt, nelemp)) {
+ +              error = -EFAULT;
+ +      } else if (sesscnt > nelem) {
+ +              error = -E2BIG;
+ +      } else if (copy_to_user(sidp, sesslist, sesscnt * sizeof(*sidp))) {
+ +              error = -EFAULT;
+ +      } else {
+ +              error = 0;
+ +      }
+ +      kfree(sesslist);
+ +      return(error);
+ +}
+ +
+ +
+ +/*
+ + *  Return the descriptive string associated with a session.
+ + */
+ +
+ +int
+ +dm_query_session(
+ +      dm_sessid_t     sid,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user *rlenp)
+ +{
+ +      dm_session_t    *s;             /* pointer to session given by sid */
+ +      int             len;            /* length of session info string */
+ +      int             error;
+ +      char            sessinfo[DM_SESSION_INFO_LEN];
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, &lc)) != 0)
+ +              return(error);
+ +
+ +      len = strlen(s->sn_info) + 1;   /* NULL terminated when created */
+ +      memcpy(sessinfo, s->sn_info, len);
+ +
+ +      mutex_spinunlock(&s->sn_qlock, lc);
+ +
+ +      /* Now that the mutex is released, copy the sessinfo to the user. */
+ +
+ +      if (put_user(len, rlenp)) {
+ +              error = -EFAULT;
+ +      } else if (len > buflen) {
+ +              error = -E2BIG;
+ +      } else if (copy_to_user(bufp, sessinfo, len)) {
+ +              error = -EFAULT;
+ +      } else {
+ +              error = 0;
+ +      }
+ +      return(error);
+ +}
+ +
+ +
+ +/*
+ + *  Return all of the previously delivered tokens (that is, their IDs)
+ + *  for the given session.
+ + */
+ +
+ +int
+ +dm_getall_tokens(
+ +      dm_sessid_t     sid,            /* session obtaining tokens from */
+ +      u_int           nelem,          /* size of tokenbufp */
+ +      dm_token_t      __user *tokenbufp,/* buffer to copy token IDs to */
+ +      u_int           __user *nelemp) /* return number copied to tokenbufp */
+ +{
+ +      dm_session_t    *s;             /* pointer to session given by sid */
+ +      dm_tokevent_t   *tevp;          /* event message queue traversal */
+ +      unsigned long   lc;             /* lock cookie */
+ +      int             tokcnt;
+ +      dm_token_t      *toklist;
+ +      int             error;
+ +      int             i;
+ +
+ +      /* Loop until we can get the right amount of temp space, being careful
+ +         not to hold a mutex during the allocation.  Usually only one trip.
+ +      */
+ +
+ +      for (;;) {
+ +              if ((error = dm_find_session_and_lock(sid, &s, &lc)) != 0)
+ +                      return(error);
+ +              tokcnt = s->sn_delq.eq_count;
+ +              mutex_spinunlock(&s->sn_qlock, lc);
+ +
+ +              if (tokcnt == 0) {
+ +                      /*if (suword(nelemp, 0))*/
+ +                      if (put_user(0, nelemp))
+ +                              return(-EFAULT);
+ +                      return(0);
+ +              }
+ +              toklist = kmalloc(tokcnt * sizeof(*tokenbufp), GFP_KERNEL);
+ +              if (toklist == NULL) {
+ +                      printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +                      return -ENOMEM;
+ +              }
+ +
+ +              if ((error = dm_find_session_and_lock(sid, &s, &lc)) != 0) {
+ +                      kfree(toklist);
+ +                      return(error);
+ +              }
+ +
+ +              if (tokcnt == s->sn_delq.eq_count)
+ +                      break;
+ +
+ +              mutex_spinunlock(&s->sn_qlock, lc);
+ +              kfree(toklist);
+ +      }
+ +
+ +      /* Make a temp copy of the data, then release the mutex. */
+ +
+ +      tevp = s->sn_delq.eq_head;
+ +      for (i = 0; i < tokcnt; i++, tevp = tevp->te_next)
+ +              toklist[i] = tevp->te_msg.ev_token;
+ +
+ +      mutex_spinunlock(&s->sn_qlock, lc);
+ +
+ +      /* Now copy the data to the user. */
+ +
+ +      if (put_user(tokcnt, nelemp)) {
+ +              error = -EFAULT;
+ +      } else if (tokcnt > nelem) {
+ +              error = -E2BIG;
+ +      } else if (copy_to_user(tokenbufp,toklist,tokcnt*sizeof(*tokenbufp))) {
+ +              error = -EFAULT;
+ +      } else {
+ +              error = 0;
+ +      }
+ +      kfree(toklist);
+ +      return(error);
+ +}
+ +
+ +
+ +/*
+ + *  Return the message identified by token.
+ + */
+ +
+ +int
+ +dm_find_eventmsg(
+ +      dm_sessid_t     sid,
+ +      dm_token_t      token,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user *rlenp)
+ +{
+ +      dm_tokevent_t   *tevp;          /* message identified by token */
+ +      int             msgsize;        /* size of message to copy out */
+ +      void            *msg;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      /* Because some of the events (dm_data_event_t in particular) contain
+ +         __u64 fields, we need to make sure that the buffer provided by the
+ +         caller is aligned such that he can read those fields successfully.
+ +      */
+ +
+ +      if (((unsigned long)bufp & (sizeof(__u64) - 1)) != 0)
+ +              return(-EFAULT);
+ +
+ +      /* Allocate the right amount of temp space, being careful not to hold
+ +         a mutex during the allocation.
+ +      */
+ +
+ +      if ((error = dm_find_msg_and_lock(sid, token, &tevp, &lc)) != 0)
+ +              return(error);
+ +      msgsize = tevp->te_allocsize - offsetof(dm_tokevent_t, te_msg);
+ +      mutex_spinunlock(&tevp->te_lock, lc);
+ +
+ +      msg = kmalloc(msgsize, GFP_KERNEL);
+ +      if (msg == NULL) {
+ +              printk("%s/%d: kmalloc returned NULL\n", __FUNCTION__, __LINE__);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      if ((error = dm_find_msg_and_lock(sid, token, &tevp, &lc)) != 0) {
+ +              kfree(msg);
+ +              return(error);
+ +      }
+ +
+ +      /* Make a temp copy of the data, then release the mutex. */
+ +
+ +      memcpy(msg, &tevp->te_msg, msgsize);
+ +      mutex_spinunlock(&tevp->te_lock, lc);
+ +
+ +      /* Now copy the data to the user. */
+ +
+ +      if (put_user(msgsize,rlenp)) {
+ +              error = -EFAULT;
+ +      } else if (msgsize > buflen) {          /* user buffer not big enough */
+ +              error = -E2BIG;
+ +      } else if (copy_to_user( bufp, msg, msgsize )) {
+ +              error = -EFAULT;
+ +      } else {
+ +              error = 0;
+ +      }
+ +      kfree(msg);
+ +      return(error);
+ +}
+ +
+ +
+ +int
+ +dm_move_event(
+ +      dm_sessid_t     srcsid,
+ +      dm_token_t      token,
+ +      dm_sessid_t     targetsid,
+ +      dm_token_t      __user *rtokenp)
+ +{
+ +      dm_session_t    *s1;
+ +      dm_session_t    *s2;
+ +      dm_tokevent_t   *tevp;
+ +      int             error;
+ +      unsigned long           lc;             /* lock cookie */
+ +      int             hash_it = 0;
+ +
+ +      lc = mutex_spinlock(&dm_session_lock);
+ +
+ +      if ((error = dm_find_session(srcsid, &s1)) != 0 ||
+ +          (error = dm_find_session(targetsid, &s2)) != 0 ||
+ +          (error = dm_find_msg(s1, token, &tevp)) != 0) {
+ +              mutex_spinunlock(&dm_session_lock, lc);
+ +              return(error);
+ +      }
+ +      dm_unlink_event(tevp, &s1->sn_delq);
+ +      if (tevp->te_flags & DM_TEF_HASHED) {
+ +              unhash_event(s1, tevp);
+ +              hash_it = 1;
+ +      }
+ +      dm_link_event(tevp, &s2->sn_delq);
+ +      if (hash_it)
+ +              hash_event(s2, tevp);
+ +      mutex_spinunlock(&dm_session_lock, lc);
+ +
+ +      if (copy_to_user(rtokenp, &token, sizeof(token)))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +int
+ +dm_pending(
+ +      dm_sessid_t     sid,
+ +      dm_token_t      token,
+ +      dm_timestruct_t __user *delay)          /* unused */
+ +{
+ +      dm_tokevent_t   *tevp;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      if ((error = dm_find_msg_and_lock(sid, token, &tevp, &lc)) != 0)
+ +              return(error);
+ +
+ +      tevp->te_flags |= DM_TEF_INTERMED;
+ +      if (tevp->te_evt_ref > 0)       /* if event generation threads exist */
+ +              sv_broadcast(&tevp->te_evt_queue);
+ +
+ +      mutex_spinunlock(&tevp->te_lock, lc);
+ +      return(0);
+ +}
+ +
+ +
+ +int
+ +dm_get_events(
+ +      dm_sessid_t     sid,
+ +      u_int           maxmsgs,
+ +      u_int           flags,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user *rlenp)
+ +{
+ +      dm_session_t    *s;             /* pointer to session given by sid */
+ +      dm_tokevent_t   *tevp;          /* next event message on queue */
+ +      int             error;
+ +      unsigned long   lc1;            /* first lock cookie */
+ +      unsigned long   lc2 = 0;        /* second lock cookie */
+ +      int             totalsize;
+ +      int             msgsize;
+ +      dm_eventmsg_t   __user *prevmsg;
+ +      int             prev_msgsize = 0;
+ +      u_int           msgcnt;
+ +
+ +      /* Because some of the events (dm_data_event_t in particular) contain
+ +         __u64 fields, we need to make sure that the buffer provided by the
+ +         caller is aligned such that he can read those fields successfully.
+ +      */
+ +
+ +      if (((unsigned long)bufp & (sizeof(__u64) - 1)) != 0)
+ +              return(-EFAULT);
+ +
+ +      /* Find the indicated session and lock it. */
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, &lc1)) != 0)
+ +              return(error);
+ +
+ +      /* Check for messages on sn_newq.  If there aren't any that haven't
+ +         already been grabbed by another process, and if we are supposed to
+ +         to wait until one shows up, then go to sleep interruptibly on the
+ +         sn_readerq semaphore.  The session can't disappear out from under
+ +         us as long as sn_readerq is non-zero.
+ +      */
+ +
+ +      for (;;) {
+ +              int             rc;
+ +
+ +              for (tevp = s->sn_newq.eq_head; tevp; tevp = tevp->te_next) {
+ +                      lc2 = mutex_spinlock(&tevp->te_lock);
+ +                      if (!(tevp->te_flags & DM_TEF_LOCKED))
+ +                              break;
+ +                      mutex_spinunlock(&tevp->te_lock, lc2);
+ +              }
+ +              if (tevp)
+ +                      break;          /* got one! */
+ +
+ +              if (!(flags & DM_EV_WAIT)) {
+ +                      mutex_spinunlock(&s->sn_qlock, lc1);
+ +                      return(-EAGAIN);
+ +              }
+ +              s->sn_readercnt++;
+ +
+ +              sv_wait_sig(&s->sn_readerq, 1, &s->sn_qlock, lc1);
+ +              rc = signal_pending(current);
+ +
+ +              lc1 = mutex_spinlock(&s->sn_qlock);
+ +              s->sn_readercnt--;
+ +              if (rc) {       /* if signal was received */
+ +                      mutex_spinunlock(&s->sn_qlock, lc1);
+ +                      return(-EINTR);
+ +              }
+ +      }
+ +
+ +      /* At least one message is available for delivery, and we have both the
+ +         session lock and event lock.  Mark the event so that it is not
+ +         grabbed by other daemons, then drop both locks prior copying the
+ +         data to the caller's buffer.  Leaving the event on the queue in a
+ +         marked state prevents both the session and the event from
+ +         disappearing out from under us while we don't have the locks.
+ +      */
+ +
+ +      tevp->te_flags |= DM_TEF_LOCKED;
+ +      mutex_spinunlock(&tevp->te_lock, lc2);  /* reverse cookie order */
+ +      mutex_spinunlock(&s->sn_qlock, lc1);
+ +
+ +      /* Continue to deliver messages until there are no more, the
+ +         user's buffer becomes full, or we hit his maxmsgs limit.
+ +      */
+ +
+ +      totalsize = 0;          /* total bytes transferred to the user */
+ +      prevmsg = NULL;
+ +      msgcnt = 0;
+ +
+ +      while (tevp) {
+ +              /* Compute the number of bytes to be moved, rounding up to an
+ +                 8-byte boundary so that any subsequent messages will also be
+ +                 aligned.
+ +              */
+ +
+ +              msgsize = tevp->te_allocsize - offsetof(dm_tokevent_t, te_msg);
+ +              msgsize = (msgsize + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1);
+ +              totalsize += msgsize;
+ +
+ +              /* If it fits, copy the message into the user's buffer and
+ +                 update his 'rlenp'.  Update the _link pointer for any
+ +                 previous message.
+ +              */
+ +
+ +              if (totalsize > buflen) {       /* no more room */
+ +                      error = -E2BIG;
+ +              } else if (put_user(totalsize, rlenp)) {
+ +                      error = -EFAULT;
+ +              } else if (copy_to_user(bufp, &tevp->te_msg, msgsize)) {
+ +                      error = -EFAULT;
+ +              } else if (prevmsg && put_user(prev_msgsize, &prevmsg->_link)) {
+ +                      error = -EFAULT;
+ +              } else {
+ +                      error = 0;
+ +              }
+ +
+ +              /* If an error occurred, just unmark the event and leave it on
+ +                 the queue for someone else.  Note that other daemons may
+ +                 have gone to sleep because this event was marked, so wake
+ +                 them up.  Also, if at least one message has already been
+ +                 delivered, then an error here is not really an error.
+ +              */
+ +
+ +              lc1 = mutex_spinlock(&s->sn_qlock);
+ +              lc2 = mutex_spinlock(&tevp->te_lock);
+ +              tevp->te_flags &= ~DM_TEF_LOCKED;       /* drop the mark */
+ +
+ +              if (error) {
+ +                      if (s->sn_readercnt)
+ +                              sv_signal(&s->sn_readerq);
+ +
+ +                      mutex_spinunlock(&tevp->te_lock, lc2);  /* rev. order */
+ +                      mutex_spinunlock(&s->sn_qlock, lc1);
+ +                      if (prevmsg)
+ +                              return(0);
+ +                      if (error == -E2BIG && put_user(totalsize,rlenp))
+ +                              error = -EFAULT;
+ +                      return(error);
+ +              }
+ +
+ +              /* The message was successfully delivered.  Unqueue it. */
+ +
+ +              dm_unlink_event(tevp, &s->sn_newq);
+ +
+ +              /* Wake up the first of any processes waiting for room on the
+ +                 sn_newq.
+ +              */
+ +
+ +              if (s->sn_writercnt)
+ +                      sv_signal(&s->sn_writerq);
+ +
+ +              /* If the message is synchronous, add it to the sn_delq while
+ +                 still holding the lock.  If it is asynchronous, free it.
+ +              */
+ +
+ +              if (tevp->te_msg.ev_token != DM_INVALID_TOKEN) { /* synch */
+ +                      dm_link_event(tevp, &s->sn_delq);
+ +                      mutex_spinunlock(&tevp->te_lock, lc2);
+ +              } else {
+ +                      tevp->te_flags |= DM_TEF_FINAL;
+ +                      if (tevp->te_flags & DM_TEF_HASHED)
+ +                              unhash_event(s, tevp);
+ +                      mutex_spinunlock(&tevp->te_lock, lc2);
+ +                      dm_put_tevp(tevp, NULL);/* can't cause destroy events */
+ +              }
+ +
+ +              /* Update our notion of where we are in the user's buffer.  If
+ +                 he doesn't want any more messages, then stop.
+ +              */
+ +
+ +              prevmsg = (dm_eventmsg_t __user *)bufp;
+ +              prev_msgsize = msgsize;
+ +              bufp = (char __user *)bufp + msgsize;
+ +
+ +              msgcnt++;
+ +              if (maxmsgs && msgcnt >= maxmsgs) {
+ +                      mutex_spinunlock(&s->sn_qlock, lc1);
+ +                      break;
+ +              }
+ +
+ +              /* While still holding the sn_qlock,  see if any additional
+ +                 messages are available for delivery.
+ +              */
+ +
+ +              for (tevp = s->sn_newq.eq_head; tevp; tevp = tevp->te_next) {
+ +                      lc2 = mutex_spinlock(&tevp->te_lock);
+ +                      if (!(tevp->te_flags & DM_TEF_LOCKED)) {
+ +                              tevp->te_flags |= DM_TEF_LOCKED;
+ +                              mutex_spinunlock(&tevp->te_lock, lc2);
+ +                              break;
+ +                      }
+ +                      mutex_spinunlock(&tevp->te_lock, lc2);
+ +              }
+ +              mutex_spinunlock(&s->sn_qlock, lc1);
+ +      }
+ +      return(0);
+ +}
+ +
+ +
+ +/*
+ + *  Remove an event message from the delivered queue, set the returned
+ + *  error where the event generator wants it, and wake up the generator.
+ + *  Also currently have the user side release any locks it holds...
+ + */
+ +
+ +/* ARGSUSED */
+ +int
+ +dm_respond_event(
+ +      dm_sessid_t     sid,
+ +      dm_token_t      token,
+ +      dm_response_t   response,
+ +      int             reterror,
+ +      size_t          buflen,         /* unused */
+ +      void            __user *respbufp) /* unused */
+ +{
+ +      dm_session_t    *s;             /* pointer to session given by sid */
+ +      dm_tokevent_t   *tevp;          /* event message queue traversal */
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      /* Sanity check the input parameters. */
+ +
+ +      switch (response) {
+ +      case DM_RESP_CONTINUE:  /* continue must have reterror == 0 */
+ +              if (reterror != 0)
+ +                      return(-EINVAL);
+ +              break;
+ +      case DM_RESP_ABORT:     /* abort must have errno set */
+ +              if (reterror <= 0)
+ +                      return(-EINVAL);
+ +              break;
+ +      case DM_RESP_DONTCARE:
+ +              reterror = -1;  /* to distinguish DM_RESP_DONTCARE */
+ +              break;
+ +      default:
+ +              return(-EINVAL);
+ +      }
+ +
+ +      /* Hold session lock until the event is unqueued. */
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, &lc)) != 0)
+ +              return(error);
+ +
+ +      if ((error = dm_find_msg(s, token, &tevp)) != 0) {
+ +              mutex_spinunlock(&s->sn_qlock, lc);
+ +              return(error);
+ +      }
+ +      nested_spinlock(&tevp->te_lock);
+ +
+ +      if ((response == DM_RESP_DONTCARE) &&
+ +          (tevp->te_msg.ev_type != DM_EVENT_MOUNT)) {
+ +              error = -EINVAL;
+ +              nested_spinunlock(&tevp->te_lock);
+ +              mutex_spinunlock(&s->sn_qlock, lc);
+ +      } else {
+ +              dm_unlink_event(tevp, &s->sn_delq);
+ +              if (tevp->te_flags & DM_TEF_HASHED)
+ +                      unhash_event(s, tevp);
+ +              tevp->te_reply = -reterror; /* linux wants negative errno */
+ +              tevp->te_flags |= DM_TEF_FINAL;
+ +              if (tevp->te_evt_ref)
+ +                      sv_broadcast(&tevp->te_evt_queue);
+ +              nested_spinunlock(&tevp->te_lock);
+ +              mutex_spinunlock(&s->sn_qlock, lc);
+ +              error = 0;
+ +
+ +              /* Absolutely no locks can be held when calling dm_put_tevp! */
+ +
+ +              dm_put_tevp(tevp, NULL);  /* this can generate destroy events */
+ +      }
+ +      return(error);
+ +}
+ +
+ +/* The caller must hold sn_qlock.
+ +   This will return the tokevent locked.
+ + */
+ +static dm_tokevent_t *
+ +__find_match_event_no_waiters_locked(
+ +      dm_tokevent_t   *tevp1,
+ +      dm_eventq_t     *queue)
+ +{
+ +      dm_tokevent_t   *tevp2, *next_tevp;
+ +      dm_tokdata_t    *tdp1 = tevp1->te_tdp;
+ +      dm_tokdata_t    *tdp2;
+ +      dm_data_event_t *d_event1;
+ +      dm_data_event_t *d_event2;
+ +
+ +      d_event1 = (dm_data_event_t *)((char *)&tevp1->te_msg + tevp1->te_msg.ev_data.vd_offset);
+ +
+ +      for(tevp2 = queue->eq_head; tevp2; tevp2 = next_tevp) {
+ +              nested_spinlock(&tevp2->te_lock);
+ +              next_tevp = tevp2->te_next;
+ +
+ +              /* Just compare the first tdp's in each--there should
+ +                 be just one, if it's the match we want.
+ +               */
+ +              tdp2 = tevp2->te_tdp;
+ +              if ((tevp2->te_msg.ev_type == tevp1->te_msg.ev_type) &&
+ +                  (tevp2->te_tdp->td_type == tevp1->te_tdp->td_type) &&
+ +                  (tevp2->te_evt_ref == 0) && (tdp2->td_next == NULL) &&
+ +                  (memcmp(&tdp1->td_handle, &tdp2->td_handle,
+ +                         sizeof(dm_handle_t)) == 0)) {
+ +
+ +                      d_event2 = (dm_data_event_t *)((char *)&tevp2->te_msg + tevp2->te_msg.ev_data.vd_offset);
+ +
+ +
+ +                      if ((d_event2->de_offset == d_event1->de_offset) &&
+ +                          (d_event2->de_length == d_event1->de_length)) {
+ +                              /* Match -- return it locked */
+ +                              return tevp2;
+ +                      }
+ +              }
+ +              nested_spinunlock(&tevp2->te_lock);
+ +      }
+ +      return NULL;
+ +}
+ +
+ +/* The caller must hold the sn_qlock.
+ +   The returned tokevent will be locked with nested_spinlock.
+ + */
+ +static dm_tokevent_t *
+ +find_match_event_no_waiters_locked(
+ +      dm_session_t    *s,
+ +      dm_tokevent_t   *tevp)
+ +{
+ +      dm_tokevent_t   *tevp2;
+ +
+ +      if ((!s->sn_newq.eq_tail) && (!s->sn_delq.eq_tail))
+ +              return NULL;
+ +      if (!tevp->te_tdp)
+ +              return NULL;
+ +      if (tevp->te_tdp->td_next) {
+ +              /* If it has multiple tdp's then don't bother trying to
+ +                 find a match.
+ +               */
+ +              return NULL;
+ +      }
+ +      tevp2 = __find_match_event_no_waiters_locked(tevp, &s->sn_newq);
+ +      if (tevp2 == NULL)
+ +              tevp2 = __find_match_event_no_waiters_locked(tevp, &s->sn_delq);
+ +      /* returns a locked tokevent */
+ +      return tevp2;
+ +}
+ +
+ +
+ +
+ +/* Queue the filled in event message pointed to by tevp on the session s, and
+ +   (if a synchronous event) wait for the reply from the DMAPI application.
+ +   The caller MUST be holding the session lock before calling this routine!
+ +   The session lock is always released upon exit.
+ +   Returns:
+ +       -1 == don't care
+ +        0 == success (or async event)
+ +      > 0 == errno describing reason for failure
+ +*/
+ +
+ +static int
+ +dm_enqueue(
+ +      dm_session_t    *s,
+ +      unsigned long   lc,             /* input lock cookie */
+ +      dm_tokevent_t   **tevpp,                /* in/out parameter */
+ +      int             sync,
+ +      int             flags,
+ +      int             interruptable)
+ +{
+ +      int             is_unmount = 0;
+ +      int             is_hashable = 0;
+ +      int             reply;
+ +      dm_tokevent_t   *tevp = *tevpp;
+ +
+ +      /* If the caller isn't planning to stick around for the result
+ +         and this request is identical to one that is already on the
+ +         queues then just give the caller an EAGAIN.  Release the
+ +         session lock before returning.
+ +
+ +         We look only at NDELAY requests with an event type of READ,
+ +         WRITE, or TRUNCATE on objects that are regular files.
+ +      */
+ +
+ +      if ((flags & DM_FLAGS_NDELAY) && DM_EVENT_RDWRTRUNC(tevp) &&
+ +          (tevp->te_tdp->td_type == DM_TDT_REG)) {
+ +              if (repeated_event(s, tevp)) {
+ +                      mutex_spinunlock(&s->sn_qlock, lc);
+ +                      return -EAGAIN;
+ +              }
+ +              is_hashable = 1;
+ +      }
+ +
+ +      /* If the caller is a sync event then look for a matching sync
+ +         event.  If there is a match and it doesn't currently have
+ +         event threads waiting on it, then we will drop our own
+ +         tokevent and jump on the matching event.
+ +       */
+ +      if (((flags & DM_FLAGS_NDELAY) == 0) && DM_EVENT_RDWRTRUNC(tevp) &&
+ +          (tevp->te_tdp->td_type == DM_TDT_REG)) {
+ +              dm_tokevent_t   *tevp2;
+ +              if ((tevp2 = find_match_event_no_waiters_locked(s, tevp))) {
+ +                      ASSERT(tevp2->te_evt_ref == 0);
+ +                      tevp2->te_evt_ref++;
+ +                      nested_spinunlock(&tevp2->te_lock);
+ +                      nested_spinlock(&tevp->te_lock);
+ +                      tevp->te_evt_ref--;
+ +                      nested_spinunlock(&tevp->te_lock);
+ +                      mutex_spinunlock(&s->sn_qlock, lc);
+ +                      /* All locks have been released */
+ +                      dm_evt_rele_tevp(tevp, 1);
+ +                      *tevpp = tevp = tevp2;
+ +                      goto wait_on_tevp;
+ +              }
+ +      }
+ +
+ +      if (tevp->te_msg.ev_type == DM_EVENT_UNMOUNT)
+ +              is_unmount = 1;
+ +
+ +      /* Check for room on sn_newq.  If there is no room for new messages,
+ +         then go to sleep on the sn_writerq semaphore.  The
+ +         session cannot disappear out from under us as long as sn_writercnt
+ +         is non-zero.
+ +      */
+ +
+ +      while (s->sn_newq.eq_count >= dm_max_queued_msgs) {     /* no room */
+ +              s->sn_writercnt++;
+ +              dm_link_event(tevp, &s->sn_evt_writerq);
+ +              if (interruptable) {
+ +                      sv_wait_sig(&s->sn_writerq, 1, &s->sn_qlock, lc);
+ +                      if (signal_pending(current)) {
+ +                              s->sn_writercnt--;
+ +                              return -EINTR;
+ +                      }
+ +              } else {
+ +                      sv_wait(&s->sn_writerq, 1, &s->sn_qlock, lc);
+ +              }
+ +              lc = mutex_spinlock(&s->sn_qlock);
+ +              s->sn_writercnt--;
+ +              dm_unlink_event(tevp, &s->sn_evt_writerq);
+ +#ifdef HAVE_DM_QUEUE_FLUSH
+ +              /* We hold the sn_qlock, from here to after we get into
+ +               * the sn_newq.  Any thread going through
+ +               * dm_release_threads() looking for us is already past us
+ +               * and has set the DM_TEF_FLUSH flag for us or is blocked on
+ +               * sn_qlock and will find us in sn_newq after we release
+ +               * the sn_qlock.
+ +               * We check for dop->flushing anyway, in case the
+ +               * dm_release_threads() already completed before we
+ +               * could enter dmapi.
+ +               */
+ +              if (!sync) {
+ +                      /* async events are forced into the newq */
+ +                      break;
+ +              }
+ +              if (tevp->te_flags & DM_TEF_FLUSH) {
+ +                      mutex_spinunlock(&s->sn_qlock, lc);
+ +                      return tevp->te_reply;
+ +              }
+ +              else {
+ +                      struct filesystem_dmapi_operations *dops;
+ +                      dm_tokdata_t    *tdp;
+ +                      int             errno = 0;
+ +
+ +                      nested_spinlock(&tevp->te_lock);
+ +                      for (tdp = tevp->te_tdp; tdp; tdp = tdp->td_next) {
+ +                              if (tdp->td_ip) {
+ +                                      dops = dm_fsys_ops(tdp->td_ip->i_sb);
+ +                                      ASSERT(dops);
+ +                                      if (dops->flushing)
+ +                                              errno = dops->flushing(tdp->td_ip);
+ +                                      if (errno) {
+ +                                              nested_spinunlock(&tevp->te_lock);
+ +                                              mutex_spinunlock(&s->sn_qlock, lc);
+ +                                              return errno;
+ +                                      }
+ +                              }
+ +                      }
+ +                      nested_spinunlock(&tevp->te_lock);
+ +              }
+ +#endif /* HAVE_DM_QUEUE_FLUSH */
+ +      }
+ +
+ +      /* Assign a sequence number and token to the event and bump the
+ +         application reference count by one.  We don't need 'te_lock' here
+ +         because this thread is still the only thread that can see the event.
+ +      */
+ +
+ +      nested_spinlock(&dm_token_lock);
+ +      tevp->te_msg.ev_sequence = dm_next_sequence++;
+ +      if (sync) {
+ +              tevp->te_msg.ev_token = dm_next_token++;
+ +      } else {
+ +              tevp->te_msg.ev_token = DM_INVALID_TOKEN;
+ +      }
+ +      nested_spinunlock(&dm_token_lock);
+ +
+ +      tevp->te_app_ref++;
+ +
+ +      /* Room exists on the sn_newq queue, so add this request.  If the
+ +         queue was previously empty, wake up the first of any processes
+ +         that are waiting for an event.
+ +      */
+ +
+ +      dm_link_event(tevp, &s->sn_newq);
+ +      if (is_hashable)
+ +              hash_event(s, tevp);
+ +
+ +      if (s->sn_readercnt)
+ +              sv_signal(&s->sn_readerq);
+ +
+ +      mutex_spinunlock(&s->sn_qlock, lc);
+ +
+ +      /* Now that the message is queued, processes issuing asynchronous
+ +         events or DM_EVENT_UNMOUNT events are ready to continue.
+ +      */
+ +
+ +      if (!sync || is_unmount)
+ +              return 0;
+ +
+ +      /* Synchronous requests wait until a final reply is received.  If the
+ +         caller supplied the DM_FLAGS_NDELAY flag, the process will return
+ +         EAGAIN if dm_pending() sets DM_TEF_INTERMED.  We also let users
+ +         Cntl-C out of a read, write, and truncate requests.
+ +      */
+ +
+ +wait_on_tevp:
+ +      lc = mutex_spinlock(&tevp->te_lock);
+ +
+ +      while (!(tevp->te_flags & DM_TEF_FINAL)) {
+ +              if ((tevp->te_flags & DM_TEF_INTERMED) &&
+ +                  (flags & DM_FLAGS_NDELAY)) {
+ +                      mutex_spinunlock(&tevp->te_lock, lc);
+ +                      return -EAGAIN;
+ +              }
+ +              if (tevp->te_msg.ev_type == DM_EVENT_READ ||
+ +                  tevp->te_msg.ev_type == DM_EVENT_WRITE ||
+ +                  tevp->te_msg.ev_type == DM_EVENT_TRUNCATE) {
+ +                      sv_wait_sig(&tevp->te_evt_queue, 1, &tevp->te_lock, lc);
+ +                      if (signal_pending(current)){
+ +                              return -EINTR;
+ +                      }
+ +              } else {
+ +                      sv_wait(&tevp->te_evt_queue, 1, &tevp->te_lock, lc);
+ +              }
+ +              lc = mutex_spinlock(&tevp->te_lock);
+ +#ifdef HAVE_DM_QUEUE_FLUSH
+ +              /* Did we pop out because of queue flushing? */
+ +              if (tevp->te_flags & DM_TEF_FLUSH) {
+ +                      mutex_spinunlock(&tevp->te_lock, lc);
+ +                      return tevp->te_reply;
+ +              }
+ +#endif /* HAVE_DM_QUEUE_FLUSH */
+ +      }
+ +
+ +      /* Return both the tevp and the reply which was stored in the tevp by
+ +         dm_respond_event.  The tevp structure has already been removed from
+ +         the reply queue by this point in dm_respond_event().
+ +      */
+ +
+ +      reply = tevp->te_reply;
+ +      mutex_spinunlock(&tevp->te_lock, lc);
+ +      return reply;
+ +}
+ +
+ +
+ +/* The filesystem is guaranteed to stay mounted while this event is
+ +   outstanding.
+ +*/
+ +
+ +int
+ +dm_enqueue_normal_event(
+ +      struct super_block *sb,
+ +      dm_tokevent_t   **tevpp,
+ +      int             flags)
+ +{
+ +      dm_session_t    *s;
+ +      int             error;
+ +      int             sync;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      switch ((*tevpp)->te_msg.ev_type) {
+ +      case DM_EVENT_READ:
+ +      case DM_EVENT_WRITE:
+ +      case DM_EVENT_TRUNCATE:
+ +      case DM_EVENT_PREUNMOUNT:
+ +      case DM_EVENT_UNMOUNT:
+ +      case DM_EVENT_NOSPACE:
+ +      case DM_EVENT_CREATE:
+ +      case DM_EVENT_REMOVE:
+ +      case DM_EVENT_RENAME:
+ +      case DM_EVENT_SYMLINK:
+ +      case DM_EVENT_LINK:
+ +      case DM_EVENT_DEBUT:            /* not currently supported */
+ +              sync = 1;
+ +              break;
+ +
+ +      case DM_EVENT_DESTROY:
+ +      case DM_EVENT_POSTCREATE:
+ +      case DM_EVENT_POSTREMOVE:
+ +      case DM_EVENT_POSTRENAME:
+ +      case DM_EVENT_POSTSYMLINK:
+ +      case DM_EVENT_POSTLINK:
+ +      case DM_EVENT_ATTRIBUTE:
+ +      case DM_EVENT_CLOSE:            /* not currently supported */
+ +      case DM_EVENT_CANCEL:           /* not currently supported */
+ +              sync = 0;
+ +              break;
+ +
+ +      default:
+ +              return(-EIO);           /* garbage event number */
+ +      }
+ +
+ +      /* Wait until a session selects disposition for the event.  The session
+ +         is locked upon return from dm_waitfor_disp_session().
+ +      */
+ +
+ +      if ((error = dm_waitfor_disp_session(sb, *tevpp, &s, &lc)) != 0)
+ +              return(error);
+ +
+ +      return(dm_enqueue(s, lc, tevpp, sync, flags, 0));
+ +}
+ +
+ +
+ +/* Traverse the session list checking for sessions with the WANTMOUNT flag
+ +   set.        When one is found, send it the message.  Possible responses to the
+ +   message are one of DONTCARE, CONTINUE, or ABORT.  The action taken in each
+ +   case is:
+ +      DONTCARE (-1)  - Send the event to the next session with WANTMOUNT set
+ +      CONTINUE ( 0) - Proceed with the mount, errno zero.
+ +      ABORT    (>0) - Fail the mount, return the returned errno.
+ +
+ +   The mount request is sent to sessions in ascending session ID order.
+ +   Since the session list can change dramatically while this process is
+ +   sleeping in dm_enqueue(), this routine must use session IDs rather than
+ +   session pointers when keeping track of where it is in the list.  Since
+ +   new sessions are always added at the end of the queue, and have increasing
+ +   session ID values, we don't have to worry about missing any session.
+ +*/
+ +
+ +int
+ +dm_enqueue_mount_event(
+ +      struct super_block *sb,
+ +      dm_tokevent_t   *tevp)
+ +{
+ +      dm_session_t    *s;
+ +      dm_sessid_t     sid;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      /* Make the mounting filesystem visible to other DMAPI calls. */
+ +
+ +      if ((error = dm_add_fsys_entry(sb, tevp)) != 0){
+ +              return(error);
+ +      }
+ +
+ +      /* Walk through the session list presenting the mount event to each
+ +         session that is interested until a session accepts or rejects it,
+ +         or until all sessions ignore it.
+ +      */
+ +
+ +      for (sid = DM_NO_SESSION, error = 1; error > 0; sid = s->sn_sessid) {
+ +
+ +              lc = mutex_spinlock(&dm_session_lock);
+ +              for (s = dm_sessions; s; s = s->sn_next) {
+ +                      if (s->sn_sessid > sid && s->sn_flags & DM_SN_WANTMOUNT) {
+ +                              nested_spinlock(&s->sn_qlock);
+ +                              nested_spinunlock(&dm_session_lock);
+ +                              break;
+ +                      }
+ +              }
+ +              if (s == NULL) {
+ +                      mutex_spinunlock(&dm_session_lock, lc);
+ +                      break;          /* noone wants it; proceed with mount */
+ +              }
+ +              error = dm_enqueue(s, lc, &tevp, 1, 0, 0);
+ +      }
+ +
+ +      /* If the mount will be allowed to complete, then update the fsrp entry
+ +         accordingly.  If the mount is to be aborted, remove the fsrp entry.
+ +      */
+ +
+ +      if (error >= 0) {
+ +              dm_change_fsys_entry(sb, DM_STATE_MOUNTED);
+ +              error = 0;
+ +      } else {
+ +              dm_remove_fsys_entry(sb);
+ +      }
+ +      return(error);
+ +}
+ +
+ +int
+ +dm_enqueue_sendmsg_event(
+ +      dm_sessid_t     targetsid,
+ +      dm_tokevent_t   *tevp,
+ +      int             sync)
+ +{
+ +      dm_session_t    *s;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      if ((error = dm_find_session_and_lock(targetsid, &s, &lc)) != 0)
+ +              return(error);
+ +
+ +      return(dm_enqueue(s, lc, &tevp, sync, 0, 1));
+ +}
+ +
+ +
+ +dm_token_t
+ +dm_enqueue_user_event(
+ +      dm_sessid_t     sid,
+ +      dm_tokevent_t   *tevp,
+ +      dm_token_t      *tokenp)
+ +{
+ +      dm_session_t    *s;
+ +      int             error;
+ +      unsigned long   lc;             /* lock cookie */
+ +
+ +      /* Atomically find and lock the session whose session id is 'sid'. */
+ +
+ +      if ((error = dm_find_session_and_lock(sid, &s, &lc)) != 0)
+ +              return(error);
+ +
+ +      /* Assign a sequence number and token to the event, bump the
+ +         application reference count by one, and decrement the event
+ +         count because the caller gives up all ownership of the event.
+ +         We don't need 'te_lock' here because this thread is still the
+ +         only thread that can see the event.
+ +      */
+ +
+ +      nested_spinlock(&dm_token_lock);
+ +      tevp->te_msg.ev_sequence = dm_next_sequence++;
+ +      *tokenp = tevp->te_msg.ev_token = dm_next_token++;
+ +      nested_spinunlock(&dm_token_lock);
+ +
+ +      tevp->te_flags &= ~(DM_TEF_INTERMED|DM_TEF_FINAL);
+ +      tevp->te_app_ref++;
+ +      tevp->te_evt_ref--;
+ +
+ +      /* Add the request to the tail of the sn_delq.  Now it's visible. */
+ +
+ +      dm_link_event(tevp, &s->sn_delq);
+ +      mutex_spinunlock(&s->sn_qlock, lc);
+ +
+ +      return(0);
+ +}
+ +
+ +#ifdef HAVE_DM_QUEUE_FLUSH
+ +/* If inode is non-null, find any tdp referencing that inode and flush the
+ + * thread waiting on that inode and set DM_TEF_FLUSH for that tokevent.
+ + * Otherwise, if inode is null, find any tdp referencing the specified fsid
+ + * and flush that thread and set DM_TEF_FLUSH for that tokevent.
+ + */
+ +static int
+ +dm_flush_events(
+ +      dm_session_t    *s,
+ +      dm_fsid_t       *fsidp,
+ +      struct inode    *inode, /* may be null */
+ +      dm_eventq_t     *queue,
+ +      int             is_writerq,
+ +      int             errno)
+ +{
+ +      dm_tokevent_t   *tevp, *next_tevp;
+ +      dm_tokdata_t    *tdp;
+ +      int             found_events = 0;
+ +
+ +      ASSERT(fsidp);
+ +      for (tevp = queue->eq_head; tevp; tevp = next_tevp) {
+ +              nested_spinlock(&tevp->te_lock);
+ +              next_tevp = tevp->te_next;
+ +
+ +              for (tdp = tevp->te_tdp; tdp; tdp = tdp->td_next) {
+ +                      if( inode ) {
+ +                              if( tdp->td_ip == inode ) {
+ +                                      break;
+ +                              }
+ +                      }
+ +                      else if(memcmp(fsidp, &tdp->td_handle.ha_fsid, sizeof(*fsidp)) == 0) {
+ +                              break;
+ +                      }
+ +              }
+ +
+ +              if (tdp != NULL) {
+ +                      /* found a handle reference in this event */
+ +                      ++found_events;
+ +                      tevp->te_flags |= DM_TEF_FLUSH;
+ +
+ +                      /* Set the reply value, unless dm_get_events is
+ +                         already on this one.
+ +                       */
+ +                      if (! (tevp->te_flags & DM_TEF_LOCKED))
+ +                              tevp->te_reply = errno;
+ +
+ +                      /* If it is on the sn_evt_writerq or is being
+ +                         used by dm_get_events then we're done with it.
+ +                      */
+ +                      if (is_writerq || (tevp->te_flags & DM_TEF_LOCKED)) {
+ +                              nested_spinunlock(&tevp->te_lock);
+ +                              continue;
+ +                      }
+ +
+ +                      /* If there is a thread waiting on a synchronous
+ +                         event then be like dm_respond_event.
+ +                      */
+ +
+ +                      if ((tevp->te_evt_ref) &&
+ +                          (tevp->te_msg.ev_token != DM_INVALID_TOKEN)) {
+ +
+ +                              tevp->te_flags |= DM_TEF_FINAL;
+ +                              dm_unlink_event(tevp, queue);
+ +                              if (tevp->te_flags & DM_TEF_HASHED)
+ +                                      unhash_event(s, tevp);
+ +                              sv_broadcast(&tevp->te_evt_queue);
+ +                              nested_spinunlock(&tevp->te_lock);
+ +                              dm_put_tevp(tevp, NULL);
+ +                              continue;
+ +                      }
+ +              }
+ +              nested_spinunlock(&tevp->te_lock);
+ +      }
+ +
+ +      return(found_events);
+ +}
+ +
+ +
+ +/* If inode is non-null then find any threads that have a reference to that
+ + * inode and flush them with the specified errno.
+ + * Otherwise,if inode is null, then find any threads that have a reference
+ + * to that sb and flush them with the specified errno.
+ + * We look for these threads in each session's sn_evt_writerq, sn_newq,
+ + * and sn_delq.
+ + */
+ +int
+ +dm_release_threads(
+ +      struct super_block      *sb,
+ +      struct inode            *inode, /* may be null */
+ +      int                     errno)
+ +{
+ +      dm_sessid_t     sid;
+ +      dm_session_t    *s;
+ +      unsigned long   lc;
+ +      u_int           sesscnt;
+ +      dm_sessid_t     *sidlist;
+ +      int             i;
+ +      int             found_events = 0;
+ +      dm_fsid_t       fsid;
+ +      struct filesystem_dmapi_operations *dops;
+ +
+ +      ASSERT(sb);
+ +      dops = dm_fsys_ops(sb);
+ +      ASSERT(dops);
+ +      dops->get_fsid(sb, &fsid);
+ +      dm_release_disp_threads(&fsid, inode, errno);
+ +
+ +      /* Loop until we can get the right amount of temp space, being careful
+ +         not to hold a mutex during the allocation.  Usually only one trip.
+ +      */
+ +
+ +      for (;;) {
+ +              lc = mutex_spinlock(&dm_session_lock);
+ +              sesscnt = dm_sessions_active;
+ +              mutex_spinunlock(&dm_session_lock, lc);
+ +
+ +              if (sesscnt == 0)
+ +                      return 0;
+ +
+ +              sidlist = kmalloc(sesscnt * sizeof(sid), GFP_KERNEL);
+ +
+ +              lc = mutex_spinlock(&dm_session_lock);
+ +              if (sesscnt == dm_sessions_active)
+ +                      break;
+ +
+ +              mutex_spinunlock(&dm_session_lock, lc);
+ +              kfree(sidlist);
+ +      }
+ +
+ +      for (i = 0, s = dm_sessions; i < sesscnt; i++, s = s->sn_next)
+ +              sidlist[i] = s->sn_sessid;
+ +
+ +      mutex_spinunlock(&dm_session_lock, lc);
+ +
+ +
+ +      for (i = 0; i < sesscnt; i++) {
+ +              sid = sidlist[i];
+ +              if( dm_find_session_and_lock( sid, &s, &lc ) == 0 ){
+ +                      found_events = dm_flush_events( s, &fsid, inode,
+ +                                                      &s->sn_evt_writerq, 1,
+ +                                                      errno );
+ +                      if (found_events)
+ +                              sv_broadcast(&s->sn_writerq);
+ +
+ +                      dm_flush_events(s, &fsid, inode, &s->sn_newq, 0, errno);
+ +                      dm_flush_events(s, &fsid, inode, &s->sn_delq, 0, errno);
+ +
+ +                      mutex_spinunlock( &s->sn_qlock, lc );
+ +              }
+ +      }
+ +      kfree(sidlist);
+ +
+ +      return 0;
+ +}
+ +#endif /* HAVE_DM_QUEUE_FLUSH */
diff --cc fs/exec.c

index e4655f6,0dd60a0..a76cb29
--- 1/fs/exec.c
--- 2/fs/exec.c
+++ b/fs/exec.c
@@@ -50,7 -50,8 +50,9 @@@
   #include <linux/cn_proc.h>
   #include <linux/audit.h>
   #include <linux/tracehook.h>
+ #include <linux/kmod.h>
+ #include <linux/fsnotify.h>
+ +#include <trace/fs.h>
   
   #include <asm/uaccess.h>
   #include <asm/mmu_context.h>
@@@ -691,14 -679,8 +680,16 @@@ struct file *open_exec(const char *name
         if (IS_ERR(file))
                 return file;
   
+       fsnotify_open(file->f_path.dentry);
+ 
+ +      if (file->f_op && file->f_op->open_exec) {
+ +              err = file->f_op->open_exec(nd.path.dentry->d_inode);
+ +              if (err) {
+ +                      fput(file);
+ +                      goto out;
+ +              }
+ +      }
+ +
         err = deny_write_access(file);
         if (err) {
                 fput(file);
@@@ -1355,22 -1317,18 +1326,19 @@@ int do_execve(char * filename
   
         current->flags &= ~PF_KTHREAD;
         retval = search_binary_handler(bprm,regs);
-       if (retval >= 0) {
-               trace_fs_exec(filename);
-               /* execve success */
-               security_bprm_free(bprm);
-               acct_update_integrals(current);
-               free_bprm(bprm);
-               if (displaced)
-                       put_files_struct(displaced);
-               return retval;
-       }
+       if (retval < 0)
+               goto out;
   
- out:
-       if (bprm->security)
-               security_bprm_free(bprm);
+       /* execve succeeded */
+       mutex_unlock(&current->cred_exec_mutex);
++      trace_fs_exec(filename);
+       acct_update_integrals(current);
+       free_bprm(bprm);
+       if (displaced)
+               put_files_struct(displaced);
+       return retval;
   
- out_mm:
+ out:
         if (bprm->mm)
                 mmput (bprm->mm);
   
@@@ -1859,8 -1838,9 +1848,11 @@@ fail_unlock
         if (helper_argv)
                 argv_free(helper_argv);
   
-       current->fsuid = fsuid;
+       revert_creds(old_cred);
+       put_cred(cred);
         coredump_finish(mm);
   fail:
-       return retval;
+       return;
   }
++
++DEFINE_TRACE(fs_exec);
diff --cc fs/ext3/Kconfig

index 0000000,8e0cfe4..89b697f

mode 000000,100644..100644
--- /dev/null
--- 2/fs/ext3/Kconfig
+++ b/fs/ext3/Kconfig
@@@ -1,0 -1,67 +1,74 @@@
+ config EXT3_FS
+       tristate "Ext3 journalling file system support"
+       select JBD
+       help
+         This is the journalling version of the Second extended file system
+         (often called ext3), the de facto standard Linux file system
+         (method to organize files on a storage device) for hard disks.
+ 
+         The journalling code included in this driver means you do not have
+         to run e2fsck (file system checker) on your file systems after a
+         crash.  The journal keeps track of any changes that were being made
+         at the time the system crashed, and can ensure that your file system
+         is consistent without the need for a lengthy check.
+ 
+         Other than adding the journal to the file system, the on-disk format
+         of ext3 is identical to ext2.  It is possible to freely switch
+         between using the ext3 driver and the ext2 driver, as long as the
+         file system has been cleanly unmounted, or e2fsck is run on the file
+         system.
+ 
+         To add a journal on an existing ext2 file system or change the
+         behavior of ext3 file systems, you can use the tune2fs utility ("man
+         tune2fs").  To modify attributes of files and directories on ext3
+         file systems, use chattr ("man chattr").  You need to be using
+         e2fsprogs version 1.20 or later in order to create ext3 journals
+         (available at <http://sourceforge.net/projects/e2fsprogs/>).
+ 
+         To compile this file system support as a module, choose M here: the
+         module will be called ext3.
+ 
+ config EXT3_FS_XATTR
+       bool "Ext3 extended attributes"
+       depends on EXT3_FS
+       default y
+       help
+         Extended attributes are name:value pairs associated with inodes by
+         the kernel or by users (see the attr(5) manual page, or visit
+         <http://acl.bestbits.at/> for details).
+ 
+         If unsure, say N.
+ 
+         You need this for POSIX ACL support on ext3.
+ 
+ config EXT3_FS_POSIX_ACL
+       bool "Ext3 POSIX Access Control Lists"
+       depends on EXT3_FS_XATTR
+       select FS_POSIX_ACL
+       help
+         Posix Access Control Lists (ACLs) support permissions for users and
+         groups beyond the owner/group/world scheme.
+ 
+         To learn more about Access Control Lists, visit the Posix ACLs for
+         Linux website <http://acl.bestbits.at/>.
+ 
+         If you don't know what Access Control Lists are, say N
+ 
++config EXT3_FS_NFS4ACL
++      bool "Native NFSv4 ACLs (EXPERIMENTAL)"
++      depends on EXT3_FS_XATTR && EXPERIMENTAL
++      select FS_NFS4ACL
++      help
++        Allow to use NFSv4 ACLs instead of POSIX ACLs.
++
+ config EXT3_FS_SECURITY
+       bool "Ext3 Security Labels"
+       depends on EXT3_FS_XATTR
+       help
+         Security labels support alternative access control models
+         implemented by security modules like SELinux.  This option
+         enables an extended attribute handler for file security
+         labels in the ext3 filesystem.
+ 
+         If you are not using a security module that requires using
+         extended attributes for file security labels, say N.
diff --cc fs/ext3/file.c

index 9d95e86,3be1e06..471a2da
--- 1/fs/ext3/file.c
--- 2/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@@ -136,7 -134,6 +136,8 @@@ const struct inode_operations ext3_file
         .removexattr    = generic_removexattr,
   #endif
         .permission     = ext3_permission,
+       .fiemap         = ext3_fiemap,
+ +      .may_create     = ext3_may_create,
+ +      .may_delete     = ext3_may_delete,
   };
   
diff --cc fs/ext3/ialloc.c
Simple merge
diff --cc fs/ext3/inode.c

index af86cdd,5fa453b..295f0fe
--- 1/fs/ext3/inode.c
--- 2/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@@ -36,9 -36,10 +36,11 @@@
   #include <linux/mpage.h>
   #include <linux/uio.h>
   #include <linux/bio.h>
+ #include <linux/fiemap.h>
+ #include <linux/namei.h>
   #include "xattr.h"
   #include "acl.h"
+ +#include "nfs4acl.h"
   
   static int ext3_writepage_trans_blocks(struct inode *inode);
   
@@@ -3002,65 -3016,6 +3020,65 @@@ int ext3_write_inode(struct inode *inod
         return ext3_force_commit(inode->i_sb);
   }
   
+ +#ifdef CONFIG_EXT3_FS_NFS4ACL
+ +static int ext3_inode_change_ok(struct inode *inode, struct iattr *attr)
+ +{
+ +      unsigned int ia_valid = attr->ia_valid;
+ +
+ +      if (!test_opt(inode->i_sb, NFS4ACL))
+ +              return inode_change_ok(inode, attr);
+ +
+ +      /* If force is set do it anyway. */
+ +      if (ia_valid & ATTR_FORCE)
+ +              return 0;
+ +
+ +      /* Make sure a caller can chown. */
+ +      if ((ia_valid & ATTR_UID) &&
-           (current->fsuid != inode->i_uid ||
++          (current_fsuid() != inode->i_uid ||
+ +           attr->ia_uid != inode->i_uid) &&
-           (current->fsuid != attr->ia_uid ||
++          (current_fsuid() != attr->ia_uid ||
+ +           ext3_nfs4acl_permission(inode, ACE4_WRITE_OWNER)) &&
+ +          !capable(CAP_CHOWN))
+ +              goto error;
+ +
+ +      /* Make sure caller can chgrp. */
+ +      if ((ia_valid & ATTR_GID)) {
+ +              int in_group = in_group_p(attr->ia_gid);
-               if ((current->fsuid != inode->i_uid ||
++              if ((current_fsuid() != inode->i_uid ||
+ +                  (!in_group && attr->ia_gid != inode->i_gid)) &&
+ +                  (!in_group ||
+ +                   ext3_nfs4acl_permission(inode, ACE4_WRITE_OWNER)) &&
+ +                  !capable(CAP_CHOWN))
+ +                      goto error;
+ +      }
+ +
+ +      /* Make sure a caller can chmod. */
+ +      if (ia_valid & ATTR_MODE) {
-               if (current->fsuid != inode->i_uid &&
++              if (current_fsuid() != inode->i_uid &&
+ +                  ext3_nfs4acl_permission(inode, ACE4_WRITE_ACL) &&
+ +                  !capable(CAP_FOWNER))
+ +                      goto error;
+ +              /* Also check the setgid bit! */
+ +              if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
+ +                              inode->i_gid) && !capable(CAP_FSETID))
+ +                      attr->ia_mode &= ~S_ISGID;
+ +      }
+ +
+ +      /* Check for setting the inode time. */
+ +      if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
-               if (current->fsuid != inode->i_uid &&
++              if (current_fsuid() != inode->i_uid &&
+ +                  ext3_nfs4acl_permission(inode, ACE4_WRITE_ATTRIBUTES) &&
+ +                  !capable(CAP_FOWNER))
+ +                      goto error;
+ +      }
+ +      return 0;
+ +error:
+ +      return -EPERM;
+ +}
+ +#else
+ +# define ext3_inode_change_ok inode_change_ok
+ +#endif
+ +
   /*
    * ext3_setattr()
    *
diff --cc fs/ext3/namei.c
Simple merge
diff --cc fs/ext3/nfs4acl.c

index 353a939,0000000..3f9f8f1

mode 100644,000000..100644
--- 1/fs/ext3/nfs4acl.c
--- /dev/null
+++ b/fs/ext3/nfs4acl.c
@@@ -1,370 -1,0 +1,370 @@@
+ +/*
+ + * Copyright (C) 2006 Andreas Gruenbacher <a.gruenbacher@computer.org>
+ + *
+ + * This program is free software; you can redistribute it and/or modify it
+ + * under the terms of the GNU General Public License as published by the
+ + * Free Software Foundation; either version 2, or (at your option) any
+ + * later version.
+ + *
+ + * This program is distributed in the hope that it will be useful, but
+ + * WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + */
+ +
+ +#include <linux/kernel.h>
+ +#include <linux/fs.h>
+ +#include <linux/ext3_jbd.h>
+ +#include <linux/ext3_fs.h>
+ +#include <linux/nfs4acl_xattr.h>
+ +#include "namei.h"
+ +#include "xattr.h"
+ +#include "nfs4acl.h"
+ +
+ +static inline struct nfs4acl *
+ +ext3_iget_nfs4acl(struct inode *inode)
+ +{
+ +      struct nfs4acl *acl = EXT3_NFS4ACL_NOT_CACHED;
+ +      struct ext3_inode_info *ei = EXT3_I(inode);
+ +
+ +      spin_lock(&inode->i_lock);
+ +      if (ei->i_nfs4acl != EXT3_NFS4ACL_NOT_CACHED)
+ +              acl = nfs4acl_get(ei->i_nfs4acl);
+ +      spin_unlock(&inode->i_lock);
+ +
+ +      return acl;
+ +}
+ +
+ +static inline void
+ +ext3_iset_nfs4acl(struct inode *inode, struct nfs4acl *acl)
+ +{
+ +      struct ext3_inode_info *ei = EXT3_I(inode);
+ +
+ +      spin_lock(&inode->i_lock);
+ +      if (ei->i_nfs4acl != EXT3_NFS4ACL_NOT_CACHED)
+ +              nfs4acl_put(ei->i_nfs4acl);
+ +      ei->i_nfs4acl = nfs4acl_get(acl);
+ +      spin_unlock(&inode->i_lock);
+ +}
+ +
+ +static struct nfs4acl *
+ +ext3_get_nfs4acl(struct inode *inode)
+ +{
+ +      const int name_index = EXT3_XATTR_INDEX_NFS4ACL;
+ +      void *value = NULL;
+ +      struct nfs4acl *acl;
+ +      int retval;
+ +
+ +      if (!test_opt(inode->i_sb, NFS4ACL))
+ +              return NULL;
+ +
+ +      acl = ext3_iget_nfs4acl(inode);
+ +      if (acl != EXT3_NFS4ACL_NOT_CACHED)
+ +              return acl;
+ +      retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
+ +      if (retval > 0) {
+ +              value = kmalloc(retval, GFP_KERNEL);
+ +              if (!value)
+ +                      return ERR_PTR(-ENOMEM);
+ +              retval = ext3_xattr_get(inode, name_index, "", value, retval);
+ +      }
+ +      if (retval > 0) {
+ +              acl = nfs4acl_from_xattr(value, retval);
+ +              if (acl == ERR_PTR(-EINVAL))
+ +                      acl = ERR_PTR(-EIO);
+ +      } else if (retval == -ENODATA || retval == -ENOSYS)
+ +              acl = NULL;
+ +      else
+ +              acl = ERR_PTR(retval);
+ +      kfree(value);
+ +
+ +      if (!IS_ERR(acl))
+ +              ext3_iset_nfs4acl(inode, acl);
+ +
+ +      return acl;
+ +}
+ +
+ +static int
+ +ext3_set_nfs4acl(handle_t *handle, struct inode *inode, struct nfs4acl *acl)
+ +{
+ +      const int name_index = EXT3_XATTR_INDEX_NFS4ACL;
+ +      size_t size = 0;
+ +      void *value = NULL;
+ +      int retval;
+ +
+ +      if (acl) {
+ +              size = nfs4acl_xattr_size(acl);
+ +              value = kmalloc(size, GFP_KERNEL);
+ +              if (!value)
+ +                      return -ENOMEM;
+ +              nfs4acl_to_xattr(acl, value);
+ +      }
+ +      if (handle)
+ +              retval = ext3_xattr_set_handle(handle, inode, name_index, "",
+ +                                             value, size, 0);
+ +      else
+ +              retval = ext3_xattr_set(inode, name_index, "", value, size, 0);
+ +      if (value)
+ +              kfree(value);
+ +      if (!retval)
+ +              ext3_iset_nfs4acl(inode, acl);
+ +
+ +      return retval;
+ +}
+ +
+ +int
+ +ext3_nfs4acl_permission(struct inode *inode, unsigned int mask)
+ +{
+ +      struct nfs4acl *acl;
+ +      int retval;
+ +
+ +      BUG_ON(!test_opt(inode->i_sb, NFS4ACL));
+ +
+ +      acl = ext3_get_nfs4acl(inode);
+ +      if (!acl)
+ +              retval = nfs4acl_generic_permission(inode, mask);
+ +      else if (IS_ERR(acl))
+ +              retval = PTR_ERR(acl);
+ +      else {
+ +              retval = nfs4acl_permission(inode, acl, mask);
+ +              nfs4acl_put(acl);
+ +      }
+ +
+ +      return retval;
+ +}
+ +
+ +int ext3_may_create(struct inode *dir, int isdir)
+ +{
+ +      int error;
+ +
+ +      if (test_opt(dir->i_sb, NFS4ACL)) {
+ +              unsigned int mask = (isdir ? ACE4_ADD_SUBDIRECTORY : ACE4_ADD_FILE) |
+ +                                  ACE4_EXECUTE;
+ +
+ +              error = ext3_nfs4acl_permission(dir, mask);
+ +      } else
+ +              error = ext3_permission(dir,  MAY_WRITE | MAY_EXEC);
+ +
+ +      return error;
+ +}
+ +
+ +static int check_sticky(struct inode *dir, struct inode *inode)
+ +{
+ +      if (!(dir->i_mode & S_ISVTX))
+ +              return 0;
-       if (inode->i_uid == current->fsuid)
++      if (inode->i_uid == current_fsuid())
+ +              return 0;
-       if (dir->i_uid == current->fsuid)
++      if (dir->i_uid == current_fsuid())
+ +              return 0;
+ +      return !capable(CAP_FOWNER);
+ +}
+ +
+ +int ext3_may_delete(struct inode *dir, struct inode *inode)
+ +{
+ +      int error;
+ +
+ +      if (test_opt(inode->i_sb, NFS4ACL)) {
+ +              error = ext3_nfs4acl_permission(dir, ACE4_DELETE_CHILD | ACE4_EXECUTE);
+ +              if (!error && check_sticky(dir, inode))
+ +                      error = -EPERM;
+ +              if (error && !ext3_nfs4acl_permission(inode, ACE4_DELETE))
+ +                      error = 0;
+ +      } else {
+ +              error = ext3_permission(dir, MAY_WRITE | MAY_EXEC);
+ +              if (!error && check_sticky(dir, inode))
+ +                      error = -EPERM;
+ +      }
+ +
+ +      return error;
+ +}
+ +
+ +int
+ +ext3_nfs4acl_init(handle_t *handle, struct inode *inode, struct inode *dir)
+ +{
+ +      struct nfs4acl *dir_acl = NULL, *acl;
+ +      int retval;
+ +
+ +      if (!S_ISLNK(inode->i_mode))
+ +              dir_acl = ext3_get_nfs4acl(dir);
+ +      if (!dir_acl || IS_ERR(dir_acl)) {
+ +              inode->i_mode &= ~current->fs->umask;
+ +              return PTR_ERR(dir_acl);
+ +      }
+ +      acl = nfs4acl_inherit(dir_acl, inode->i_mode);
+ +      nfs4acl_put(dir_acl);
+ +
+ +      retval = PTR_ERR(acl);
+ +      if (acl && !IS_ERR(acl)) {
+ +              retval = ext3_set_nfs4acl(handle, inode, acl);
+ +              inode->i_mode = (inode->i_mode & ~S_IRWXUGO) |
+ +                              nfs4acl_masks_to_mode(acl);
+ +              nfs4acl_put(acl);
+ +      }
+ +      return retval;
+ +}
+ +
+ +int
+ +ext3_nfs4acl_chmod(struct inode *inode)
+ +{
+ +      struct nfs4acl *acl;
+ +      int retval;
+ +
+ +      if (S_ISLNK(inode->i_mode))
+ +              return -EOPNOTSUPP;
+ +      acl = ext3_get_nfs4acl(inode);
+ +      if (!acl || IS_ERR(acl))
+ +              return PTR_ERR(acl);
+ +      acl = nfs4acl_chmod(acl, inode->i_mode);
+ +      if (IS_ERR(acl))
+ +              return PTR_ERR(acl);
+ +      retval = ext3_set_nfs4acl(NULL, inode, acl);
+ +      nfs4acl_put(acl);
+ +
+ +      return retval;
+ +}
+ +
+ +static size_t
+ +ext3_xattr_list_nfs4acl(struct inode *inode, char *list, size_t list_len,
+ +                      const char *name, size_t name_len)
+ +{
+ +      const size_t size = sizeof(NFS4ACL_XATTR);
+ +
+ +      if (!test_opt(inode->i_sb, NFS4ACL))
+ +              return 0;
+ +      if (list && size <= list_len)
+ +              memcpy(list, NFS4ACL_XATTR, size);
+ +      return size;
+ +}
+ +
+ +static int
+ +ext3_xattr_get_nfs4acl(struct inode *inode, const char *name, void *buffer,
+ +                     size_t buffer_size)
+ +{
+ +      struct nfs4acl *acl;
+ +      size_t size;
+ +
+ +      if (!test_opt(inode->i_sb, NFS4ACL))
+ +              return -EOPNOTSUPP;
+ +      if (strcmp(name, "") != 0)
+ +              return -EINVAL;
+ +
+ +      acl = ext3_get_nfs4acl(inode);
+ +      if (IS_ERR(acl))
+ +              return PTR_ERR(acl);
+ +      if (acl == NULL)
+ +              return -ENODATA;
+ +      size = nfs4acl_xattr_size(acl);
+ +      if (buffer) {
+ +              if (size > buffer_size)
+ +                      return -ERANGE;
+ +              nfs4acl_to_xattr(acl, buffer);
+ +      }
+ +      nfs4acl_put(acl);
+ +
+ +      return size;
+ +}
+ +
+ +#ifdef NFS4ACL_DEBUG
+ +static size_t
+ +ext3_xattr_list_masked_nfs4acl(struct inode *inode, char *list, size_t list_len,
+ +                             const char *name, size_t name_len)
+ +{
+ +      return 0;
+ +}
+ +
+ +static int
+ +ext3_xattr_get_masked_nfs4acl(struct inode *inode, const char *name,
+ +                            void *buffer, size_t buffer_size)
+ +{
+ +      const int name_index = EXT3_XATTR_INDEX_NFS4ACL;
+ +      struct nfs4acl *acl;
+ +      void *xattr;
+ +      size_t size;
+ +      int retval;
+ +
+ +      if (!test_opt(inode->i_sb, NFS4ACL))
+ +              return -EOPNOTSUPP;
+ +      if (strcmp(name, "") != 0)
+ +              return -EINVAL;
+ +      retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
+ +      if (retval <= 0)
+ +              return retval;
+ +      xattr = kmalloc(retval, GFP_KERNEL);
+ +      if (!xattr)
+ +              return -ENOMEM;
+ +      retval = ext3_xattr_get(inode, name_index, "", xattr, retval);
+ +      if (retval <= 0)
+ +              return retval;
+ +      acl = nfs4acl_from_xattr(xattr, retval);
+ +      kfree(xattr);
+ +      if (IS_ERR(acl))
+ +              return PTR_ERR(acl);
+ +      retval = nfs4acl_apply_masks(&acl);
+ +      if (retval) {
+ +              nfs4acl_put(acl);
+ +              return retval;
+ +      }
+ +      size = nfs4acl_xattr_size(acl);
+ +      if (buffer) {
+ +              if (size > buffer_size)
+ +                      return -ERANGE;
+ +              nfs4acl_to_xattr(acl, buffer);
+ +      }
+ +      nfs4acl_put(acl);
+ +      return size;
+ +}
+ +#endif
+ +
+ +static int
+ +ext3_xattr_set_nfs4acl(struct inode *inode, const char *name,
+ +                     const void *value, size_t size, int flags)
+ +{
+ +      handle_t *handle;
+ +      struct nfs4acl *acl = NULL;
+ +      int retval, retries = 0;
+ +
+ +      if (S_ISLNK(inode->i_mode) || !test_opt(inode->i_sb, NFS4ACL))
+ +              return -EOPNOTSUPP;
+ +      if (strcmp(name, "") != 0)
+ +              return -EINVAL;
-       if (current->fsuid != inode->i_uid &&
++      if (current_fsuid() != inode->i_uid &&
+ +          ext3_nfs4acl_permission(inode, ACE4_WRITE_ACL) &&
+ +          !capable(CAP_FOWNER))
+ +              return -EPERM;
+ +      if (value) {
+ +              acl = nfs4acl_from_xattr(value, size);
+ +              if (IS_ERR(acl))
+ +                      return PTR_ERR(acl);
+ +
+ +              inode->i_mode &= ~S_IRWXUGO;
+ +              inode->i_mode |= nfs4acl_masks_to_mode(acl);
+ +      }
+ +
+ +retry:
+ +      handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+ +      if (IS_ERR(handle))
+ +              return PTR_ERR(handle);
+ +      ext3_mark_inode_dirty(handle, inode);
+ +      retval = ext3_set_nfs4acl(handle, inode, acl);
+ +      ext3_journal_stop(handle);
+ +      if (retval == ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+ +              goto retry;
+ +      nfs4acl_put(acl);
+ +      return retval;
+ +}
+ +
+ +struct xattr_handler ext3_nfs4acl_xattr_handler = {
+ +      .prefix = NFS4ACL_XATTR,
+ +      .list   = ext3_xattr_list_nfs4acl,
+ +      .get    = ext3_xattr_get_nfs4acl,
+ +      .set    = ext3_xattr_set_nfs4acl,
+ +};
+ +
+ +#ifdef NFS4ACL_DEBUG
+ +struct xattr_handler ext3_masked_nfs4acl_xattr_handler = {
+ +      .prefix = "system.masked-nfs4acl",
+ +      .list   = ext3_xattr_list_masked_nfs4acl,
+ +      .get    = ext3_xattr_get_masked_nfs4acl,
+ +      .set    = ext3_xattr_set_nfs4acl,
+ +};
+ +#endif
diff --cc fs/ext3/super.c

index 869581f,b70d90e..38a1884
--- 1/fs/ext3/super.c
--- 2/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@@ -2291,16 -2324,10 +2356,17 @@@ static int ext3_commit_super(struct sup
         es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
         es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
         BUFFER_TRACE(sbh, "marking dirty");
+ +
+ +      /* We only read the superblock once. The in-memory version is
+ +       * always the most recent. If ext3_error is called after a
+ +       * superblock write failure, it will be !uptodate. This write
+ +       * will likely fail also, but it avoids the WARN_ON in
+ +       * mark_buffer_dirty. */
+ +      set_buffer_uptodate(sbh);
         mark_buffer_dirty(sbh);
         if (sync)
-               sync_dirty_buffer(sbh);
+               error = sync_dirty_buffer(sbh);
+       return error;
   }
   
   
diff --cc fs/file_table.c
Simple merge
diff --cc fs/fs-writeback.c
Simple merge
diff --cc fs/inode.c
Simple merge
diff --cc fs/ioctl.c

index 06c3996,240ec63..14cd26c
--- 1/fs/ioctl.c
--- 2/fs/ioctl.c
+++ b/fs/ioctl.c
@@@ -13,7 -13,8 +13,9 @@@
   #include <linux/security.h>
   #include <linux/module.h>
   #include <linux/uaccess.h>
+ #include <linux/writeback.h>
+ #include <linux/buffer_head.h>
+ +#include <trace/fs.h>
   
   #include <asm/ioctls.h>
   
@@@ -218,3 -562,3 +565,5 @@@ SYSCALL_DEFINE3(ioctl, unsigned int, fd
    out:
         return error;
   }
++
++DEFINE_TRACE(fs_ioctl);
diff --cc fs/namei.c

index d2b9e3a,bbc15c2..245e38f
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -227,10 -226,19 +226,20 @@@ int generic_permission(struct inode *in
         return -EACCES;
   }
   
- static int __inode_permission(struct inode *inode, int mask)
+ /**
+  * inode_permission  -  check for access rights to a given inode
+  * @inode:    inode to check permission on
+  * @mask:     right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+  *
+  * Used to check for read/write/execute permissions on an inode.
+  * We use "fsuid" for this, letting us set arbitrary permissions
+  * for filesystem access without changing the "normal" uids which
+  * are used for other things.
+  */
+ int inode_permission(struct inode *inode, int mask)
   {
         int retval;
+ +      int submask = mask;
   
         if (mask & MAY_WRITE) {
                 umode_t mode = inode->i_mode;
@@@ -249,27 -257,11 +258,14 @@@
                         return -EACCES;
         }
   
+ +      if (!IS_WITHAPPEND(inode))
+ +              submask &= ~MAY_APPEND;
+ +
-       /* Ordinary permission routines do not understand MAY_APPEND. */
-       if (inode->i_op && inode->i_op->permission) {
+       if (inode->i_op->permission)
- -              retval = inode->i_op->permission(inode, mask);
+ +              retval = inode->i_op->permission(inode, submask);
-               if (!retval) {
-                       /*
-                        * Exec permission on a regular file is denied if none
-                        * of the execute bits are set.
-                        *
-                        * This check should be done by the ->permission()
-                        * method.
-                        */
-                       if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) &&
-                           !(inode->i_mode & S_IXUGO))
-                               return -EACCES;
-               }
-       } else {
+       else
                 retval = generic_permission(inode, mask, NULL);
-       }
+ 
         if (retval)
                 return retval;
   
@@@ -1988,10 -1917,9 +1948,9 @@@ fail
   }
   EXPORT_SYMBOL_GPL(lookup_create);
   
- int vfs_mknod(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt,
-             int mode, dev_t dev)
+ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
   {
- -      int error = may_create(dir, dentry);
+ +      int error = may_create(dir, dentry, 0);
   
         if (error)
                 return error;
@@@ -2091,10 -2022,9 +2053,9 @@@ SYSCALL_DEFINE3(mknod, const char __use
         return sys_mknodat(AT_FDCWD, filename, mode, dev);
   }
   
- int vfs_mkdir(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt,
-             int mode)
+ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
   {
- -      int error = may_create(dir, dentry);
+ +      int error = may_create(dir, dentry, 1);
   
         if (error)
                 return error;
@@@ -2360,10 -2307,9 +2338,9 @@@ SYSCALL_DEFINE1(unlink, const char __us
         return do_unlinkat(AT_FDCWD, pathname);
   }
   
- int vfs_symlink(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt,
-               const char *oldname)
+ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
   {
- -      int error = may_create(dir, dentry);
+ +      int error = may_create(dir, dentry, 0);
   
         if (error)
                 return error;
diff --cc fs/namespace.c
Simple merge
diff --cc fs/nfs/Kconfig

index 0000000,36fe20d..1f8d8c9

mode 000000,100644..100644
--- /dev/null
--- 2/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@@ -1,0 -1,86 +1,97 @@@
+ config NFS_FS
+       tristate "NFS client support"
+       depends on INET
+       select LOCKD
+       select SUNRPC
+       select NFS_ACL_SUPPORT if NFS_V3_ACL
+       help
+         Choose Y here if you want to access files residing on other
+         computers using Sun's Network File System protocol.  To compile
+         this file system support as a module, choose M here: the module
+         will be called nfs.
+ 
+         To mount file systems exported by NFS servers, you also need to
+         install the user space mount.nfs command which can be found in
+         the Linux nfs-utils package, available from http://linux-nfs.org/.
+         Information about using the mount command is available in the
+         mount(8) man page.  More detail about the Linux NFS client
+         implementation is available via the nfs(5) man page.
+ 
+         Below you can choose which versions of the NFS protocol are
+         available in the kernel to mount NFS servers.  Support for NFS
+         version 2 (RFC 1094) is always available when NFS_FS is selected.
+ 
+         To configure a system which mounts its root file system via NFS
+         at boot time, say Y here, select "Kernel level IP
+         autoconfiguration" in the NETWORK menu, and select "Root file
+         system on NFS" below.  You cannot compile this file system as a
+         module in this case.
+ 
+         If unsure, say N.
+ 
+ config NFS_V3
+       bool "NFS client support for NFS version 3"
+       depends on NFS_FS
+       help
+         This option enables support for version 3 of the NFS protocol
+         (RFC 1813) in the kernel's NFS client.
+ 
+         If unsure, say Y.
+ 
+ config NFS_V3_ACL
+       bool "NFS client support for the NFSv3 ACL protocol extension"
+       depends on NFS_V3
+       help
+         Some NFS servers support an auxiliary NFSv3 ACL protocol that
+         Sun added to Solaris but never became an official part of the
+         NFS version 3 protocol.  This protocol extension allows
+         applications on NFS clients to manipulate POSIX Access Control
+         Lists on files residing on NFS servers.  NFS servers enforce
+         ACLs on local files whether this protocol is available or not.
+ 
+         Choose Y here if your NFS server supports the Solaris NFSv3 ACL
+         protocol extension and you want your NFS client to allow
+         applications to access and modify ACLs on files on the server.
+ 
+         Most NFS servers don't support the Solaris NFSv3 ACL protocol
+         extension.  You can choose N here or specify the "noacl" mount
+         option to prevent your NFS client from trying to use the NFSv3
+         ACL protocol.
+ 
+         If unsure, say N.
+ 
+ config NFS_V4
+       bool "NFS client support for NFS version 4 (EXPERIMENTAL)"
+       depends on NFS_FS && EXPERIMENTAL
+       select RPCSEC_GSS_KRB5
+       help
+         This option enables support for version 4 of the NFS protocol
+         (RFC 3530) in the kernel's NFS client.
+ 
+         To mount NFS servers using NFSv4, you also need to install user
+         space programs which can be found in the Linux nfs-utils package,
+         available from http://linux-nfs.org/.
+ 
+         If unsure, say N.
+ 
+ config ROOT_NFS
+       bool "Root file system on NFS"
+       depends on NFS_FS=y && IP_PNP
+       help
+         If you want your system to mount its root file system via NFS,
+         choose Y here.  This is common practice for managing systems
+         without local permanent storage.  For details, read
+         <file:Documentation/filesystems/nfsroot.txt>.
+ 
+         Most people say N here.
++
++config NFS_SWAP
++      bool "Provide swap over NFS support"
++      default n
++      depends on NFS_FS
++      select SUNRPC_SWAP
++      help
++        This option enables swapon to work on files located on NFS mounts.
++
++        For more details, see Documentation/network-swap.txt
++
diff --cc fs/nfs/file.c
Simple merge
diff --cc fs/nfs/inode.c

index 9892ef8,0c38168..a40821a
--- 1/fs/nfs/inode.c
--- 2/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@@ -855,6 -799,6 +799,12 @@@ int nfs_revalidate_mapping(struct inod
         struct nfs_inode *nfsi = NFS_I(inode);
         int ret = 0;
   
++      /*
++       * swapfiles are not supposed to be shared.
++       */
++      if (IS_SWAPFILE(inode))
++              goto out;
++
         if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
                         || nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
                 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
diff --cc fs/nfs/internal.h
Simple merge
diff --cc fs/nfs/write.c
Simple merge
diff --cc fs/nfs4acl_base.c

index 2e9c1cf,0000000..58d2c09

mode 100644,000000..100644
--- 1/fs/nfs4acl_base.c
--- /dev/null
+++ b/fs/nfs4acl_base.c
@@@ -1,573 -1,0 +1,573 @@@
+ +/*
+ + * Copyright (C) 2006 Andreas Gruenbacher <a.gruenbacher@computer.org>
+ + *
+ + * This program is free software; you can redistribute it and/or modify it
+ + * under the terms of the GNU General Public License as published by the
+ + * Free Software Foundation; either version 2, or (at your option) any
+ + * later version.
+ + *
+ + * This program is distributed in the hope that it will be useful, but
+ + * WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + */
+ +
+ +#include <linux/sched.h>
+ +#include <linux/module.h>
+ +#include <linux/fs.h>
+ +#include <linux/nfs4acl.h>
+ +
+ +MODULE_LICENSE("GPL");
+ +
+ +/*
+ + * ACL entries that have ACE4_SPECIAL_WHO set in ace->e_flags use the
+ + * pointer values of these constants in ace->u.e_who to avoid massive
+ + * amounts of string comparisons.
+ + */
+ +
+ +const char nfs4ace_owner_who[]          = "OWNER@";
+ +const char nfs4ace_group_who[]          = "GROUP@";
+ +const char nfs4ace_everyone_who[] = "EVERYONE@";
+ +
+ +EXPORT_SYMBOL_GPL(nfs4ace_owner_who);
+ +EXPORT_SYMBOL_GPL(nfs4ace_group_who);
+ +EXPORT_SYMBOL_GPL(nfs4ace_everyone_who);
+ +
+ +/**
+ + * nfs4acl_alloc  -  allocate an acl
+ + * @count:    number of entries
+ + */
+ +struct nfs4acl *
+ +nfs4acl_alloc(int count)
+ +{
+ +      size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace);
+ +      struct nfs4acl *acl = kmalloc(size, GFP_KERNEL);
+ +
+ +      if (acl) {
+ +              memset(acl, 0, size);
+ +              atomic_set(&acl->a_refcount, 1);
+ +              acl->a_count = count;
+ +      }
+ +      return acl;
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4acl_alloc);
+ +
+ +/**
+ + * nfs4acl_clone  -  create a copy of an acl
+ + */
+ +struct nfs4acl *
+ +nfs4acl_clone(const struct nfs4acl *acl)
+ +{
+ +      int count = acl->a_count;
+ +      size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace);
+ +      struct nfs4acl *dup = kmalloc(size, GFP_KERNEL);
+ +
+ +      if (dup) {
+ +              memcpy(dup, acl, size);
+ +              atomic_set(&dup->a_refcount, 1);
+ +      }
+ +      return dup;
+ +}
+ +
+ +/*
+ + * The POSIX permissions are supersets of the below mask flags.
+ + *
+ + * The ACE4_READ_ATTRIBUTES and ACE4_READ_ACL flags are always granted
+ + * in POSIX. The ACE4_SYNCHRONIZE flag has no meaning under POSIX. We
+ + * make sure that we do not mask them if they are set, so that users who
+ + * rely on these flags won't get confused.
+ + */
+ +#define ACE4_POSIX_MODE_READ ( \
+ +      ACE4_READ_DATA | ACE4_LIST_DIRECTORY )
+ +#define ACE4_POSIX_MODE_WRITE ( \
+ +      ACE4_WRITE_DATA | ACE4_ADD_FILE | \
+ +      ACE4_APPEND_DATA | ACE4_ADD_SUBDIRECTORY | \
+ +      ACE4_DELETE_CHILD )
+ +#define ACE4_POSIX_MODE_EXEC ( \
+ +      ACE4_EXECUTE)
+ +
+ +static int
+ +nfs4acl_mask_to_mode(unsigned int mask)
+ +{
+ +      int mode = 0;
+ +
+ +      if (mask & ACE4_POSIX_MODE_READ)
+ +              mode |= MAY_READ;
+ +      if (mask & ACE4_POSIX_MODE_WRITE)
+ +              mode |= MAY_WRITE;
+ +      if (mask & ACE4_POSIX_MODE_EXEC)
+ +              mode |= MAY_EXEC;
+ +
+ +      return mode;
+ +}
+ +
+ +/**
+ + * nfs4acl_masks_to_mode  -  compute file mode permission bits from file masks
+ + *
+ + * Compute the file mode permission bits from the file masks in the acl.
+ + */
+ +int
+ +nfs4acl_masks_to_mode(const struct nfs4acl *acl)
+ +{
+ +      return nfs4acl_mask_to_mode(acl->a_owner_mask) << 6 |
+ +             nfs4acl_mask_to_mode(acl->a_group_mask) << 3 |
+ +             nfs4acl_mask_to_mode(acl->a_other_mask);
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4acl_masks_to_mode);
+ +
+ +static unsigned int
+ +nfs4acl_mode_to_mask(mode_t mode)
+ +{
+ +      unsigned int mask = ACE4_POSIX_ALWAYS_ALLOWED;
+ +
+ +      if (mode & MAY_READ)
+ +              mask |= ACE4_POSIX_MODE_READ;
+ +      if (mode & MAY_WRITE)
+ +              mask |= ACE4_POSIX_MODE_WRITE;
+ +      if (mode & MAY_EXEC)
+ +              mask |= ACE4_POSIX_MODE_EXEC;
+ +
+ +      return mask;
+ +}
+ +
+ +/**
+ + * nfs4acl_chmod  -  update the file masks to reflect the new mode
+ + * @mode:     file mode permission bits to apply to the @acl
+ + *
+ + * Converts the mask flags corresponding to the owner, group, and other file
+ + * permissions and computes the file masks. Returns @acl if it already has the
+ + * appropriate file masks, or updates the flags in a copy of @acl. Takes over
+ + * @acl.
+ + */
+ +struct nfs4acl *
+ +nfs4acl_chmod(struct nfs4acl *acl, mode_t mode)
+ +{
+ +      unsigned int owner_mask, group_mask, other_mask;
+ +      struct nfs4acl *clone;
+ +
+ +      owner_mask = nfs4acl_mode_to_mask(mode >> 6);
+ +      group_mask = nfs4acl_mode_to_mask(mode >> 3);
+ +      other_mask = nfs4acl_mode_to_mask(mode);
+ +
+ +      if (acl->a_owner_mask == owner_mask &&
+ +          acl->a_group_mask == group_mask &&
+ +          acl->a_other_mask == other_mask &&
+ +          (!nfs4acl_is_auto_inherit(acl) || nfs4acl_is_protected(acl)))
+ +              return acl;
+ +
+ +      clone = nfs4acl_clone(acl);
+ +      nfs4acl_put(acl);
+ +      if (!clone)
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      clone->a_owner_mask = owner_mask;
+ +      clone->a_group_mask = group_mask;
+ +      clone->a_other_mask = other_mask;
+ +      if (nfs4acl_is_auto_inherit(clone))
+ +              clone->a_flags |= ACL4_PROTECTED;
+ +
+ +      if (nfs4acl_write_through(&clone)) {
+ +              nfs4acl_put(clone);
+ +              clone = ERR_PTR(-ENOMEM);
+ +      }
+ +      return clone;
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4acl_chmod);
+ +
+ +/**
+ + * nfs4acl_want_to_mask  - convert permission want argument to a mask
+ + * @want:     @want argument of the permission inode operation
+ + *
+ + * When checking for append, @want is (MAY_WRITE | MAY_APPEND).
+ + */
+ +unsigned int
+ +nfs4acl_want_to_mask(int want)
+ +{
+ +      unsigned int mask = 0;
+ +
+ +      if (want & MAY_READ)
+ +              mask |= ACE4_READ_DATA;
+ +      if (want & MAY_APPEND)
+ +              mask |= ACE4_APPEND_DATA;
+ +      else if (want & MAY_WRITE)
+ +              mask |= ACE4_WRITE_DATA;
+ +      if (want & MAY_EXEC)
+ +              mask |= ACE4_EXECUTE;
+ +
+ +      return mask;
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4acl_want_to_mask);
+ +
+ +/**
+ + * nfs4acl_capability_check  -  check for capabilities overriding read/write access
+ + * @inode:    inode to check
+ + * @mask:     requested access (ACE4_* bitmask)
+ + *
+ + * Capabilities other than CAP_DAC_OVERRIDE and CAP_DAC_READ_SEARCH must be checked
+ + * separately.
+ + */
+ +static inline int nfs4acl_capability_check(struct inode *inode, unsigned int mask)
+ +{
+ +      /*
+ +       * Read/write DACs are always overridable.
+ +       * Executable DACs are overridable if at least one exec bit is set.
+ +       */
+ +      if (!(mask & (ACE4_WRITE_ACL | ACE4_WRITE_OWNER)) &&
+ +          (!(mask & ACE4_EXECUTE) ||
+ +          (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)))
+ +              if (capable(CAP_DAC_OVERRIDE))
+ +                      return 0;
+ +
+ +      /*
+ +       * Searching includes executable on directories, else just read.
+ +       */
+ +      if (!(mask & ~(ACE4_READ_DATA | ACE4_EXECUTE)) &&
+ +          (S_ISDIR(inode->i_mode) || !(mask & ACE4_EXECUTE)))
+ +              if (capable(CAP_DAC_READ_SEARCH))
+ +                      return 0;
+ +
+ +      return -EACCES;
+ +}
+ +
+ +/**
+ + * nfs4acl_permission  -  permission check algorithm with masking
+ + * @inode:    inode to check
+ + * @acl:      nfs4 acl of the inode
+ + * @mask:     requested access (ACE4_* bitmask)
+ + *
+ + * Checks if the current process is granted @mask flags in @acl. With
+ + * write-through, the OWNER@ is always granted the owner file mask, the
+ + * GROUP@ is always granted the group file mask, and EVERYONE@ is always
+ + * granted the other file mask. Otherwise, processes are only granted
+ + * @mask flags which they are granted in the @acl as well as in their
+ + * file mask.
+ + */
+ +int nfs4acl_permission(struct inode *inode, const struct nfs4acl *acl,
+ +                     unsigned int mask)
+ +{
+ +      const struct nfs4ace *ace;
+ +      unsigned int file_mask, requested = mask, denied = 0;
+ +      int in_owning_group = in_group_p(inode->i_gid);
+ +      int owner_or_group_class = in_owning_group;
+ +
+ +      /*
+ +       * A process is in the
+ +       *   - owner file class if it owns the file, in the
+ +       *   - group file class if it is in the file's owning group or
+ +       *     it matches any of the user or group entries, and in the
+ +       *   - other file class otherwise.
+ +       */
+ +
+ +      nfs4acl_for_each_entry(ace, acl) {
+ +              unsigned int ace_mask = ace->e_mask;
+ +
+ +              if (nfs4ace_is_inherit_only(ace))
+ +                      continue;
+ +              if (nfs4ace_is_owner(ace)) {
-                       if (current->fsuid != inode->i_uid)
++                      if (current_fsuid() != inode->i_uid)
+ +                              continue;
+ +                      goto is_owner;
+ +              } else if (nfs4ace_is_group(ace)) {
+ +                      if (!in_owning_group)
+ +                              continue;
+ +              } else if (nfs4ace_is_unix_id(ace)) {
+ +                      if (ace->e_flags & ACE4_IDENTIFIER_GROUP) {
+ +                              if (!in_group_p(ace->u.e_id))
+ +                                      continue;
+ +                      } else {
-                               if (current->fsuid != ace->u.e_id)
++                              if (current_fsuid() != ace->u.e_id)
+ +                                      continue;
+ +                      }
+ +              } else
+ +                      goto is_everyone;
+ +
+ +              /*
+ +               * Apply the group file mask to entries other than OWNER@ and
+ +               * EVERYONE@. This is not required for correct access checking
+ +               * but ensures that we grant the same permissions as the acl
+ +               * computed by nfs4acl_apply_masks().
+ +               *
+ +               * For example, without this restriction, 'group@:rw::allow'
+ +               * with mode 0600 would grant rw access to owner processes
+ +               * which are also in the owning group. This cannot be expressed
+ +               * in an acl.
+ +               */
+ +              if (nfs4ace_is_allow(ace))
+ +                      ace_mask &= acl->a_group_mask;
+ +
+ +          is_owner:
+ +              /* The process is in the owner or group file class. */
+ +              owner_or_group_class = 1;
+ +
+ +          is_everyone:
+ +              /* Check which mask flags the ACE allows or denies. */
+ +              if (nfs4ace_is_deny(ace))
+ +                      denied |= ace_mask & mask;
+ +              mask &= ~ace_mask;
+ +
+ +              /* Keep going until we know which file class the process is in. */
+ +              if (!mask && owner_or_group_class)
+ +                      break;
+ +      }
+ +      denied |= mask;
+ +
+ +      /*
+ +       * Figure out which file mask applies.
+ +       * Clear write-through if the process is in the file group class but
+ +       * not in the owning group, and so the denied permissions apply.
+ +       */
-       if (current->fsuid == inode->i_uid)
++      if (current_fsuid() == inode->i_uid)
+ +              file_mask = acl->a_owner_mask;
+ +      else if (in_owning_group || owner_or_group_class)
+ +              file_mask = acl->a_group_mask;
+ +      else
+ +              file_mask = acl->a_other_mask;
+ +
+ +      denied |= requested & ~file_mask;
+ +      if (!denied)
+ +              return 0;
+ +      return nfs4acl_capability_check(inode, requested);
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4acl_permission);
+ +
+ +/**
+ + * nfs4acl_generic_permission  -  permission check algorithm without explicit acl
+ + * @inode:    inode to check permissions for
+ + * @mask:     requested access (ACE4_* bitmask)
+ + *
+ + * The file mode of a file without ACL corresponds to an ACL with a single
+ + * "EVERYONE:~0::ALLOW" entry, with file masks that correspond to the file mode
+ + * permissions. Instead of constructing a temporary ACL and applying
+ + * nfs4acl_permission() to it, compute the identical result directly from the file
+ + * mode.
+ + */
+ +int nfs4acl_generic_permission(struct inode *inode, unsigned int mask)
+ +{
+ +      int mode = inode->i_mode;
+ +
-       if (current->fsuid == inode->i_uid)
++      if (current_fsuid() == inode->i_uid)
+ +              mode >>= 6;
+ +      else if (in_group_p(inode->i_gid))
+ +              mode >>= 3;
+ +      if (!(mask & ~nfs4acl_mode_to_mask(mode)))
+ +              return 0;
+ +      return nfs4acl_capability_check(inode, mask);
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4acl_generic_permission);
+ +
+ +/*
+ + * nfs4ace_is_same_who  -  do both acl entries refer to the same identifier?
+ + */
+ +int
+ +nfs4ace_is_same_who(const struct nfs4ace *a, const struct nfs4ace *b)
+ +{
+ +#define WHO_FLAGS (ACE4_SPECIAL_WHO | ACE4_IDENTIFIER_GROUP)
+ +      if ((a->e_flags & WHO_FLAGS) != (b->e_flags & WHO_FLAGS))
+ +              return 0;
+ +      if (a->e_flags & ACE4_SPECIAL_WHO)
+ +              return a->u.e_who == b->u.e_who;
+ +      else
+ +              return a->u.e_id == b->u.e_id;
+ +#undef WHO_FLAGS
+ +}
+ +
+ +/**
+ + * nfs4acl_set_who  -  set a special who value
+ + * @ace:      acl entry
+ + * @who:      who value to use
+ + */
+ +int
+ +nfs4ace_set_who(struct nfs4ace *ace, const char *who)
+ +{
+ +      if (!strcmp(who, nfs4ace_owner_who))
+ +              who = nfs4ace_owner_who;
+ +      else if (!strcmp(who, nfs4ace_group_who))
+ +              who = nfs4ace_group_who;
+ +      else if (!strcmp(who, nfs4ace_everyone_who))
+ +              who = nfs4ace_everyone_who;
+ +      else
+ +              return -EINVAL;
+ +
+ +      ace->u.e_who = who;
+ +      ace->e_flags |= ACE4_SPECIAL_WHO;
+ +      ace->e_flags &= ~ACE4_IDENTIFIER_GROUP;
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4ace_set_who);
+ +
+ +/**
+ + * nfs4acl_allowed_to_who  -  mask flags allowed to a specific who value
+ + *
+ + * Computes the mask values allowed to a specific who value, taking
+ + * EVERYONE@ entries into account.
+ + */
+ +static unsigned int
+ +nfs4acl_allowed_to_who(struct nfs4acl *acl, struct nfs4ace *who)
+ +{
+ +      struct nfs4ace *ace;
+ +      unsigned int allowed = 0;
+ +
+ +      nfs4acl_for_each_entry_reverse(ace, acl) {
+ +              if (nfs4ace_is_inherit_only(ace))
+ +                      continue;
+ +              if (nfs4ace_is_same_who(ace, who) ||
+ +                  nfs4ace_is_everyone(ace)) {
+ +                      if (nfs4ace_is_allow(ace))
+ +                              allowed |= ace->e_mask;
+ +                      else if (nfs4ace_is_deny(ace))
+ +                              allowed &= ~ace->e_mask;
+ +              }
+ +      }
+ +      return allowed;
+ +}
+ +
+ +/**
+ + * nfs4acl_compute_max_masks  -  compute upper bound masks
+ + *
+ + * Computes upper bound owner, group, and other masks so that none of
+ + * the mask flags allowed by the acl are disabled (for any choice of the
+ + * file owner or group membership).
+ + */
+ +static void
+ +nfs4acl_compute_max_masks(struct nfs4acl *acl)
+ +{
+ +      struct nfs4ace *ace;
+ +
+ +      acl->a_owner_mask = 0;
+ +      acl->a_group_mask = 0;
+ +      acl->a_other_mask = 0;
+ +
+ +      nfs4acl_for_each_entry_reverse(ace, acl) {
+ +              if (nfs4ace_is_inherit_only(ace))
+ +                      continue;
+ +
+ +              if (nfs4ace_is_owner(ace)) {
+ +                      if (nfs4ace_is_allow(ace))
+ +                              acl->a_owner_mask |= ace->e_mask;
+ +                      else if (nfs4ace_is_deny(ace))
+ +                              acl->a_owner_mask &= ~ace->e_mask;
+ +              } else if (nfs4ace_is_everyone(ace)) {
+ +                      if (nfs4ace_is_allow(ace)) {
+ +                              struct nfs4ace who = {
+ +                                      .e_flags = ACE4_SPECIAL_WHO,
+ +                                      .u.e_who = nfs4ace_group_who,
+ +                              };
+ +
+ +                              acl->a_other_mask |= ace->e_mask;
+ +                              acl->a_group_mask |=
+ +                                      nfs4acl_allowed_to_who(acl, &who);
+ +                              acl->a_owner_mask |= ace->e_mask;
+ +                      } else if (nfs4ace_is_deny(ace)) {
+ +                              acl->a_other_mask &= ~ace->e_mask;
+ +                              acl->a_group_mask &= ~ace->e_mask;
+ +                              acl->a_owner_mask &= ~ace->e_mask;
+ +                      }
+ +              } else {
+ +                      if (nfs4ace_is_allow(ace)) {
+ +                              unsigned int mask =
+ +                                      nfs4acl_allowed_to_who(acl, ace);
+ +
+ +                              acl->a_group_mask |= mask;
+ +                              acl->a_owner_mask |= mask;
+ +                      }
+ +              }
+ +      }
+ +}
+ +
+ +/**
+ + * nfs4acl_inherit  -  compute the acl a new file will inherit
+ + * @dir_acl:  acl of the containing direcory
+ + * @mode:     file type and create mode of the new file
+ + *
+ + * Given the containing directory's acl, this function will compute the
+ + * acl that new files in that directory will inherit, or %NULL if
+ + * @dir_acl does not contain acl entries inheritable by this file.
+ + *
+ + * Without write-through, the file masks in the returned acl are set to
+ + * the intersection of the create mode and the maximum permissions
+ + * allowed to each file class. With write-through, the file masks are
+ + * set to the create mode.
+ + */
+ +struct nfs4acl *
+ +nfs4acl_inherit(const struct nfs4acl *dir_acl, mode_t mode)
+ +{
+ +      const struct nfs4ace *dir_ace;
+ +      struct nfs4acl *acl;
+ +      struct nfs4ace *ace;
+ +      int count = 0;
+ +
+ +      if (S_ISDIR(mode)) {
+ +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ +                      if (!nfs4ace_is_inheritable(dir_ace))
+ +                              continue;
+ +                      count++;
+ +              }
+ +              if (!count)
+ +                      return NULL;
+ +              acl = nfs4acl_alloc(count);
+ +              if (!acl)
+ +                      return ERR_PTR(-ENOMEM);
+ +              ace = acl->a_entries;
+ +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ +                      if (!nfs4ace_is_inheritable(dir_ace))
+ +                              continue;
+ +                      memcpy(ace, dir_ace, sizeof(struct nfs4ace));
+ +                      if (dir_ace->e_flags & ACE4_NO_PROPAGATE_INHERIT_ACE)
+ +                              nfs4ace_clear_inheritance_flags(ace);
+ +                      if ((dir_ace->e_flags & ACE4_FILE_INHERIT_ACE) &&
+ +                          !(dir_ace->e_flags & ACE4_DIRECTORY_INHERIT_ACE))
+ +                              ace->e_flags |= ACE4_INHERIT_ONLY_ACE;
+ +                      ace++;
+ +              }
+ +      } else {
+ +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ +                      if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE))
+ +                              continue;
+ +                      count++;
+ +              }
+ +              if (!count)
+ +                      return NULL;
+ +              acl = nfs4acl_alloc(count);
+ +              if (!acl)
+ +                      return ERR_PTR(-ENOMEM);
+ +              ace = acl->a_entries;
+ +              nfs4acl_for_each_entry(dir_ace, dir_acl) {
+ +                      if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE))
+ +                              continue;
+ +                      memcpy(ace, dir_ace, sizeof(struct nfs4ace));
+ +                      nfs4ace_clear_inheritance_flags(ace);
+ +                      ace++;
+ +              }
+ +      }
+ +
+ +      /* The maximum max flags that the owner, group, and other classes
+ +         are allowed. */
+ +      if (dir_acl->a_flags & ACL4_WRITE_THROUGH) {
+ +              acl->a_owner_mask = ACE4_VALID_MASK;
+ +              acl->a_group_mask = ACE4_VALID_MASK;
+ +              acl->a_other_mask = ACE4_VALID_MASK;
+ +
+ +              mode &= ~current->fs->umask;
+ +      } else
+ +              nfs4acl_compute_max_masks(acl);
+ +
+ +      /* Apply the create mode. */
+ +      acl->a_owner_mask &= nfs4acl_mode_to_mask(mode >> 6);
+ +      acl->a_group_mask &= nfs4acl_mode_to_mask(mode >> 3);
+ +      acl->a_other_mask &= nfs4acl_mode_to_mask(mode);
+ +
+ +      if (nfs4acl_write_through(&acl)) {
+ +              nfs4acl_put(acl);
+ +              return ERR_PTR(-ENOMEM);
+ +      }
+ +
+ +      acl->a_flags = (dir_acl->a_flags & ~ACL4_PROTECTED);
+ +      if (nfs4acl_is_auto_inherit(acl)) {
+ +              nfs4acl_for_each_entry(ace, acl)
+ +                      ace->e_flags |= ACE4_INHERITED_ACE;
+ +              acl->a_flags |= ACL4_PROTECTED;
+ +      }
+ +
+ +      return acl;
+ +}
+ +EXPORT_SYMBOL_GPL(nfs4acl_inherit);
diff --cc fs/nfsd/vfs.c

index 029f2ec,6e50aaa..0394acc
--- 1/fs/nfsd/vfs.c
--- 2/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@@ -397,10 -387,7 +396,10 @@@ nfsd_setattr(struct svc_rqst *rqstp, st
         err = nfserr_notsync;
         if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
                 fh_lock(fhp);
-               host_err = notify_change(dentry, fhp->fh_export->ex_path.mnt, iap);
+               host_err = notify_change(dentry, iap);
+ +              /* to get NFSERR_JUKEBOX on the wire, need -ETIMEDOUT */
+ +              if (host_err == -EAGAIN)
+ +                      host_err = -ETIMEDOUT;
                 err = nfserrno(host_err);
                 fh_unlock(fhp);
         }
diff --cc fs/novfs/Kconfig

index 0000000,0000000..df572a0

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/fs/novfs/Kconfig
@@@ -1,0 -1,0 +1,8 @@@
++config NOVFS
++      tristate "Novell Netware Filesystem support (novfs) (EXPERIMENTAL)"
++      depends on INET && EXPERIMENTAL
++      help
++        If you say Y here, you will get an experimental Novell Netware
++        filesystem driver.
++
++        If unsure, say N.
diff --cc fs/novfs/daemon.c

index cd61d34,0000000..c42f610

mode 100644,000000..100644
--- 1/fs/novfs/daemon.c
--- /dev/null
+++ b/fs/novfs/daemon.c
@@@ -1,2129 -1,0 +1,2085 @@@
+ +/*
+ + * Novell NCP Redirector for Linux
+ + * Author: James Turner
+ + *
+ + * This file contains all the functions necessary for sending commands to our
+ + * daemon module.
+ + *
+ + * Copyright (C) 2005 Novell, Inc.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + */
+ +
+ +#include <linux/module.h>
+ +#include <linux/fs.h>
+ +#include <linux/mount.h>
+ +#include <linux/slab.h>
+ +#include <linux/list.h>
+ +#include <linux/timer.h>
+ +#include <linux/poll.h>
+ +#include <linux/pagemap.h>
+ +#include <linux/smp_lock.h>
+ +#include <linux/semaphore.h>
+ +#include <asm/uaccess.h>
+ +#include <asm/atomic.h>
+ +#include <linux/time.h>
+ +
+ +#include "vfs.h"
+ +#include "nwcapi.h"
+ +#include "commands.h"
+ +#include "nwerror.h"
+ +
+ +#define QUEUE_SENDING 0
+ +#define QUEUE_WAITING 1
+ +#define QUEUE_TIMEOUT 2
+ +#define QUEUE_ACKED   3
+ +#define QUEUE_DONE    4
+ +
+ +#define TIMEOUT_VALUE 10
+ +
+ +#define DH_TYPE_UNDEFINED    0
+ +#define DH_TYPE_STREAM       1
+ +#define DH_TYPE_CONNECTION   2
+ +
+ +struct daemon_queue {
+ +      struct list_head list;  /* Must be first entry */
+ +      spinlock_t lock;        /* Used to control access to list */
+ +      struct semaphore semaphore;     /* Used to signal when data is available */
+ +};
+ +
+ +struct daemon_cmd {
+ +      struct list_head list;  /* Must be first entry */
+ +      atomic_t reference;
+ +      unsigned int status;
+ +      unsigned int flags;
+ +      struct semaphore semaphore;
+ +      unsigned long sequence;
+ +      struct timer_list timer;
+ +      void *request;
+ +      unsigned long reqlen;
+ +      void *data;
+ +      int datalen;
+ +      void *reply;
+ +      unsigned long replen;
+ +};
+ +
+ +struct daemon_handle {
+ +      struct list_head list;
+ +      rwlock_t lock;
+ +      struct novfs_schandle session;
+ +};
+ +
+ +struct daemon_resource {
+ +      struct list_head list;
+ +      int type;
+ +      void *connection;
+ +      unsigned char handle[6];
+ +      mode_t mode;
+ +      loff_t size;
+ +};
+ +
+ +struct drive_map {
+ +      struct list_head list;  /* Must be first item */
+ +      struct novfs_schandle session;
+ +      unsigned long hash;
+ +      int namelen;
+ +      char name[1];
+ +};
+ +
+ +static void Queue_get(struct daemon_cmd * Que);
+ +static void Queue_put(struct daemon_cmd * Que);
+ +static void RemoveDriveMaps(void);
+ +static int NwdConvertLocalHandle(struct novfs_xplat *pdata, struct daemon_handle * DHandle);
+ +static int NwdConvertNetwareHandle(struct novfs_xplat *pdata, struct daemon_handle * DHandle);
+ +static int set_map_drive(struct novfs_xplat *pdata, struct novfs_schandle Session);
+ +static int unmap_drive(struct novfs_xplat *pdata, struct novfs_schandle Session);
+ +static int NwdGetMountPath(struct novfs_xplat *pdata);
+ +static long local_unlink(const char *pathname);
+ +
+ +
+ +/*===[ Global variables ]=================================================*/
+ +static struct daemon_queue Daemon_Queue;
+ +
+ +static DECLARE_WAIT_QUEUE_HEAD(Read_waitqueue);
+ +
+ +static atomic_t Sequence = ATOMIC_INIT(-1);
+ +static atomic_t Daemon_Open_Count = ATOMIC_INIT(0);
+ +
+ +static unsigned long Daemon_Command_Timeout = TIMEOUT_VALUE;
+ +
+ +static DECLARE_MUTEX(DriveMapLock);
+ +static LIST_HEAD(DriveMapList);
+ +
+ +int novfs_max_iosize = PAGE_SIZE;
+ +
+ +void novfs_daemon_queue_init()
+ +{
+ +      INIT_LIST_HEAD(&Daemon_Queue.list);
+ +      spin_lock_init(&Daemon_Queue.lock);
+ +      init_MUTEX_LOCKED(&Daemon_Queue.semaphore);
+ +}
+ +
+ +void novfs_daemon_queue_exit(void)
+ +{
+ +      /* Does nothing for now but we maybe should clear the queue. */
+ +}
+ +
+ +/*++======================================================================*/
+ +static void novfs_daemon_timer(unsigned long data)
+ +{
+ +      struct daemon_cmd *que = (struct daemon_cmd *) data;
+ +
+ +      if (QUEUE_ACKED != que->status) {
+ +              que->status = QUEUE_TIMEOUT;
+ +      }
+ +      up(&que->semaphore);
+ +}
+ +
+ +/*++======================================================================*/
+ +int Queue_Daemon_Command(void *request,
+ +                       unsigned long reqlen,
+ +                       void *data,
+ +                       int dlen,
+ +                       void **reply, unsigned long * replen, int interruptible)
+ +{
+ +      struct daemon_cmd *que;
+ +      int retCode = 0;
+ +      uint64_t ts1, ts2;
+ +
+ +      ts1 = get_nanosecond_time();
+ +
-       DbgPrint("Queue_Daemon_Command: 0x%p %d\n", request, reqlen);
++      DbgPrint("0x%p %d", request, reqlen);
+ +
+ +      if (atomic_read(&Daemon_Open_Count)) {
+ +
+ +              que = kmalloc(sizeof(*que), GFP_KERNEL);
+ +
-               DbgPrint("Queue_Daemon_Command: que=0x%p\n", que);
++              DbgPrint("que=0x%p", que);
+ +              if (que) {
+ +                      atomic_set(&que->reference, 0);
+ +                      que->status = QUEUE_SENDING;
+ +                      que->flags = 0;
+ +
+ +                      init_MUTEX_LOCKED(&que->semaphore);
+ +
+ +                      que->sequence = atomic_inc_return(&Sequence);
+ +
+ +                      ((struct novfs_command_request_header *) request)->SequenceNumber =
+ +                          que->sequence;
+ +
+ +                      /*
+ +                       * Setup and start que timer
+ +                       */
+ +                      init_timer(&que->timer);
+ +                      que->timer.expires = jiffies + (HZ * Daemon_Command_Timeout);
+ +                      que->timer.data = (unsigned long) que;
+ +                      que->timer.function = novfs_daemon_timer;
+ +                      add_timer(&que->timer);
+ +
+ +                      /*
+ +                       * Setup request
+ +                       */
+ +                      que->request = request;
+ +                      que->reqlen = reqlen;
+ +                      que->data = data;
+ +                      que->datalen = dlen;
+ +                      que->reply = NULL;
+ +                      que->replen = 0;
+ +
+ +                      /*
+ +                       * Added entry to queue.
+ +                       */
+ +                      /*
+ +                       * Check to see if interruptible and set flags.
+ +                       */
+ +                      if (interruptible) {
+ +                              que->flags |= INTERRUPTIBLE;
+ +                      }
+ +
+ +                      Queue_get(que);
+ +
+ +                      spin_lock(&Daemon_Queue.lock);
+ +                      list_add_tail(&que->list, &Daemon_Queue.list);
+ +                      spin_unlock(&Daemon_Queue.lock);
+ +
+ +                      /*
+ +                       * Signal that there is data to be read
+ +                       */
+ +                      up(&Daemon_Queue.semaphore);
+ +
+ +                      /*
+ +                       * Give a change to the other processes.
+ +                       */
+ +                      yield();
+ +
+ +                      /*
+ +                       * Block waiting for reply or timeout
+ +                       */
+ +                      down(&que->semaphore);
+ +
+ +                      if (QUEUE_ACKED == que->status) {
+ +                              que->status = QUEUE_WAITING;
+ +                              mod_timer(&que->timer,
+ +                                        jiffies +
+ +                                        (HZ * 2 * Daemon_Command_Timeout));
+ +                              if (interruptible) {
+ +                                      retCode =
+ +                                          down_interruptible(&que->semaphore);
+ +                              } else {
+ +                                      down(&que->semaphore);
+ +                              }
+ +                      }
+ +
+ +                      /*
+ +                       * Delete timer
+ +                       */
+ +                      del_timer(&que->timer);
+ +
+ +                      /*
+ +                       * Check for timeout
+ +                       */
+ +                      if ((QUEUE_TIMEOUT == que->status)
+ +                          && (NULL == que->reply)) {
-                               DbgPrint("Queue_Daemon_Command: Timeout\n");
++                              DbgPrint("Timeout");
+ +                              retCode = -ETIME;
+ +                      }
+ +                      *reply = que->reply;
+ +                      *replen = que->replen;
+ +
+ +                      /*
+ +                       * Remove item from queue
+ +                       */
+ +                      Queue_put(que);
+ +
+ +              } else {        /* Error case with no memory */
+ +
+ +                      retCode = -ENOMEM;
+ +                      *reply = NULL;
+ +                      *replen = 0;
+ +              }
+ +      } else {
+ +              retCode = -EIO;
+ +              *reply = NULL;
+ +              *replen = 0;
+ +
+ +      }
+ +      ts2 = get_nanosecond_time();
+ +      ts2 = ts2 - ts1;
+ +
-       DbgPrint("Queue_Daemon_Command: %llu retCode=%d \n", ts2, retCode);
++      DbgPrint("%llu retCode=%d", ts2, retCode);
+ +      return (retCode);
+ +}
+ +
+ +static void Queue_get(struct daemon_cmd * Que)
+ +{
-       DbgPrint("Queue_get: que=0x%p %d\n", Que, atomic_read(&Que->reference));
++      DbgPrint("que=0x%p %d", Que, atomic_read(&Que->reference));
+ +      atomic_inc(&Que->reference);
+ +}
+ +
+ +static void Queue_put(struct daemon_cmd * Que)
+ +{
+ +
-       DbgPrint("Queue_put: que=0x%p %d\n", Que, atomic_read(&Que->reference));
++      DbgPrint("que=0x%p %d", Que, atomic_read(&Que->reference));
+ +      spin_lock(&Daemon_Queue.lock);
+ +
+ +      if (atomic_dec_and_test(&Que->reference)) {
+ +              /*
+ +               * Remove item from queue
+ +               */
+ +              list_del(&Que->list);
+ +              spin_unlock(&Daemon_Queue.lock);
+ +
+ +              /*
+ +               * Free item memory
+ +               */
+ +              kfree(Que);
+ +      } else {
+ +              spin_unlock(&Daemon_Queue.lock);
+ +      }
+ +}
+ +
+ +struct daemon_cmd *get_next_queue(int Set_Queue_Waiting)
+ +{
+ +      struct daemon_cmd *que;
+ +
-       DbgPrint("get_next_queue: que=0x%p\n", Daemon_Queue.list.next);
++      DbgPrint("que=0x%p", Daemon_Queue.list.next);
+ +
+ +      spin_lock(&Daemon_Queue.lock);
+ +      que = (struct daemon_cmd *) Daemon_Queue.list.next;
+ +
+ +      while (que && (que != (struct daemon_cmd *) & Daemon_Queue.list.next)
+ +             && (que->status != QUEUE_SENDING)) {
+ +              que = (struct daemon_cmd *) que->list.next;
+ +      }
+ +
+ +      if ((NULL == que) || (que == (struct daemon_cmd *) & Daemon_Queue.list)
+ +          || (que->status != QUEUE_SENDING)) {
+ +              que = NULL;
+ +      } else if (Set_Queue_Waiting) {
+ +              que->status = QUEUE_WAITING;
+ +      }
+ +
+ +      if (que) {
+ +              atomic_inc(&que->reference);
+ +      }
+ +
+ +      spin_unlock(&Daemon_Queue.lock);
+ +
-       DbgPrint("get_next_queue: return=0x%p\n", que);
++      DbgPrint("return=0x%p", que);
+ +      return (que);
+ +}
+ +
+ +static struct daemon_cmd *find_queue(unsigned long sequence)
+ +{
+ +      struct daemon_cmd *que;
+ +
-       DbgPrint("find_queue: 0x%x\n", sequence);
++      DbgPrint("0x%x", sequence);
+ +
+ +      spin_lock(&Daemon_Queue.lock);
+ +      que = (struct daemon_cmd *) Daemon_Queue.list.next;
+ +
+ +      while (que && (que != (struct daemon_cmd *) & Daemon_Queue.list.next)
+ +             && (que->sequence != sequence)) {
+ +              que = (struct daemon_cmd *) que->list.next;
+ +      }
+ +
+ +      if ((NULL == que)
+ +          || (que == (struct daemon_cmd *) & Daemon_Queue.list.next)
+ +          || (que->sequence != sequence)) {
+ +              que = NULL;
+ +      }
+ +
+ +      if (que) {
+ +              atomic_inc(&que->reference);
+ +      }
+ +
+ +      spin_unlock(&Daemon_Queue.lock);
+ +
-       DbgPrint("find_queue: return 0x%p\n", que);
++      DbgPrint("return 0x%p", que);
+ +      return (que);
+ +}
+ +
+ +int novfs_daemon_open_control(struct inode *Inode, struct file *File)
+ +{
-       DbgPrint("Daemon_Open_Control: pid=%d Count=%d\n", current->pid,
++      DbgPrint("pid=%d Count=%d", current->pid,
+ +               atomic_read(&Daemon_Open_Count));
+ +      atomic_inc(&Daemon_Open_Count);
+ +
+ +      return (0);
+ +}
+ +
+ +int novfs_daemon_close_control(struct inode *Inode, struct file *File)
+ +{
+ +      struct daemon_cmd *que;
+ +
-       DbgPrint("Daemon_Close_Control: pid=%d Count=%d\n", current->pid,
++      DbgPrint("pid=%d Count=%d", current->pid,
+ +               atomic_read(&Daemon_Open_Count));
+ +
+ +      if (atomic_dec_and_test(&Daemon_Open_Count)) {
+ +              /*
+ +               * Signal any pending que itmes.
+ +               */
+ +
+ +              spin_lock(&Daemon_Queue.lock);
+ +              que = (struct daemon_cmd *) Daemon_Queue.list.next;
+ +
+ +              while (que
+ +                     && (que != (struct daemon_cmd *) & Daemon_Queue.list.next)
+ +                     && (que->status != QUEUE_DONE)) {
+ +                      que->status = QUEUE_TIMEOUT;
+ +                      up(&que->semaphore);
+ +
+ +                      que = (struct daemon_cmd *) que->list.next;
+ +              }
+ +              spin_unlock(&Daemon_Queue.lock);
+ +
+ +              RemoveDriveMaps();
+ +
+ +              novfs_scope_cleanup();
+ +      }
+ +
+ +      return (0);
+ +}
+ +
+ +ssize_t novfs_daemon_cmd_send(struct file * file, char *buf, size_t len, loff_t * off)
+ +{
+ +      struct daemon_cmd *que;
+ +      size_t retValue = 0;
+ +      int Finished = 0;
+ +      struct novfs_data_list *dlist;
+ +      int i, dcnt, bcnt, ccnt, error;
+ +      char *vadr;
+ +      unsigned long cpylen;
+ +
-       DbgPrint("Daemon_Send_Command: %u %lld\n", len, *off);
++      DbgPrint("%u %lld", len, *off);
+ +      if (len > novfs_max_iosize) {
+ +              novfs_max_iosize = len;
+ +      }
+ +
+ +      while (!Finished) {
+ +              que = get_next_queue(1);
-               DbgPrint("Daemon_Send_Command: 0x%p\n", que);
++              DbgPrint("0x%p", que);
+ +              if (que) {
+ +                      retValue = que->reqlen;
+ +                      if (retValue > len) {
+ +                              retValue = len;
+ +                      }
+ +                      if (retValue > 0x80)
+ +                              novfs_dump(0x80, que->request);
+ +                      else
+ +                              novfs_dump(retValue, que->request);
+ +
+ +                      cpylen = copy_to_user(buf, que->request, retValue);
+ +                      if (que->datalen && (retValue < len)) {
+ +                              buf += retValue;
+ +                              dlist = que->data;
+ +                              dcnt = que->datalen;
+ +                              for (i = 0; i < dcnt; i++, dlist++) {
+ +                                      if (DLREAD == dlist->rwflag) {
+ +                                              bcnt = dlist->len;
-                                               DbgPrint
-                                                   ("Daemon_Send_Command%d: page=0x%p offset=0x%p len=%d\n",
-                                                    i, dlist->page,
++                                              DbgPrint("page=0x%p "
++                                                       "offset=0x%p len=%d",
++                                                       i, dlist->page,
+ +                                                   dlist->offset, dlist->len);
+ +                                              if ((bcnt + retValue) <= len) {
+ +                                                      void *km_adr = NULL;
+ +
+ +                                                      if (dlist->page) {
+ +                                                              km_adr =
+ +                                                                  kmap(dlist->
+ +                                                                       page);
+ +                                                              vadr = km_adr;
+ +                                                              vadr +=
+ +                                                                  (unsigned long)
+ +                                                                  dlist->
+ +                                                                  offset;
+ +                                                      } else {
+ +                                                              vadr =
+ +                                                                  dlist->
+ +                                                                  offset;
+ +                                                      }
+ +
+ +                                                      ccnt =
+ +                                                          copy_to_user(buf,
+ +                                                                       vadr,
+ +                                                                       bcnt);
+ +
-                                                       DbgPrint
-                                                           ("Daemon_Send_Command: Copy %d from 0x%p to 0x%p.\n",
++                                                      DbgPrint("Copy %d from 0x%p to 0x%p.",
+ +                                                           bcnt, vadr, buf);
+ +                                                      if (bcnt > 0x80)
+ +                                                              novfs_dump(0x80,
+ +                                                                     vadr);
+ +                                                      else
+ +                                                              novfs_dump(bcnt,
+ +                                                                     vadr);
+ +
+ +                                                      if (km_adr) {
+ +                                                              kunmap(dlist->
+ +                                                                     page);
+ +                                                      }
+ +
+ +                                                      retValue += bcnt;
+ +                                                      buf += bcnt;
+ +                                              } else {
+ +                                                      break;
+ +                                              }
+ +                                      }
+ +                              }
+ +                      }
+ +                      Queue_put(que);
+ +                      break;
+ +              }
+ +
+ +              if (O_NONBLOCK & file->f_flags) {
+ +                      retValue = -EAGAIN;
+ +                      break;
+ +              } else {
+ +                      if ((error =
+ +                           down_interruptible(&Daemon_Queue.semaphore))) {
-                               DbgPrint
-                                   ("Daemon_Send_Command: after down_interruptible error...%d\n",
-                                    error);
++                              DbgPrint("after down_interruptible error...%d",
++                                       error);
+ +                              retValue = -EINTR;
+ +                              break;
+ +                      }
-                       DbgPrint
-                           ("Daemon_Send_Command: after down_interruptible\n");
++                      DbgPrint("after down_interruptible");
+ +              }
+ +      }
+ +
+ +      *off = *off;
+ +
-       DbgPrint("Daemon_Send_Command: return 0x%x\n", retValue);
++      DbgPrint("return 0x%x", retValue);
+ +
+ +      return (retValue);
+ +}
+ +
+ +ssize_t novfs_daemon_recv_reply(struct file *file, const char *buf, size_t nbytes, loff_t * ppos)
+ +{
+ +      struct daemon_cmd *que;
+ +      size_t retValue = 0;
+ +      void *reply;
+ +      unsigned long sequence, cpylen;
+ +
+ +      struct novfs_data_list *dlist;
+ +      char *vadr;
+ +      int i;
+ +
-       DbgPrint("Daemon_Receive_Reply: buf=0x%p nbytes=%d ppos=%llx\n", buf,
++      DbgPrint("buf=0x%p nbytes=%d ppos=%llx", buf,
+ +               nbytes, *ppos);
+ +
+ +      /*
+ +       * Get sequence number from reply buffer
+ +       */
+ +
+ +      cpylen = copy_from_user(&sequence, buf, sizeof(sequence));
+ +
+ +      /*
+ +       * Find item based on sequence number
+ +       */
+ +      que = find_queue(sequence);
+ +
-       DbgPrint("Daemon_Receive_Reply: 0x%x 0x%p %d\n", sequence, que, nbytes);
++      DbgPrint("0x%x 0x%p %d", sequence, que, nbytes);
+ +      if (que) {
+ +              do {
+ +                      retValue = nbytes;
+ +                      /*
+ +                       * Ack packet from novfsd.  Remove timer and
+ +                       * return
+ +                       */
+ +                      if (nbytes == sizeof(sequence)) {
+ +                              que->status = QUEUE_ACKED;
+ +                              break;
+ +                      }
+ +
+ +                      if (NULL != (dlist = que->data)) {
+ +                              int thiscopy, left = nbytes;
+ +                              retValue = 0;
+ +
-                               DbgPrint
-                                   ("Daemon_Receive_Reply: dlist=0x%p count=%d\n",
++                              DbgPrint("dlist=0x%p count=%d",
+ +                                   dlist, que->datalen);
+ +                              for (i = 0;
+ +                                   (i < que->datalen) && (retValue < nbytes);
+ +                                   i++, dlist++) {
-                                       DbgPrint("Daemon_Receive_Reply:\n"
++                                      __DbgPrint("\n"
+ +                                               "   dlist[%d].page:   0x%p\n"
+ +                                               "   dlist[%d].offset: 0x%p\n"
+ +                                               "   dlist[%d].len:    0x%x\n"
+ +                                               "   dlist[%d].rwflag: 0x%x\n",
+ +                                               i, dlist->page, i,
+ +                                               dlist->offset, i, dlist->len,
+ +                                               i, dlist->rwflag);
+ +
+ +                                      if (DLWRITE == dlist->rwflag) {
+ +                                              void *km_adr = NULL;
+ +
+ +                                              if (dlist->page) {
+ +                                                      km_adr =
+ +                                                          kmap(dlist->page);
+ +                                                      vadr = km_adr;
+ +                                                      vadr +=
+ +                                                          (unsigned long) dlist->
+ +                                                          offset;
+ +                                              } else {
+ +                                                      vadr = dlist->offset;
+ +                                              }
+ +
+ +                                              thiscopy = dlist->len;
+ +                                              if (thiscopy > left) {
+ +                                                      thiscopy = left;
+ +                                                      dlist->len = left;
+ +                                              }
+ +                                              cpylen =
+ +                                                  copy_from_user(vadr, buf,
+ +                                                                 thiscopy);
+ +
+ +                                              if (thiscopy > 0x80)
+ +                                                      novfs_dump(0x80, vadr);
+ +                                              else
+ +                                                      novfs_dump(thiscopy, vadr);
+ +
+ +                                              if (km_adr) {
+ +                                                      kunmap(dlist->page);
+ +                                              }
+ +
+ +                                              left -= thiscopy;
+ +                                              retValue += thiscopy;
+ +                                              buf += thiscopy;
+ +                                      }
+ +                              }
+ +                              que->replen = retValue;
+ +                      } else {
+ +                              reply = kmalloc(nbytes, GFP_KERNEL);
-                               DbgPrint("Daemon_Receive_Reply: reply=0x%p\n",
-                                        reply);
++                              DbgPrint("reply=0x%p", reply);
+ +                              if (reply) {
+ +                                      retValue = nbytes;
+ +                                      que->reply = reply;
+ +                                      que->replen = nbytes;
+ +
+ +                                      retValue -=
+ +                                          copy_from_user(reply, buf,
+ +                                                         retValue);
+ +                                      if (retValue > 0x80)
+ +                                              novfs_dump(0x80, reply);
+ +                                      else
+ +                                              novfs_dump(retValue, reply);
+ +
+ +                              } else {
+ +                                      retValue = -ENOMEM;
+ +                              }
+ +                      }
+ +
+ +                      /*
+ +                       * Set status that packet is done.
+ +                       */
+ +                      que->status = QUEUE_DONE;
+ +
+ +              } while (0);
+ +              up(&que->semaphore);
+ +              Queue_put(que);
+ +      }
+ +
-       DbgPrint("Daemon_Receive_Reply: return 0x%x\n", retValue);
++      DbgPrint("return 0x%x", retValue);
+ +
+ +      return (retValue);
+ +}
+ +
+ +int novfs_do_login(struct ncl_string *Server, struct ncl_string *Username,
+ +struct ncl_string *Password, void **lgnId, struct novfs_schandle *Session)
+ +{
+ +      struct novfs_login_user_request *cmd;
+ +      struct novfs_login_user_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen, datalen;
+ +      unsigned char *data;
+ +
+ +      datalen = Server->len + Username->len + Password->len;
+ +      cmdlen = sizeof(*cmd) + datalen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      data = (unsigned char *) cmd + sizeof(*cmd);
+ +      cmd->Command.CommandType = VFS_COMMAND_LOGIN_USER;
+ +      cmd->Command.SequenceNumber = 0;
+ +      memcpy(&cmd->Command.SessionId, Session, sizeof(*Session));
+ +
+ +      cmd->srvNameType = Server->type;
+ +      cmd->serverLength = Server->len;
+ +      cmd->serverOffset = (unsigned long) (data - (unsigned char *) cmd);
+ +      memcpy(data, Server->buffer, Server->len);
+ +      data += Server->len;
+ +
+ +      cmd->usrNameType = Username->type;
+ +      cmd->userNameLength = Username->len;
+ +      cmd->userNameOffset = (unsigned long) (data - (unsigned char *) cmd);
+ +      memcpy(data, Username->buffer, Username->len);
+ +      data += Username->len;
+ +
+ +      cmd->pwdNameType = Password->type;
+ +      cmd->passwordLength = Password->len;
+ +      cmd->passwordOffset = (unsigned long) (data - (unsigned char *) cmd);
+ +      memcpy(data, Password->buffer, Password->len);
+ +      data += Password->len;
+ +
+ +      retCode =       Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                                              &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = reply->Reply.ErrorCode;
+ +              } else {
+ +                      retCode = 0;
+ +                      if (lgnId) {
+ +                              *lgnId = reply->loginIdentity;
+ +                      }
+ +              }
+ +              kfree(reply);
+ +      }
+ +      memset(cmd, 0, cmdlen);
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_daemon_logout(struct qstr *Server, struct novfs_schandle *Session)
+ +{
+ +      struct novfs_logout_request *cmd;
+ +      struct novfs_logout_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen;
+ +
+ +      cmdlen = offsetof(struct novfs_logout_request, Name) + Server->len;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_LOGOUT_USER;
+ +      cmd->Command.SequenceNumber = 0;
+ +      memcpy(&cmd->Command.SessionId, Session, sizeof(*Session));
+ +      cmd->length = Server->len;
+ +      memcpy(cmd->Name, Server->name, Server->len);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply, &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_daemon_getpwuid(uid_t uid, int unamelen, char *uname)
+ +{
+ +      struct novfs_getpwuid_request cmd;
+ +      struct novfs_getpwuid_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode;
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_GETPWUD;
+ +      cmd.Command.SequenceNumber = 0;
+ +      SC_INITIALIZE(cmd.Command.SessionId);
+ +      cmd.uid = uid;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -EIO;
+ +              } else {
+ +                      retCode = 0;
+ +                      memset(uname, 0, unamelen);
+ +                      replylen =
+ +                          replylen - offsetof(struct
+ +                                          novfs_getpwuid_reply, UserName);
+ +                      if (replylen) {
+ +                              if (replylen > unamelen) {
+ +                                      retCode = -EINVAL;
+ +                                      replylen = unamelen - 1;
+ +                              }
+ +                              memcpy(uname, reply->UserName, replylen);
+ +                      }
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_daemon_getversion(char *Buf, int length)
+ +{
+ +      struct novfs_get_version_request cmd;
+ +      struct novfs_get_version_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retVal = 0;
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_GET_VERSION;
+ +      cmd.Command.SequenceNumber = 0;
+ +      SC_INITIALIZE(cmd.Command.SessionId);
+ +
+ +      Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                           &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (reply->Reply.ErrorCode) {
+ +                      retVal = -EIO;
+ +              } else {
+ +                      retVal =
+ +                          replylen - offsetof(struct
+ +                                          novfs_get_version_reply, Version);
+ +                      if (retVal < length) {
+ +                              memcpy(Buf, reply->Version, retVal);
+ +                              Buf[retVal] = '\0';
+ +                      }
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retVal);
+ +
+ +}
+ +
+ +static int daemon_login(struct novfs_login *Login, struct novfs_schandle *Session)
+ +{
+ +      int retCode = -ENOMEM;
+ +      struct novfs_login lLogin;
+ +      struct ncl_string server;
+ +      struct ncl_string username;
+ +      struct ncl_string password;
+ +
+ +      if (!copy_from_user(&lLogin, Login, sizeof(lLogin))) {
+ +              server.buffer = kmalloc(lLogin.Server.length, GFP_KERNEL);
+ +              if (server.buffer) {
+ +                      server.len = lLogin.Server.length;
+ +                      server.type = NWC_STRING_TYPE_ASCII;
+ +                      if (!copy_from_user((void *)server.buffer, lLogin.Server.data, server.len)) {
+ +                              username.buffer =       kmalloc(lLogin.UserName.length, GFP_KERNEL);
+ +                              if (username.buffer) {
+ +                                      username.len = lLogin.UserName.length;
+ +                                      username.type = NWC_STRING_TYPE_ASCII;
+ +                                      if (!copy_from_user((void *)username.buffer, lLogin.UserName.data, username.len)) {
+ +                                              password.buffer =       kmalloc(lLogin.Password.length, GFP_KERNEL);
+ +                                              if (password.buffer)
+ +                                              {
+ +                                                      password.len = lLogin.Password.length;
+ +                                                      password.type = NWC_STRING_TYPE_ASCII;
+ +                                                      if (!copy_from_user((void *)password.buffer, lLogin.Password.data, password.len)) {
+ +                                                              retCode = novfs_do_login (&server, &username, &password, NULL, Session);
+ +                                                              if (!retCode) {
+ +                                                                      char *username;
+ +                                                                      username = novfs_scope_get_username();
+ +                                                                      if (username) {
+ +                                                                              novfs_add_to_root(username);
+ +                                                                      }
+ +                                                              }
+ +                                                      }
+ +                                                      kfree(password.buffer);
+ +                                              }
+ +                                      }
+ +                                      kfree(username.buffer);
+ +                              }
+ +                      }
+ +                      kfree(server.buffer);
+ +              }
+ +      }
+ +
+ +      return (retCode);
+ +}
+ +
+ +static int daemon_logout(struct novfs_logout *Logout, struct novfs_schandle *Session)
+ +{
+ +      struct novfs_logout lLogout;
+ +      struct qstr server;
+ +      int retCode = 0;
+ +
+ +      if (copy_from_user(&lLogout, Logout, sizeof(lLogout)))
+ +              return -EFAULT;
+ +      server.name = kmalloc(lLogout.Server.length, GFP_KERNEL);
+ +      if (!server.name)
+ +              return -ENOMEM;
+ +      server.len = lLogout.Server.length;
+ +      if (copy_from_user((void *)server.name, lLogout.Server.data, server.len))
+ +              goto exit;
+ +      retCode = novfs_daemon_logout(&server, Session);
+ +exit:
+ +      kfree(server.name);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_daemon_create_sessionId(struct novfs_schandle * SessionId)
+ +{
+ +      struct novfs_create_context_request cmd;
+ +      struct novfs_create_context_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +
-       DbgPrint("Daemon_CreateSessionId: %d\n", current->pid);
++      DbgPrint("%d", current->pid);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_CREATE_CONTEXT;
+ +      cmd.Command.SequenceNumber = 0;
+ +      SC_INITIALIZE(cmd.Command.SessionId);
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (!reply->Reply.ErrorCode
+ +                  && replylen > sizeof(struct novfs_command_reply_header)) {
+ +                      *SessionId = reply->SessionId;
+ +                      retCode = 0;
+ +              } else {
+ +                      SessionId->hTypeId = 0;
+ +                      SessionId->hId = 0;
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
-       DbgPrint("Daemon_CreateSessionId: SessionId=0x%llx\n", *SessionId);
++      DbgPrint("SessionId=0x%llx", *SessionId);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_daemon_destroy_sessionId(struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_destroy_context_request cmd;
+ +      struct novfs_destroy_context_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +
-       DbgPrint("Daemon_DestroySessionId: 0x%p:%p\n", SessionId.hTypeId,
++      DbgPrint("0x%p:%p", SessionId.hTypeId,
+ +               SessionId.hId);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_DESTROY_CONTEXT;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (!reply->Reply.ErrorCode) {
+ +                      struct drive_map *dm;
+ +                      struct list_head *list;
+ +
+ +                      retCode = 0;
+ +
+ +                      /*
+ +                       * When destroying the session check to see if there are any
+ +                       * mapped drives.  If there are then remove them.
+ +                       */
+ +                      down(&DriveMapLock);
+ +                      list_for_each(list, &DriveMapList) {
+ +                              dm = list_entry(list, struct drive_map, list);
+ +                              if (SC_EQUAL(SessionId, dm->session)) {
+ +                                      local_unlink(dm->name);
+ +                                      list = list->prev;
+ +                                      list_del(&dm->list);
+ +                                      kfree(dm);
+ +                              }
+ +
+ +                      }
+ +                      up(&DriveMapLock);
+ +
+ +              } else {
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_daemon_get_userspace(struct novfs_schandle SessionId, uint64_t * TotalSize,
+ +                       uint64_t * Free, uint64_t * TotalEnties,
+ +                       uint64_t * FreeEnties)
+ +{
+ +      struct novfs_get_user_space cmd;
+ +      struct novfs_get_user_space_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +
-       DbgPrint("Daemon_Get_UserSpace: 0x%p:%p\n", SessionId.hTypeId,
++      DbgPrint("0x%p:%p", SessionId.hTypeId,
+ +               SessionId.hId);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_GET_USER_SPACE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (!reply->Reply.ErrorCode) {
+ +
-                       DbgPrint("TotalSpace:  %llu\n", reply->TotalSpace);
-                       DbgPrint("FreeSpace:   %llu\n", reply->FreeSpace);
-                       DbgPrint("TotalEnties: %llu\n", reply->TotalEnties);
-                       DbgPrint("FreeEnties:  %llu\n", reply->FreeEnties);
++                      __DbgPrint("TotalSpace:  %llu\n", reply->TotalSpace);
++                      __DbgPrint("FreeSpace:   %llu\n", reply->FreeSpace);
++                      __DbgPrint("TotalEnties: %llu\n", reply->TotalEnties);
++                      __DbgPrint("FreeEnties:  %llu\n", reply->FreeEnties);
+ +
+ +                      if (TotalSize)
+ +                              *TotalSize = reply->TotalSpace;
+ +                      if (Free)
+ +                              *Free = reply->FreeSpace;
+ +                      if (TotalEnties)
+ +                              *TotalEnties = reply->TotalEnties;
+ +                      if (FreeEnties)
+ +                              *FreeEnties = reply->FreeEnties;
+ +                      retCode = 0;
+ +              } else {
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_daemon_set_mnt_point(char *Path)
+ +{
+ +      struct novfs_set_mount_path *cmd;
+ +      struct novfs_set_mount_path_reply *reply;
+ +      unsigned long replylen, cmdlen;
+ +      int retCode = -ENOMEM;
+ +
-       DbgPrint("Daemon_SetMountPoint: %s\n", Path);
++      DbgPrint("%s", Path);
+ +
+ +      replylen = strlen(Path);
+ +
+ +      cmdlen = sizeof(struct novfs_set_mount_path) + replylen;
+ +
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_SET_MOUNT_PATH;
+ +      cmd->Command.SequenceNumber = 0;
+ +      SC_INITIALIZE(cmd->Command.SessionId);
+ +      cmd->PathLength = replylen;
+ +
+ +      strcpy(cmd->Path, Path);
+ +
+ +      replylen = 0;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                              &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              if (!reply->Reply.ErrorCode) {
+ +                      retCode = 0;
+ +              } else {
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return retCode;
+ +}
+ +
+ +int novfs_daemon_debug_cmd_send(char *Command)
+ +{
+ +      struct novfs_debug_request cmd;
+ +      struct novfs_debug_reply *reply;
+ +      struct novfs_debug_reply lreply;
+ +      unsigned long replylen, cmdlen;
+ +      struct novfs_data_list dlist[2];
+ +
+ +      int retCode = -ENOMEM;
+ +
-       DbgPrint("Daemon_SendDebugCmd: %s\n", Command);
++      DbgPrint("%s", Command);
+ +
+ +      dlist[0].page = NULL;
+ +      dlist[0].offset = (char *)Command;
+ +      dlist[0].len = strlen(Command);
+ +      dlist[0].rwflag = DLREAD;
+ +
+ +      dlist[1].page = NULL;
+ +      dlist[1].offset = (char *)&lreply;
+ +      dlist[1].len = sizeof(lreply);
+ +      dlist[1].rwflag = DLWRITE;
+ +
+ +      cmdlen = offsetof(struct novfs_debug_request, dbgcmd);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_DBG;
+ +      cmd.Command.SequenceNumber = 0;
+ +      SC_INITIALIZE(cmd.Command.SessionId);
+ +      cmd.cmdlen = strlen(Command);
+ +
+ +      replylen = 0;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, cmdlen, dlist, 2, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              kfree(reply);
+ +      }
+ +      if (0 == retCode) {
+ +              retCode = lreply.Reply.ErrorCode;
+ +      }
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_daemon_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+ +{
+ +      int retCode = -ENOSYS;
+ +      unsigned long cpylen;
+ +      struct novfs_schandle session_id;
+ +      session_id = novfs_scope_get_sessionId(NULL);
+ +
+ +      switch (cmd) {
+ +      case IOC_LOGIN:
+ +              retCode = daemon_login((struct novfs_login *) arg, &session_id);
+ +              break;
+ +
+ +      case IOC_LOGOUT:
+ +              retCode = daemon_logout((struct novfs_logout *)arg, &session_id);
+ +              break;
+ +      case IOC_DEBUGPRINT:
+ +              {
+ +                      struct Ioctl_Debug {
+ +                              int length;
+ +                              char *data;
+ +                      } io;
+ +                      char *buf;
+ +                      io.length = 0;
+ +                      cpylen = copy_from_user(&io, (char *)arg, sizeof(io));
+ +                      if (io.length) {
+ +                              buf = kmalloc(io.length + 1, GFP_KERNEL);
+ +                              if (buf) {
+ +                                      buf[0] = 0;
+ +                                      cpylen =
+ +                                          copy_from_user(buf, io.data,
+ +                                                         io.length);
+ +                                      buf[io.length] = '\0';
+ +                                      DbgPrint("%s", buf);
+ +                                      kfree(buf);
+ +                                      retCode = 0;
+ +                              }
+ +                      }
+ +                      break;
+ +              }
+ +
+ +      case IOC_XPLAT:
+ +              {
+ +                      struct novfs_xplat data;
+ +
+ +                      cpylen =
+ +                          copy_from_user(&data, (void *)arg, sizeof(data));
+ +                      retCode = ((data.xfunction & 0x0000FFFF) | 0xCC000000);
+ +
+ +                      switch (data.xfunction) {
+ +                      case NWC_GET_MOUNT_PATH:
-                               DbgPrint
-                                   ("[Daemon_ioctl] Call NwdGetMountPath\n");
++                              DbgPrint("Call NwdGetMountPath");
+ +                              retCode = NwdGetMountPath(&data);
+ +                              break;
+ +                      }
+ +
+ +                      DbgPrint("[NOVFS XPLAT] status Code = %X\n", retCode);
+ +                      break;
+ +              }
+ +
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +static int daemon_added_resource(struct daemon_handle * DHandle, int Type, void *CHandle,
+ +                        unsigned char * FHandle, unsigned long Mode, u_long Size)
+ +{
+ +      struct daemon_resource *resource;
+ +
+ +      if (FHandle)
-               DbgPrint
-                   ("Daemon_Added_Resource: DHandle=0x%p Type=%d CHandle=0x%p FHandle=0x%x Mode=0x%x Size=%d\n",
-                    DHandle, Type, CHandle, *(u32 *) & FHandle[2], Mode, Size);
++              DbgPrint("DHandle=0x%p Type=%d CHandle=0x%p FHandle=0x%x "
++                       "Mode=0x%x Size=%d", DHandle, Type, CHandle,
++                       *(u32 *) & FHandle[2], Mode, Size);
+ +      else
-               DbgPrint
-                   ("Daemon_Added_Resource: DHandle=0x%p Type=%d CHandle=0x%p\n",
-                    DHandle, Type, CHandle);
++              DbgPrint("DHandle=0x%p Type=%d CHandle=0x%p\n",
++                       DHandle, Type, CHandle);
+ +
+ +      resource = kmalloc(sizeof(struct daemon_resource), GFP_KERNEL);
+ +      if (!resource)
+ +              return -ENOMEM;
+ +
+ +      resource->type = Type;
+ +      resource->connection = CHandle;
+ +      if (FHandle)
+ +              memcpy(resource->handle, FHandle,
+ +                              sizeof(resource->handle));
+ +      else
+ +              memset(resource->handle, 0, sizeof(resource->handle));
+ +      resource->mode = Mode;
+ +      resource->size = Size;
+ +      write_lock(&DHandle->lock);
+ +      list_add(&resource->list, &DHandle->list);
+ +      write_unlock(&DHandle->lock);
-       DbgPrint("Daemon_Added_Resource: Adding resource=0x%p\n",
-                       resource);
++      DbgPrint("Adding resource=0x%p", resource);
+ +      return 0;
+ +}
+ +
+ +static int daemon_remove_resource(struct daemon_handle * DHandle, int Type, void *CHandle,
+ +                         unsigned long FHandle)
+ +{
+ +      struct daemon_resource *resource;
+ +      struct list_head *l;
+ +      int retVal = -ENOMEM;
+ +
-       DbgPrint
-           ("Daemon_Remove_Resource: DHandle=0x%p Type=%d CHandle=0x%p FHandle=0x%x\n",
-            DHandle, Type, CHandle, FHandle);
++      DbgPrint("DHandle=0x%p Type=%d CHandle=0x%p FHandle=0x%x",
++               DHandle, Type, CHandle, FHandle);
+ +
+ +      write_lock(&DHandle->lock);
+ +
+ +      list_for_each(l, &DHandle->list) {
+ +              resource = list_entry(l, struct daemon_resource, list);
+ +
+ +              if ((Type == resource->type) &&
+ +                  (resource->connection == CHandle)) {
-                       DbgPrint
-                           ("Daemon_Remove_Resource: Found resource=0x%p\n",
-                            resource);
++                      DbgPrint("Found resource=0x%p", resource);
+ +                      l = l->prev;
+ +                      list_del(&resource->list);
+ +                      kfree(resource);
+ +                      break;
+ +              }
+ +      }
+ +
+ +      write_unlock(&DHandle->lock);
+ +
+ +      return (retVal);
+ +}
+ +
+ +int novfs_daemon_lib_open(struct inode *inode, struct file *file)
+ +{
+ +      struct daemon_handle *dh;
+ +
-       DbgPrint("Daemon_Library_open: inode=0x%p file=0x%p\n", inode, file);
++      DbgPrint("inode=0x%p file=0x%p", inode, file);
+ +      dh = kmalloc(sizeof(struct daemon_handle), GFP_KERNEL);
+ +      if (!dh)
+ +              return -ENOMEM;
+ +      file->private_data = dh;
+ +      INIT_LIST_HEAD(&dh->list);
+ +      rwlock_init(&dh->lock);
+ +      dh->session = novfs_scope_get_sessionId(NULL);
+ +      return 0;
+ +}
+ +
+ +int novfs_daemon_lib_close(struct inode *inode, struct file *file)
+ +{
+ +      struct daemon_handle *dh;
+ +      struct daemon_resource *resource;
+ +      struct list_head *l;
+ +
+ +      char commanddata[sizeof(struct novfs_xplat_call_request) + sizeof(struct nwd_close_conn)];
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct xplat_call_reply *reply;
+ +      struct nwd_close_conn *nwdClose;
+ +      unsigned long cmdlen, replylen;
+ +
-       DbgPrint("Daemon_Library_close: inode=0x%p file=0x%p\n", inode, file);
++      DbgPrint("inode=0x%p file=0x%p", inode, file);
+ +      if (file->private_data) {
+ +              dh = (struct daemon_handle *) file->private_data;
+ +
+ +              list_for_each(l, &dh->list) {
+ +                      resource = list_entry(l, struct daemon_resource, list);
+ +
+ +                      if (DH_TYPE_STREAM == resource->type) {
+ +                              novfs_close_stream(resource->connection,
+ +                                                 resource->handle,
+ +                                                 dh->session);
+ +                      } else if (DH_TYPE_CONNECTION == resource->type) {
+ +                              cmd = (struct novfs_xplat_call_request *) commanddata;
+ +                              cmdlen =
+ +                                  offsetof(struct novfs_xplat_call_request,
+ +                                           data) + sizeof(struct nwd_close_conn);
+ +                              cmd->Command.CommandType =
+ +                                  VFS_COMMAND_XPLAT_CALL;
+ +                              cmd->Command.SequenceNumber = 0;
+ +                              cmd->Command.SessionId = dh->session;
+ +                              cmd->NwcCommand = NWC_CLOSE_CONN;
+ +
+ +                              cmd->dataLen = sizeof(struct nwd_close_conn);
+ +                              nwdClose = (struct nwd_close_conn *) cmd->data;
+ +                              nwdClose->ConnHandle =
+ +                                  (void *) resource->connection;
+ +
+ +                              Queue_Daemon_Command((void *)cmd, cmdlen, NULL,
+ +                                                   0, (void **)&reply,
+ +                                                   &replylen, 0);
+ +                              if (reply)
+ +                                      kfree(reply);
+ +                      }
+ +                      l = l->prev;
+ +                      list_del(&resource->list);
+ +                      kfree(resource);
+ +              }
+ +              kfree(dh);
+ +              file->private_data = NULL;
+ +      }
+ +
+ +      return (0);
+ +}
+ +
+ +ssize_t novfs_daemon_lib_read(struct file * file, char *buf, size_t len,
+ +                          loff_t * off)
+ +{
+ +      struct daemon_handle *dh;
+ +      struct daemon_resource *resource;
+ +
+ +      size_t thisread, totalread = 0;
+ +      loff_t offset = *off;
+ +
-       DbgPrint("Daemon_Library_read: file=0x%p len=%d off=%lld\n", file, len,
-                *off);
++      DbgPrint("file=0x%p len=%d off=%lld", file, len, *off);
+ +
+ +      if (file->private_data) {
+ +              dh = file->private_data;
+ +              read_lock(&dh->lock);
+ +              if (&dh->list != dh->list.next) {
+ +                      resource =
+ +                          list_entry(dh->list.next, struct daemon_resource, list);
+ +
+ +                      if (DH_TYPE_STREAM == resource->type) {
+ +                              while (len > 0 && (offset < resource->size)) {
+ +                                      thisread = len;
+ +                                      if (novfs_read_stream
+ +                                          (resource->connection,
+ +                                           resource->handle, buf, &thisread,
+ +                                           &offset, 1, dh->session)
+ +                                          || !thisread) {
+ +                                              break;
+ +                                      }
+ +                                      len -= thisread;
+ +                                      buf += thisread;
+ +                                      offset += thisread;
+ +                                      totalread += thisread;
+ +                              }
+ +                      }
+ +              }
+ +              read_unlock(&dh->lock);
+ +      }
+ +      *off = offset;
-       DbgPrint("Daemon_Library_read return = 0x%x\n", totalread);
++      DbgPrint("return = 0x%x", totalread);
+ +      return (totalread);
+ +}
+ +
+ +ssize_t novfs_daemon_lib_write(struct file * file, const char *buf, size_t len,
+ +                           loff_t * off)
+ +{
+ +      struct daemon_handle *dh;
+ +      struct daemon_resource *resource;
+ +
+ +      size_t thiswrite, totalwrite = -EINVAL;
+ +      loff_t offset = *off;
+ +      int status;
+ +
-       DbgPrint("Daemon_Library_write: file=0x%p len=%d off=%lld\n", file, len,
-                *off);
++      DbgPrint("file=0x%p len=%d off=%lld", file, len, *off);
+ +
+ +      if (file->private_data) {
+ +              dh = file->private_data;
+ +              write_lock(&dh->lock);
+ +              if (&dh->list != dh->list.next) {
+ +                      resource =
+ +                          list_entry(dh->list.next, struct daemon_resource, list);
+ +
+ +                      if ((DH_TYPE_STREAM == resource->type) && (len >= 0)) {
+ +                              totalwrite = 0;
+ +                              do {
+ +                                      thiswrite = len;
+ +                                      status =
+ +                                          novfs_write_stream(resource->
+ +                                                             connection,
+ +                                                             resource->handle,
+ +                                                             (void *)buf,
+ +                                                             &thiswrite,
+ +                                                             &offset,
+ +                                                             dh->session);
+ +                                      if (status || !thiswrite) {
+ +                                              /*
+ +                                               * If len is zero then the file will have just been
+ +                                               * truncated to offset.  Update size.
+ +                                               */
+ +                                              if (!status && !len) {
+ +                                                      resource->size = offset;
+ +                                              }
+ +                                              totalwrite = status;
+ +                                              break;
+ +                                      }
+ +                                      len -= thiswrite;
+ +                                      buf += thiswrite;
+ +                                      offset += thiswrite;
+ +                                      totalwrite += thiswrite;
+ +                                      if (offset > resource->size) {
+ +                                              resource->size = offset;
+ +                                      }
+ +                              } while (len > 0);
+ +                      }
+ +              }
+ +              write_unlock(&dh->lock);
+ +      }
+ +      *off = offset;
-       DbgPrint("Daemon_Library_write return = 0x%x\n", totalwrite);
++      DbgPrint("return = 0x%x", totalwrite);
+ +
+ +      return (totalwrite);
+ +}
+ +
+ +loff_t novfs_daemon_lib_llseek(struct file * file, loff_t offset, int origin)
+ +{
+ +      struct daemon_handle *dh;
+ +      struct daemon_resource *resource;
+ +
+ +      loff_t retVal = -EINVAL;
+ +
-       DbgPrint("Daemon_Library_llseek: file=0x%p offset=%lld origin=%d\n",
-                file, offset, origin);
++      DbgPrint("file=0x%p offset=%lld origin=%d", file, offset, origin);
+ +
+ +      if (file->private_data) {
+ +              dh = file->private_data;
+ +              read_lock(&dh->lock);
+ +              if (&dh->list != dh->list.next) {
+ +                      resource =
+ +                          list_entry(dh->list.next, struct daemon_resource, list);
+ +
+ +                      if (DH_TYPE_STREAM == resource->type) {
+ +                              switch (origin) {
+ +                              case 2:
+ +                                      offset += resource->size;
+ +                                      break;
+ +                              case 1:
+ +                                      offset += file->f_pos;
+ +                              }
+ +                              if (offset >= 0) {
+ +                                      if (offset != file->f_pos) {
+ +                                              file->f_pos = offset;
+ +                                              file->f_version = 0;
+ +                                      }
+ +                                      retVal = offset;
+ +                              }
+ +                      }
+ +              }
+ +              read_unlock(&dh->lock);
+ +      }
+ +
-       DbgPrint("Daemon_Library_llseek: ret %lld\n", retVal);
++      DbgPrint("ret %lld", retVal);
+ +
+ +      return retVal;
+ +}
+ +
++#define DbgIocCall(str)               __DbgPrint("[VFS XPLAT] Call " str "\n")
++
+ +int novfs_daemon_lib_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+ +{
+ +      int retCode = -ENOSYS;
+ +      struct daemon_handle *dh;
+ +      void *handle = NULL;
+ +      unsigned long cpylen;
+ +
+ +      dh = file->private_data;
+ +
-       DbgPrint("Daemon_Library_ioctl: file=0x%p 0x%x 0x%p dh=0x%p\n", file,
-                cmd, arg, dh);
++      DbgPrint("file=0x%p 0x%x 0x%p dh=0x%p", file, cmd, arg, dh);
+ +
+ +      if (dh) {
+ +
+ +              switch (cmd) {
+ +              case IOC_LOGIN:
+ +                      retCode = daemon_login((struct novfs_login *)arg, &dh->session);
+ +                      break;
+ +
+ +              case IOC_LOGOUT:
+ +                      retCode = daemon_logout((struct novfs_logout *)arg, &dh->session);
+ +                      break;
+ +
+ +              case IOC_DEBUGPRINT:
+ +                      {
+ +                              struct Ioctl_Debug {
+ +                                      int length;
+ +                                      char *data;
+ +                              } io;
+ +                              char *buf;
+ +                              io.length = 0;
+ +                              cpylen =
+ +                                  copy_from_user(&io, (void *)arg,
+ +                                                 sizeof(io));
+ +                              if (io.length) {
+ +                                      buf =
+ +                                          kmalloc(io.length + 1,
+ +                                                       GFP_KERNEL);
+ +                                      if (buf) {
+ +                                              buf[0] = 0;
+ +                                              cpylen =
+ +                                                  copy_from_user(buf, io.data,
+ +                                                                 io.length);
+ +                                              buf[io.length] = '\0';
-                                               DbgPrint("%s", buf);
++                                              __DbgPrint("%s", buf);
+ +                                              kfree(buf);
+ +                                              retCode = 0;
+ +                                      }
+ +                              }
+ +                              break;
+ +                      }
+ +
+ +              case IOC_XPLAT:
+ +                      {
+ +                              struct novfs_xplat data;
+ +
+ +                              cpylen =
+ +                                  copy_from_user(&data, (void *)arg,
+ +                                                 sizeof(data));
+ +                              retCode =
+ +                                  ((data.
+ +                                    xfunction & 0x0000FFFF) | 0xCC000000);
+ +
+ +                              switch (data.xfunction) {
+ +                              case NWC_OPEN_CONN_BY_NAME:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwOpenConnByName\n");
++                                      DbgIocCall("NwOpenConnByName");
+ +                                      retCode =
+ +                                          novfs_open_conn_by_name(&data,
+ +                                                  &handle, dh->session);
+ +                                      if (!retCode)
+ +                                              daemon_added_resource(dh,
+ +                                                                    DH_TYPE_CONNECTION,handle, 0, 0, 0);
+ +                                      break;
+ +
+ +                              case NWC_OPEN_CONN_BY_ADDRESS:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwOpenConnByAddress\n");
++                                      DbgIocCall("NwOpenConnByAddress");
+ +                                      retCode =
+ +                                          novfs_open_conn_by_addr(&data, &handle,
+ +                                                           dh->session);
+ +                                      if (!retCode)
+ +                                              daemon_added_resource(dh,
+ +                                                                    DH_TYPE_CONNECTION,
+ +                                                                    handle, 0,
+ +                                                                    0, 0);
+ +                                      break;
+ +
+ +                              case NWC_OPEN_CONN_BY_REFERENCE:
+ +
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwOpenConnByReference\n");
++                                      DbgIocCall("NwOpenConnByReference");
+ +                                      retCode =
+ +                                          novfs_open_conn_by_ref(&data, &handle,
+ +                                                          dh->session);
+ +                                      if (!retCode)
+ +                                              daemon_added_resource(dh,
+ +                                                                    DH_TYPE_CONNECTION,
+ +                                                                    handle, 0,
+ +                                                                    0, 0);
+ +                                      break;
+ +
+ +                              case NWC_SYS_CLOSE_CONN:
-                                       DbgPrint("[VFS XPLAT] Call NwSysCloseConn\n");
++                                      DbgIocCall("NwSysCloseConn");
+ +                                      retCode =
+ +                                              novfs_sys_conn_close(&data, (unsigned long *)&handle, dh->session);
+ +                                      daemon_remove_resource(dh, DH_TYPE_CONNECTION, handle, 0);
+ +                                      break;
+ +
+ +                              case NWC_CLOSE_CONN:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwCloseConn\n");
++                                      DbgIocCall("NwCloseConn");
+ +                                      retCode =
+ +                                          novfs_conn_close(&data, &handle,
+ +                                                      dh->session);
+ +                                      daemon_remove_resource(dh,
+ +                                                             DH_TYPE_CONNECTION,
+ +                                                             handle, 0);
+ +                                      break;
+ +
+ +                              case NWC_LOGIN_IDENTITY:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwLoginIdentity\n");
++                                      DbgIocCall(""
++                                               "NwLoginIdentity");
+ +                                      retCode =
+ +                                          novfs_login_id(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_RAW_NCP_REQUEST:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Send Raw NCP Request\n");
++                                      DbgIocCall("[VFS XPLAT] Send Raw "
++                                               "NCP Request");
+ +                                      retCode = novfs_raw_send(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_AUTHENTICATE_CONN_WITH_ID:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Authenticate Conn With ID\n");
++                                      DbgIocCall("[VFS XPLAT] Authenticate "
++                                               "Conn With ID");
+ +                                      retCode =
+ +                                          novfs_auth_conn(&data,
+ +                                                           dh->session);
+ +                                      break;
+ +
+ +                              case NWC_UNAUTHENTICATE_CONN:
-                                       DbgPrint
-                                           ("[VFS XPLAT] UnAuthenticate Conn With ID\n");
++                                      DbgIocCall("[VFS XPLAT] UnAuthenticate "
++                                               "Conn With ID");
+ +                                      retCode =
+ +                                          novfs_unauthenticate(&data,
+ +                                                           dh->session);
+ +                                      break;
+ +
+ +                              case NWC_LICENSE_CONN:
-                                       DbgPrint("Call NwLicenseConn\n");
++                                      DbgIocCall("Call NwLicenseConn");
+ +                                      retCode =
+ +                                          novfs_license_conn(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_LOGOUT_IDENTITY:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwLogoutIdentity\n");
++                                      DbgIocCall("NwLogoutIdentity");
+ +                                      retCode =
+ +                                          novfs_logout_id(&data,
+ +                                                           dh->session);
+ +                                      break;
+ +
+ +                              case NWC_UNLICENSE_CONN:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwUnlicense\n");
++                                      DbgIocCall("NwUnlicense");
+ +                                      retCode =
+ +                                          novfs_unlicense_conn(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_CONN_INFO:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwGetConnInfo\n");
++                                      DbgIocCall("NwGetConnInfo");
+ +                                      retCode =
+ +                                          novfs_get_conn_info(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_SET_CONN_INFO:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwSetConnInfo\n");
++                                      DbgIocCall("NwGetConnInfo");
+ +                                      retCode =
+ +                                          novfs_set_conn_info(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_SCAN_CONN_INFO:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwScanConnInfo\n");
++                                      DbgIocCall("NwScanConnInfo");
+ +                                      retCode =
+ +                                          novfs_scan_conn_info(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_IDENTITY_INFO:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwGetIdentityInfo\n");
++                                      DbgIocCall("NwGetIdentityInfo");
+ +                                      retCode =
+ +                                          novfs_get_id_info(&data,
+ +                                                            dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_REQUESTER_VERSION:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwGetDaemonVersion\n");
++                                      DbgIocCall("NwGetDaemonVersion");
+ +                                      retCode =
+ +                                          novfs_get_daemon_ver(&data,
+ +                                                             dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_PREFERRED_DS_TREE:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcGetPreferredDsTree\n");
++                                      DbgIocCall("NwcGetPreferredDsTree");
+ +                                      retCode =
+ +                                          novfs_get_preferred_DS_tree(&data,
+ +                                                                dh->session);
+ +                                      break;
+ +
+ +                              case NWC_SET_PREFERRED_DS_TREE:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcSetPreferredDsTree\n");
++                                      DbgIocCall("NwcSetPreferredDsTree");
+ +                                      retCode =
+ +                                          novfs_set_preferred_DS_tree(&data,
+ +                                                                dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_DEFAULT_NAME_CONTEXT:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcGetDefaultNameContext\n");
++                                      DbgIocCall("NwcGetDefaultNameContext");
+ +                                      retCode =
+ +                                          novfs_get_default_ctx(&data,
+ +                                                               dh->session);
+ +                                      break;
+ +
+ +                              case NWC_SET_DEFAULT_NAME_CONTEXT:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcSetDefaultNameContext\n");
++                                      DbgIocCall("NwcSetDefaultNameContext");
+ +                                      retCode =
+ +                                          novfs_set_default_ctx(&data,
+ +                                                               dh->session);
+ +                                      break;
+ +
+ +                              case NWC_QUERY_FEATURE:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwQueryFeature\n");
++                                      DbgIocCall("NwQueryFeature");
+ +                                      retCode =
+ +                                          novfs_query_feature(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_TREE_MONITORED_CONN_REF:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcGetTreeMonitoredConn\n");
++                                      DbgIocCall("NwcGetTreeMonitoredConn");
+ +                                      retCode =
+ +                                          novfs_get_tree_monitored_conn(&data,
+ +                                                                  dh->
+ +                                                                  session);
+ +                                      break;
+ +
+ +                              case NWC_ENUMERATE_IDENTITIES:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcEnumerateIdentities\n");
++                                      DbgIocCall("NwcEnumerateIdentities");
+ +                                      retCode =
+ +                                          novfs_enum_ids(&data,
+ +                                                            dh->session);
+ +                                      break;
+ +
+ +                              case NWC_CHANGE_KEY:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcChangeAuthKey\n");
++                                      DbgIocCall("NwcChangeAuthKey");
+ +                                      retCode =
+ +                                          novfs_change_auth_key(&data,
+ +                                                           dh->session);
+ +                                      break;
+ +
+ +                              case NWC_CONVERT_LOCAL_HANDLE:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwdConvertLocalHandle\n");
++                                      DbgIocCall("NwdConvertLocalHandle");
+ +                                      retCode =
+ +                                          NwdConvertLocalHandle(&data, dh);
+ +                                      break;
+ +
+ +                              case NWC_CONVERT_NETWARE_HANDLE:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwdConvertNetwareHandle\n");
++                                      DbgIocCall("NwdConvertNetwareHandle");
+ +                                      retCode =
+ +                                          NwdConvertNetwareHandle(&data, dh);
+ +                                      break;
+ +
+ +                              case NWC_SET_PRIMARY_CONN:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcSetPrimaryConn\n");
++                                      DbgIocCall("NwcSetPrimaryConn");
+ +                                      retCode =
+ +                                          novfs_set_pri_conn(&data,
+ +                                                            dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_PRIMARY_CONN:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcGetPrimaryConn\n");
++                                      DbgIocCall("NwcGetPrimaryConn");
+ +                                      retCode =
+ +                                          novfs_get_pri_conn(&data,
+ +                                                            dh->session);
+ +                                      break;
+ +
+ +                              case NWC_MAP_DRIVE:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcMapDrive\n");
++                                      DbgIocCall("NwcMapDrive");
+ +                                      retCode =
+ +                                          set_map_drive(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_UNMAP_DRIVE:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcUnMapDrive\n");
++                                      DbgIocCall("NwcUnMapDrive");
+ +                                      retCode =
+ +                                          unmap_drive(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_ENUMERATE_DRIVES:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwcEnumerateDrives\n");
++                                      DbgIocCall("NwcEnumerateDrives");
+ +                                      retCode =
+ +                                          novfs_enum_drives(&data,
+ +                                                             dh->session);
+ +                                      break;
+ +
+ +                              case NWC_GET_MOUNT_PATH:
-                                       DbgPrint
-                                           ("[VFS XPLAT] Call NwdGetMountPath\n");
++                                      DbgIocCall("NwdGetMountPath");
+ +                                      retCode = NwdGetMountPath(&data);
+ +                                      break;
+ +
+ +                              case NWC_GET_BROADCAST_MESSAGE:
-                                       DbgPrint
-                                           ("[VSF XPLAT Call NwdGetBroadcastMessage\n");
++                                      DbgIocCall("NwdGetBroadcastMessage");
+ +                                      retCode =
+ +                                          novfs_get_bcast_msg(&data,
+ +                                                                 dh->session);
+ +                                      break;
+ +
+ +                              case NWC_SET_KEY:
-                                       DbgPrint("[VSF XPLAT Call NwdSetKey\n");
++                                      DbgIocCall("NwdSetKey");
+ +                                      retCode =
+ +                                          novfs_set_key_value(&data, dh->session);
+ +                                      break;
+ +
+ +                              case NWC_VERIFY_KEY:
-                                       DbgPrint
-                                           ("[VSF XPLAT Call NwdVerifyKey\n");
++                                      DbgIocCall("NwdVerifyKey");
+ +                                      retCode =
+ +                                          novfs_verify_key_value(&data,
+ +                                                            dh->session);
+ +                                      break;
+ +
+ +                              case NWC_RAW_NCP_REQUEST_ALL:
+ +                              case NWC_NDS_RESOLVE_NAME_TO_ID:
+ +                              case NWC_FRAGMENT_REQUEST:
+ +                              case NWC_GET_CONFIGURED_NSPS:
+ +                              default:
+ +                                      break;
+ +
+ +                              }
+ +
+ +                              DbgPrint("[NOVFS XPLAT] status Code = %X\n",
+ +                                       retCode);
+ +                              break;
+ +                      }
+ +              }
+ +      }
+ +
+ +      return (retCode);
+ +}
+ +
+ +unsigned int novfs_daemon_poll(struct file *file,
+ +                       struct poll_table_struct *poll_table)
+ +{
+ +      struct daemon_cmd *que;
+ +      unsigned int mask = POLLOUT | POLLWRNORM;
+ +
+ +      que = get_next_queue(0);
+ +      if (que)
+ +              mask |= (POLLIN | POLLRDNORM);
+ +      return mask;
+ +}
+ +
+ +static int NwdConvertNetwareHandle(struct novfs_xplat *pdata, struct daemon_handle * DHandle)
+ +{
+ +      int retVal;
+ +      struct nwc_convert_netware_handle nh;
+ +      unsigned long cpylen;
+ +
-       DbgPrint("NwdConvertNetwareHandle: DHandle=0x%p\n", DHandle);
++      DbgPrint("DHandle=0x%p", DHandle);
+ +
+ +      cpylen =
+ +          copy_from_user(&nh, pdata->reqData,
+ +                         sizeof(struct nwc_convert_netware_handle));
+ +
+ +      retVal =
+ +          daemon_added_resource(DHandle, DH_TYPE_STREAM,
+ +                                Uint32toHandle(nh.ConnHandle),
+ +                                nh.NetWareHandle, nh.uAccessMode,
+ +                                nh.uFileSize);
+ +
+ +      return (retVal);
+ +}
+ +
+ +static int NwdConvertLocalHandle(struct novfs_xplat *pdata, struct daemon_handle * DHandle)
+ +{
+ +      int retVal = NWE_REQUESTER_FAILURE;
+ +      struct daemon_resource *resource;
+ +      struct nwc_convert_local_handle lh;
+ +      struct list_head *l;
+ +      unsigned long cpylen;
+ +
-       DbgPrint("NwdConvertLocalHandle: DHandle=0x%p\n", DHandle);
++      DbgPrint("DHandle=0x%p", DHandle);
+ +
+ +      read_lock(&DHandle->lock);
+ +
+ +      list_for_each(l, &DHandle->list) {
+ +              resource = list_entry(l, struct daemon_resource, list);
+ +
+ +              if (DH_TYPE_STREAM == resource->type) {
+ +                      lh.uConnReference =
+ +                          HandletoUint32(resource->connection);
+ +
+ +//sgled         memcpy(lh.NwWareHandle, resource->handle, sizeof(resource->handle));
+ +                      memcpy(lh.NetWareHandle, resource->handle, sizeof(resource->handle));   //sgled
+ +                      if (pdata->repLen >= sizeof(struct nwc_convert_local_handle)) {
+ +                              cpylen =
+ +                                  copy_to_user(pdata->repData, &lh,
+ +                                               sizeof(struct nwc_convert_local_handle));
+ +                              retVal = 0;
+ +                      } else {
+ +                              retVal = NWE_BUFFER_OVERFLOW;
+ +                      }
+ +                      break;
+ +              }
+ +      }
+ +
+ +      read_unlock(&DHandle->lock);
+ +
+ +      return (retVal);
+ +}
+ +
+ +static int NwdGetMountPath(struct novfs_xplat *pdata)
+ +{
+ +      int retVal = NWE_REQUESTER_FAILURE;
+ +      int len;
+ +      unsigned long cpylen;
+ +      struct nwc_get_mount_path mp;
+ +
+ +      cpylen = copy_from_user(&mp, pdata->reqData, pdata->reqLen);
+ +
+ +      if (novfs_current_mnt) {
+ +
+ +              len = strlen(novfs_current_mnt) + 1;
+ +              if ((len > mp.MountPathLen) && mp.pMountPath) {
+ +                      retVal = NWE_BUFFER_OVERFLOW;
+ +              } else {
+ +                      if (mp.pMountPath) {
+ +                              cpylen =
+ +                                  copy_to_user(mp.pMountPath,
+ +                                               novfs_current_mnt, len);
+ +                      }
+ +                      retVal = 0;
+ +              }
+ +
+ +              mp.MountPathLen = len;
+ +
+ +              if (pdata->repData && (pdata->repLen >= sizeof(mp))) {
+ +                      cpylen = copy_to_user(pdata->repData, &mp, sizeof(mp));
+ +              }
+ +      }
+ +
+ +      return (retVal);
+ +}
+ +
+ +static int set_map_drive(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      int retVal;
+ +      unsigned long cpylen;
+ +      struct nwc_map_drive_ex symInfo;
+ +      char *path;
+ +      struct drive_map *drivemap, *dm;
+ +      struct list_head *list;
+ +
+ +      retVal = novfs_set_map_drive(pdata, Session);
+ +      if (retVal)
+ +              return retVal;
+ +      if (copy_from_user(&symInfo, pdata->reqData, sizeof(symInfo)))
+ +              return -EFAULT;
+ +      drivemap =
+ +              kmalloc(sizeof(struct drive_map) + symInfo.linkOffsetLength,
+ +                              GFP_KERNEL);
+ +      if (!drivemap)
+ +              return -ENOMEM;
+ +
+ +      path = (char *)pdata->reqData;
+ +      path += symInfo.linkOffset;
+ +      cpylen =
+ +              copy_from_user(drivemap->name, path,
+ +                              symInfo.linkOffsetLength);
+ +
+ +      drivemap->session = Session;
+ +      drivemap->hash =
+ +              full_name_hash(drivemap->name,
+ +                              symInfo.linkOffsetLength - 1);
+ +      drivemap->namelen = symInfo.linkOffsetLength - 1;
-       DbgPrint("set_map_drive: hash=0x%lx path=%s\n",
-                       drivemap->hash, drivemap->name);
++      DbgPrint("hash=0x%lx path=%s", drivemap->hash, drivemap->name);
+ +
+ +      dm = (struct drive_map *) & DriveMapList.next;
+ +
+ +      down(&DriveMapLock);
+ +
+ +      list_for_each(list, &DriveMapList) {
+ +              dm = list_entry(list, struct drive_map, list);
-               DbgPrint("set_map_drive: dm=0x%p\n"
++              __DbgPrint("%s: dm=0x%p\n"
+ +                              "   hash:    0x%lx\n"
+ +                              "   namelen: %d\n"
-                               "   name:    %s\n",
++                              "   name:    %s\n", __func__,
+ +                              dm, dm->hash, dm->namelen, dm->name);
+ +
+ +              if (drivemap->hash == dm->hash) {
+ +                      if (0 ==
+ +                                      strcmp(dm->name, drivemap->name)) {
+ +                              dm = NULL;
+ +                              break;
+ +                      }
+ +              } else if (drivemap->hash < dm->hash) {
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (dm) {
+ +              if ((dm == (struct drive_map *) & DriveMapList) ||
+ +                              (dm->hash < drivemap->hash)) {
+ +                      list_add(&drivemap->list, &dm->list);
+ +              } else {
+ +                      list_add_tail(&drivemap->list,
+ +                                      &dm->list);
+ +              }
+ +      }
+ +      else
+ +              kfree(drivemap);
+ +      up(&DriveMapLock);
+ +      return (retVal);
+ +}
+ +
+ +static int unmap_drive(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      int retVal = NWE_REQUESTER_FAILURE;
+ +      struct nwc_unmap_drive_ex symInfo;
+ +      char *path;
+ +      struct drive_map *dm;
+ +      struct list_head *list;
+ +      unsigned long hash;
+ +
+ +
+ +      retVal = novfs_unmap_drive(pdata, Session);
+ +      if (retVal)
+ +              return retVal;
+ +      if (copy_from_user(&symInfo, pdata->reqData, sizeof(symInfo)))
+ +              return -EFAULT;
+ +
+ +      path = kmalloc(symInfo.linkLen, GFP_KERNEL);
+ +      if (!path)
+ +              return -ENOMEM;
+ +      if (copy_from_user(path,((struct nwc_unmap_drive_ex *) pdata->reqData)->linkData, symInfo.linkLen)) {
+ +              kfree(path);
+ +              return -EFAULT;
+ +      }
+ +
+ +      hash = full_name_hash(path, symInfo.linkLen - 1);
-       DbgPrint("NwdUnMapDrive: hash=0x%x path=%s\n", hash,
-                       path);
++      DbgPrint("hash=0x%x path=%s", hash, path);
+ +
+ +      dm = NULL;
+ +
+ +      down(&DriveMapLock);
+ +
+ +      list_for_each(list, &DriveMapList) {
+ +              dm = list_entry(list, struct drive_map, list);
-               DbgPrint("NwdUnMapDrive: dm=0x%p %s\n"
++              __DbgPrint("%s: dm=0x%p %s\n"
+ +                              "   hash:    0x%x\n"
-                               "   namelen: %d\n",
++                              "   namelen: %d\n", __func__,
+ +                              dm, dm->name, dm->hash, dm->namelen);
+ +
+ +              if (hash == dm->hash) {
+ +                      if (0 == strcmp(dm->name, path)) {
+ +                              break;
+ +                      }
+ +              } else if (hash < dm->hash) {
+ +                      dm = NULL;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (dm) {
-               DbgPrint("NwdUnMapDrive: Remove dm=0x%p %s\n"
++              __DbgPrint("%s: Remove dm=0x%p %s\n"
+ +                              "   hash:    0x%x\n"
-                               "   namelen: %d\n",
++                              "   namelen: %d\n", __func__,
+ +                              dm, dm->name, dm->hash, dm->namelen);
+ +              list_del(&dm->list);
+ +              kfree(dm);
+ +      }
+ +
+ +      up(&DriveMapLock);
+ +      return (retVal);
+ +}
+ +
+ +static void RemoveDriveMaps(void)
+ +{
+ +      struct drive_map *dm;
+ +      struct list_head *list;
+ +
+ +      down(&DriveMapLock);
+ +      list_for_each(list, &DriveMapList) {
+ +              dm = list_entry(list, struct drive_map, list);
+ +
-               DbgPrint("RemoveDriveMap: dm=0x%p\n"
++              __DbgPrint("%s: dm=0x%p\n"
+ +                       "   hash:    0x%x\n"
+ +                       "   namelen: %d\n"
-                        "   name:    %s\n",
++                       "   name:    %s\n", __func__,
+ +                       dm, dm->hash, dm->namelen, dm->name);
+ +              local_unlink(dm->name);
+ +              list = list->prev;
+ +              list_del(&dm->list);
+ +              kfree(dm);
+ +      }
+ +      up(&DriveMapLock);
+ +}
+ +
+ +/* As picked from do_unlinkat() */
+ +
+ +static long local_unlink(const char *pathname)
+ +{
+ +      int error;
+ +      struct dentry *dentry;
+ +      char *name, *c;
+ +      struct nameidata nd;
+ +      struct inode *inode = NULL;
+ +
+ +      error = path_lookup(pathname, LOOKUP_PARENT, &nd);
-       DbgPrint("local_unlink: path_lookup %s error: %d\n", pathname, error);
++      DbgPrint("path_lookup %s error: %d\n", pathname, error);
+ +      if (error)
+ +              return error;
+ +
+ +      error = -EISDIR;
+ +      if (nd.last_type != LAST_NORM)
+ +              goto exit1;
+ +      mutex_lock(&nd.path.dentry->d_inode->i_mutex);
+ +      /* Get the filename of pathname */
+ +      name=c=(char *)pathname;
+ +      while (*c!='\0') {
+ +              if (*c=='/')
+ +                      name=++c;
+ +              c++;
+ +      }
+ +      dentry = lookup_one_len(name, nd.path.dentry, strlen(name));
+ +      error = PTR_ERR(dentry);
-       DbgPrint("local_unlink: dentry %p\n", dentry);
++      DbgPrint("dentry %p", dentry);
+ +      if (!(dentry->d_inode->i_mode & S_IFLNK)) {
-               DbgPrint("local_unlink: %s not a link", name);
++              DbgPrint("%s not a link", name);
+ +              error=-ENOENT;
+ +              goto exit1;
+ +      }
+ +
+ +      if (!IS_ERR(dentry)) {
+ +              /* Why not before? Because we want correct error value */
+ +              if (nd.last.name[nd.last.len])
+ +                      goto slashes;
+ +              inode = dentry->d_inode;
+ +              if (inode)
+ +                      atomic_inc(&inode->i_count);
+ +              error = mnt_want_write(nd.path.mnt);
-               DbgPrint("local_unlink: inode %p mnt_want_write error %d\n", inode, error);
++              DbgPrint("inode %p mnt_want_write error %d", inode, error);
+ +              if (error)
+ +                      goto exit2;
-               error = vfs_unlink(nd.path.dentry->d_inode, dentry, nd.path.mnt);
++              error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+ +              mnt_drop_write(nd.path.mnt);
+ +      exit2:
+ +              dput(dentry);
+ +      }
+ +      mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ +      if (inode)
+ +              iput(inode);    /* truncate the inode here */
+ +exit1:
+ +      path_put(&nd.path);
-       DbgPrint("local_unlink: returning error %d\n", error);
++      DbgPrint("returning error %d", error);
+ +      return error;
+ +
+ +slashes:
+ +      error = !dentry->d_inode ? -ENOENT :
+ +              S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+ +      goto exit2;
+ +}
+ +
diff --cc fs/novfs/file.c

index 0fa1f53,0000000..b7033ff

mode 100644,000000..100644
--- 1/fs/novfs/file.c
--- /dev/null
+++ b/fs/novfs/file.c
@@@ -1,1981 -1,0 +1,1921 @@@
+ +/*
+ + * Novell NCP Redirector for Linux
+ + * Author: James Turner
+ + *
+ + * This file contains functions for accessing files through the daemon.
+ + *
+ + * Copyright (C) 2005 Novell, Inc.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + */
+ +
+ +#include <linux/module.h>
+ +#include <linux/kthread.h>
+ +#include <linux/fs.h>
+ +#include <linux/file.h>
+ +#include <linux/sched.h>
+ +#include <linux/dcache.h>
+ +#include <linux/pagemap.h>
+ +#include <linux/stat.h>
+ +#include <linux/slab.h>
+ +#include <asm/uaccess.h>
+ +
+ +#include "vfs.h"
+ +#include "commands.h"
+ +#include "nwerror.h"
+ +
+ +static ssize_t novfs_tree_read(struct file * file, char *buf, size_t len, loff_t * off);
+ +extern struct dentry_operations novfs_dentry_operations;
+ +
+ +static struct file_operations novfs_tree_operations = {
+ +      read:novfs_tree_read,
+ +};
+ +
+ +/*
+ + * StripTrailingDots was added because some apps will
+ + * try and create a file name with a trailing dot.  NetWare
+ + * doesn't like this and will return an error.
+ + */
+ +static int StripTrailingDots = 1;
+ +
+ +int novfs_get_alltrees(struct dentry *parent)
+ +{
+ +      unsigned char *p;
+ +      struct novfs_command_reply_header * reply = NULL;
+ +      unsigned long replylen = 0;
+ +      struct novfs_command_request_header cmd;
+ +      int retCode;
+ +      struct dentry *entry;
+ +      struct qstr name;
+ +      struct inode *inode;
+ +
+ +      cmd.CommandType = 0;
+ +      cmd.SequenceNumber = 0;
+ +//sg ???   cmd.SessionId = 0x1234;
+ +      SC_INITIALIZE(cmd.SessionId);
+ +
-       DbgPrint("novfs_get_alltrees:\n");
++      DbgPrint("");
+ +
+ +      retCode = Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply, &replylen, INTERRUPTIBLE);
-       DbgPrint("novfs_get_alltrees: relpy=0x%p replylen=%d\n", reply,
-                replylen);
++      DbgPrint("reply=0x%p replylen=%d", reply, replylen);
+ +      if (reply) {
+ +              novfs_dump(replylen, reply);
+ +              if (!reply->ErrorCode
+ +                  && (replylen > sizeof(struct novfs_command_reply_header))) {
+ +                      p = (char *)reply + 8;
+ +                      while (*p) {
-                               DbgPrint("novfs_get_alltrees: %s\n", p);
++                              DbgPrint("%s", p);
+ +                              name.len = strlen(p);
+ +                              name.name = p;
+ +                              name.hash = full_name_hash(name.name, name.len);
+ +                              entry = d_lookup(parent, &name);
+ +                              if (NULL == entry) {
-                                       DbgPrint("novfs_get_alltrees: adding %s\n", p);
++                                      DbgPrint("adding %s", p);
+ +                                      entry = d_alloc(parent, &name);
+ +                                      if (entry) {
+ +                                              entry->d_op = &novfs_dentry_operations;
+ +                                              inode = novfs_get_inode(parent->d_sb, S_IFREG | 0400, 0, 0, 0, &name);
+ +                                              if (inode) {
+ +                                                      inode->i_fop = &novfs_tree_operations;
+ +                                                      d_add(entry, inode);
+ +                                              }
+ +                                      }
+ +                              }
+ +                              p += (name.len + 1);
+ +                      }
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +static ssize_t novfs_tree_read(struct file * file, char *buf, size_t len, loff_t * off)
+ +{
+ +      if (file->f_pos != 0) {
+ +              return (0);
+ +      }
+ +      if (copy_to_user(buf, "Tree\n", 5)) {
+ +              return (0);
+ +      }
+ +      return (5);
+ +}
+ +
+ +int novfs_get_servers(unsigned char ** ServerList, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_get_connected_server_list req;
+ +      struct novfs_get_connected_server_list_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +
+ +      *ServerList = NULL;
+ +
+ +      req.Command.CommandType = VFS_COMMAND_GET_CONNECTED_SERVER_LIST;
+ +      req.Command.SessionId = SessionId;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&req, sizeof(req), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (reply) {
-               DbgPrint("novfs_Get_Connected_Server_List: reply\n");
++              DbgPrint("reply");
+ +              replylen -= sizeof(struct novfs_command_reply_header);
+ +              if (!reply->Reply.ErrorCode && replylen) {
+ +                      memcpy(reply, reply->List, replylen);
+ +                      *ServerList = (unsigned char *) reply;
+ +                      retCode = 0;
+ +              } else {
+ +                      kfree(reply);
+ +                      retCode = -ENOENT;
+ +              }
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_get_vols(struct qstr *Server, unsigned char ** VolumeList,
+ +                               struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_get_server_volume_list *req;
+ +      struct novfs_get_server_volume_list_reply *reply = NULL;
+ +      unsigned long replylen = 0, reqlen;
+ +      int retCode;
+ +
+ +      *VolumeList = NULL;
+ +      reqlen = sizeof(struct novfs_get_server_volume_list) + Server->len;
+ +      req = kmalloc(reqlen, GFP_KERNEL);
+ +      if (!req)
+ +              return -ENOMEM;
+ +      req->Command.CommandType = VFS_COMMAND_GET_SERVER_VOLUME_LIST;
+ +      req->Length = Server->len;
+ +      memcpy(req->Name, Server->name, Server->len);
+ +      req->Command.SessionId = SessionId;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command(req, reqlen, NULL, 0, (void *)&reply,
+ +                              &replylen, INTERRUPTIBLE);
+ +      if (reply) {
-               DbgPrint("novfs_Get_Server_Volume_List: reply\n");
++              DbgPrint("reply");
+ +              novfs_dump(replylen, reply);
+ +              replylen -= sizeof(struct novfs_command_reply_header);
+ +
+ +              if (!reply->Reply.ErrorCode && replylen) {
+ +                      memcpy(reply, reply->List, replylen);
+ +                      *VolumeList = (unsigned char *) reply;
+ +                      retCode = 0;
+ +              } else {
+ +                      kfree(reply);
+ +                      retCode = -ENOENT;
+ +              }
+ +      }
+ +      kfree(req);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_get_file_info(unsigned char * Path, struct novfs_entry_info * Info, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_verify_file_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      struct novfs_verify_file_request * cmd;
+ +      int cmdlen;
+ +      int retCode = -ENOENT;
+ +      int pathlen;
+ +
-       DbgPrint("novfs_Get_File_Info: Path = %s\n", Path);
++      DbgPrint("Path = %s", Path);
+ +
+ +      Info->mode = S_IFDIR | 0700;
-       Info->uid = current->uid;
-       Info->gid = current->gid;
++      Info->uid = current_uid();
++      Info->gid = current_gid();
+ +      Info->size = 0;
+ +      Info->atime = Info->mtime = Info->ctime = CURRENT_TIME;
+ +
+ +      if (Path && *Path) {
+ +              pathlen = strlen(Path);
+ +              if (StripTrailingDots) {
+ +                      if ('.' == Path[pathlen - 1])
+ +                              pathlen--;
+ +              }
+ +              cmdlen = offsetof(struct novfs_verify_file_request,path) + pathlen;
+ +              cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +              if (cmd) {
+ +                      cmd->Command.CommandType = VFS_COMMAND_VERIFY_FILE;
+ +                      cmd->Command.SequenceNumber = 0;
+ +                      cmd->Command.SessionId = SessionId;
+ +                      cmd->pathLen = pathlen;
+ +                      memcpy(cmd->path, Path, cmd->pathLen);
+ +
+ +                      retCode =
+ +                          Queue_Daemon_Command(cmd, cmdlen, NULL, 0,
+ +                                               (void *)&reply, &replylen,
+ +                                               INTERRUPTIBLE);
+ +
+ +                      if (reply) {
+ +
+ +                              if (reply->Reply.ErrorCode) {
+ +                                      retCode = -ENOENT;
+ +                              } else {
+ +                                      Info->type = 3;
+ +                                      Info->mode = S_IRWXU;
+ +
+ +                                      if (reply->
+ +                                          fileMode & NW_ATTRIBUTE_DIRECTORY) {
+ +                                              Info->mode |= S_IFDIR;
+ +                                      } else {
+ +                                              Info->mode |= S_IFREG;
+ +                                      }
+ +
+ +                                      if (reply->
+ +                                          fileMode & NW_ATTRIBUTE_READ_ONLY) {
+ +                                              Info->mode &= ~(S_IWUSR);
+ +                                      }
+ +
-                                       Info->uid = current->euid;
-                                       Info->gid = current->egid;
++                                      Info->uid = current_euid();
++                                      Info->gid = current_egid();
+ +                                      Info->size = reply->fileSize;
+ +                                      Info->atime.tv_sec =
+ +                                          reply->lastAccessTime;
+ +                                      Info->atime.tv_nsec = 0;
+ +                                      Info->mtime.tv_sec = reply->modifyTime;
+ +                                      Info->mtime.tv_nsec = 0;
+ +                                      Info->ctime.tv_sec = reply->createTime;
+ +                                      Info->ctime.tv_nsec = 0;
-                                       DbgPrint
-                                           ("novfs_Get_File_Info: replylen=%d sizeof(VERIFY_FILE_REPLY)=%d\n",
++                                      DbgPrint("replylen=%d sizeof(VERIFY_FILE_REPLY)=%d",
+ +                                           replylen,
+ +                                           sizeof(struct novfs_verify_file_reply));
+ +                                      if (replylen >
+ +                                          sizeof(struct novfs_verify_file_reply)) {
+ +                                              unsigned int *lp =
+ +                                                  &reply->fileMode;
+ +                                              lp++;
-                                               DbgPrint
-                                                   ("novfs_Get_File_Info: extra data 0x%x\n",
-                                                    *lp);
++                                              DbgPrint("extra data 0x%x",
++                                                       *lp);
+ +                                              Info->mtime.tv_nsec = *lp;
+ +                                      }
+ +                                      retCode = 0;
+ +                              }
+ +
+ +                              kfree(reply);
+ +                      }
+ +                      kfree(cmd);
+ +              }
+ +      }
+ +
-       DbgPrint("novfs_Get_File_Info: return 0x%x\n", retCode);
++      DbgPrint("return 0x%x", retCode);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_getx_file_info(char *Path, const char *Name, char *buffer,
+ +                       ssize_t buffer_size, ssize_t * dataLen,
+ +                       struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_xa_get_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      struct novfs_xa_get_request *cmd;
+ +      int cmdlen;
+ +      int retCode = -ENOENT;
+ +
+ +      int namelen = strlen(Name);
+ +      int pathlen = strlen(Path);
+ +
-       DbgPrint
-           ("novfs_GetX_File_Info xattr: Path = %s, pathlen = %i, Name = %s, namelen = %i\n",
-            Path, pathlen, Name, namelen);
++      DbgPrint("xattr: Path = %s, pathlen = %i, Name = %s, namelen = %i",
++               Path, pathlen, Name, namelen);
+ +
+ +      if (namelen > MAX_XATTR_NAME_LEN) {
+ +              return ENOATTR;
+ +      }
+ +
+ +      cmdlen = offsetof(struct novfs_xa_get_request, data) + pathlen + 1 + namelen + 1;       // two '\0'
+ +      cmd = (struct novfs_xa_get_request *) kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_GET_EXTENDED_ATTRIBUTE;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->pathLen = pathlen;
+ +              memcpy(cmd->data, Path, cmd->pathLen + 1);      //+ '\0'
+ +
+ +              cmd->nameLen = namelen;
+ +              memcpy(cmd->data + cmd->pathLen + 1, Name, cmd->nameLen + 1);
+ +
-               DbgPrint("novfs_GetX_File_Info xattr: PXA_GET_REQUEST BEGIN\n");
-               DbgPrint
-                   ("novfs_GetX_File_Info xattr: Queue_Daemon_Command %d\n",
-                    cmd->Command.CommandType);
-               DbgPrint("novfs_GetX_File_Info xattr: Command.SessionId = %d\n",
++              DbgPrint("xattr: PXA_GET_REQUEST BEGIN");
++              DbgPrint("xattr: Queue_Daemon_Command %d",
++                       cmd->Command.CommandType);
++              DbgPrint("xattr: Command.SessionId = %d",
+ +                       cmd->Command.SessionId);
-               DbgPrint("novfs_GetX_File_Info xattr: pathLen = %d\n",
-                        cmd->pathLen);
-               DbgPrint("novfs_GetX_File_Info xattr: Path = %s\n", cmd->data);
-               DbgPrint("novfs_GetX_File_Info xattr: nameLen = %d\n",
-                        cmd->nameLen);
-               DbgPrint("novfs_GetX_File_Info xattr: name = %s\n",
-                        (cmd->data + cmd->pathLen + 1));
-               DbgPrint("novfs_GetX_File_Info xattr: PXA_GET_REQUEST END\n");
++              DbgPrint("xattr: pathLen = %d", cmd->pathLen);
++              DbgPrint("xattr: Path = %s", cmd->data);
++              DbgPrint("xattr: nameLen = %d", cmd->nameLen);
++              DbgPrint("xattr: name = %s", (cmd->data + cmd->pathLen + 1));
++              DbgPrint("xattr: PXA_GET_REQUEST END");
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +
+ +              if (reply) {
+ +
+ +                      if (reply->Reply.ErrorCode) {
-                               DbgPrint
-                                   ("novfs_GetX_File_Info xattr: reply->Reply.ErrorCode=%d, %X\n",
-                                    reply->Reply.ErrorCode,
-                                    reply->Reply.ErrorCode);
-                               DbgPrint
-                                   ("novfs_GetX_File_Info xattr: replylen=%d\n",
-                                    replylen);
++                              DbgPrint("xattr: reply->Reply.ErrorCode=%d, %X",
++                                       reply->Reply.ErrorCode,
++                                       reply->Reply.ErrorCode);
++                              DbgPrint("xattr: replylen=%d", replylen);
+ +
+ +                              //0xC9 = EA not found (C9), 0xD1 = EA access denied
+ +                              if ((reply->Reply.ErrorCode == 0xC9)
+ +                                  || (reply->Reply.ErrorCode == 0xD1)) {
+ +                                      retCode = -ENOATTR;
+ +                              } else {
+ +                                      retCode = -ENOENT;
+ +                              }
+ +                      } else {
+ +
+ +                              *dataLen =
+ +                                  replylen - sizeof(struct novfs_command_reply_header);
-                               DbgPrint
-                                   ("novfs_GetX_File_Info xattr: replylen=%u, dataLen=%u\n",
-                                    replylen, *dataLen);
++                              DbgPrint("xattr: replylen=%u, dataLen=%u",
++                                       replylen, *dataLen);
+ +
+ +                              if (buffer_size >= *dataLen) {
-                                       DbgPrint
-                                           ("novfs_GetX_File_Info xattr: copying to buffer from &reply->pData\n");
++                                      DbgPrint("xattr: copying to buffer from &reply->pData");
+ +                                      memcpy(buffer, &reply->pData, *dataLen);
+ +
+ +                                      retCode = 0;
+ +                              } else {
-                                       DbgPrint
-                                           ("novfs_GetX_File_Info xattr: (!!!) buffer is smaller then reply\n");
++                                      DbgPrint("xattr: (!!!) buffer is smaller then reply");
+ +                                      retCode = -ERANGE;
+ +                              }
-                               DbgPrint
-                                   ("novfs_GetX_File_Info xattr: /dumping buffer\n");
++                              DbgPrint("xattr: /dumping buffer");
+ +                              novfs_dump(*dataLen, buffer);
-                               DbgPrint
-                                   ("novfs_GetX_File_Info xattr: \\after dumping buffer\n");
++                              DbgPrint("xattr: \\after dumping buffer");
+ +                      }
+ +
+ +                      kfree(reply);
+ +              } else {
-                       DbgPrint("novfs_GetX_File_Info xattr: reply = NULL\n");
++                      DbgPrint("xattr: reply = NULL");
+ +              }
+ +              kfree(cmd);
+ +
+ +      }
+ +
+ +      return retCode;
+ +}
+ +
+ +int novfs_setx_file_info(char *Path, const char *Name, const void *Value,
+ +                       unsigned long valueLen, unsigned long *bytesWritten,
+ +                       int flags, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_xa_set_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      struct novfs_xa_set_request *cmd;
+ +      int cmdlen;
+ +      int retCode = -ENOENT;
+ +
+ +      int namelen = strlen(Name);
+ +      int pathlen = strlen(Path);
+ +
-       DbgPrint
-           ("novfs_SetX_File_Info xattr: Path = %s, pathlen = %i, Name = %s, namelen = %i, value len = %u\n",
-            Path, pathlen, Name, namelen, valueLen);
++      DbgPrint("xattr: Path = %s, pathlen = %i, Name = %s, namelen = %i, "
++               "value len = %u", Path, pathlen, Name, namelen, valueLen);
+ +
+ +      if (namelen > MAX_XATTR_NAME_LEN) {
+ +              return ENOATTR;
+ +      }
+ +
+ +      cmdlen = offsetof(struct novfs_xa_set_request, data) + pathlen + 1 + namelen + 1 + valueLen;
+ +      cmd = (struct novfs_xa_set_request *) kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_SET_EXTENDED_ATTRIBUTE;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->flags = flags;
+ +              cmd->pathLen = pathlen;
+ +              memcpy(cmd->data, Path, cmd->pathLen);
+ +
+ +              cmd->nameLen = namelen;
+ +              memcpy(cmd->data + cmd->pathLen + 1, Name, cmd->nameLen + 1);
+ +
+ +              cmd->valueLen = valueLen;
+ +              memcpy(cmd->data + cmd->pathLen + 1 + cmd->nameLen + 1, Value,
+ +                     valueLen);
+ +
-               DbgPrint("novfs_SetX_File_Info xattr: PXA_SET_REQUEST BEGIN\n");
-               DbgPrint
-                   ("novfs_SetX_File_Info xattr: Queue_Daemon_Command %d\n",
-                    cmd->Command.CommandType);
-               DbgPrint("novfs_SetX_File_Info xattr: Command.SessionId = %d\n",
++              DbgPrint("xattr: PXA_SET_REQUEST BEGIN");
++              DbgPrint("attr: Queue_Daemon_Command %d",
++                       cmd->Command.CommandType);
++              DbgPrint("xattr: Command.SessionId = %d",
+ +                       cmd->Command.SessionId);
-               DbgPrint("novfs_SetX_File_Info xattr: pathLen = %d\n",
-                        cmd->pathLen);
-               DbgPrint("novfs_SetX_File_Info xattr: Path = %s\n", cmd->data);
-               DbgPrint("novfs_SetX_File_Info xattr: nameLen = %d\n",
-                        cmd->nameLen);
-               DbgPrint("novfs_SetX_File_Info xattr: name = %s\n",
-                        (cmd->data + cmd->pathLen + 1));
++              DbgPrint("xattr: pathLen = %d", cmd->pathLen);
++              DbgPrint("xattr: Path = %s", cmd->data);
++              DbgPrint("xattr: nameLen = %d", cmd->nameLen);
++              DbgPrint("xattr: name = %s", (cmd->data + cmd->pathLen + 1));
+ +              novfs_dump(valueLen < 16 ? valueLen : 16, (char *)Value);
+ +
-               DbgPrint("novfs_SetX_File_Info xattr: PXA_SET_REQUEST END\n");
++              DbgPrint("xattr: PXA_SET_REQUEST END");
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +
+ +              if (reply) {
+ +
+ +                      if (reply->Reply.ErrorCode) {
-                               DbgPrint
-                                   ("novfs_SetX_File_Info xattr: reply->Reply.ErrorCode=%d, %X\n",
-                                    reply->Reply.ErrorCode,
-                                    reply->Reply.ErrorCode);
-                               DbgPrint
-                                   ("novfs_SetX_File_Info xattr: replylen=%d\n",
-                                    replylen);
++                              DbgPrint("xattr: reply->Reply.ErrorCode=%d, %X",
++                                       reply->Reply.ErrorCode,
++                                       reply->Reply.ErrorCode);
++                              DbgPrint("xattr: replylen=%d", replylen);
+ +
+ +                              retCode = -reply->Reply.ErrorCode;      //-ENOENT;
+ +                      } else {
+ +
-                               DbgPrint
-                                   ("novfs_SetX_File_Info xattr: replylen=%u, real len = %u\n",
-                                    replylen,
-                                    replylen - sizeof(struct novfs_command_reply_header));
++                              DbgPrint("xattr: replylen=%u, real len = %u",
++                                       replylen,
++                                       replylen - sizeof(struct novfs_command_reply_header));
+ +                              memcpy(bytesWritten, &reply->pData,
+ +                                     replylen - sizeof(struct novfs_command_reply_header));
+ +
+ +                              retCode = 0;
+ +                      }
+ +
+ +                      kfree(reply);
+ +              } else {
-                       DbgPrint("novfs_SetX_File_Info xattr: reply = NULL\n");
++                      DbgPrint("xattr: reply = NULL");
+ +              }
+ +              kfree(cmd);
+ +
+ +      }
+ +
+ +      return retCode;
+ +}
+ +
+ +int novfs_listx_file_info(char *Path, char *buffer, ssize_t buffer_size,
+ +                        ssize_t * dataLen, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_xa_list_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      struct novfs_verify_file_request *cmd;
+ +      int cmdlen;
+ +      int retCode = -ENOENT;
+ +
+ +      int pathlen = strlen(Path);
-       DbgPrint("novfs_ListX_File_Info xattr: Path = %s, pathlen = %i\n", Path,
-                pathlen);
++      DbgPrint("xattr: Path = %s, pathlen = %i", Path, pathlen);
+ +
+ +      *dataLen = 0;
+ +      cmdlen = offsetof(struct novfs_verify_file_request, path) + pathlen;
+ +      cmd = (struct novfs_verify_file_request *) kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_LIST_EXTENDED_ATTRIBUTES;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +              cmd->pathLen = pathlen;
+ +              memcpy(cmd->path, Path, cmd->pathLen + 1);      //+ '\0'
-               DbgPrint
-                   ("novfs_ListX_File_Info xattr: PVERIFY_FILE_REQUEST BEGIN\n");
-               DbgPrint
-                   ("novfs_ListX_File_Info xattr: Queue_Daemon_Command %d\n",
++              DbgPrint("xattr: PVERIFY_FILE_REQUEST BEGIN");
++              DbgPrint("xattr: Queue_Daemon_Command %d",
+ +                   cmd->Command.CommandType);
-               DbgPrint
-                   ("novfs_ListX_File_Info xattr: Command.SessionId = %d\n",
++              DbgPrint("xattr: Command.SessionId = %d",
+ +                   cmd->Command.SessionId);
-               DbgPrint("novfs_ListX_File_Info xattr: pathLen = %d\n",
-                        cmd->pathLen);
-               DbgPrint("novfs_ListX_File_Info xattr: Path = %s\n", cmd->path);
-               DbgPrint
-                   ("novfs_ListX_File_Info xattr: PVERIFY_FILE_REQUEST END\n");
++              DbgPrint("xattr: pathLen = %d", cmd->pathLen);
++              DbgPrint("xattr: Path = %s", cmd->path);
++              DbgPrint("xattr: PVERIFY_FILE_REQUEST END");
+ +
-               retCode =
-                   Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
-                                        &replylen, INTERRUPTIBLE);
++              retCode = Queue_Daemon_Command(cmd, cmdlen, NULL, 0,
++                                             (void *)&reply, &replylen,
++                                             INTERRUPTIBLE);
+ +
+ +              if (reply) {
+ +
+ +                      if (reply->Reply.ErrorCode) {
-                               DbgPrint
-                                   ("novfs_ListX_File_Info xattr: reply->Reply.ErrorCode=%d, %X\n",
-                                    reply->Reply.ErrorCode,
-                                    reply->Reply.ErrorCode);
-                               DbgPrint
-                                   ("novfs_ListX_File_Info xattr: replylen=%d\n",
-                                    replylen);
++                              DbgPrint("xattr: reply->Reply.ErrorCode=%d, %X",
++                                       reply->Reply.ErrorCode,
++                                       reply->Reply.ErrorCode);
++                              DbgPrint("xattr: replylen=%d", replylen);
+ +
+ +                              retCode = -ENOENT;
+ +                      } else {
+ +                              *dataLen =
+ +                                  replylen - sizeof(struct novfs_command_reply_header);
-                               DbgPrint
-                                   ("novfs_ListX_File_Info xattr: replylen=%u, dataLen=%u\n",
-                                    replylen, *dataLen);
++                              DbgPrint("xattr: replylen=%u, dataLen=%u",
++                                       replylen, *dataLen);
+ +
+ +                              if (buffer_size >= *dataLen) {
-                                       DbgPrint
-                                           ("novfs_ListX_File_Info xattr: copying to buffer from &reply->pData\n");
++                                      DbgPrint("xattr: copying to buffer "
++                                               "from &reply->pData");
+ +                                      memcpy(buffer, &reply->pData, *dataLen);
+ +                              } else {
-                                       DbgPrint
-                                           ("novfs_ListX_File_Info xattr: (!!!) buffer is smaller then reply\n");
++                                      DbgPrint("xattr: (!!!) buffer is "
++                                               "smaller then reply\n");
+ +                                      retCode = -ERANGE;
+ +                              }
-                               DbgPrint
-                                   ("novfs_ListX_File_Info xattr: /dumping buffer\n");
++                              DbgPrint("xattr: /dumping buffer");
+ +                              novfs_dump(*dataLen, buffer);
-                               DbgPrint
-                                   ("novfs_ListX_File_Info xattr: \\after dumping buffer\n");
++                              DbgPrint("xattr: \\after dumping buffer");
+ +
+ +                              retCode = 0;
+ +                      }
+ +
+ +                      kfree(reply);
+ +              } else {
-                       DbgPrint("novfs_ListX_File_Info xattr: reply = NULL\n");
++                      DbgPrint("xattr: reply = NULL");
+ +              }
+ +              kfree(cmd);
+ +
+ +      }
+ +
+ +      return retCode;
+ +}
+ +
+ +static int begin_directory_enumerate(unsigned char * Path, int PathLen, void ** EnumHandle,
+ +                            struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_begin_enumerate_directory_request *cmd;
+ +      struct novfs_begin_enumerate_directory_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen;
+ +
+ +      *EnumHandle = 0;
+ +
+ +      cmdlen = offsetof(struct
+ +                      novfs_begin_enumerate_directory_request, path) + PathLen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_START_ENUMERATE;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->pathLen = PathLen;
+ +              memcpy(cmd->path, Path, PathLen);
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +/*
+ + *      retCode = Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply, &replylen, 0);
+ + */
+ +              if (reply) {
+ +                      if (reply->Reply.ErrorCode) {
+ +                              retCode = -EIO;
+ +                      } else {
+ +                              *EnumHandle = reply->enumerateHandle;
+ +                              retCode = 0;
+ +                      }
+ +                      kfree(reply);
+ +              }
+ +              kfree(cmd);
+ +      } else {
+ +              retCode = -ENOMEM;
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_end_directory_enumerate(void *EnumHandle, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_end_enumerate_directory_request cmd;
+ +      struct novfs_end_enumerate_directory_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode;
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_END_ENUMERATE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.enumerateHandle = EnumHandle;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, 0);
+ +      if (reply) {
+ +              retCode = 0;
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
+ +
+ +      return (retCode);
+ +}
+ +
+ +static int directory_enumerate_ex(void ** EnumHandle, struct novfs_schandle SessionId, int *Count,
+ +                         struct novfs_entry_info **PInfo, int Interrupt)
+ +{
+ +      struct novfs_enumerate_directory_ex_request cmd;
+ +      struct novfs_enumerate_directory_ex_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +      struct novfs_entry_info * info;
+ +      struct novfs_enumerate_directory_ex_data *data;
+ +      int isize;
+ +
+ +      if (PInfo)
+ +              *PInfo = NULL;
+ +      *Count = 0;
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_ENUMERATE_DIRECTORY_EX;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.enumerateHandle = *EnumHandle;
+ +      cmd.pathLen = 0;
+ +      cmd.path[0] = '\0';
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, Interrupt);
+ +
+ +      if (reply) {
+ +              retCode = 0;
+ +              /*
+ +               * The VFS_COMMAND_ENUMERATE_DIRECTORY call can return an
+ +               * error but there could still be valid data.
+ +               */
+ +
+ +              if (!reply->Reply.ErrorCode ||
+ +                  ((replylen > sizeof(struct novfs_command_reply_header)) &&
+ +                   (reply->enumCount > 0))) {
-                       DbgPrint("directory_enumerate_ex: isize=%d\n",
-                                replylen);
++                      DbgPrint("isize=%d", replylen);
+ +                      data =
+ +                          (struct novfs_enumerate_directory_ex_data *) ((char *)reply +
+ +                                                          sizeof
+ +                                                          (struct novfs_enumerate_directory_ex_reply));
+ +                      isize =
+ +                          replylen - sizeof(struct novfs_enumerate_directory_ex_reply *) -
+ +                          reply->enumCount *
+ +                          offsetof(struct
+ +                                          novfs_enumerate_directory_ex_data, name);
+ +                      isize +=
+ +                          (reply->enumCount *
+ +                           offsetof(struct novfs_entry_info, name));
+ +
+ +                      if (PInfo) {
+ +                              *PInfo = info = kmalloc(isize, GFP_KERNEL);
+ +                              if (*PInfo) {
-                                       DbgPrint
-                                           ("directory_enumerate_ex1: data=0x%p info=0x%p\n",
-                                            data, info);
++                                      DbgPrint("data=0x%p info=0x%p",
++                                               data, info);
+ +                                      *Count = reply->enumCount;
+ +                                      do {
-                                               DbgPrint
-                                                   ("directory_enumerate_ex2: data=0x%p length=%d\n",
-                                                    data);
++                                              DbgPrint("data=0x%p length=%d",
++                                                       data);
+ +
+ +                                              info->type = 3;
+ +                                              info->mode = S_IRWXU;
+ +
+ +                                              if (data->
+ +                                                  mode &
+ +                                                  NW_ATTRIBUTE_DIRECTORY) {
+ +                                                      info->mode |= S_IFDIR;
+ +                                                      info->mode |= S_IXUSR;
+ +                                              } else {
+ +                                                      info->mode |= S_IFREG;
+ +                                              }
+ +
+ +                                              if (data->
+ +                                                  mode &
+ +                                                  NW_ATTRIBUTE_READ_ONLY) {
+ +                                                      info->mode &=
+ +                                                          ~(S_IWUSR);
+ +                                              }
+ +
+ +                                              if (data->
+ +                                                  mode & NW_ATTRIBUTE_EXECUTE)
+ +                                              {
+ +                                                      info->mode |= S_IXUSR;
+ +                                              }
+ +
-                                               info->uid = current->euid;
-                                               info->gid = current->egid;
++                                              info->uid = current_euid();
++                                              info->gid = current_egid();
+ +                                              info->size = data->size;
+ +                                              info->atime.tv_sec =
+ +                                                  data->lastAccessTime;
+ +                                              info->atime.tv_nsec = 0;
+ +                                              info->mtime.tv_sec =
+ +                                                  data->modifyTime;
+ +                                              info->mtime.tv_nsec = 0;
+ +                                              info->ctime.tv_sec =
+ +                                                  data->createTime;
+ +                                              info->ctime.tv_nsec = 0;
+ +                                              info->namelength =
+ +                                                  data->nameLen;
+ +                                              memcpy(info->name, data->name,
+ +                                                     data->nameLen);
+ +                                              data =
+ +                                                  (struct novfs_enumerate_directory_ex_data *)
+ +                                                  & data->name[data->nameLen];
+ +                                              replylen =
+ +                                                  (int)((char *)&info->
+ +                                                        name[info->
+ +                                                             namelength] -
+ +                                                        (char *)info);
-                                               DbgPrint
-                                                   ("directory_enumerate_ex3: info=0x%p\n",
-                                                    info);
++                                              DbgPrint("info=0x%p", info);
+ +                                              novfs_dump(replylen, info);
+ +
+ +                                              info =
+ +                                                  (struct novfs_entry_info *) & info->
+ +                                                  name[info->namelength];
+ +
+ +                                      } while (--reply->enumCount);
+ +                              }
+ +                      }
+ +
+ +                      if (reply->Reply.ErrorCode) {
+ +                              retCode = -1;   /* Eof of data */
+ +                      }
+ +                      *EnumHandle = reply->enumerateHandle;
+ +              } else {
+ +                      retCode = -ENODATA;
+ +              }
+ +              kfree(reply);
+ +      }
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_get_dir_listex(unsigned char * Path, void ** EnumHandle, int *Count,
+ +                             struct novfs_entry_info **Info,
+ +                             struct novfs_schandle SessionId)
+ +{
+ +      int retCode = -ENOENT;
+ +
+ +      if (Count)
+ +              *Count = 0;
+ +      if (Info)
+ +              *Info = NULL;
+ +
+ +      if ((void *) - 1 == *EnumHandle) {
+ +              return (-ENODATA);
+ +      }
+ +
+ +      if (0 == *EnumHandle) {
+ +              retCode =
+ +                  begin_directory_enumerate(Path, strlen(Path), EnumHandle,
+ +                                            SessionId);
+ +      }
+ +
+ +      if (*EnumHandle) {
+ +              retCode =
+ +                  directory_enumerate_ex(EnumHandle, SessionId, Count, Info,
+ +                                         INTERRUPTIBLE);
+ +              if (retCode) {
+ +                      novfs_end_directory_enumerate(*EnumHandle, SessionId);
+ +                      retCode = 0;
+ +                      *EnumHandle = Uint32toHandle(-1);
+ +              }
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_open_file(unsigned char * Path, int Flags, struct novfs_entry_info * Info,
+ +              void ** Handle,
+ +              struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_open_file_request *cmd;
+ +      struct novfs_open_file_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen, pathlen;
+ +
+ +      pathlen = strlen(Path);
+ +
+ +      if (StripTrailingDots) {
+ +              if ('.' == Path[pathlen - 1])
+ +                      pathlen--;
+ +      }
+ +
+ +      *Handle = 0;
+ +
+ +      cmdlen = offsetof(struct novfs_open_file_request, path) + pathlen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_OPEN_FILE;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->access = 0;
+ +
+ +              if (!(Flags & O_WRONLY) || (Flags & O_RDWR)) {
+ +                      cmd->access |= NWD_ACCESS_READ;
+ +              }
+ +
+ +              if ((Flags & O_WRONLY) || (Flags & O_RDWR)) {
+ +                      cmd->access |= NWD_ACCESS_WRITE;
+ +              }
+ +
+ +              switch (Flags & (O_CREAT | O_EXCL | O_TRUNC)) {
+ +              case O_CREAT:
+ +                      cmd->disp = NWD_DISP_OPEN_ALWAYS;
+ +                      break;
+ +
+ +              case O_CREAT | O_EXCL:
+ +                      cmd->disp = NWD_DISP_CREATE_NEW;
+ +                      break;
+ +
+ +              case O_TRUNC:
+ +                      cmd->disp = NWD_DISP_CREATE_ALWAYS;
+ +                      break;
+ +
+ +              case O_CREAT | O_TRUNC:
+ +                      cmd->disp = NWD_DISP_CREATE_ALWAYS;
+ +                      break;
+ +
+ +              case O_CREAT | O_EXCL | O_TRUNC:
+ +                      cmd->disp = NWD_DISP_CREATE_NEW;
+ +                      break;
+ +
+ +              default:
+ +                      cmd->disp = NWD_DISP_OPEN_EXISTING;
+ +                      break;
+ +              }
+ +
+ +              cmd->mode = NWD_SHARE_READ | NWD_SHARE_WRITE | NWD_SHARE_DELETE;
+ +
+ +              cmd->pathLen = pathlen;
+ +              memcpy(cmd->path, Path, pathlen);
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +
+ +              if (reply) {
+ +                      if (reply->Reply.ErrorCode) {
+ +                              if (NWE_OBJECT_EXISTS == reply->Reply.ErrorCode) {
+ +                                      retCode = -EEXIST;
+ +                              } else if (NWE_ACCESS_DENIED ==
+ +                                         reply->Reply.ErrorCode) {
+ +                                      retCode = -EACCES;
+ +                              } else if (NWE_FILE_IN_USE ==
+ +                                         reply->Reply.ErrorCode) {
+ +                                      retCode = -EBUSY;
+ +                              } else {
+ +                                      retCode = -ENOENT;
+ +                              }
+ +                      } else {
+ +                              *Handle = reply->handle;
+ +                              retCode = 0;
+ +                      }
+ +                      kfree(reply);
+ +              }
+ +              kfree(cmd);
+ +      } else {
+ +              retCode = -ENOMEM;
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_create(unsigned char * Path, int DirectoryFlag, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_create_file_request *cmd;
+ +      struct novfs_create_file_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen, pathlen;
+ +
+ +      pathlen = strlen(Path);
+ +
+ +      if (StripTrailingDots) {
+ +              if ('.' == Path[pathlen - 1])
+ +                      pathlen--;
+ +      }
+ +
+ +      cmdlen = offsetof(struct novfs_create_file_request, path) + pathlen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_CREATE_FILE;
+ +      if (DirectoryFlag) {
+ +              cmd->Command.CommandType = VFS_COMMAND_CREATE_DIRECOTRY;
+ +      }
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = SessionId;
+ +
+ +      cmd->pathlength = pathlen;
+ +      memcpy(cmd->path, Path, pathlen);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                              &replylen, INTERRUPTIBLE);
+ +
+ +      if (reply) {
+ +              retCode = 0;
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -EIO;
+ +                      if (reply->Reply.ErrorCode == NWE_ACCESS_DENIED)
+ +                              retCode = -EACCES;
+ +
+ +              }
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_close_file(void *Handle, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_close_file_request cmd;
+ +      struct novfs_close_file_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode;
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_CLOSE_FILE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.handle = Handle;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, 0);
+ +      if (reply) {
+ +              retCode = 0;
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_read_file(void *Handle, unsigned char * Buffer, size_t * Bytes,
+ +                  loff_t * Offset, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_read_file_request cmd;
+ +      struct novfs_read_file_reply * reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +      size_t len;
+ +
+ +      len = *Bytes;
+ +      *Bytes = 0;
+ +
+ +      if (offsetof(struct novfs_read_file_reply, data) + len
+ +                      > novfs_max_iosize) {
+ +              len = novfs_max_iosize - offsetof(struct
+ +                              novfs_read_file_reply, data);
+ +              len = (len / PAGE_SIZE) * PAGE_SIZE;
+ +      }
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_READ_FILE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.handle = Handle;
+ +      cmd.len = len;
+ +      cmd.offset = *Offset;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +
-       DbgPrint("novfs_Read_File: Queue_Daemon_Command 0x%x replylen=%d\n",
-                retCode, replylen);
++      DbgPrint("Queue_Daemon_Command 0x%x replylen=%d", retCode, replylen);
+ +
+ +      if (!retCode) {
+ +              if (reply->Reply.ErrorCode) {
+ +                      if (NWE_FILE_IO_LOCKED == reply->Reply.ErrorCode) {
+ +                              retCode = -EBUSY;
+ +                      } else {
+ +                              retCode = -EIO;
+ +                      }
+ +              } else {
+ +                      replylen -= offsetof(struct
+ +                                      novfs_read_file_reply, data);
+ +
+ +                      if (replylen > 0) {
+ +                              replylen -=
+ +                                  copy_to_user(Buffer, reply->data, replylen);
+ +                              *Bytes = replylen;
+ +                      }
+ +              }
+ +      }
+ +
+ +      if (reply) {
+ +              kfree(reply);
+ +      }
+ +
-       DbgPrint("novfs_Read_File *Bytes=0x%x retCode=0x%x\n", *Bytes, retCode);
++      DbgPrint("*Bytes=0x%x retCode=0x%x", *Bytes, retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_read_pages(void *Handle, struct novfs_data_list *DList,
+ +              int DList_Cnt, size_t * Bytes, loff_t * Offset,
+ +              struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_read_file_request cmd;
+ +      struct novfs_read_file_reply * reply = NULL;
+ +      struct novfs_read_file_reply lreply;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +      size_t len;
+ +
+ +      len = *Bytes;
+ +      *Bytes = 0;
+ +
-       DbgPrint
-           ("novfs_Read_Pages: Handle=0x%p Dlst=0x%p Dlcnt=%d Bytes=%d Offset=%lld SessionId=0x%p:%p\n",
-            Handle, DList, DList_Cnt, len, *Offset, SessionId.hTypeId,
-            SessionId.hId);
++      DbgPrint("Handle=0x%p Dlst=0x%p Dlcnt=%d Bytes=%d Offset=%lld "
++               "SessionId=0x%p:%p", Handle, DList, DList_Cnt, len, *Offset,
++               SessionId.hTypeId, SessionId.hId);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_READ_FILE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.handle = Handle;
+ +      cmd.len = len;
+ +      cmd.offset = *Offset;
+ +
+ +      /*
+ +       * Dlst first entry is reserved for reply header.
+ +       */
+ +      DList[0].page = NULL;
+ +      DList[0].offset = &lreply;
+ +      DList[0].len = offsetof(struct novfs_read_file_reply, data);
+ +      DList[0].rwflag = DLWRITE;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), DList, DList_Cnt,
+ +                               (void *)&reply, &replylen, INTERRUPTIBLE);
+ +
-       DbgPrint("novfs_Read_Pages: Queue_Daemon_Command 0x%x\n", retCode);
++      DbgPrint("Queue_Daemon_Command 0x%x", retCode);
+ +
+ +      if (!retCode) {
+ +              if (reply) {
+ +                      memcpy(&lreply, reply, sizeof(lreply));
+ +              }
+ +
+ +              if (lreply.Reply.ErrorCode) {
+ +                      if (NWE_FILE_IO_LOCKED == lreply.Reply.ErrorCode) {
+ +                              retCode = -EBUSY;
+ +                      } else {
+ +                              retCode = -EIO;
+ +                      }
+ +              }
+ +              *Bytes = replylen - offsetof(struct
+ +                              novfs_read_file_reply, data);
+ +      }
+ +
+ +      if (reply) {
+ +              kfree(reply);
+ +      }
+ +
-       DbgPrint("novfs_Read_Pages: retCode=0x%x\n", retCode);
++      DbgPrint("retCode=0x%x", retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_write_file(void *Handle, unsigned char * Buffer, size_t * Bytes,
+ +                   loff_t * Offset, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_write_file_request cmd;
+ +      struct novfs_write_file_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0, cmdlen;
+ +      size_t len;
+ +
+ +      unsigned long boff;
+ +      struct page **pages;
+ +      struct novfs_data_list *dlist;
+ +      int res = 0, npage, i;
+ +      struct novfs_write_file_reply lreply;
+ +
+ +      len = *Bytes;
+ +      cmdlen = offsetof(struct novfs_write_file_request, data);
+ +
+ +      *Bytes = 0;
+ +
+ +      memset(&lreply, 0, sizeof(lreply));
+ +
-       DbgPrint("novfs_Write_File cmdlen=%ld len=%ld\n", cmdlen, len);
++      DbgPrint("cmdlen=%ld len=%ld", cmdlen, len);
+ +
+ +      if ((cmdlen + len) > novfs_max_iosize) {
+ +              len = novfs_max_iosize - cmdlen;
+ +              len = (len / PAGE_SIZE) * PAGE_SIZE;
+ +      }
+ +      cmd.Command.CommandType = VFS_COMMAND_WRITE_FILE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +      cmd.handle = Handle;
+ +      cmd.len = len;
+ +      cmd.offset = *Offset;
+ +
-       DbgPrint("novfs_Write_File cmdlen=%ld len=%ld\n", cmdlen, len);
++      DbgPrint("cmdlen=%ld len=%ld", cmdlen, len);
+ +
+ +      npage =
+ +          (((unsigned long)Buffer & ~PAGE_MASK) + len +
+ +           (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ +
+ +      dlist = kmalloc(sizeof(struct novfs_data_list) * (npage + 1), GFP_KERNEL);
+ +      if (NULL == dlist) {
+ +              return (-ENOMEM);
+ +      }
+ +
+ +      pages = kmalloc(sizeof(struct page *) * npage, GFP_KERNEL);
+ +
+ +      if (NULL == pages) {
+ +              kfree(dlist);
+ +              return (-ENOMEM);
+ +      }
+ +
+ +      down_read(&current->mm->mmap_sem);
+ +
+ +      res = get_user_pages(current, current->mm, (unsigned long)Buffer, npage, 0,     /* read type */
+ +                           0, /* don't force */
+ +                           pages, NULL);
+ +
+ +      up_read(&current->mm->mmap_sem);
+ +
-       DbgPrint("novfs_Write_File res=%d\n", res);
++      DbgPrint("res=%d", res);
+ +
+ +      if (res > 0) {
+ +              boff = (unsigned long)Buffer & ~PAGE_MASK;
+ +
+ +              flush_dcache_page(pages[0]);
+ +              dlist[0].page = pages[0];
+ +              dlist[0].offset = (char *)boff;
+ +              dlist[0].len = PAGE_SIZE - boff;
+ +              dlist[0].rwflag = DLREAD;
+ +
+ +              if (dlist[0].len > len) {
+ +                      dlist[0].len = len;
+ +              }
+ +
-               DbgPrint("novfs_Write_File0: page=0x%p offset=0x%p len=%d\n",
++              DbgPrint("page=0x%p offset=0x%p len=%d",
+ +                       dlist[0].page, dlist[0].offset, dlist[0].len);
+ +
+ +              boff = dlist[0].len;
+ +
-               DbgPrint("novfs_Write_File len=%d boff=%d\n", len, boff);
++              DbgPrint("len=%d boff=%d", len, boff);
+ +
+ +              for (i = 1; (i < res) && (boff < len); i++) {
+ +                      flush_dcache_page(pages[i]);
+ +
+ +                      dlist[i].page = pages[i];
+ +                      dlist[i].offset = NULL;
+ +                      dlist[i].len = len - boff;
+ +                      if (dlist[i].len > PAGE_SIZE) {
+ +                              dlist[i].len = PAGE_SIZE;
+ +                      }
+ +                      dlist[i].rwflag = DLREAD;
+ +
+ +                      boff += dlist[i].len;
-                       DbgPrint
-                           ("novfs_Write_File%d: page=0x%p offset=0x%p len=%d\n",
-                            i, dlist[i].page, dlist[i].offset, dlist[i].len);
++                      DbgPrint("%d: page=0x%p offset=0x%p len=%d", i,
++                               dlist[i].page, dlist[i].offset, dlist[i].len);
+ +              }
+ +
+ +              dlist[i].page = NULL;
+ +              dlist[i].offset = &lreply;
+ +              dlist[i].len = sizeof(lreply);
+ +              dlist[i].rwflag = DLWRITE;
+ +              res++;
+ +
-               DbgPrint("novfs_Write_File Buffer=0x%p boff=0x%x len=%d\n",
-                        Buffer, boff, len);
++              DbgPrint("Buffer=0x%p boff=0x%x len=%d", Buffer, boff, len);
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(&cmd, cmdlen, dlist, res,
+ +                                       (void *)&reply, &replylen,
+ +                                       INTERRUPTIBLE);
+ +
+ +      } else {
+ +              char *kdata;
+ +
+ +              res = 0;
+ +
+ +              kdata = kmalloc(len, GFP_KERNEL);
+ +              if (kdata) {
+ +                      len -= copy_from_user(kdata, Buffer, len);
+ +                      dlist[0].page = NULL;
+ +                      dlist[0].offset = kdata;
+ +                      dlist[0].len = len;
+ +                      dlist[0].rwflag = DLREAD;
+ +
+ +                      dlist[1].page = NULL;
+ +                      dlist[1].offset = &lreply;
+ +                      dlist[1].len = sizeof(lreply);
+ +                      dlist[1].rwflag = DLWRITE;
+ +
+ +                      retCode =
+ +                          Queue_Daemon_Command(&cmd, cmdlen, dlist, 2,
+ +                                               (void *)&reply, &replylen,
+ +                                               INTERRUPTIBLE);
+ +
+ +                      kfree(kdata);
+ +              }
+ +      }
+ +
-       DbgPrint("novfs_Write_File retCode=0x%x reply=0x%p\n", retCode, reply);
++      DbgPrint("retCode=0x%x reply=0x%p", retCode, reply);
+ +
+ +      if (!retCode) {
+ +              switch (lreply.Reply.ErrorCode) {
+ +              case 0:
+ +                      *Bytes = (size_t) lreply.bytesWritten;
+ +                      retCode = 0;
+ +                      break;
+ +
+ +              case NWE_INSUFFICIENT_SPACE:
+ +                      retCode = -ENOSPC;
+ +                      break;
+ +
+ +              case NWE_ACCESS_DENIED:
+ +                      retCode = -EACCES;
+ +                      break;
+ +
+ +              default:
+ +                      retCode = -EIO;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (res) {
+ +              for (i = 0; i < res; i++) {
+ +                      if (dlist[i].page) {
+ +                              page_cache_release(dlist[i].page);
+ +                      }
+ +              }
+ +      }
+ +
+ +      kfree(pages);
+ +      kfree(dlist);
+ +
-       DbgPrint("novfs_Write_File *Bytes=0x%x retCode=0x%x\n", *Bytes,
++      DbgPrint("*Bytes=0x%x retCode=0x%x", *Bytes,
+ +               retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +/*
+ + *  Arguments: HANDLE Handle - novfsd file handle
+ + *             struct page *Page - Page to be written out
+ + *             struct novfs_schandle SessionId - novfsd session handle
+ + *
+ + *  Returns:   0 - Success
+ + *             -ENOSPC - Out of space on server
+ + *             -EACCES - Access denied
+ + *             -EIO - Any other error
+ + *
+ + *  Abstract:  Write page to file.
+ + */
+ +int novfs_write_page(void *Handle, struct page *Page, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_write_file_request cmd;
+ +      struct novfs_write_file_reply lreply;
+ +      struct novfs_write_file_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0, cmdlen;
+ +      struct novfs_data_list dlst[2];
+ +
-       DbgPrint
-           ("novfs_Write_Page: Handle=0x%p Page=0x%p Index=%lu SessionId=0x%llx\n",
-            Handle, Page, Page->index, SessionId);
++      DbgPrint("Handle=0x%p Page=0x%p Index=%lu SessionId=0x%llx",
++               Handle, Page, Page->index, SessionId);
+ +
+ +      dlst[0].page = NULL;
+ +      dlst[0].offset = &lreply;
+ +      dlst[0].len = sizeof(lreply);
+ +      dlst[0].rwflag = DLWRITE;
+ +
+ +      dlst[1].page = Page;
+ +      dlst[1].offset = 0;
+ +      dlst[1].len = PAGE_CACHE_SIZE;
+ +      dlst[1].rwflag = DLREAD;
+ +
+ +      cmdlen = offsetof(struct novfs_write_file_request, data);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_WRITE_FILE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.handle = Handle;
+ +      cmd.len = PAGE_CACHE_SIZE;
+ +      cmd.offset = (loff_t) Page->index << PAGE_CACHE_SHIFT;;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, cmdlen, &dlst, 2, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (!retCode) {
+ +              if (reply) {
+ +                      memcpy(&lreply, reply, sizeof(lreply));
+ +              }
+ +              switch (lreply.Reply.ErrorCode) {
+ +              case 0:
+ +                      retCode = 0;
+ +                      break;
+ +
+ +              case NWE_INSUFFICIENT_SPACE:
+ +                      retCode = -ENOSPC;
+ +                      break;
+ +
+ +              case NWE_ACCESS_DENIED:
+ +                      retCode = -EACCES;
+ +                      break;
+ +
+ +              default:
+ +                      retCode = -EIO;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (reply) {
+ +              kfree(reply);
+ +      }
+ +
-       DbgPrint("novfs_Write_Page retCode=0x%x\n", retCode);
++      DbgPrint("retCode=0x%x", retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_write_pages(void *Handle, struct novfs_data_list *DList, int DList_Cnt,
+ +                    size_t Bytes, loff_t Offset, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_write_file_request cmd;
+ +      struct novfs_write_file_reply lreply;
+ +      struct novfs_write_file_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0, cmdlen;
+ +      size_t len;
+ +
-       DbgPrint
-           ("novfs_Write_Pages: Handle=0x%p Dlst=0x%p Dlcnt=%d Bytes=%d Offset=%lld SessionId=0x%llx\n",
-            Handle, DList, DList_Cnt, Bytes, Offset, SessionId);
++      DbgPrint("Handle=0x%p Dlst=0x%p Dlcnt=%d Bytes=%d Offset=%lld "
++               "SessionId=0x%llx\n", Handle, DList, DList_Cnt, Bytes,
++               Offset, SessionId);
+ +
+ +      DList[0].page = NULL;
+ +      DList[0].offset = &lreply;
+ +      DList[0].len = sizeof(lreply);
+ +      DList[0].rwflag = DLWRITE;
+ +
+ +      len = Bytes;
+ +      cmdlen = offsetof(struct novfs_write_file_request, data);
+ +
+ +      if (len) {
+ +              cmd.Command.CommandType = VFS_COMMAND_WRITE_FILE;
+ +              cmd.Command.SequenceNumber = 0;
+ +              cmd.Command.SessionId = SessionId;
+ +
+ +              cmd.handle = Handle;
+ +              cmd.len = len;
+ +              cmd.offset = Offset;
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(&cmd, cmdlen, DList, DList_Cnt,
+ +                                       (void *)&reply, &replylen,
+ +                                       INTERRUPTIBLE);
+ +              if (!retCode) {
+ +                      if (reply) {
+ +                              memcpy(&lreply, reply, sizeof(lreply));
+ +                      }
+ +                      switch (lreply.Reply.ErrorCode) {
+ +                      case 0:
+ +                              retCode = 0;
+ +                              break;
+ +
+ +                      case NWE_INSUFFICIENT_SPACE:
+ +                              retCode = -ENOSPC;
+ +                              break;
+ +
+ +                      case NWE_ACCESS_DENIED:
+ +                              retCode = -EACCES;
+ +                              break;
+ +
+ +                      default:
+ +                              retCode = -EIO;
+ +                              break;
+ +                      }
+ +              }
+ +              if (reply) {
+ +                      kfree(reply);
+ +              }
+ +      }
-       DbgPrint("novfs_Write_Pages retCode=0x%x\n", retCode);
++      DbgPrint("retCode=0x%x", retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_read_stream(void *ConnHandle, unsigned char * Handle, u_char * Buffer,
+ +                    size_t * Bytes, loff_t * Offset, int User,
+ +                    struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_read_stream_request cmd;
+ +      struct novfs_read_stream_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0;
+ +      size_t len;
+ +
+ +      len = *Bytes;
+ +      *Bytes = 0;
+ +
+ +      if (offsetof(struct novfs_read_file_reply, data) + len
+ +                      > novfs_max_iosize) {
+ +              len = novfs_max_iosize - offsetof(struct
+ +                              novfs_read_file_reply, data);
+ +              len = (len / PAGE_SIZE) * PAGE_SIZE;
+ +      }
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_READ_STREAM;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.connection = ConnHandle;
+ +      memcpy(cmd.handle, Handle, sizeof(cmd.handle));
+ +      cmd.len = len;
+ +      cmd.offset = *Offset;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +
-       DbgPrint("novfs_Read_Stream: Queue_Daemon_Command 0x%x replylen=%d\n",
-                retCode, replylen);
++      DbgPrint("Queue_Daemon_Command 0x%x replylen=%d", retCode, replylen);
+ +
+ +      if (reply) {
+ +              retCode = 0;
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -EIO;
+ +              } else {
+ +                      replylen -= offsetof(struct
+ +                                      novfs_read_stream_reply, data);
+ +                      if (replylen > 0) {
+ +                              if (User) {
+ +                                      replylen -=
+ +                                          copy_to_user(Buffer, reply->data,
+ +                                                       replylen);
+ +                              } else {
+ +                                      memcpy(Buffer, reply->data, replylen);
+ +                              }
+ +
+ +                              *Bytes = replylen;
+ +                      }
+ +              }
+ +              kfree(reply);
+ +      }
+ +
-       DbgPrint("novfs_Read_Stream *Bytes=0x%x retCode=0x%x\n", *Bytes,
-                retCode);
++      DbgPrint("*Bytes=0x%x retCode=0x%x", *Bytes, retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_write_stream(void *ConnHandle, unsigned char * Handle, u_char * Buffer,
+ +                     size_t * Bytes, loff_t * Offset, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_write_stream_request * cmd;
+ +      struct novfs_write_stream_reply * reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0, cmdlen;
+ +      size_t len;
+ +
+ +      len = *Bytes;
+ +      cmdlen = len + offsetof(struct novfs_write_stream_request, data);
+ +      *Bytes = 0;
+ +
+ +      if (cmdlen > novfs_max_iosize) {
+ +              cmdlen = novfs_max_iosize;
+ +              len = cmdlen - offsetof(struct
+ +                              novfs_write_stream_request, data);
+ +      }
+ +
-       DbgPrint("novfs_Write_Stream cmdlen=%d len=%d\n", cmdlen, len);
++      DbgPrint("cmdlen=%d len=%d", cmdlen, len);
+ +
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (cmd) {
+ +              if (Buffer && len) {
+ +                      len -= copy_from_user(cmd->data, Buffer, len);
+ +              }
+ +
-               DbgPrint("novfs_Write_Stream len=%d\n", len);
++              DbgPrint("len=%d", len);
+ +
+ +              cmd->Command.CommandType = VFS_COMMAND_WRITE_STREAM;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->connection = ConnHandle;
+ +              memcpy(cmd->handle, Handle, sizeof(cmd->handle));
+ +              cmd->len = len;
+ +              cmd->offset = *Offset;
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +              if (reply) {
+ +                      switch (reply->Reply.ErrorCode) {
+ +                      case 0:
+ +                              retCode = 0;
+ +                              break;
+ +
+ +                      case NWE_INSUFFICIENT_SPACE:
+ +                              retCode = -ENOSPC;
+ +                              break;
+ +
+ +                      case NWE_ACCESS_DENIED:
+ +                              retCode = -EACCES;
+ +                              break;
+ +
+ +                      default:
+ +                              retCode = -EIO;
+ +                              break;
+ +                      }
-                       DbgPrint
-                           ("novfs_Write_Stream reply->bytesWritten=0x%lx\n",
++                      DbgPrint("reply->bytesWritten=0x%lx",
+ +                           reply->bytesWritten);
+ +                      *Bytes = reply->bytesWritten;
+ +                      kfree(reply);
+ +              }
+ +              kfree(cmd);
+ +      }
-       DbgPrint("novfs_Write_Stream *Bytes=0x%x retCode=0x%x\n", *Bytes,
-                retCode);
++      DbgPrint("*Bytes=0x%x retCode=0x%x", *Bytes, retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_close_stream(void *ConnHandle, unsigned char * Handle, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_close_stream_request cmd;
+ +      struct novfs_close_stream_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode;
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_CLOSE_STREAM;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.connection = ConnHandle;
+ +      memcpy(cmd.handle, Handle, sizeof(cmd.handle));
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, 0);
+ +      if (reply) {
+ +              retCode = 0;
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -EIO;
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_delete(unsigned char * Path, int DirectoryFlag, struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_delete_file_request *cmd;
+ +      struct novfs_delete_file_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen, pathlen;
+ +
+ +      pathlen = strlen(Path);
+ +
+ +      if (StripTrailingDots) {
+ +              if ('.' == Path[pathlen - 1])
+ +                      pathlen--;
+ +      }
+ +
+ +      cmdlen = offsetof(struct novfs_delete_file_request, path) + pathlen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_DELETE_FILE;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->isDirectory = DirectoryFlag;
+ +              cmd->pathlength = pathlen;
+ +              memcpy(cmd->path, Path, pathlen);
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +              if (reply) {
+ +                      retCode = 0;
+ +                      if (reply->Reply.ErrorCode) {
+ +                              if ((reply->Reply.ErrorCode & 0xFFFF) == 0x0006) {      /* Access Denied Error */
+ +                                      retCode = -EACCES;
+ +                              } else {
+ +                                      retCode = -EIO;
+ +                              }
+ +                      }
+ +                      kfree(reply);
+ +              }
+ +              kfree(cmd);
+ +      } else {
+ +              retCode = -ENOMEM;
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_trunc(unsigned char * Path, int PathLen,
+ +              struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_truncate_file_request *cmd;
+ +      struct novfs_truncate_file_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen;
+ +
+ +      if (StripTrailingDots) {
+ +              if ('.' == Path[PathLen - 1])
+ +                      PathLen--;
+ +      }
+ +      cmdlen = offsetof(struct novfs_truncate_file_request, path)
+ +              + PathLen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_TRUNCATE_FILE;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->pathLen = PathLen;
+ +              memcpy(cmd->path, Path, PathLen);
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +              if (reply) {
+ +                      if (reply->Reply.ErrorCode) {
+ +                              retCode = -EIO;
+ +                      }
+ +                      kfree(reply);
+ +              }
+ +              kfree(cmd);
+ +      } else {
+ +              retCode = -ENOMEM;
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_trunc_ex(void *Handle, loff_t Offset,
+ +              struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_write_file_request cmd;
+ +      struct novfs_write_file_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode = 0, cmdlen;
+ +
-       DbgPrint("novfs_Truncate_File_Ex Handle=0x%p Offset=%lld\n", Handle,
-                Offset);
++      DbgPrint("Handle=0x%p Offset=%lld", Handle, Offset);
+ +
+ +      cmdlen = offsetof(struct novfs_write_file_request, data);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_WRITE_FILE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +      cmd.handle = Handle;
+ +      cmd.len = 0;
+ +      cmd.offset = Offset;
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +
-       DbgPrint("novfs_Truncate_File_Ex retCode=0x%x reply=0x%p\n", retCode,
-                reply);
++      DbgPrint("retCode=0x%x reply=0x%p", retCode, reply);
+ +
+ +      if (!retCode) {
+ +              switch (reply->Reply.ErrorCode) {
+ +              case 0:
+ +                      retCode = 0;
+ +                      break;
+ +
+ +              case NWE_INSUFFICIENT_SPACE:
+ +                      retCode = -ENOSPC;
+ +                      break;
+ +
+ +              case NWE_ACCESS_DENIED:
+ +                      retCode = -EACCES;
+ +                      break;
+ +
+ +              case NWE_FILE_IO_LOCKED:
+ +                      retCode = -EBUSY;
+ +                      break;
+ +
+ +              default:
+ +                      retCode = -EIO;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (reply) {
+ +              kfree(reply);
+ +      }
+ +
-       DbgPrint("novfs_Truncate_File_Ex retCode=%d\n", retCode);
++      DbgPrint("retCode=%d", retCode);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_rename_file(int DirectoryFlag, unsigned char * OldName, int OldLen,
+ +                    unsigned char * NewName, int NewLen,
+ +                    struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_rename_file_request cmd;
+ +      struct novfs_rename_file_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode;
+ +
-       DbgPrint("novfs_Rename_File:\n"
++      __DbgPrint("%s:\n"
+ +               "   DirectoryFlag: %d\n"
+ +               "   OldName:       %.*s\n"
+ +               "   NewName:       %.*s\n"
-                "   SessionId:     0x%llx\n",
++               "   SessionId:     0x%llx\n", __func__,
+ +               DirectoryFlag, OldLen, OldName, NewLen, NewName, SessionId);
+ +
+ +      cmd.Command.CommandType = VFS_COMMAND_RENAME_FILE;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = SessionId;
+ +
+ +      cmd.directoryFlag = DirectoryFlag;
+ +
+ +      if (StripTrailingDots) {
+ +              if ('.' == OldName[OldLen - 1])
+ +                      OldLen--;
+ +              if ('.' == NewName[NewLen - 1])
+ +                      NewLen--;
+ +      }
+ +
+ +      cmd.newnameLen = NewLen;
+ +      memcpy(cmd.newname, NewName, NewLen);
+ +
+ +      cmd.oldnameLen = OldLen;
+ +      memcpy(cmd.oldname, OldName, OldLen);
+ +
+ +      retCode =
+ +          Queue_Daemon_Command(&cmd, sizeof(cmd), NULL, 0, (void *)&reply,
+ +                               &replylen, INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = 0;
+ +              if (reply->Reply.ErrorCode) {
+ +                      retCode = -ENOENT;
+ +              }
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_set_attr(unsigned char * Path, struct iattr *Attr,
+ +              struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_set_file_info_request *cmd;
+ +      struct novfs_set_file_info_reply *reply;
+ +      unsigned long replylen = 0;
+ +      int retCode, cmdlen, pathlen;
+ +
+ +      pathlen = strlen(Path);
+ +
+ +      if (StripTrailingDots) {
+ +              if ('.' == Path[pathlen - 1])
+ +                      pathlen--;
+ +      }
+ +
+ +      cmdlen = offsetof(struct novfs_set_file_info_request,path) + pathlen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (cmd) {
+ +              cmd->Command.CommandType = VFS_COMMAND_SET_FILE_INFO;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +              cmd->fileInfo.ia_valid = Attr->ia_valid;
+ +              cmd->fileInfo.ia_mode = Attr->ia_mode;
+ +              cmd->fileInfo.ia_uid = Attr->ia_uid;
+ +              cmd->fileInfo.ia_gid = Attr->ia_uid;
+ +              cmd->fileInfo.ia_size = Attr->ia_size;
+ +              cmd->fileInfo.ia_atime = Attr->ia_atime.tv_sec;
+ +              cmd->fileInfo.ia_mtime = Attr->ia_mtime.tv_sec;;
+ +              cmd->fileInfo.ia_ctime = Attr->ia_ctime.tv_sec;;
+ +/*
+ +      cmd->fileInfo.ia_attr_flags = Attr->ia_attr_flags;
+ +*/
+ +              cmd->fileInfo.ia_attr_flags = 0;
+ +
+ +              cmd->pathlength = pathlen;
+ +              memcpy(cmd->path, Path, pathlen);
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply,
+ +                                       &replylen, INTERRUPTIBLE);
+ +              if (reply) {
+ +                      switch (reply->Reply.ErrorCode) {
+ +                      case 0:
+ +                              retCode = 0;
+ +                              break;
+ +
+ +                      case NWE_PARAM_INVALID:
+ +                              retCode = -EINVAL;
+ +                              break;
+ +
+ +                      case NWE_FILE_IO_LOCKED:
+ +                              retCode = -EBUSY;
+ +                              break;
+ +
+ +                      default:
+ +                              retCode = -EIO;
+ +                              break;
+ +                      }
+ +                      kfree(reply);
+ +              }
+ +              kfree(cmd);
+ +      } else {
+ +              retCode = -ENOMEM;
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_get_file_cache_flag(unsigned char * Path,
+ +              struct novfs_schandle SessionId)
+ +{
+ +      struct novfs_get_cache_flag *cmd;
+ +      struct novfs_get_cache_flag_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int cmdlen;
+ +      int retCode = 0;
+ +      int pathlen;
+ +
-       DbgPrint("novfs_Get_File_Cache_Flag: Path = %s\n", Path);
++      DbgPrint("Path = %s", Path);
+ +
+ +      if (Path && *Path) {
+ +              pathlen = strlen(Path);
+ +              if (StripTrailingDots) {
+ +                      if ('.' == Path[pathlen - 1])
+ +                              pathlen--;
+ +              }
+ +              cmdlen = offsetof(struct novfs_get_cache_flag, path) +
+ +                      pathlen;
+ +              cmd = (struct novfs_get_cache_flag *)
+ +                      kmalloc(cmdlen, GFP_KERNEL);
+ +              if (cmd) {
+ +                      cmd->Command.CommandType = VFS_COMMAND_GET_CACHE_FLAG;
+ +                      cmd->Command.SequenceNumber = 0;
+ +                      cmd->Command.SessionId = SessionId;
+ +                      cmd->pathLen = pathlen;
+ +                      memcpy(cmd->path, Path, cmd->pathLen);
+ +
+ +                      Queue_Daemon_Command(cmd, cmdlen, NULL, 0,
+ +                                           (void *)&reply, &replylen,
+ +                                           INTERRUPTIBLE);
+ +
+ +                      if (reply) {
+ +
+ +                              if (!reply->Reply.ErrorCode) {
+ +                                      retCode = reply->CacheFlag;
+ +                              }
+ +
+ +                              kfree(reply);
+ +                      }
+ +                      kfree(cmd);
+ +              }
+ +      }
+ +
-       DbgPrint("novfs_Get_File_Cache_Flag: return %d\n", retCode);
++      DbgPrint("return %d", retCode);
+ +      return (retCode);
+ +}
+ +
+ +/*
+ + *  Arguments:
+ + *      SessionId, file handle, type of lock (read/write or unlock),
+ + *        start of lock area, length of lock area
+ + *
+ + *  Notes: lock type - fcntl
+ + */
+ +int novfs_set_file_lock(struct novfs_schandle SessionId, void *Handle,
+ +                      unsigned char fl_type, loff_t fl_start, loff_t fl_len)
+ +{
+ +      struct novfs_set_file_lock_request *cmd;
+ +      struct novfs_set_file_lock_reply *reply = NULL;
+ +      unsigned long replylen = 0;
+ +      int retCode;
+ +
+ +      retCode = -1;
+ +
-       DbgPrint("novfs_Set_File_Lock:\n"
-                "   SessionId:     0x%llx\n", SessionId);
++      DbgPrint("SessionId:     0x%llx\n", SessionId);
+ +
+ +      cmd =
+ +          (struct novfs_set_file_lock_request *) kmalloc(sizeof(struct novfs_set_file_lock_request), GFP_KERNEL);
+ +
+ +      if (cmd) {
-               DbgPrint("novfs_Set_File_Lock 2\n");
++              DbgPrint("2");
+ +
+ +              cmd->Command.CommandType = VFS_COMMAND_SET_FILE_LOCK;
+ +              cmd->Command.SequenceNumber = 0;
+ +              cmd->Command.SessionId = SessionId;
+ +
+ +              cmd->handle = Handle;
+ +              if (F_RDLCK == fl_type) {
+ +                      fl_type = 1;    // LockRegionExclusive
+ +              } else if (F_WRLCK == fl_type) {
+ +                      fl_type = 0;    // LockRegionShared
+ +              }
+ +
+ +              cmd->fl_type = fl_type;
+ +              cmd->fl_start = fl_start;
+ +              cmd->fl_len = fl_len;
+ +
-               DbgPrint("novfs_Set_File_Lock 3\n");
++              DbgPrint("3");
+ +
-               DbgPrint("novfs_Set_File_Lock: BEGIN dump arguments\n");
-               DbgPrint("novfs_Set_File_Lock: Queue_Daemon_Command %d\n",
++              DbgPrint("BEGIN dump arguments");
++              DbgPrint("Queue_Daemon_Command %d",
+ +                       cmd->Command.CommandType);
-               DbgPrint("novfs_Set_File_Lock: cmd->handle   = 0x%p\n",
-                        cmd->handle);
-               DbgPrint("novfs_Set_File_Lock: cmd->fl_type  = %u\n",
-                        cmd->fl_type);
-               DbgPrint("novfs_Set_File_Lock: cmd->fl_start = 0x%X\n",
-                        cmd->fl_start);
-               DbgPrint("novfs_Set_File_Lock: cmd->fl_len   = 0x%X\n",
-                        cmd->fl_len);
-               DbgPrint
-                   ("novfs_Set_File_Lock: sizeof(SET_FILE_LOCK_REQUEST) = %u\n",
++              DbgPrint("cmd->handle   = 0x%p", cmd->handle);
++              DbgPrint("cmd->fl_type  = %u", cmd->fl_type);
++              DbgPrint("cmd->fl_start = 0x%X", cmd->fl_start);
++              DbgPrint("cmd->fl_len   = 0x%X", cmd->fl_len);
++              DbgPrint("sizeof(SET_FILE_LOCK_REQUEST) = %u",
+ +                   sizeof(struct novfs_set_file_lock_request));
-               DbgPrint("novfs_Set_File_Lock: END dump arguments\n");
++              DbgPrint("END dump arguments");
+ +
+ +              retCode =
+ +                  Queue_Daemon_Command(cmd, sizeof(struct novfs_set_file_lock_request),
+ +                                       NULL, 0, (void *)&reply, &replylen,
+ +                                       INTERRUPTIBLE);
-               DbgPrint("novfs_Set_File_Lock 4\n");
++              DbgPrint("4");
+ +
+ +              if (reply) {
-                       DbgPrint("novfs_Set_File_Lock 5, ErrorCode = %X\n",
-                                reply->Reply.ErrorCode);
++                      DbgPrint("5, ErrorCode = %X", reply->Reply.ErrorCode);
+ +
+ +                      if (reply->Reply.ErrorCode) {
+ +                              retCode = reply->Reply.ErrorCode;
+ +                      }
+ +                      kfree(reply);
+ +              }
+ +              kfree(cmd);
+ +      }
+ +
-       DbgPrint("novfs_Set_File_Lock 6\n");
++      DbgPrint("6");
+ +
+ +      return (retCode);
+ +}
diff --cc fs/novfs/inode.c

index 3038983,0000000..3045ba4

mode 100644,000000..100644
--- 1/fs/novfs/inode.c
--- /dev/null
+++ b/fs/novfs/inode.c
@@@ -1,4729 -1,0 +1,4639 @@@
+ +/*
+ + * Novell NCP Redirector for Linux
+ + * Author: James Turner
+ + *
+ + * This file contains functions used to control access to the Linux file
+ + * system.
+ + *
+ + * Copyright (C) 2005 Novell, Inc.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + */
+ +
+ +#include <linux/module.h>
+ +#include <linux/autoconf.h>
+ +#include <linux/init.h>
+ +#include <linux/fs.h>
+ +#include <linux/dcache.h>
+ +#include <linux/mount.h>
+ +#include <linux/pagemap.h>
+ +#include <linux/string.h>
+ +#include <linux/smp_lock.h>
+ +#include <linux/slab.h>
+ +#include <linux/unistd.h>
+ +#include <asm/statfs.h>
+ +#include <asm/uaccess.h>
+ +#include <linux/ctype.h>
+ +#include <linux/statfs.h>
+ +#include <linux/pagevec.h>
+ +#include <linux/writeback.h>
+ +#include <linux/backing-dev.h>
+ +#include <linux/mm.h>
+ +#include <linux/file.h>
+ +
+ +/*===[ Include files specific to this module ]============================*/
+ +#include "vfs.h"
+ +
+ +
+ +struct inode_data {
+ +      void *Scope;
+ +      unsigned long Flags;
+ +      struct list_head IList;
+ +      struct inode *Inode;
+ +      unsigned long cntDC;
+ +      struct list_head DirCache;
+ +      struct semaphore DirCacheLock;
+ +      void * FileHandle;
+ +      int CacheFlag;
+ +      char Name[1];           /* Needs to be last entry */
+ +};
+ +
+ +#define FILE_UPDATE_TIMEOUT   2
+ +
+ +/*===[ Function prototypes ]=============================================*/
+ +
+ +static unsigned long novfs_internal_hash(struct qstr *name);
+ +static int novfs_d_add(struct dentry *p, struct dentry *d, struct inode *i, int add);
+ +
+ +static int novfs_get_sb(struct file_system_type *Fstype, int Flags,
+ +               const char *Dev_name, void *Data, struct vfsmount *Mnt);
+ +
+ +static void novfs_kill_sb(struct super_block *SB);
+ +
+ +
+ +/*
+ + * Declared dentry_operations
+ + */
+ +int novfs_d_revalidate(struct dentry *, struct nameidata *);
+ +int novfs_d_hash(struct dentry *, struct qstr *);
+ +int novfs_d_compare(struct dentry *, struct qstr *, struct qstr *);
+ +int novfs_d_delete(struct dentry *dentry);
+ +void novfs_d_release(struct dentry *dentry);
+ +void novfs_d_iput(struct dentry *dentry, struct inode *inode);
+ +
+ +/*
+ + * Declared directory operations
+ + */
+ +int novfs_dir_open(struct inode *inode, struct file *file);
+ +int novfs_dir_release(struct inode *inode, struct file *file);
+ +loff_t novfs_dir_lseek(struct file *file, loff_t offset, int origin);
+ +ssize_t novfs_dir_read(struct file *file, char *buf, size_t len, loff_t * off);
+ +void addtodentry(struct dentry *Parent, unsigned char *List, int Level);
+ +int novfs_filldir(void *data, const char *name, int namelen, loff_t off,
+ +                ino_t ino, unsigned ftype);
+ +int novfs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir);
+ +int novfs_dir_fsync(struct file *file, struct dentry *dentry, int datasync);
+ +
+ +/*
+ + * Declared address space operations
+ + */
+ +int novfs_a_writepage(struct page *page, struct writeback_control *wbc);
+ +int novfs_a_writepages(struct address_space *mapping,
+ +                     struct writeback_control *wbc);
- int novfs_a_prepare_write(struct file *file, struct page *page, unsigned from,
-                         unsigned to);
- int novfs_a_commit_write(struct file *file, struct page *page, unsigned offset,
-                        unsigned to);
++int novfs_a_write_begin(struct file *file, struct address_space *mapping,
++                      loff_t pos, unsigned len, unsigned flags,
++                      struct page **pagep, void **fsdata);
++int novfs_a_write_end(struct file *file, struct address_space *mapping,
++                        loff_t pos, unsigned len, unsigned copied,
++                        struct page *pagep, void *fsdata);
+ +int novfs_a_readpage(struct file *file, struct page *page);
+ +int novfs_a_readpages(struct file *file, struct address_space *mapping,
+ +                    struct list_head *page_lst, unsigned nr_pages);
+ +ssize_t novfs_a_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov,
+ +                        loff_t offset, unsigned long nr_segs);
+ +
+ +/*
+ + * Declared file_operations
+ + */
+ +ssize_t novfs_f_read(struct file *, char *, size_t, loff_t *);
+ +ssize_t novfs_f_write(struct file *, const char *, size_t, loff_t *);
+ +int novfs_f_readdir(struct file *, void *, filldir_t);
+ +int novfs_f_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+ +int novfs_f_mmap(struct file *file, struct vm_area_struct *vma);
+ +int novfs_f_open(struct inode *, struct file *);
+ +int novfs_f_flush(struct file *, fl_owner_t);
+ +int novfs_f_release(struct inode *, struct file *);
+ +int novfs_f_fsync(struct file *, struct dentry *, int datasync);
+ +int novfs_f_lock(struct file *, int, struct file_lock *);
+ +
+ +/*
+ + * Declared inode_operations
+ + */
+ +int novfs_i_create(struct inode *, struct dentry *, int, struct nameidata *);
+ +struct dentry *novfs_i_lookup(struct inode *, struct dentry *,
+ +                            struct nameidata *);
+ +int novfs_i_mkdir(struct inode *, struct dentry *, int);
+ +int novfs_i_unlink(struct inode *dir, struct dentry *dentry);
+ +int novfs_i_rmdir(struct inode *, struct dentry *);
+ +int novfs_i_mknod(struct inode *, struct dentry *, int, dev_t);
+ +int novfs_i_rename(struct inode *, struct dentry *, struct inode *,
+ +                 struct dentry *);
+ +int novfs_i_setattr(struct dentry *, struct iattr *);
+ +int novfs_i_getattr(struct vfsmount *mnt, struct dentry *, struct kstat *);
+ +int novfs_i_revalidate(struct dentry *dentry);
+ +
+ +/*
+ + * Extended attributes operations
+ + */
+ +
+ +ssize_t novfs_i_getxattr(struct dentry *dentry, const char *name, void *buffer,
+ +                   size_t size);
+ +int novfs_i_setxattr(struct dentry *dentry, const char *name, const void *value,
+ +                   size_t value_size, int flags);
+ +ssize_t novfs_i_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
+ +
+ +void update_inode(struct inode *Inode, struct novfs_entry_info *Info);
+ +
+ +/*
+ + * Declared super_operations
+ + */
+ +void novfs_read_inode(struct inode *inode);
+ +void novfs_write_inode(struct inode *inode);
+ +int novfs_notify_change(struct dentry *dentry, struct iattr *attr);
+ +void novfs_clear_inode(struct inode *inode);
+ +int novfs_show_options(struct seq_file *s, struct vfsmount *m);
+ +
+ +int novfs_statfs(struct dentry *de, struct kstatfs *buf);
+ +
+ +/*
+ + * Declared control interface functions
+ + */
+ +ssize_t
+ +novfs_control_Read(struct file *file, char *buf, size_t nbytes, loff_t * ppos);
+ +
+ +ssize_t
+ +novfs_control_write(struct file *file, const char *buf, size_t nbytes,
+ +                  loff_t * ppos);
+ +
+ +int novfs_control_ioctl(struct inode *inode, struct file *file,
+ +                      unsigned int cmd, unsigned long arg);
+ +
+ +int __init init_novfs(void);
+ +void __exit exit_novfs(void);
+ +
+ +int novfs_lock_inode_cache(struct inode *i);
+ +void novfs_unlock_inode_cache(struct inode *i);
+ +int novfs_enumerate_inode_cache(struct inode *i, struct list_head **iteration,
+ +                              ino_t * ino, struct novfs_entry_info *info);
+ +int novfs_get_entry(struct inode *i, struct qstr *name, ino_t * ino,
+ +                  struct novfs_entry_info *info);
+ +int novfs_get_entry_by_pos(struct inode *i, loff_t pos, ino_t * ino,
+ +                         struct novfs_entry_info *info);
+ +int novfs_get_entry_time(struct inode *i, struct qstr *name, ino_t * ino,
+ +                       struct novfs_entry_info *info, u64 * EntryTime);
+ +int novfs_get_remove_entry(struct inode *i, ino_t * ino, struct novfs_entry_info *info);
+ +void novfs_invalidate_inode_cache(struct inode *i);
+ +struct novfs_dir_cache *novfs_lookup_inode_cache(struct inode *i, struct qstr *name,
+ +                                  ino_t ino);
+ +int novfs_lookup_validate(struct inode *i, struct qstr *name, ino_t ino);
+ +int novfs_add_inode_entry(struct inode *i, struct qstr *name, ino_t ino,
+ +                        struct novfs_entry_info *info);
+ +int novfs_update_entry(struct inode *i, struct qstr *name, ino_t ino,
+ +                     struct novfs_entry_info *info);
+ +void novfs_remove_inode_entry(struct inode *i, struct qstr *name, ino_t ino);
+ +void novfs_free_invalid_entries(struct inode *i);
+ +void novfs_free_inode_cache(struct inode *i);
+ +
+ +/*===[ Global variables ]=================================================*/
+ +struct dentry_operations novfs_dentry_operations = {
+ +      .d_revalidate = novfs_d_revalidate,
+ +      .d_hash = novfs_d_hash,
+ +      .d_compare = novfs_d_compare,
+ +      //.d_delete      = novfs_d_delete,
+ +      .d_release = novfs_d_release,
+ +      .d_iput = novfs_d_iput,
+ +};
+ +
+ +struct file_operations novfs_dir_operations = {
+ +      .owner = THIS_MODULE,
+ +      .open = novfs_dir_open,
+ +      .release = novfs_dir_release,
+ +      .llseek = novfs_dir_lseek,
+ +      .read = novfs_dir_read,
+ +      .readdir = novfs_dir_readdir,
+ +      .fsync = novfs_dir_fsync,
+ +};
+ +
+ +static struct file_operations novfs_file_operations = {
+ +      .owner = THIS_MODULE,
+ +      .read = novfs_f_read,
+ +      .write = novfs_f_write,
+ +      .readdir = novfs_f_readdir,
+ +      .ioctl = novfs_f_ioctl,
+ +      .mmap = novfs_f_mmap,
+ +      .open = novfs_f_open,
+ +      .flush = novfs_f_flush,
+ +      .release = novfs_f_release,
+ +      .fsync = novfs_f_fsync,
+ +      .llseek = generic_file_llseek,
+ +      .lock = novfs_f_lock,
+ +};
+ +
+ +static struct address_space_operations novfs_nocache_aops = {
+ +      .readpage = novfs_a_readpage,
+ +};
+ +
+ +struct backing_dev_info novfs_backing_dev_info = {
+ +      .ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
+ +      .state = 0,
+ +      .capabilities = BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY,
+ +      .unplug_io_fn = default_unplug_io_fn,
+ +};
+ +
+ +static struct address_space_operations novfs_aops = {
+ +      .readpage = novfs_a_readpage,
+ +      .readpages = novfs_a_readpages,
+ +      .writepage = novfs_a_writepage,
+ +      .writepages = novfs_a_writepages,
-       .prepare_write = novfs_a_prepare_write,
-       .commit_write = novfs_a_commit_write,
++      .write_begin = novfs_a_write_begin,
++      .write_end = novfs_a_write_end,
+ +      .set_page_dirty = __set_page_dirty_nobuffers,
+ +      .direct_IO = novfs_a_direct_IO,
+ +};
+ +
+ +static struct inode_operations novfs_inode_operations = {
+ +      .create = novfs_i_create,
+ +      .lookup = novfs_i_lookup,
+ +      .unlink = novfs_i_unlink,
+ +      .mkdir = novfs_i_mkdir,
+ +      .rmdir = novfs_i_rmdir,
+ +      .mknod = novfs_i_mknod,
+ +      .rename = novfs_i_rename,
+ +      .setattr = novfs_i_setattr,
+ +      .getattr = novfs_i_getattr,
+ +      .getxattr = novfs_i_getxattr,
+ +      .setxattr = novfs_i_setxattr,
+ +      .listxattr = novfs_i_listxattr,
+ +};
+ +
+ +static struct inode_operations novfs_file_inode_operations = {
+ +      .setattr = novfs_i_setattr,
+ +      .getattr = novfs_i_getattr,
+ +      .getxattr = novfs_i_getxattr,
+ +      .setxattr = novfs_i_setxattr,
+ +      .listxattr = novfs_i_listxattr,
+ +};
+ +
+ +static struct super_operations novfs_ops = {
+ +      .statfs = novfs_statfs,
+ +      .clear_inode = novfs_clear_inode,
+ +      .drop_inode = generic_delete_inode,
+ +      .show_options = novfs_show_options,
+ +
+ +};
+ +
+ +/* Not currently used
+ +static struct file_operations novfs_Control_operations = {
+ +   .read    = novfs_Control_read,
+ +   .write   = novfs_Control_write,
+ +   .ioctl   = novfs_Control_ioctl,
+ +};
+ +*/
+ +
+ +static atomic_t novfs_Inode_Number = ATOMIC_INIT(0);
+ +
+ +
+ +struct dentry *novfs_root = NULL;
+ +char *novfs_current_mnt = NULL;
+ +
+ +DECLARE_MUTEX(InodeList_lock);
+ +
+ +LIST_HEAD(InodeList);
+ +
+ +DECLARE_MUTEX(TimeDir_Lock);
+ +uint64_t lastTime;
+ +char lastDir[PATH_MAX];
+ +
+ +uint64_t inHAXTime;
+ +int inHAX;
+ +
+ +unsigned long InodeCount = 0, DCCount = 0;
+ +unsigned long novfs_update_timeout = FILE_UPDATE_TIMEOUT;
+ +int novfs_page_cache = 0;
+ +
+ +struct file_private {
+ +      int listedall;
+ +      void *enumHandle;
+ +};
+ +
+ +static void PRINT_DENTRY(const char *s, struct dentry *d)
+ +{
-       DbgPrint("%s: 0x%p\n", s, d);
-       DbgPrint("   d_count:      0x%x\n", d->d_count);
-       DbgPrint("   d_lock:       0x%x\n", d->d_lock);
-       DbgPrint("   d_inode:      0x%x\n", d->d_inode);
-       DbgPrint("   d_lru:        0x%p\n"
-                "      next:      0x%p\n"
-                "      prev:      0x%p\n", &d->d_lru, d->d_lru.next,
-                d->d_lru.prev);
-       DbgPrint("   d_child:      0x%p\n" "      next:      0x%p\n"
-                "      prev:      0x%p\n", &d->d_u.d_child,
-                d->d_u.d_child.next, d->d_u.d_child.prev);
-       DbgPrint("   d_subdirs:    0x%p\n" "      next:      0x%p\n"
-                "      prev:      0x%p\n", &d->d_subdirs, d->d_subdirs.next,
-                d->d_subdirs.prev);
-       DbgPrint("   d_alias:      0x%p\n" "      next:      0x%p\n"
-                "      prev:      0x%p\n", &d->d_alias, d->d_alias.next,
-                d->d_alias.prev);
-       DbgPrint("   d_time:       0x%x\n", d->d_time);
-       DbgPrint("   d_op:         0x%p\n", d->d_op);
-       DbgPrint("   d_sb:         0x%p\n", d->d_sb);
-       DbgPrint("   d_flags:      0x%x\n", d->d_flags);
-       DbgPrint("   d_mounted:    0x%x\n", d->d_mounted);
-       DbgPrint("   d_fsdata:     0x%p\n", d->d_fsdata);
++      __DbgPrint("%s: 0x%p\n", s, d);
++      __DbgPrint("   d_count:      0x%x\n", d->d_count);
++      __DbgPrint("   d_lock:       0x%x\n", d->d_lock);
++      __DbgPrint("   d_inode:      0x%x\n", d->d_inode);
++      __DbgPrint("   d_lru:        0x%p\n"
++                 "      next:      0x%p\n"
++                 "      prev:      0x%p\n", &d->d_lru, d->d_lru.next,
++                 d->d_lru.prev);
++      __DbgPrint("   d_child:      0x%p\n" "      next:      0x%p\n"
++                 "      prev:      0x%p\n", &d->d_u.d_child,
++                 d->d_u.d_child.next, d->d_u.d_child.prev);
++      __DbgPrint("   d_subdirs:    0x%p\n" "      next:      0x%p\n"
++                 "      prev:      0x%p\n", &d->d_subdirs, d->d_subdirs.next,
++                 d->d_subdirs.prev);
++      __DbgPrint("   d_alias:      0x%p\n" "      next:      0x%p\n"
++                 "      prev:      0x%p\n", &d->d_alias, d->d_alias.next,
++                 d->d_alias.prev);
++      __DbgPrint("   d_time:       0x%x\n", d->d_time);
++      __DbgPrint("   d_op:         0x%p\n", d->d_op);
++      __DbgPrint("   d_sb:         0x%p\n", d->d_sb);
++      __DbgPrint("   d_flags:      0x%x\n", d->d_flags);
++      __DbgPrint("   d_mounted:    0x%x\n", d->d_mounted);
++      __DbgPrint("   d_fsdata:     0x%p\n", d->d_fsdata);
+ +/*   DbgPrint("   d_cookie:     0x%x\n", d->d_cookie); */
-       DbgPrint("   d_parent:     0x%p\n", d->d_parent);
-       DbgPrint("   d_name:       0x%p %.*s\n", &d->d_name, d->d_name.len,
-                d->d_name.name);
-       DbgPrint("      name:      0x%p\n" "      len:       %d\n"
-                "      hash:      0x%x\n", d->d_name.name, d->d_name.len,
-                d->d_name.hash);
-       DbgPrint("   d_hash:       0x%x\n" "      next:      0x%x\n"
-                "      pprev:     0x%x\n", d->d_hash, d->d_hash.next,
-                d->d_hash.pprev);
++      __DbgPrint("   d_parent:     0x%p\n", d->d_parent);
++      __DbgPrint("   d_name:       0x%p %.*s\n", &d->d_name, d->d_name.len,
++                 d->d_name.name);
++      __DbgPrint("      name:      0x%p\n" "      len:       %d\n"
++                 "      hash:      0x%x\n", d->d_name.name, d->d_name.len,
++                 d->d_name.hash);
++      __DbgPrint("   d_hash:       0x%x\n" "      next:      0x%x\n"
++                 "      pprev:     0x%x\n", d->d_hash, d->d_hash.next,
++                 d->d_hash.pprev);
+ +}
+ +
+ +/*++======================================================================*/
+ +int novfs_remove_from_root(char *RemoveName)
+ +{
+ +      struct qstr name;
+ +      struct dentry *dentry;
+ +      struct inode *dir;
+ +
-       DbgPrint("novfs_Remove_from_Root: %s\n", RemoveName);
++      DbgPrint("%s", RemoveName);
+ +      name.len = strlen(RemoveName);
+ +      name.name = RemoveName;
+ +      novfs_d_hash(novfs_root, &name);
+ +
+ +      dentry = d_lookup(novfs_root, &name);
+ +      if (dentry) {
+ +              if (dentry->d_inode && dentry->d_inode->i_private) {
+ +                      struct inode_data *n_inode =
+ +                              dentry->d_inode->i_private;
+ +                      n_inode->Scope = NULL;
+ +              }
+ +              dput(dentry);
+ +      }
+ +
+ +      dir = novfs_root->d_inode;
+ +
+ +      novfs_lock_inode_cache(dir);
+ +      novfs_remove_inode_entry(dir, &name, 0);
+ +      novfs_unlock_inode_cache(dir);
+ +
+ +      return (0);
+ +}
+ +
+ +/*++======================================================================*/
+ +int novfs_add_to_root(char *AddName)
+ +{
+ +      struct qstr name;
+ +      struct inode *dir;
+ +      struct novfs_entry_info info;
+ +      ino_t ino;
+ +
-       DbgPrint("novfs_Add_to_Root: %s\n", AddName);
++      DbgPrint("%s", AddName);
+ +      name.len = strlen(AddName);
+ +      name.name = AddName;
+ +      novfs_d_hash(novfs_root, &name);
+ +
+ +      dir = novfs_root->d_inode;
+ +
+ +      novfs_lock_inode_cache(dir);
+ +
+ +      ino = 0;
+ +
+ +      if (!novfs_lookup_inode_cache(dir, &name, 0)) {
+ +              info.mode = S_IFDIR | 0700;
+ +              info.size = 0;
+ +              info.atime = info.ctime = info.mtime = CURRENT_TIME;
+ +
+ +              ino = (ino_t)atomic_inc_return(&novfs_Inode_Number);
+ +              novfs_add_inode_entry(dir, &name, ino, &info);
+ +      }
+ +
+ +      novfs_unlock_inode_cache(dir);
+ +
+ +      return (0);
+ +}
+ +
+ +/*++======================================================================*/
+ +int novfs_Add_to_Root2(char *AddName)
+ +{
+ +      struct dentry *entry;
+ +      struct qstr name;
+ +      struct inode *inode;
+ +      void *scope;
+ +
-       DbgPrint("novfs_Add_to_Root: %s\n", AddName);
++      DbgPrint("%s", AddName);
+ +      name.len = strlen(AddName);
+ +      name.name = AddName;
+ +
+ +      novfs_d_hash(novfs_root, &name);
+ +
+ +      entry = d_lookup(novfs_root, &name);
-       DbgPrint("novfs_Add_to_Root: novfs_d_lookup 0x%p\n", entry);
++      DbgPrint("novfs_d_lookup 0x%p", entry);
+ +      if (NULL == entry) {
+ +              scope = novfs_scope_lookup();
+ +
+ +              entry = d_alloc(novfs_root, &name);
-               DbgPrint("novfs_Add_to_Root: d_alloc 0x%p\n", entry);
++              DbgPrint("d_alloc 0x%p", entry);
+ +              if (entry) {
+ +                      entry->d_op = &novfs_dentry_operations;
+ +                      entry->d_time = jiffies + (novfs_update_timeout * HZ);
+ +                      /*
+ +                       * done in novfs_d_add now... entry->d_fsdata = (void *)novfs_internal_hash( &name );
+ +                       */
+ +                      inode =
+ +                          novfs_get_inode(novfs_root->d_sb, S_IFDIR | 0700, 0, novfs_scope_get_uid(scope), 0, &name);
-                       DbgPrint("novfs_Add_to_Root: Inode=0x%p\n", inode);
++                      DbgPrint("Inode=0x%p", inode);
+ +                      if (inode) {
+ +                              inode->i_atime =
+ +                                  inode->i_ctime =
+ +                                  inode->i_mtime = CURRENT_TIME;
+ +                              if (!novfs_d_add(novfs_root, entry, inode, 1)) {
+ +                                      if (inode->i_private) {
+ +                                              struct inode_data *n_inode = inode->i_private;
+ +                                              n_inode->Flags = USER_INODE;
+ +                                      }
+ +                                      PRINT_DENTRY("After novfs_d_add",
+ +                                                   entry);
+ +                              } else {
+ +                                      dput(entry);
+ +                                      iput(inode);
+ +                              }
+ +                      }
+ +              }
+ +      } else {
+ +              dput(entry);
+ +              PRINT_DENTRY("novfs_Add_to_Root: After dput Dentry", entry);
+ +      }
+ +      return (0);
+ +}
+ +
+ +char *novfs_dget_path(struct dentry *Dentry, char *Buf, unsigned int Buflen)
+ +{
+ +      char *retval = &Buf[Buflen];
+ +      struct dentry *p = Dentry;
+ +
+ +      *(--retval) = '\0';
+ +      Buflen--;
+ +
+ +      if (!IS_ROOT(p) && !IS_ROOT(p->d_parent)) {
+ +              while (Buflen && !IS_ROOT(p) && !IS_ROOT(p->d_parent)) {
+ +                      if (Buflen > p->d_name.len) {
+ +                              retval -= p->d_name.len;
+ +                              Buflen -= p->d_name.len;
+ +                              memcpy(retval, p->d_name.name, p->d_name.len);
+ +                              *(--retval) = '\\';
+ +                              Buflen--;
+ +                              p = p->d_parent;
+ +                      } else {
+ +                              retval = NULL;
+ +                              break;
+ +                      }
+ +              }
+ +      } else {
+ +              *(--retval) = '\\';
+ +      }
+ +
+ +      if (retval)
-               DbgPrint("novfs_dget_path: %s\n", retval);
++              DbgPrint("%s", retval);
+ +      return (retval);
+ +}
+ +
+ +int verify_dentry(struct dentry *dentry, int Flags)
+ +{
+ +      int retVal = -ENOENT;
+ +      struct inode *dir;
+ +      struct novfs_entry_info *info = NULL;
+ +      struct inode_data *id;
+ +      struct novfs_schandle session;
+ +      char *path, *list = NULL, *cp;
+ +      ino_t ino = 0;
+ +      struct qstr name;
+ +      int iLock = 0;
+ +      struct dentry *parent = NULL;
+ +      u64 ctime;
+ +      struct inode *inode;
+ +
+ +      if (IS_ROOT(dentry)) {
-               DbgPrint("verify_dentry: Root entry\n");
++              DbgPrint("Root entry");
+ +              return (0);
+ +      }
+ +
+ +      if (dentry && dentry->d_parent &&
+ +          (dir = dentry->d_parent->d_inode) && (id = dir->i_private)) {
+ +              parent = dget_parent(dentry);
+ +
+ +              info = kmalloc(sizeof(struct novfs_entry_info) + PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +
+ +              if (info) {
+ +                      if (novfs_lock_inode_cache(dir)) {
+ +                              name.len = dentry->d_name.len;
+ +                              name.name = dentry->d_name.name;
+ +                              name.hash = novfs_internal_hash(&name);
+ +                              if (!novfs_get_entry_time(dir, &name, &ino, info, &ctime)) {
+ +                                      inode = dentry->d_inode;
+ +                                      if (inode && inode->i_private &&
+ +                                          ((inode->i_size != info->size) ||
+ +                                           (inode->i_mtime.tv_sec !=
+ +                                            info->mtime.tv_sec)
+ +                                           || (inode->i_mtime.tv_nsec !=
+ +                                               info->mtime.tv_nsec))) {
+ +                                              /*
+ +                                               * Values don't match so update.
+ +                                               */
+ +                                              struct inode_data *n_inode = inode->i_private;
+ +                                              n_inode->Flags |= UPDATE_INODE;
+ +                                      }
+ +
+ +                                      ctime = get_jiffies_64() - ctime;
+ +                                      if (Flags || ctime < (u64) (novfs_update_timeout * HZ)) {
+ +                                              retVal = 0;
+ +                                              novfs_unlock_inode_cache(dir);
+ +                                              dput(parent);
+ +                                              kfree(info);
+ +                                              return (0);
+ +                                      }
+ +                              }
+ +                              novfs_unlock_inode_cache(dir);
+ +                      }
+ +
+ +                      if (IS_ROOT(dentry->d_parent)) {
+ +                              session =       novfs_scope_get_sessionId(
+ +                              novfs_get_scope_from_name(&dentry->d_name));
+ +                      } else
+ +                              session = novfs_scope_get_sessionId(id->Scope);
+ +
+ +                      if (!SC_PRESENT(session)) {
+ +                              id->Scope = novfs_get_scope(dentry);
+ +                              session = novfs_scope_get_sessionId(id->Scope);
+ +                      }
+ +
+ +                      ino = 0;
+ +                      retVal = 0;
+ +
+ +                      if (IS_ROOT(dentry->d_parent)) {
-                               DbgPrint("verify_dentry: parent is Root directory\n");
++                              DbgPrint("parent is Root directory");
+ +                              list = novfs_get_scopeusers();
+ +
+ +                              iLock = novfs_lock_inode_cache(dir);
+ +                              novfs_invalidate_inode_cache(dir);
+ +
+ +                              if (list) {
+ +                                      cp = list;
+ +                                      while (*cp) {
+ +                                              name.name = cp;
+ +                                              name.len = strlen(cp);
+ +                                              name.hash = novfs_internal_hash(&name);
+ +                                              cp += (name.len + 1);
+ +                                              ino = 0;
+ +                                              if (novfs_get_entry(dir, &name, &ino, info)) {
+ +                                                      info->mode = S_IFDIR | 0700;
+ +                                                      info->size = 0;
+ +                                                      info->atime = info->ctime = info->mtime = CURRENT_TIME;
+ +                                                      ino = (ino_t)atomic_inc_return(&novfs_Inode_Number);
+ +                                                      novfs_add_inode_entry(dir, &name, ino, info);
+ +                                              }
+ +                                      }
+ +                              }
+ +                              novfs_free_invalid_entries(dir);
+ +                      } else {
+ +
+ +                              path =
+ +                                  novfs_dget_path(dentry, info->name,
+ +                                                  PATH_LENGTH_BUFFER);
+ +                              if (path) {
+ +                                      if (dentry->d_name.len <=
+ +                                          NW_MAX_PATH_LENGTH) {
+ +                                              name.hash =
+ +                                                  novfs_internal_hash
+ +                                                  (&dentry->d_name);
+ +                                              name.len = dentry->d_name.len;
+ +                                              name.name = dentry->d_name.name;
+ +
+ +                                              retVal =
+ +                                                  novfs_get_file_info(path,
+ +                                                                      info,
+ +                                                                      session);
+ +                                              if (0 == retVal) {
+ +                                                      dentry->d_time =
+ +                                                          jiffies +
+ +                                                          (novfs_update_timeout
+ +                                                           * HZ);
+ +                                                      iLock =
+ +                                                          novfs_lock_inode_cache
+ +                                                          (dir);
+ +                                                      if (novfs_update_entry
+ +                                                          (dir, &name, 0,
+ +                                                           info)) {
+ +                                                              if (dentry->
+ +                                                                  d_inode) {
+ +                                                                      ino = dentry->d_inode->i_ino;
+ +                                                              } else {
+ +                                                                      ino = (ino_t)atomic_inc_return(&novfs_Inode_Number);
+ +                                                              }
+ +                                                              novfs_add_inode_entry
+ +                                                                  (dir, &name,
+ +                                                                   ino, info);
+ +                                                      }
+ +                                                      if (dentry->d_inode) {
+ +                                                              update_inode
+ +                                                                  (dentry->
+ +                                                                   d_inode,
+ +                                                                   info);
+ +                                                              id->Flags &=
+ +                                                                  ~UPDATE_INODE;
+ +
+ +                                                              dentry->
+ +                                                                  d_inode->
+ +                                                                  i_flags &=
+ +                                                                  ~S_DEAD;
+ +                                                              if (dentry->
+ +                                                                  d_inode->
+ +                                                                  i_private) {
+ +                                                                      ((struct inode_data *) dentry->d_inode->i_private)->Scope = id->Scope;
+ +                                                              }
+ +                                                      }
+ +                                              } else if (-EINTR != retVal) {
+ +                                                      retVal = 0;
+ +                                                      iLock = novfs_lock_inode_cache(dir);
+ +                                                      novfs_remove_inode_entry(dir, &name, 0);
+ +                                                      if (dentry->d_inode
+ +                                                          && !(dentry->d_inode->i_flags & S_DEAD)) {
+ +                                                              dentry->d_inode->i_flags |= S_DEAD;
+ +                                                              dentry->d_inode-> i_size = 0;
+ +                                                              dentry->d_inode->i_atime.tv_sec =
+ +                                                                      dentry->d_inode->i_atime.tv_nsec =
+ +                                                                      dentry->d_inode->i_ctime.tv_sec =
+ +                                                                      dentry->d_inode->i_ctime.tv_nsec =
+ +                                                                      dentry->d_inode->i_mtime.tv_sec =
+ +                                                                      dentry->d_inode->i_mtime.tv_nsec = 0;
+ +                                                              dentry->d_inode->i_blocks = 0;
+ +                                                              d_delete(dentry);       /* Remove from cache */
+ +                                                      }
+ +                                              }
+ +                                      } else {
+ +                                              retVal = -ENAMETOOLONG;
+ +                                      }
+ +                              }
+ +                      }
+ +              } else {
+ +                      retVal = -ENOMEM;
+ +              }
+ +              if (iLock) {
+ +                      novfs_unlock_inode_cache(dir);
+ +              }
+ +              dput(parent);
+ +      }
+ +
+ +      if (list)
+ +              kfree(list);
+ +      if (info)
+ +              kfree(info);
+ +
-       DbgPrint("verify_dentry: return=0x%x\n", retVal);
++      DbgPrint("return=0x%x", retVal);
+ +
+ +      return (retVal);
+ +}
+ +
+ +
+ +static int novfs_d_add(struct dentry *Parent, struct dentry *d, struct inode *i, int a)
+ +{
+ +      void *scope;
+ +      struct inode_data *id = NULL;
+ +
+ +      char *path, *buf;
+ +
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (buf) {
+ +              path = novfs_dget_path(d, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
-                       DbgPrint("novfs_d_add: inode=0x%p ino=%d path %s\n", i,
++                      DbgPrint("inode=0x%p ino=%d path %s", i,
+ +                               i->i_ino, path);
+ +              }
+ +              kfree(buf);
+ +      }
+ +
+ +      if (Parent && Parent->d_inode && Parent->d_inode->i_private) {
+ +              id = (struct inode_data *) Parent->d_inode->i_private;
+ +      }
+ +
+ +      if (id && id->Scope) {
+ +              scope = id->Scope;
+ +      } else {
+ +              scope = novfs_get_scope(d);
+ +      }
+ +
+ +      ((struct inode_data *) i->i_private)->Scope = scope;
+ +
+ +      d->d_time = jiffies + (novfs_update_timeout * HZ);
+ +      if (a) {
+ +              d_add(d, i);
+ +      } else {
+ +              d_instantiate(d, i);
+ +      }
+ +
+ +      return (0);
+ +}
+ +
+ +int novfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+ +{
+ +      int retCode = 0;
+ +      struct inode *dir;
+ +      struct inode_data *id;
+ +      struct qstr name;
+ +
-       DbgPrint("novfs_d_revalidate: 0x%p %.*s\n"
-                "   d_count: %d\n"
-                "   d_inode: 0x%p\n",
++      __DbgPrint("%s: 0x%p %.*s\n"
++                 "   d_count: %d\n"
++                 "   d_inode: 0x%p\n", __func__,
+ +               dentry, dentry->d_name.len, dentry->d_name.name,
+ +               dentry->d_count, dentry->d_inode);
+ +
+ +      if (IS_ROOT(dentry)) {
+ +              retCode = 1;
+ +      } else {
+ +              if (dentry->d_inode &&
+ +                  dentry->d_parent &&
+ +                  (dir = dentry->d_parent->d_inode) &&
+ +                  (id = dir->i_private)) {
+ +                      /*
+ +                       * Check timer to see if in valid time limit
+ +                       */
+ +                      if (jiffies > dentry->d_time) {
+ +                              /*
+ +                               * Revalidate entry
+ +                               */
+ +                              name.len = dentry->d_name.len;
+ +                              name.name = dentry->d_name.name;
+ +                              name.hash =
+ +                                  novfs_internal_hash(&dentry->d_name);
+ +                              dentry->d_time = 0;
+ +
+ +                              if (0 == verify_dentry(dentry, 0)) {
+ +                                      if (novfs_lock_inode_cache(dir)) {
+ +                                              if (novfs_lookup_inode_cache
+ +                                                  (dir, &name, 0)) {
+ +                                                      dentry->d_time =
+ +                                                          jiffies +
+ +                                                          (novfs_update_timeout
+ +                                                           * HZ);
+ +                                                      retCode = 1;
+ +                                              }
+ +                                              novfs_unlock_inode_cache(dir);
+ +                                      }
+ +                              }
+ +                      } else {
+ +                              retCode = 1;
+ +                      }
+ +              }
+ +      }
+ +
+ +      if ((0 == retCode) && dentry->d_inode) {
+ +              /*
+ +               * Entry has become invalid
+ +               */
+ +/*      dput(dentry);
+ +*/
+ +      }
+ +
-       DbgPrint("novfs_d_revalidate: return 0x%x %.*s\n", retCode,
++      DbgPrint("return 0x%x %.*s", retCode,
+ +               dentry->d_name.len, dentry->d_name.name);
+ +
+ +      return (retCode);
+ +}
+ +
+ +static unsigned long novfs_internal_hash(struct qstr *name)
+ +{
+ +      unsigned long hash = 0;
+ +      unsigned int len = name->len;
+ +      unsigned char *c = (unsigned char *)name->name;
+ +
+ +      while (len--) {
+ +              /*
+ +               * Lower case values for the hash.
+ +               */
+ +              hash = partial_name_hash(tolower(*c++), hash);
+ +      }
+ +
+ +      return (hash);
+ +}
+ +
+ +int novfs_d_hash(struct dentry *dentry, struct qstr *name)
+ +{
-       DbgPrint("novfs_d_hash: %.*s\n", name->len, name->name);
++      DbgPrint("%.*s", name->len, name->name);
+ +
+ +      name->hash = novfs_internal_hash(name);
+ +
+ +      return (0);
+ +}
+ +
+ +int novfs_d_strcmp(struct qstr *s1, struct qstr *s2)
+ +{
+ +      int retCode = 1;
+ +      unsigned char *str1, *str2;
+ +      unsigned int len;
+ +
-       DbgPrint("novfs_d_strcmp: s1=%.*s s2=%.*s\n", s1->len, s1->name,
++      DbgPrint("s1=%.*s s2=%.*s", s1->len, s1->name,
+ +               s2->len, s2->name);
+ +
+ +      if (s1->len && (s1->len == s2->len) && (s1->hash == s2->hash)) {
+ +              len = s1->len;
+ +              str1 = (unsigned char *)s1->name;
+ +              str2 = (unsigned char *)s2->name;
+ +              for (retCode = 0; len--; str1++, str2++) {
+ +                      if (*str1 != *str2) {
+ +                              if (tolower(*str1) != tolower(*str2)) {
+ +                                      retCode = 1;
+ +                                      break;
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +
-       DbgPrint("novfs_d_strcmp: retCode=0x%x\n", retCode);
++      DbgPrint("retCode=0x%x", retCode);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_d_compare(struct dentry *parent, struct qstr *s1, struct qstr *s2)
+ +{
+ +      int retCode;
+ +
+ +      retCode = novfs_d_strcmp(s1, s2);
+ +
-       DbgPrint("novfs_d_compare: retCode=0x%x\n", retCode);
++      DbgPrint("retCode=0x%x", retCode);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_d_delete(struct dentry *dentry)
+ +{
+ +      int retVal = 0;
+ +
-       DbgPrint("novfs_d_delete: 0x%p %.*s\n"
-                "   d_count: %d\n"
-                "   d_inode: 0x%p\n",
++      DbgPrint("0x%p %.*s; d_count: %d; d_inode: 0x%p",
+ +               dentry, dentry->d_name.len, dentry->d_name.name,
+ +               dentry->d_count, dentry->d_inode);
+ +
+ +      if (dentry->d_inode && (dentry->d_inode->i_flags & S_DEAD)) {
+ +              retVal = 1;
+ +      }
+ +
+ +      dentry->d_time = 0;
+ +
+ +      return (retVal);
+ +}
+ +
+ +void novfs_d_release(struct dentry *dentry)
+ +{
-       DbgPrint("novfs_d_release: 0x%p %.*s\n", dentry, dentry->d_name.len,
++      DbgPrint("0x%p %.*s", dentry, dentry->d_name.len,
+ +               dentry->d_name.name);
+ +}
+ +
+ +void novfs_d_iput(struct dentry *dentry, struct inode *inode)
+ +{
-       DbgPrint
-           ("novfs_d_iput: Inode=0x%p Ino=%d Dentry=0x%p i_state=%d Name=%.*s\n",
++      DbgPrint("Inode=0x%p Ino=%d Dentry=0x%p i_state=%d Name=%.*s",
+ +           inode, inode->i_ino, dentry, inode->i_state, dentry->d_name.len,
+ +           dentry->d_name.name);
+ +
+ +      iput(inode);
+ +
+ +}
+ +
+ +int novfs_dir_open(struct inode *dir, struct file *file)
+ +{
+ +      char *path, *buf;
+ +      struct file_private *file_private = NULL;
+ +
-       DbgPrint("novfs_dir_open: Inode 0x%p %d Name %.*s\n", dir, dir->i_ino,
++      DbgPrint("Inode 0x%p %d Name %.*s", dir, dir->i_ino,
+ +               file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ +
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (buf) {
+ +              path = novfs_dget_path(file->f_dentry, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
-                       DbgPrint("novfs_dir_open: path %s\n", path);
++                      DbgPrint("path %s", path);
+ +              }
+ +              kfree(buf);
+ +      }
+ +
+ +      file_private = kmalloc(sizeof(struct file_private), GFP_KERNEL);
+ +      file_private->listedall = 0;
+ +      file_private->enumHandle = NULL;
+ +
+ +      file->private_data = file_private;
+ +
+ +      return (0);
+ +}
+ +
+ +int novfs_dir_release(struct inode *dir, struct file *file)
+ +{
+ +      struct file_private *file_private = file->private_data;
+ +      struct inode *inode = file->f_dentry->d_inode;
+ +      struct novfs_schandle sessionId;
+ +
-       DbgPrint("novfs_dir_release: Inode 0x%p %d Name %.*s\n", dir,
-                dir->i_ino, file->f_dentry->d_name.len,
-                file->f_dentry->d_name.name);
++      DbgPrint("Inode 0x%p %d Name %.*s", dir, dir->i_ino,
++               file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ +
+ +      if (file_private) {
+ +              if (file_private->enumHandle && (file_private->enumHandle != ((void *)-1))) {
+ +                      sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope);
+ +                      if (SC_PRESENT(sessionId) == 0) {
+ +                              ((struct inode_data *)inode->i_private)->Scope = novfs_get_scope(file->f_dentry);
+ +                              sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope);
+ +                      }
+ +                      novfs_end_directory_enumerate(file_private->enumHandle, sessionId);
+ +              }
+ +              kfree(file_private);
+ +              file->private_data = NULL;
+ +      }
+ +
+ +      return (0);
+ +}
+ +
+ +loff_t novfs_dir_lseek(struct file * file, loff_t offset, int origin)
+ +{
+ +      struct file_private *file_private = NULL;
+ +
-       DbgPrint("novfs_dir_lseek: offset %lld %d Name %.*s\n", offset, origin,
++      DbgPrint("offset %lld %d Name %.*s", offset, origin,
+ +               file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ +      //printk("<1> seekdir file = %.*s offset = %i\n", file->f_dentry->d_name.len, file->f_dentry->d_name.name, (int)offset);
+ +
+ +      if (0 != offset) {
+ +              return -ESPIPE;
+ +      }
+ +
+ +      file->f_pos = 0;
+ +
+ +      file_private = (struct file_private *) file->private_data;
+ +      file_private->listedall = 0;
+ +      if (file_private->enumHandle && (file_private->enumHandle != ((void *)-1))) {
+ +              struct novfs_schandle sessionId;
+ +              struct inode *inode = file->f_dentry->d_inode;
+ +              sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope);
+ +              if (SC_PRESENT(sessionId) == 0) {
+ +                      ((struct inode_data *)inode->i_private)->Scope = novfs_get_scope(file->f_dentry);
+ +                      sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope);
+ +              }
+ +              novfs_end_directory_enumerate(file_private->enumHandle, sessionId);
+ +      }
+ +      file_private->enumHandle = NULL;
+ +
+ +      return 0;
+ +      //return(default_llseek(file, offset, origin));
+ +}
+ +
+ +ssize_t novfs_dir_read(struct file * file, char *buf, size_t len, loff_t * off)
+ +{
+ +/*
+ +   int rlen = 0;
+ +
-    DbgPrint("novfs_dir_readdir: dentry path %.*s buf=0x%p len=%d off=%lld\n", file->f_dentry->d_name.len, file->f_dentry->d_name.name, buf, len, *off);
++   DbgPrint("dentry path %.*s buf=0x%p len=%d off=%lld", file->f_dentry->d_name.len, file->f_dentry->d_name.name, buf, len, *off);
+ +
+ +   if (0 == *off)
+ +   {
+ +      rlen = 8;
+ +      rlen -= copy_to_user(buf, "Testing\n", 8);
+ +      *off += rlen;
+ +   }
+ +   return(rlen);
+ +*/
-       DbgPrint("novfs_dir_read: %lld %d Name %.*s\n", *off, len,
++      DbgPrint("%lld %d Name %.*s", *off, len,
+ +               file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ +      return (generic_read_dir(file, buf, len, off));
+ +}
+ +
+ +static void novfs_Dump_Info(struct novfs_entry_info *info)
+ +{
+ +      char atime_buf[32], mtime_buf[32], ctime_buf[32];
+ +      char namebuf[512];
+ +      int len = 0;
+ +
+ +      if (info == NULL) {
-               DbgPrint("novfs_dir_readdir : Dump_Info info == NULL\n");
++              DbgPrint("Dump_Info info == NULL");
+ +              return;
+ +      }
+ +
+ +      if (info->namelength >= 512) {
+ +              len = 511;
+ +      } else {
+ +              len = info->namelength;
+ +      }
+ +
+ +      memcpy(namebuf, info->name, len);
+ +      namebuf[len] = '\0';
+ +
+ +      ctime_r(&info->atime.tv_sec, atime_buf);
+ +      ctime_r(&info->mtime.tv_sec, mtime_buf);
+ +      ctime_r(&info->ctime.tv_sec, ctime_buf);
-       DbgPrint("novfs_dir_readdir : type = %i\n", info->type);
-       DbgPrint("novfs_dir_readdir : mode = %x\n", info->mode);
-       DbgPrint("novfs_dir_readdir : uid = %d\n", info->uid);
-       DbgPrint("novfs_dir_readdir : gid = %d\n", info->gid);
-       DbgPrint("novfs_dir_readdir : size = %i\n", info->size);
-       DbgPrint("novfs_dir_readdir : atime = %s\n", atime_buf);
-       DbgPrint("novfs_dir_readdir : mtime = %s\n", mtime_buf);
-       DbgPrint("novfs_dir_readdir : ctime = %s\n", ctime_buf);
-       DbgPrint("novfs_dir_readdir : namelength = %i\n", info->namelength);
-       DbgPrint("novfs_dir_readdir : name = %s\n", namebuf);
++      DbgPrint("type = %i", info->type);
++      DbgPrint("mode = %x", info->mode);
++      DbgPrint("uid = %d", info->uid);
++      DbgPrint("gid = %d", info->gid);
++      DbgPrint("size = %i", info->size);
++      DbgPrint("atime = %s", atime_buf);
++      DbgPrint("mtime = %s", mtime_buf);
++      DbgPrint("ctime = %s", ctime_buf);
++      DbgPrint("namelength = %i", info->namelength);
++      DbgPrint("name = %s", namebuf);
+ +}
+ +
+ +void processList(struct file *file, void *dirent, filldir_t filldir, char *list,
+ +               int type, struct novfs_schandle SessionId)
+ +{
+ +      unsigned char *path, *buf = NULL, *cp;
+ +      struct qstr name;
+ +      struct novfs_entry_info *pinfo = NULL;
+ +
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      path = buf;
+ +      if (buf) {
+ +              path = novfs_dget_path(file->f_dentry, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
+ +                      strcpy(buf, path);
+ +              }
+ +              path = buf + strlen(buf);
+ +              *path++ = '\\';
+ +      }
+ +
+ +      if (list) {
+ +              cp = list;
+ +              while (*cp) {
+ +                      name.name = cp;
-                       DbgPrint("novfs_dir_readdir : name.name = %s\n",
-                                name.name);
++                      DbgPrint("name.name = %s", name.name);
+ +                      name.len = strlen(cp);
+ +                      name.hash = novfs_internal_hash(&name);
+ +                      cp += (name.len + 1);
+ +
+ +                      pinfo =
+ +                          kmalloc(sizeof(struct novfs_entry_info) +
+ +                                       PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +                      pinfo->mode = S_IFDIR | 0700;
+ +                      pinfo->size = 0;
+ +                      pinfo->atime = pinfo->ctime = pinfo->mtime =
+ +                          CURRENT_TIME;
+ +                      strcpy(pinfo->name, name.name);
+ +                      pinfo->namelength = name.len;
+ +
+ +                      novfs_Dump_Info(pinfo);
+ +
+ +                      filldir(dirent, pinfo->name, pinfo->namelength,
+ +                              file->f_pos, file->f_pos, pinfo->mode >> 12);
+ +                      file->f_pos += 1;
+ +
+ +                      kfree(pinfo);
+ +              }
+ +      }
+ +
+ +      if (buf) {
+ +              kfree(buf);
+ +      }
+ +}
+ +
+ +int processEntries(struct file *file, void *dirent, filldir_t filldir,
+ +                 void ** enumHandle, struct novfs_schandle sessionId)
+ +{
+ +      unsigned char *path = NULL, *buf = NULL;
+ +      int count = 0, status = 0;
+ +      struct novfs_entry_info *pinfo = NULL;
+ +      struct novfs_entry_info *pInfoMem = NULL;
+ +
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (!buf) {
+ +              return -ENOMEM;
+ +      }
+ +
+ +      path = novfs_dget_path(file->f_dentry, buf, PATH_LENGTH_BUFFER);
+ +      if (!path) {
+ +              kfree(buf);
+ +              return -ENOMEM;
+ +      }
+ +      //NWSearchfiles
+ +      count = 0;
+ +      status =
+ +          novfs_get_dir_listex(path, enumHandle, &count, &pinfo,
+ +                                     sessionId);
+ +      pInfoMem = pinfo;
+ +
+ +      if ((count == -1) || (count == 0) || (status != 0)) {
+ +              kfree(pInfoMem);
+ +              kfree(buf);
+ +              return -1;
+ +      }
+ +      // parse resultset
+ +      while (pinfo && count--) {
+ +              filldir(dirent, pinfo->name, pinfo->namelength, file->f_pos,
+ +                      file->f_pos, pinfo->mode >> 12);
+ +              file->f_pos += 1;
+ +
+ +              pinfo = (struct novfs_entry_info *) (pinfo->name + pinfo->namelength);
+ +      }
+ +
+ +      kfree(pInfoMem);
+ +      kfree(buf);
+ +      return 0;
+ +}
+ +
+ +int novfs_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
+ +{
+ +      unsigned char *list = NULL;
+ +      int status = 0;         //-ENOMEM;
+ +      struct inode *inode = file->f_dentry->d_inode;
+ +      struct novfs_schandle sessionId;
+ +      uid_t uid;
+ +      int type = 0;
+ +      struct file_private *file_private = NULL;
+ +      int lComm;
+ +
+ +      file_private = (struct file_private *) file->private_data;
-       DbgPrint("novfs_dir_readdir: Name %.*s\n", file->f_dentry->d_name.len,
++      DbgPrint("Name %.*s", file->f_dentry->d_name.len,
+ +               file->f_dentry->d_name.name);
+ +
+ +      //printk("<1> file = %.*s\n", file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ +
+ +// Use this hack by default
+ +#ifndef SKIP_CROSSOVER_HACK
+ +      // Hack for crossover - begin
+ +      down(&TimeDir_Lock);
+ +      if ((file->f_dentry->d_name.len == 7) &&
+ +          ((0 == strncmp(file->f_dentry->d_name.name, " !xover", 7)) ||
+ +           (0 == strncmp(file->f_dentry->d_name.name, "z!xover", 7)))) {
+ +              //printk("<1> xoverhack: we are in xoverHack\n");
+ +
+ +              inHAX = 1;
+ +              inHAXTime = get_nanosecond_time();
+ +              //up( &TimeDir_Lock );
+ +              //return 0;
+ +              file_private->listedall = 1;
+ +      } else {
+ +              if (inHAX) {
+ +                      if (get_nanosecond_time() - inHAXTime >
+ +                          100 * 1000 * 1000) {
+ +                              //printk("<1> xoverhack: it was long, long, long ago...\n");
+ +                              inHAX = 0;
+ +                      } else {
+ +                              //printk("<1> xoverhack: word gotcha in xoverHack...\n");
+ +                              inHAXTime = get_nanosecond_time();
+ +                              //up( &TimeDir_Lock );
+ +                              //return 0;
+ +                              file_private->listedall = 1;
+ +                      }
+ +              }
+ +      }
+ +
+ +      up(&TimeDir_Lock);
+ +      // Hack for crossover - end
+ +#endif
+ +
+ +      if (file->f_pos == 0) {
+ +              if (filldir(dirent, ".", 1, file->f_pos, inode->i_ino, DT_DIR) <
+ +                  0)
+ +                      return 1;
+ +              file->f_pos++;
+ +              return 1;
+ +      }
+ +
+ +      if (file->f_pos == 1) {
+ +              if (filldir
+ +                  (dirent, "..", 2, file->f_pos,
+ +                   file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0)
+ +                      return 1;
+ +              file->f_pos++;
+ +              return 1;
+ +      }
+ +
+ +      if (file_private->listedall != 0) {
+ +              return 0;
+ +      }
+ +
+ +      inode = file->f_dentry->d_inode;
+ +      if (inode && inode->i_private) {
+ +              sessionId =
+ +                  novfs_scope_get_sessionId(((struct inode_data *) inode->i_private)->
+ +                                      Scope);
+ +              if (0 == SC_PRESENT(sessionId)) {
+ +                      ((struct inode_data *) inode->i_private)->Scope =
+ +                          novfs_get_scope(file->f_dentry);
+ +                      sessionId =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
+ +              }
+ +              uid = novfs_scope_get_uid(((struct inode_data *) inode->i_private)->Scope);
+ +      } else {
+ +              SC_INITIALIZE(sessionId);
-               uid = current->euid;
++              uid = current_euid();
+ +      }
+ +
+ +      if (IS_ROOT(file->f_dentry) ||  // Root
+ +          IS_ROOT(file->f_dentry->d_parent) ||        // User
+ +          IS_ROOT(file->f_dentry->d_parent->d_parent))        // Server
+ +      {
+ +              if (IS_ROOT(file->f_dentry)) {
-                       DbgPrint("novfs_dir_readdir: Root directory\n");
++                      DbgPrint("Root directory");
+ +                      list = novfs_get_scopeusers();
+ +                      type = USER_LIST;
+ +              } else if (IS_ROOT(file->f_dentry->d_parent)) {
-                       DbgPrint
-                           ("novfs_dir_readdir: Parent is Root directory\n");
++                      DbgPrint("Parent is Root directory");
+ +                      novfs_get_servers(&list, sessionId);
+ +                      type = SERVER_LIST;
+ +              } else {
-                       DbgPrint
-                           ("novfs_dir_readdir: Parent-Parent is Root directory\n");
++                      DbgPrint("Parent-Parent is Root directory");
+ +                      novfs_get_vols(&file->f_dentry->d_name,
+ +                                                   &list, sessionId);
+ +                      type = VOLUME_LIST;
+ +              }
+ +
+ +              processList(file, dirent, filldir, list, type, sessionId);
+ +              file_private->listedall = 1;
+ +      } else {
+ +              status =
+ +                  processEntries(file, dirent, filldir,
+ +                                 &file_private->enumHandle, sessionId);
+ +
+ +              if (status != 0) {
+ +                      file_private->listedall = 1;
+ +#ifndef SKIP_CROSSOVER_HACK
+ +                      // Hack for crossover part 2 - begin
+ +                      lComm = strlen(current->comm);
+ +                      if ((lComm > 4)
+ +                          && (0 ==
+ +                              strcmp(current->comm + lComm - 4, ".EXE"))) {
+ +                              if (filldir
+ +                                  (dirent, " !xover", 7, file->f_pos,
+ +                                   inode->i_ino, DT_DIR) < 0)
+ +                                      return 1;
+ +                              if (filldir
+ +                                  (dirent, "z!xover", 7, file->f_pos,
+ +                                   inode->i_ino, DT_DIR) < 0)
+ +                                      return 1;
+ +                              file->f_pos += 2;
+ +                      }
+ +                      // Hack for crossover part2 - end
+ +#endif
+ +              }
+ +      }
+ +
+ +      file->private_data = file_private;
+ +      return 1;
+ +}
+ +
+ +int novfs_dir_fsync(struct file *file, struct dentry *dentry, int datasync)
+ +{
-       DbgPrint("novfs_dir_fsync: Name %.*s\n", file->f_dentry->d_name.len,
++      DbgPrint("Name %.*s", file->f_dentry->d_name.len,
+ +               file->f_dentry->d_name.name);
+ +      return (simple_sync_file(file, dentry, datasync));
+ +}
+ +
+ +ssize_t novfs_f_read(struct file * file, char *buf, size_t len, loff_t * off)
+ +{
+ +      size_t thisread, totalread = 0;
+ +      loff_t offset = *off;
+ +      struct inode *inode;
+ +      struct novfs_schandle session;
+ +      struct inode_data *id;
+ +
+ +      if (file->f_dentry &&
+ +          (inode = file->f_dentry->d_inode) &&
+ +          (id = (struct inode_data *) inode->i_private)) {
+ +
-               DbgPrint("novfs_f_read(0x%p 0x%p %d %lld %.*s)\n",
++              DbgPrint("(0x%p 0x%p %d %lld %.*s)",
+ +                       file->private_data,
+ +                       buf, len, offset,
+ +                       file->f_dentry->d_name.len,
+ +                       file->f_dentry->d_name.name);
+ +
+ +              if (novfs_page_cache && !(file->f_flags & O_DIRECT) && id->CacheFlag) {
+ +                      totalread = do_sync_read(file, buf, len, off);
+ +              } else {
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              id->Scope =
+ +                                  novfs_get_scope(file->f_dentry);
+ +                              session = novfs_scope_get_sessionId(id->Scope);
+ +                      }
+ +
+ +                      while (len > 0 && (offset < i_size_read(inode))) {
+ +                              int retval;
+ +                              thisread = len;
+ +                              retval =
+ +                                  novfs_read_file(file->private_data, buf,
+ +                                                  &thisread, &offset,
+ +                                                  session);
+ +                              if (retval || !thisread) {
+ +                                      if (retval) {
+ +                                              totalread = retval;
+ +                                      }
+ +                                      break;
+ +                              }
-                               DbgPrint("novfs_f_read thisread = 0x%x\n",
-                                        thisread);
++                              DbgPrint("thisread = 0x%x", thisread);
+ +                              len -= thisread;
+ +                              buf += thisread;
+ +                              offset += thisread;
+ +                              totalread += thisread;
+ +                      }
+ +                      *off = offset;
+ +              }
+ +      }
-       DbgPrint("novfs_f_read return = %d\n", totalread);
++      DbgPrint("return = %d", totalread);
+ +
+ +      return (totalread);
+ +}
+ +
+ +ssize_t novfs_f_write(struct file * file, const char *buf, size_t len,
+ +                    loff_t * off)
+ +{
+ +      ssize_t thiswrite, totalwrite = 0;
+ +      loff_t offset = *off;
+ +      struct novfs_schandle session;
+ +      struct inode *inode;
+ +      int status;
+ +      struct inode_data *id;
+ +
+ +      if (file->f_dentry &&
+ +          (inode = file->f_dentry->d_inode) &&
+ +          (id = file->f_dentry->d_inode->i_private)) {
-               DbgPrint("novfs_f_write(0x%p 0x%p 0x%p %d %lld %.*s)\n",
++              DbgPrint("(0x%p 0x%p 0x%p %d %lld %.*s)",
+ +                       file->private_data, inode, id->FileHandle, len, offset,
+ +                       file->f_dentry->d_name.len,
+ +                       file->f_dentry->d_name.name);
+ +
+ +              if (novfs_page_cache &&
+ +                  !(file->f_flags & O_DIRECT) &&
+ +                  id->CacheFlag && !(file->f_flags & O_WRONLY)) {
+ +                      totalwrite = do_sync_write(file, buf, len, off);
+ +              } else {
+ +                      if (file->f_flags & O_APPEND) {
+ +                              offset = i_size_read(inode);
-                               DbgPrint
-                                   ("novfs_f_write appending to end %lld %.*s\n",
++                              DbgPrint("appending to end %lld %.*s",
+ +                                   offset, file->f_dentry->d_name.len,
+ +                                   file->f_dentry->d_name.name);
+ +                      }
+ +
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              id->Scope =
+ +                                  novfs_get_scope(file->f_dentry);
+ +                              session = novfs_scope_get_sessionId(id->Scope);
+ +                      }
+ +
+ +                      while (len > 0) {
+ +                              thiswrite = len;
+ +                              if ((status =
+ +                                   novfs_write_file(file->private_data,
+ +                                                    (unsigned char *)buf,
+ +                                                    &thiswrite, &offset,
+ +                                                    session)) || !thiswrite) {
+ +                                      totalwrite = status;
+ +                                      break;
+ +                              }
-                               DbgPrint("novfs_f_write thiswrite = 0x%x\n",
++                              DbgPrint("thiswrite = 0x%x",
+ +                                       thiswrite);
+ +                              len -= thiswrite;
+ +                              buf += thiswrite;
+ +                              offset += thiswrite;
+ +                              totalwrite += thiswrite;
+ +                              if (offset > i_size_read(inode)) {
+ +                                      i_size_write(inode, offset);
+ +                                      inode->i_blocks =
+ +                                          (offset + inode->i_sb->s_blocksize -
+ +                                           1) >> inode->i_blkbits;
+ +                              }
+ +                              inode->i_mtime = inode->i_atime = CURRENT_TIME;
+ +                              id->Flags |= UPDATE_INODE;
+ +
+ +                      }
+ +                      *off = offset;
+ +              }
+ +      }
-       DbgPrint("novfs_f_write return = 0x%x\n", totalwrite);
++      DbgPrint("return = 0x%x", totalwrite);
+ +
+ +      return (totalwrite);
+ +}
+ +
+ +int novfs_f_readdir(struct file *file, void *data, filldir_t fill)
+ +{
+ +      return -EISDIR;
+ +}
+ +
+ +int novfs_f_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+ +                unsigned long arg)
+ +{
-       DbgPrint("novfs_f_ioctl: file=0x%p cmd=0x%x arg=0x%p\n", file, cmd,
-                arg);
++      DbgPrint("file=0x%p cmd=0x%x arg=0x%p", file, cmd, arg);
+ +
+ +      return -ENOSYS;
+ +}
+ +
+ +int novfs_f_mmap(struct file *file, struct vm_area_struct *vma)
+ +{
+ +      int retCode = -EINVAL;
+ +
-       DbgPrint("novfs_f_mmap: file=0x%p %.*s\n", file,
-                file->f_dentry->d_name.len, file->f_dentry->d_name.name);
++      DbgPrint("file=0x%p %.*s", file, file->f_dentry->d_name.len,
++               file->f_dentry->d_name.name);
+ +
+ +      retCode = generic_file_mmap(file, vma);
+ +
-       DbgPrint("novfs_f_mmap: retCode=0x%x\n", retCode);
++      DbgPrint("retCode=0x%x", retCode);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_f_open(struct inode *inode, struct file *file)
+ +{
+ +      struct novfs_entry_info *info = NULL;
+ +      int retCode = -ENOENT;
+ +      struct novfs_schandle session;
+ +      char *path;
+ +      struct dentry *parent;
+ +      ino_t ino;
+ +      struct inode_data *id;
+ +      int errInfo;
+ +
-       DbgPrint
-           ("novfs_f_open: inode=0x%p file=0x%p dentry=0x%p dentry->d_inode=0x%p %.*s\n",
-            inode, file, file->f_dentry, file->f_dentry->d_inode,
++      DbgPrint("inode=0x%p file=0x%p dentry=0x%p dentry->d_inode=0x%p %.*s",
++               inode, file, file->f_dentry, file->f_dentry->d_inode,
+ +           file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ +      if (file->f_dentry) {
-               DbgPrint
-                   ("novfs_f_open: %.*s f_flags=0%o f_mode=0%o i_mode=0%o\n",
-                    file->f_dentry->d_name.len, file->f_dentry->d_name.name,
-                    file->f_flags, file->f_mode, inode->i_mode);
++              DbgPrint("%.*s f_flags=0%o f_mode=0%o i_mode=0%o",
++                       file->f_dentry->d_name.len,
++                       file->f_dentry->d_name.name,
++                       file->f_flags, file->f_mode, inode->i_mode);
+ +      }
+ +
+ +      if (inode && inode->i_private) {
+ +              id = (struct inode_data *) file->f_dentry->d_inode->i_private;
+ +              session = novfs_scope_get_sessionId(id->Scope);
+ +              if (0 == SC_PRESENT(session)) {
+ +                      id->Scope = novfs_get_scope(file->f_dentry);
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +              }
+ +
+ +              info = kmalloc(sizeof(struct novfs_entry_info) +
+ +                                             PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +              if (info) {
+ +                      path =
+ +                          novfs_dget_path(file->f_dentry, info->name,
+ +                                          PATH_LENGTH_BUFFER);
+ +                      if (path) {
+ +                              if (file->f_flags & O_TRUNC) {
+ +                                      errInfo =
+ +                                          novfs_get_file_info(path, info,
+ +                                                              session);
+ +
+ +                                      if (errInfo || info->size == 0) {
+ +                                              // clear O_TRUNC flag, bug #275366
+ +                                              file->f_flags =
+ +                                                  file->f_flags & (~O_TRUNC);
+ +                                      }
+ +                              }
+ +
-                               DbgPrint("novfs_f_open: %s\n", path);
++                              DbgPrint("%s", path);
+ +                              retCode = novfs_open_file(path,
+ +                                                        file->
+ +                                                        f_flags & ~O_EXCL,
+ +                                                        info,
+ +                                                        &file->private_data,
+ +                                                        session);
+ +
-                               DbgPrint("novfs_f_open: 0x%x 0x%p\n", retCode,
++                              DbgPrint("0x%x 0x%p", retCode,
+ +                                       file->private_data);
+ +                              if (!retCode) {
+ +                                      /*
+ +                                       *update_inode(inode, &info);
+ +                                       */
+ +                                      //id->FileHandle = file->private_data;
+ +                                      id->CacheFlag =
+ +                                          novfs_get_file_cache_flag(path,
+ +                                                                    session);
+ +
+ +                                      if (!novfs_get_file_info
+ +                                          (path, info, session)) {
+ +                                              update_inode(inode, info);
+ +                                      }
+ +
+ +                                      parent = dget_parent(file->f_dentry);
+ +
+ +                                      if (parent && parent->d_inode) {
+ +                                              struct inode *dir =
+ +                                                  parent->d_inode;
+ +                                              novfs_lock_inode_cache(dir);
+ +                                              ino = 0;
+ +                                              if (novfs_get_entry
+ +                                                  (dir,
+ +                                                   &file->f_dentry->d_name,
+ +                                                   &ino, info)) {
+ +                                                      ((struct inode_data *) inode->
+ +                                                       i_private)->Flags |=
+ +                                     UPDATE_INODE;
+ +                                              }
+ +
+ +                                              novfs_unlock_inode_cache(dir);
+ +                                      }
+ +                                      dput(parent);
+ +                              }
+ +                      }
+ +                      kfree(info);
+ +              }
+ +      }
-       DbgPrint("novfs_f_open: retCode=0x%x\n", retCode);
++      DbgPrint("retCode=0x%x", retCode);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_flush_mapping(void *Handle, struct address_space *mapping,
+ +                      struct novfs_schandle Session)
+ +{
+ +      struct pagevec pagevec;
+ +      unsigned nrpages;
+ +      pgoff_t index = 0;
+ +      int done, rc = 0;
+ +
+ +      pagevec_init(&pagevec, 0);
+ +
+ +      do {
+ +              done = 1;
+ +              nrpages = pagevec_lookup_tag(&pagevec,
+ +                                           mapping,
+ +                                           &index,
+ +                                           PAGECACHE_TAG_DIRTY, PAGEVEC_SIZE);
+ +
+ +              if (nrpages) {
+ +                      struct page *page;
+ +                      int i;
+ +
-                       DbgPrint("novfs_flush_mapping: %u\n", nrpages);
++                      DbgPrint("%u", nrpages);
+ +
+ +                      done = 0;
+ +                      for (i = 0; !rc && (i < nrpages); i++) {
+ +                              page = pagevec.pages[i];
+ +
-                               DbgPrint("novfs_flush_mapping: page 0x%p %lu\n",
-                                        page, page->index);
++                              DbgPrint("page 0x%p %lu", page, page->index);
+ +
+ +                              lock_page(page);
+ +                              page_cache_get(page);
+ +                              if (page->mapping == mapping) {
+ +                                      if (clear_page_dirty_for_io(page)) {
+ +                                              rc = novfs_write_page(Handle,
+ +                                                                    page,
+ +                                                                    Session);
+ +                                              if (!rc) {
+ +                                                      //ClearPageDirty(page);
+ +                                                      radix_tree_tag_clear
+ +                                                          (&mapping->
+ +                                                           page_tree,
+ +                                                           page_index(page),
+ +                                                           PAGECACHE_TAG_DIRTY);
+ +                                              }
+ +                                      }
+ +                              }
+ +
+ +                              page_cache_release(page);
+ +                              unlock_page(page);
+ +                      }
+ +                      pagevec_release(&pagevec);
+ +              }
+ +      } while (!rc && !done);
+ +
-       DbgPrint("novfs_flush_mapping: return %d\n", rc);
++      DbgPrint("return %d", rc);
+ +
+ +      return (rc);
+ +}
+ +
+ +int novfs_f_flush(struct file *file, fl_owner_t ownid)
+ +{
+ +
+ +      int rc = 0;
+ +#ifdef FLUSH
+ +      struct inode *inode;
+ +      struct novfs_schandle session;
+ +      struct inode_data *id;
+ +
-       DbgPrint("novfs_f_flush: Called from 0x%p\n",
-                __builtin_return_address(0));
++      DbgPrint("Called from 0x%p", __builtin_return_address(0));
+ +      if (file->f_dentry && (inode = file->f_dentry->d_inode)
+ +          && (id = file->f_dentry->d_inode->i_private)) {
+ +
+ +              if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
+ +                      inode = file->f_dentry->d_inode;
-                       DbgPrint
-                           ("novfs_f_flush: %.*s f_flags=0%o f_mode=0%o i_mode=0%o\n",
-                            file->f_dentry->d_name.len,
-                            file->f_dentry->d_name.name, file->f_flags,
-                            file->f_mode, inode->i_mode);
++                      DbgPrint("%.*s f_flags=0%o f_mode=0%o i_mode=0%o",
++                               file->f_dentry->d_name.len,
++                               file->f_dentry->d_name.name, file->f_flags,
++                               file->f_mode, inode->i_mode);
+ +
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              id->Scope =
+ +                                  novfs_get_scope(file->f_dentry);
+ +                              session = novfs_scope_get_sessionId(id->Scope);
+ +                      }
+ +
+ +                      if (inode &&
+ +                          inode->i_mapping && inode->i_mapping->nrpages) {
+ +
-                               DbgPrint("novfs_f_flush: %.*s pages=%lu\n",
++                              DbgPrint("%.*s pages=%lu",
+ +                                       file->f_dentry->d_name.len,
+ +                                       file->f_dentry->d_name.name,
+ +                                       inode->i_mapping->nrpages);
+ +
+ +                              if (file->f_dentry &&
+ +                                  file->f_dentry->d_inode &&
+ +                                  file->f_dentry->d_inode->i_mapping &&
+ +                                  file->f_dentry->d_inode->i_mapping->a_ops &&
+ +                                  file->f_dentry->d_inode->i_mapping->a_ops->
+ +                                  writepage) {
+ +                                      rc = filemap_fdatawrite(file->f_dentry->
+ +                                                              d_inode->
+ +                                                              i_mapping);
+ +                              } else {
+ +                                      rc = novfs_flush_mapping(file->
+ +                                                               private_data,
+ +                                                               file->
+ +                                                               f_dentry->
+ +                                                               d_inode->
+ +                                                               i_mapping,
+ +                                                               session);
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +#endif
+ +      return (rc);
+ +}
+ +
+ +int novfs_f_release(struct inode *inode, struct file *file)
+ +{
+ +      int retCode = -EACCES;
+ +      struct novfs_schandle session;
+ +      struct inode_data *id;
+ +
-       DbgPrint("novfs_f_release: path=%.*s handle=%p\n",
++      DbgPrint("path=%.*s handle=%p",
+ +               file->f_dentry->d_name.len,
+ +               file->f_dentry->d_name.name, file->private_data);
+ +
+ +      if (inode && (id = inode->i_private)) {
+ +              session = novfs_scope_get_sessionId(id->Scope);
+ +              if (0 == SC_PRESENT(session)) {
+ +                      id->Scope = novfs_get_scope(file->f_dentry);
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +              }
+ +
+ +              if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
-                       DbgPrint
-                           ("novfs_f_release: %.*s f_flags=0%o f_mode=0%o i_mode=0%o\n",
++                      DbgPrint("%.*s f_flags=0%o f_mode=0%o i_mode=0%o",
+ +                           file->f_dentry->d_name.len,
+ +                           file->f_dentry->d_name.name, file->f_flags,
+ +                           file->f_mode, inode->i_mode);
+ +
+ +                      if (inode->i_mapping && inode->i_mapping->nrpages) {
+ +
-                               DbgPrint("novfs_f_release: %.*s pages=%lu\n",
++                              DbgPrint("%.*s pages=%lu",
+ +                                       file->f_dentry->d_name.len,
+ +                                       file->f_dentry->d_name.name,
+ +                                       inode->i_mapping->nrpages);
+ +
+ +                              if (inode->i_mapping->a_ops &&
+ +                                  inode->i_mapping->a_ops->writepage) {
+ +                                      filemap_fdatawrite(file->f_dentry->
+ +                                                         d_inode->i_mapping);
+ +                              } else {
+ +                                      novfs_flush_mapping(file->private_data,
+ +                                                          file->f_dentry->
+ +                                                          d_inode->i_mapping,
+ +                                                          session);
+ +                              }
+ +                      }
+ +              }
+ +
+ +              if (file->f_dentry && file->f_dentry->d_inode) {
+ +                      invalidate_remote_inode(file->f_dentry->d_inode);
+ +              }
+ +
+ +              retCode = novfs_close_file(file->private_data, session);
+ +              //id->FileHandle = 0;
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_f_fsync(struct file *file, struct dentry *dentry, int datasync)
+ +{
+ +      return 0;
+ +}
+ +
+ +int novfs_f_llseek(struct file *file, loff_t offset, int origin)
+ +{
-       DbgPrint("novfs_f_llseek: File=0x%p Name=%.*s offset=%lld origin=%d\n",
++      DbgPrint("File=0x%p Name=%.*s offset=%lld origin=%d",
+ +               file, file->f_dentry->d_name.len, file->f_dentry->d_name.name,
+ +               offset, origin);
+ +      return (generic_file_llseek(file, offset, origin));
+ +}
+ +
+ +/*++======================================================================*/
+ +int novfs_f_lock(struct file *file, int cmd, struct file_lock *lock)
+ +/*
+ + *  Arguments:
+ + *      "file" - pointer to file structure - contains file handle in "file->private_data"
+ + *
+ + *      "cmd" could be F_SETLK, F_SETLKW, F_GETLK
+ + *      F_SETLK/F_SETLKW are for setting/unsetting file lock
+ + *      F_GETLK is for getting infomation about region - is it locked, or not
+ + *
+ + *      "lock" structure - contains "start" and "end" of locking region
+ + *
+ + *  Returns:
+ + *      0 on success
+ + *      -ENOSYS on F_GETLK cmd. It's not implemented.
+ + *      -EINVAL if (lock->fl_start > lock->fl_end)
+ + *      -EAGAIN on all other errors
+ + *  Abstract:
+ + *
+ + *  Notes:
+ + *      "lock->fl_start" and "lock->fl_end" are of type "long long",
+ + *      but xtier functions in novfsd "NCFsdLockFile" and "NCFsdUnlockFile"
+ + *      receive arguments in u64 type.
+ + *
+ + *
+ + *========================================================================*/
+ +{
+ +      int err_code;
+ +
+ +      struct inode *inode;
+ +      struct novfs_schandle session;
+ +      struct inode_data *id;
+ +      loff_t len;
+ +
-       DbgPrint("novfs_f_lock(0x%p): begin in novfs_f_lock 0x%p\n",
++      DbgPrint("(0x%p): begin in novfs_f_lock 0x%p",
+ +               __builtin_return_address(0), file->private_data);
-       DbgPrint
-           ("novfs_f_lock: cmd = %d, F_GETLK = %d, F_SETLK = %d, F_SETLKW = %d\n",
-            cmd, F_GETLK, F_SETLK, F_SETLKW);
-       DbgPrint
-           ("novfs_f_lock: lock->fl_start = 0x%llX, lock->fl_end = 0x%llX\n",
-            lock->fl_start, lock->fl_end);
++      DbgPrint("cmd = %d, F_GETLK = %d, F_SETLK = %d, F_SETLKW = %d",
++               cmd, F_GETLK, F_SETLK, F_SETLKW);
++      DbgPrint("lock->fl_start = 0x%llX, lock->fl_end = 0x%llX",
++               lock->fl_start, lock->fl_end);
+ +
+ +      err_code = -1;
+ +      if (lock->fl_start <= lock->fl_end) {
+ +              /* Get len from "start" and "end" */
+ +              len = lock->fl_end - lock->fl_start + 1;
+ +              if ((0 == lock->fl_start) && (OFFSET_MAX == lock->fl_end)) {
+ +                      len = 0;
+ +              }
+ +
+ +              if (file->f_dentry &&
+ +                  (inode = file->f_dentry->d_inode) &&
+ +                  (id = (struct inode_data *) inode->i_private)) {
-                       DbgPrint("novfs_f_lock: (0x%p 0x%p %.*s)\n",
++                      DbgPrint("(0x%p 0x%p %.*s)",
+ +                               file->private_data, inode,
+ +                               file->f_dentry->d_name.len,
+ +                               file->f_dentry->d_name.name);
+ +
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              id->Scope =
+ +                                  novfs_get_scope(file->f_dentry);
+ +                              session = novfs_scope_get_sessionId(id->Scope);
+ +                      }
+ +
+ +                      /* fl_type = F_RDLCK, F_WRLCK, F_UNLCK */
+ +                      switch (cmd) {
+ +                      case F_SETLK:
+ +#ifdef F_GETLK64
+ +                      case F_SETLK64:
+ +#endif
+ +
+ +                              err_code =
+ +                                  novfs_set_file_lock(session,
+ +                                                      file->private_data,
+ +                                                      lock->fl_type,
+ +                                                      lock->fl_start, len);
+ +                              break;
+ +
+ +                      case F_SETLKW:
+ +#ifdef F_GETLK64
+ +                      case F_SETLKW64:
+ +#endif
+ +                              err_code =
+ +                                  novfs_set_file_lock(session,
+ +                                                      file->private_data,
+ +                                                      lock->fl_type,
+ +                                                      lock->fl_start, len);
+ +                              break;
+ +
+ +                      case F_GETLK:
+ +#ifdef F_GETLK64
+ +                      case F_GETLK64:
+ +#endif
+ +                              err_code = -ENOSYS;
+ +                              /*
+ +                               * Not implemented. We doesn't have appropriate xtier function.
+ +                               * */
+ +                              break;
+ +
+ +                      default:
+ +                              printk
+ +                                  ("<1> novfs in novfs_f_lock, not implemented cmd = %d\n",
+ +                                   cmd);
-                               DbgPrint
-                                   ("novfs_f_lock: novfs in novfs_f_lock, not implemented cmd = %d\n",
++                              DbgPrint("novfs in novfs_f_lock, not implemented cmd = %d",
+ +                                   cmd);
+ +                              break;
+ +                      }
+ +              }
+ +
-               DbgPrint("novfs_f_lock: lock->fl_type = %u, err_code 0x%X\n",
++              DbgPrint("lock->fl_type = %u, err_code 0x%X",
+ +                       lock->fl_type, err_code);
+ +
+ +              if ((err_code != 0) && (err_code != -1)
+ +                  && (err_code != -ENOSYS)) {
+ +                      err_code = -EAGAIN;
+ +              }
+ +      } else {
+ +              err_code = -EINVAL;
+ +      }
+ +
+ +      return (err_code);
+ +}
+ +
+ +/*++======================================================================*/
+ +static void novfs_copy_cache_pages(struct address_space *mapping,
+ +                                 struct list_head *pages, int bytes_read,
+ +                                 char *data, struct pagevec *plru_pvec)
+ +{
+ +      struct page *page;
+ +      char *target;
+ +
+ +      while (bytes_read > 0) {
+ +              if (list_empty(pages))
+ +                      break;
+ +
+ +              page = list_entry(pages->prev, struct page, lru);
+ +              list_del(&page->lru);
+ +
+ +              if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
+ +                      page_cache_release(page);
+ +                      data += PAGE_CACHE_SIZE;
+ +                      bytes_read -= PAGE_CACHE_SIZE;
+ +                      continue;
+ +              }
+ +
+ +              target = kmap_atomic(page, KM_USER0);
+ +
+ +              if (PAGE_CACHE_SIZE > bytes_read) {
+ +                      memcpy(target, data, bytes_read);
+ +                      /* zero the tail end of this partial page */
+ +                      memset(target + bytes_read, 0,
+ +                             PAGE_CACHE_SIZE - bytes_read);
+ +                      bytes_read = 0;
+ +              } else {
+ +                      memcpy(target, data, PAGE_CACHE_SIZE);
+ +                      bytes_read -= PAGE_CACHE_SIZE;
+ +              }
+ +              kunmap_atomic(target, KM_USER0);
+ +
+ +              flush_dcache_page(page);
+ +              SetPageUptodate(page);
+ +              unlock_page(page);
+ +              if (!pagevec_add(plru_pvec, page))
-                       __pagevec_lru_add(plru_pvec);
++                      __pagevec_lru_add_file(plru_pvec);
+ +              data += PAGE_CACHE_SIZE;
+ +      }
+ +      return;
+ +}
+ +
+ +int novfs_a_writepage(struct page *page, struct writeback_control *wbc)
+ +{
+ +      int retCode = -EFAULT;
+ +      struct inode *inode = page->mapping->host;
+ +      struct inode_data *id = inode->i_private;
+ +      loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT);
+ +      struct novfs_schandle session;
+ +      struct novfs_data_list dlst[2];
+ +      size_t len = PAGE_CACHE_SIZE;
+ +
+ +      session = novfs_scope_get_sessionId(((struct inode_data *) inode->i_private)->Scope);
+ +
+ +      page_cache_get(page);
+ +
+ +      pos = ((loff_t) page->index << PAGE_CACHE_SHIFT);
+ +
+ +      /*
+ +       * Leave first dlst entry for reply header.
+ +       */
+ +      dlst[1].page = page;
+ +      dlst[1].offset = NULL;
+ +      dlst[1].len = len;
+ +      dlst[1].rwflag = DLREAD;
+ +
+ +      /*
+ +       * Check size so we don't write pass end of file.
+ +       */
+ +      if ((pos + (loff_t) len) > i_size_read(inode)) {
+ +              len = (size_t) (i_size_read(inode) - pos);
+ +      }
+ +
+ +      retCode = novfs_write_pages(id->FileHandle, dlst, 2, len, pos, session);
+ +      if (!retCode) {
+ +              SetPageUptodate(page);
+ +      }
+ +
+ +      unlock_page(page);
+ +      page_cache_release(page);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_a_writepages(struct address_space *mapping,
+ +                     struct writeback_control *wbc)
+ +{
+ +      int retCode = 0;
+ +      struct inode *inode = mapping->host;
+ +      struct novfs_schandle session;
+ +      void *fh = NULL;
+ +      struct inode_data *id = NULL;
+ +
+ +      int max_page_lookup = novfs_max_iosize / PAGE_CACHE_SIZE;
+ +
+ +      struct novfs_data_list *dlist, *dlptr;
+ +      struct page **pages;
+ +
+ +      int dlist_idx, i = 0;
+ +      pgoff_t index, next_index = 0;
+ +      loff_t pos = 0;
+ +      size_t tsize;
+ +
+ +      SC_INITIALIZE(session);
-       DbgPrint
-           ("novfs_a_writepages: inode=0x%p mapping=0x%p wbc=0x%p nr_to_write=%d\n",
++      DbgPrint("inode=0x%p mapping=0x%p wbc=0x%p nr_to_write=%d",
+ +           inode, mapping, wbc, wbc->nr_to_write);
+ +
+ +      if (inode) {
-               DbgPrint(" Inode=0x%p Ino=%d Id=0x%p\n", inode, inode->i_ino,
++              DbgPrint("Inode=0x%p Ino=%d Id=0x%p", inode, inode->i_ino,
+ +                       inode->i_private);
+ +
+ +              if (NULL != (id = inode->i_private)) {
+ +                      session =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
+ +                      fh = ((struct inode_data *) inode->i_private)->FileHandle;
+ +              }
+ +      }
+ +
+ +      dlist = kmalloc(sizeof(struct novfs_data_list) * max_page_lookup, GFP_KERNEL);
+ +      pages =
+ +          kmalloc(sizeof(struct page *) * max_page_lookup, GFP_KERNEL);
+ +
+ +      if (id)
-               DbgPrint
-                   ("novfs_a_writepages: inode=0x%p fh=0x%p dlist=0x%p pages=0x%p %s\n",
++              DbgPrint("inode=0x%p fh=0x%p dlist=0x%p pages=0x%p %s",
+ +                   inode, fh, dlist, pages, id->Name);
+ +      else
-               DbgPrint
-                   ("novfs_a_writepages: inode=0x%p fh=0x%p dlist=0x%p pages=0x%p\n",
++              DbgPrint("inode=0x%p fh=0x%p dlist=0x%p pages=0x%p",
+ +                   inode, fh, dlist, pages);
+ +
+ +      if (dlist && pages) {
+ +              struct backing_dev_info *bdi = mapping->backing_dev_info;
+ +              int done = 0;
+ +              int nr_pages = 0;
+ +              int scanned = 0;
+ +
+ +              if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ +                      wbc->encountered_congestion = 1;
+ +                      return 0;
+ +              }
+ +
+ +              if (wbc->sync_mode == WB_SYNC_NONE) {
+ +                      index = mapping->writeback_index;       /* Start from prev offset */
+ +              } else {
+ +                      index = 0;      /* whole-file sweep */
+ +                      scanned = 1;
+ +              }
+ +
+ +              next_index = index;
+ +
+ +              while (!done && (wbc->nr_to_write > 0)) {
+ +                      dlist_idx = 0;
+ +                      dlptr = &dlist[1];
+ +
-                       DbgPrint("novfs_a_writepages1: nr_pages=%d\n",
-                                nr_pages);
++                      DbgPrint("nr_pages=%d", nr_pages);
+ +                      if (!nr_pages) {
+ +                              memset(pages, 0,
+ +                                     sizeof(struct page *) * max_page_lookup);
+ +
+ +                              spin_lock_irq(&mapping->tree_lock);
+ +
+ +                              /*
+ +                               * Need to ask for one less then max_page_lookup or we
+ +                               * will overflow the request buffer.  This also frees
+ +                               * the first entry for the reply buffer.
+ +                               */
+ +                              nr_pages =
+ +                                  radix_tree_gang_lookup_tag(&mapping->
+ +                                                             page_tree,
+ +                                                             (void **)pages,
+ +                                                             index,
+ +                                                             max_page_lookup -
+ +                                                             1,
+ +                                                             PAGECACHE_TAG_DIRTY);
+ +
-                               DbgPrint("novfs_a_writepages2: nr_pages=%d\n",
-                                        nr_pages);
++                              DbgPrint("2; nr_pages=%d\n", nr_pages);
+ +                              /*
+ +                               * Check to see if there are dirty pages and there is a valid
+ +                               * file handle.
+ +                               */
+ +                              if (nr_pages && !fh) {
+ +                                      set_bit(AS_EIO, &mapping->flags);
+ +                                      done = 1;
-                                       DbgPrint
-                                           ("novfs_a_writepage: set_bit AS_EIO\n");
++                                      DbgPrint("set_bit AS_EIO");
+ +                                      break;
+ +                              }
+ +
+ +                              for (i = 0; i < nr_pages; i++) {
+ +                                      page_cache_get(pages[i]);
+ +                              }
+ +
+ +                              spin_unlock_irq(&mapping->tree_lock);
+ +
+ +                              if (nr_pages) {
+ +                                      index = pages[nr_pages - 1]->index + 1;
+ +                                      pos =
+ +                                          (loff_t) pages[0]->
+ +                                          index << PAGE_CACHE_SHIFT;
+ +                              }
+ +
+ +                              if (!nr_pages) {
+ +                                      if (scanned) {
+ +                                              index = 0;
+ +                                              scanned = 0;
+ +                                              continue;
+ +                                      }
+ +                                      done = 1;
+ +                              } else {
+ +                                      next_index = pages[0]->index;
+ +                                      i = 0;
+ +                              }
+ +                      } else {
+ +                              if (pages[i]) {
+ +                                      pos =
+ +                                          (loff_t) pages[i]->
+ +                                          index << PAGE_CACHE_SHIFT;
+ +                              }
+ +                      }
+ +
+ +                      for (; i < nr_pages; i++) {
+ +                              struct page *page = pages[i];
+ +
+ +                              /*
+ +                               * At this point we hold neither mapping->tree_lock nor
+ +                               * lock on the page itself: the page may be truncated or
+ +                               * invalidated (changing page->mapping to NULL), or even
+ +                               * swizzled back from swapper_space to tmpfs file
+ +                               * mapping
+ +                               */
+ +
+ +                              DbgPrint
+ +                                  ("novfs_a_writepages: pos=0x%llx index=%d page->index=%d next_index=%d\n",
+ +                                   pos, index, page->index, next_index);
+ +
+ +                              if (page->index != next_index) {
+ +                                      next_index = page->index;
+ +                                      break;
+ +                              }
+ +                              next_index = page->index + 1;
+ +
+ +                              lock_page(page);
+ +
+ +                              if (wbc->sync_mode != WB_SYNC_NONE)
+ +                                      wait_on_page_writeback(page);
+ +
+ +                              if (page->mapping != mapping
+ +                                  || PageWriteback(page)
+ +                                  || !clear_page_dirty_for_io(page)) {
+ +                                      unlock_page(page);
+ +                                      continue;
+ +                              }
+ +
+ +                              dlptr[dlist_idx].page = page;
+ +                              dlptr[dlist_idx].offset = NULL;
+ +                              dlptr[dlist_idx].len = PAGE_CACHE_SIZE;
+ +                              dlptr[dlist_idx].rwflag = DLREAD;
+ +                              dlist_idx++;
-                               DbgPrint
-                                   ("novfs_a_writepages: Add page=0x%p index=0x%lx\n",
++                              DbgPrint("Add page=0x%p index=0x%lx",
+ +                                   page, page->index);
+ +                      }
+ +
-                       DbgPrint("novfs_a_writepages: dlist_idx=%d\n",
-                                dlist_idx);
++                      DbgPrint("dlist_idx=%d", dlist_idx);
+ +                      if (dlist_idx) {
+ +                              tsize = dlist_idx * PAGE_CACHE_SIZE;
+ +                              /*
+ +                               * Check size so we don't write pass end of file.
+ +                               */
+ +                              if ((pos + tsize) > i_size_read(inode)) {
+ +                                      tsize =
+ +                                          (size_t) (i_size_read(inode) - pos);
+ +                              }
+ +
+ +                              retCode =
+ +                                  novfs_write_pages(fh, dlist, dlist_idx + 1,
+ +                                                    tsize, pos, session);
+ +                              switch (retCode) {
+ +                              case 0:
+ +                                      wbc->nr_to_write -= dlist_idx;
+ +                                      break;
+ +
+ +                              case -ENOSPC:
+ +                                      set_bit(AS_ENOSPC, &mapping->flags);
+ +                                      done = 1;
+ +                                      break;
+ +
+ +                              default:
+ +                                      set_bit(AS_EIO, &mapping->flags);
+ +                                      done = 1;
+ +                                      break;
+ +                              }
+ +
+ +                              do {
+ +                                      unlock_page((struct page *)
+ +                                                  dlptr[dlist_idx - 1].page);
+ +                                      page_cache_release((struct page *)
+ +                                                         dlptr[dlist_idx -
+ +                                                               1].page);
-                                       DbgPrint
-                                           ("novfs_a_writepages: release page=0x%p index=0x%lx\n",
++                                      DbgPrint("release page=0x%p index=0x%lx",
+ +                                           dlptr[dlist_idx - 1].page,
+ +                                           ((struct page *)
+ +                                            dlptr[dlist_idx -
+ +                                                  1].page)->index);
+ +                                      if (!retCode) {
+ +                                              wbc->nr_to_write--;
+ +                                      }
+ +                              } while (--dlist_idx);
+ +                      }
+ +
+ +                      if (i >= nr_pages) {
+ +                              nr_pages = 0;
+ +                      }
+ +              }
+ +
+ +              mapping->writeback_index = index;
+ +
+ +      } else {
-               DbgPrint("novfs_a_writepage: set_bit AS_EIO\n");
++              DbgPrint("set_bit AS_EIO");
+ +              set_bit(AS_EIO, &mapping->flags);
+ +      }
+ +      if (dlist)
+ +              kfree(dlist);
+ +      if (pages)
+ +              kfree(pages);
+ +
-       DbgPrint("novfs_a_writepage: retCode=%d\n", retCode);
++      DbgPrint("retCode=%d", retCode);
+ +      return (0);
+ +
+ +}
+ +
+ +int novfs_a_readpage(struct file *file, struct page *page)
+ +{
+ +      int retCode = 0;
+ +      void *pbuf;
+ +      struct inode *inode = NULL;
+ +      struct dentry *dentry = NULL;
+ +      loff_t offset;
+ +      size_t len;
+ +      struct novfs_schandle session;
+ +
+ +      SC_INITIALIZE(session);
-       DbgPrint("novfs_a_readpage: File=0x%p Name=%.*s Page=0x%p", file,
++      DbgPrint("File=0x%p Name=%.*s Page=0x%p", file,
+ +               file->f_dentry->d_name.len, file->f_dentry->d_name.name, page);
+ +
+ +      dentry = file->f_dentry;
+ +
+ +      if (dentry) {
-               DbgPrint(" Dentry=0x%p Name=%.*s", dentry, dentry->d_name.len,
++              DbgPrint("Dentry=0x%p Name=%.*s", dentry, dentry->d_name.len,
+ +                       dentry->d_name.name);
+ +              if (dentry->d_inode) {
+ +                      inode = dentry->d_inode;
+ +              }
+ +      }
+ +
+ +      if (inode) {
-               DbgPrint(" Inode=0x%p Ino=%d", inode, inode->i_ino);
++              DbgPrint("Inode=0x%p Ino=%d", inode, inode->i_ino);
+ +
+ +              if (inode->i_private) {
+ +                      session =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              ((struct inode_data *) inode->i_private)->Scope =
+ +                                  novfs_get_scope(file->f_dentry);
+ +                              session =
+ +                                  novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                                       i_private)->Scope);
+ +                      }
+ +              }
+ +      }
+ +
-       DbgPrint("\n");
- 
+ +      if (!PageUptodate(page)) {
+ +              struct novfs_data_list dlst[2];
+ +
+ +              offset = page->index << PAGE_CACHE_SHIFT;
+ +              len = PAGE_CACHE_SIZE;
+ +
+ +              /*
+ +               * Save the first entry for the reply header.
+ +               */
+ +              dlst[1].page = page;
+ +              dlst[1].offset = NULL;
+ +              dlst[1].len = PAGE_CACHE_SIZE;
+ +              dlst[1].rwflag = DLWRITE;
+ +
-               DbgPrint("novfs_a_readpage: calling= novfs_Read_Pages %lld\n",
++              DbgPrint("calling= novfs_Read_Pages %lld",
+ +                       offset);
+ +              retCode =
+ +                  novfs_read_pages(file->private_data, dlst, 2, &len, &offset,
+ +                                   session);
+ +              if (len && (len < PAGE_CACHE_SIZE)) {
+ +                      pbuf = kmap_atomic(page, KM_USER0);
+ +                      memset(&((char *)pbuf)[len], 0, PAGE_CACHE_SIZE - len);
+ +                      kunmap_atomic(pbuf, KM_USER0);
+ +              }
+ +
+ +              flush_dcache_page(page);
+ +              SetPageUptodate(page);
+ +      }
+ +      unlock_page(page);
+ +
-       DbgPrint("novfs_a_readpage: retCode=%d\n", retCode);
++      DbgPrint("retCode=%d", retCode);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_a_readpages(struct file *file, struct address_space *mapping,
+ +                    struct list_head *page_lst, unsigned nr_pages)
+ +{
+ +      int retCode = 0;
+ +      struct inode *inode = NULL;
+ +      struct dentry *dentry = NULL;
+ +      struct novfs_schandle session;
+ +      loff_t offset;
+ +      size_t len;
+ +
+ +      unsigned page_idx;
+ +      struct pagevec lru_pvec;
+ +      pgoff_t next_index;
+ +
+ +      char *rbuf, done = 0;
+ +      SC_INITIALIZE(session);
+ +
-       DbgPrint("novfs_a_readpages: File=0x%p Name=%.*s Pages=%d\n", file,
++      DbgPrint("File=0x%p Name=%.*s Pages=%d", file,
+ +               file->f_dentry->d_name.len, file->f_dentry->d_name.name,
+ +               nr_pages);
+ +
+ +      dentry = file->f_dentry;
+ +
+ +      if (dentry) {
-               DbgPrint(" Dentry=0x%p Name=%.*s\n", dentry, dentry->d_name.len,
++              DbgPrint("Dentry=0x%p Name=%.*s", dentry, dentry->d_name.len,
+ +                       dentry->d_name.name);
+ +              if (dentry->d_inode) {
+ +                      inode = dentry->d_inode;
+ +              }
+ +      }
+ +
+ +      if (inode) {
-               DbgPrint(" Inode=0x%p Ino=%d\n", inode, inode->i_ino);
++              DbgPrint("Inode=0x%p Ino=%d", inode, inode->i_ino);
+ +
+ +              if (inode->i_private) {
+ +                      session =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              ((struct inode_data *) inode->i_private)->Scope =
+ +                                  novfs_get_scope(file->f_dentry);
+ +                              session =
+ +                                  novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                                       i_private)->Scope);
+ +                      }
+ +              }
+ +      }
+ +
+ +      rbuf = kmalloc(novfs_max_iosize, GFP_KERNEL);
+ +      if (rbuf) {
+ +              pagevec_init(&lru_pvec, 0);
+ +              for (page_idx = 0; page_idx < nr_pages && !done;) {
+ +                      struct page *page, *tpage;
+ +
+ +                      if (list_empty(page_lst))
+ +                              break;
+ +
+ +                      page = list_entry(page_lst->prev, struct page, lru);
+ +
+ +                      next_index = page->index;
+ +                      offset = (loff_t) page->index << PAGE_CACHE_SHIFT;
+ +                      len = 0;
+ +
+ +                      /*
+ +                       * Count number of contiguous pages.
+ +                       */
+ +                      list_for_each_entry_reverse(tpage, page_lst, lru) {
+ +                              if ((next_index != tpage->index) ||
+ +                                  (len >= novfs_max_iosize - PAGE_SIZE)) {
+ +                                      break;
+ +                              }
+ +                              len += PAGE_SIZE;
+ +                              next_index++;
+ +                      }
+ +
+ +                      if (len && !done) {
+ +                              struct novfs_data_list dllst[2];
+ +
+ +                              dllst[1].page = NULL;
+ +                              dllst[1].offset = rbuf;
+ +                              dllst[1].len = len;
+ +                              dllst[1].rwflag = DLWRITE;
+ +
-                               DbgPrint
-                                   ("novfs_a_readpages: calling novfs_Read_Pages %lld\n",
++                              DbgPrint("calling novfs_Read_Pages %lld",
+ +                                   offset);
+ +                              if (!novfs_read_pages
+ +                                  (file->private_data, dllst, 2, &len,
+ +                                   &offset, session)) {
+ +                                      novfs_copy_cache_pages(mapping,
+ +                                                             page_lst, len,
+ +                                                             rbuf, &lru_pvec);
+ +                                      page_idx += len >> PAGE_CACHE_SHIFT;
+ +                                      if ((int)(len & PAGE_CACHE_MASK) != len) {
+ +                                              page_idx++;
+ +                                      }
+ +                                      if (len == 0) {
+ +                                              done = 1;
+ +                                      }
+ +                              } else {
+ +                                      done = 1;
+ +                              }
+ +                      }
+ +              }
+ +
+ +              /*
+ +               * Free any remaining pages.
+ +               */
+ +              while (!list_empty(page_lst)) {
+ +                      struct page *page =
+ +                          list_entry(page_lst->prev, struct page, lru);
+ +
+ +                      list_del(&page->lru);
+ +                      page_cache_release(page);
+ +              }
+ +
-               pagevec_lru_add(&lru_pvec);
++              pagevec_lru_add_file(&lru_pvec);
+ +              kfree(rbuf);
+ +      } else {
+ +              retCode = -ENOMEM;
+ +      }
+ +
-       DbgPrint("novfs_a_readpages: retCode=%d\n", retCode);
++      DbgPrint("retCode=%d", retCode);
+ +      return (retCode);
+ +
+ +}
+ +
- int novfs_a_prepare_write(struct file *file, struct page *page, unsigned from,
-                         unsigned to)
++int novfs_a_write_begin(struct file *file, struct address_space *mapping,
++                      loff_t pos, unsigned len, unsigned flags,
++                      struct page **pagep, void **fsdata)
+ +{
+ +      int retVal = 0;
-       loff_t offset = (loff_t) page->index << PAGE_CACHE_SHIFT;
-       size_t len = PAGE_CACHE_SIZE;
++      loff_t offset = pos;
+ +      struct novfs_schandle session;
+ +      struct novfs_data_list dllst[2];
+ +      struct inode *inode = file->f_dentry->d_inode;
++      struct page *page;
++      pgoff_t index;
++      unsigned from, to;
+ +      SC_INITIALIZE(session);
+ +
-       DbgPrint
-           ("novfs_a_prepare_write: File=0x%p Page=0x%p offset=0x%llx From=%u To=%u filesize=%lld\n",
-            file, page, offset, from, to,
-            i_size_read(file->f_dentry->d_inode));
++      index = pos >> PAGE_CACHE_SHIFT;
++      from = pos & (PAGE_CACHE_SIZE - 1);
++      to = from + len;
++
++      page = grab_cache_page_write_begin(mapping, index, flags);
++      if (!page)
++              return -ENOMEM;
++
++      *pagep = page;
++
++      DbgPrint("File=0x%p Page=0x%p offset=0x%llx From=%u To=%u "
++               "filesize=%lld\n", file, page, offset, from, to,
++               i_size_read(file->f_dentry->d_inode));
+ +      if (!PageUptodate(page)) {
+ +              /*
+ +               * Check to see if whole page
+ +               */
+ +              if ((to == PAGE_CACHE_SIZE) && (from == 0)) {
+ +                      SetPageUptodate(page);
+ +              }
+ +
+ +              /*
+ +               * Check to see if we can read page.
+ +               */
+ +              else if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
+ +                      /*
+ +                       * Get session.
+ +                       */
+ +                      if (file->f_dentry && file->f_dentry->d_inode) {
+ +                              if (file->f_dentry->d_inode->i_private) {
+ +                                      session =
+ +                                          novfs_scope_get_sessionId(((struct inode_data *)
+ +                                                               inode->
+ +                                                               i_private)->
+ +                                                              Scope);
+ +                                      if (0 == SC_PRESENT(session)) {
+ +                                              ((struct inode_data *) inode->
+ +                                               i_private)->Scope =
+ +                             novfs_get_scope(file->f_dentry);
+ +                                              session =
+ +                                                  novfs_scope_get_sessionId(((struct inode_data *) inode->i_private)->Scope);
+ +                                      }
+ +                              }
+ +                      }
+ +
+ +                      page_cache_get(page);
+ +
+ +                      len = i_size_read(inode) - offset;
+ +                      if (len > PAGE_CACHE_SIZE) {
+ +                              len = PAGE_CACHE_SIZE;
+ +                      }
+ +
+ +                      if (len) {
+ +                              /*
+ +                               * Read page from server.
+ +                               */
+ +
+ +                              dllst[1].page = page;
+ +                              dllst[1].offset = 0;
+ +                              dllst[1].len = len;
+ +                              dllst[1].rwflag = DLWRITE;
+ +
-                               DbgPrint
-                                   ("novfs_a_prepare_write: calling novfs_Read_Pages %lld\n",
++                              DbgPrint("calling novfs_Read_Pages %lld",
+ +                                   offset);
+ +                              novfs_read_pages(file->private_data, dllst, 2,
+ +                                               &len, &offset, session);
+ +
+ +                              /*
+ +                               * Zero unnsed page.
+ +                               */
+ +                      }
+ +
+ +                      if (len < PAGE_CACHE_SIZE) {
+ +                              char *adr = kmap_atomic(page, KM_USER0);
+ +                              memset(adr + len, 0, PAGE_CACHE_SIZE - len);
+ +                              kunmap_atomic(adr, KM_USER0);
+ +                      }
+ +              } else {
+ +                      /*
+ +                       * Zero section of memory that not going to be used.
+ +                       */
+ +                      char *adr = kmap_atomic(page, KM_USER0);
+ +                      memset(adr, 0, from);
+ +                      memset(adr + to, 0, PAGE_CACHE_SIZE - to);
+ +                      kunmap_atomic(adr, KM_USER0);
+ +
-                       DbgPrint("novfs_a_prepare_write: memset 0x%p\n", adr);
++                      DbgPrint("memset 0x%p", adr);
+ +              }
+ +              flush_dcache_page(page);
+ +              SetPageUptodate(page);
+ +      }
- //   DbgPrint("novfs_a_prepare_write: return %d\n", retVal);
++//   DbgPrint("return %d", retVal);
+ +      return (retVal);
+ +}
+ +
- int novfs_a_commit_write(struct file *file, struct page *page, unsigned offset,
-                        unsigned to)
++int novfs_a_write_end(struct file *file, struct address_space *mapping,
++                    loff_t pos, unsigned len, unsigned copied,
++                    struct page *page, void *fsdata)
+ +{
+ +      int retCode = 0;
+ +      struct inode *inode = page->mapping->host;
-       loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
++      loff_t offset = pos;
+ +      struct novfs_schandle session;
+ +      struct inode_data *id;
+ +      struct novfs_data_list dlst[1];
-       size_t len = to - offset;
- 
++      pgoff_t index;
++      unsigned from, to;
+ +      SC_INITIALIZE(session);
+ +
-       DbgPrint
-           ("novfs_a_commit_write: File=0x%p Page=0x%p offset=0x%x To=%u filesize=%lld\n",
-            file, page, offset, to, i_size_read(file->f_dentry->d_inode));
++      index = pos >> PAGE_CACHE_SHIFT;
++      from = pos & (PAGE_CACHE_SIZE - 1);
++      to = from + len;
++
++
++      DbgPrint("File=0x%p Page=0x%p offset=0x%x To=%u filesize=%lld",
++               file, page, offset, to, i_size_read(file->f_dentry->d_inode));
+ +      if (file->f_dentry->d_inode
+ +          && (id = file->f_dentry->d_inode->i_private)) {
+ +              session = novfs_scope_get_sessionId(id->Scope);
+ +              if (0 == SC_PRESENT(session)) {
+ +                      id->Scope = novfs_get_scope(file->f_dentry);
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +              }
+ +
+ +              /*
+ +               * Setup file handle
+ +               */
+ +              id->FileHandle = file->private_data;
+ +
+ +              if (pos > inode->i_size) {
+ +                      i_size_write(inode, pos);
+ +              }
+ +
+ +              if (!PageUptodate(page)) {
+ +                      pos =
+ +                          ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
+ +
+ +                      if (to < offset) {
+ +                              return (retCode);
+ +                      }
+ +                      dlst[0].page = page;
+ +                      dlst[0].offset = (void *)(unsigned long) offset;
+ +                      dlst[0].len = len;
+ +                      dlst[0].rwflag = DLREAD;
+ +
+ +                      retCode =
+ +                          novfs_write_pages(id->FileHandle, dlst, 1, len, pos,
+ +                                            session);
+ +
+ +              } else {
+ +                      set_page_dirty(page);
+ +              }
+ +      }
+ +
+ +      return (retCode);
+ +}
+ +
+ +/*++======================================================================*/
+ +ssize_t novfs_a_direct_IO(int rw, struct kiocb * kiocb,
+ +                        const struct iovec * iov,
+ +                        loff_t offset, unsigned long nr_segs)
+ +/*
+ + *
+ + *  Notes:        This is a dummy function so that we can allow a file
+ + *                to get the direct IO flag set.  novfs_f_read and
+ + *                novfs_f_write will do the work.  Maybe not the best
+ + *                way to do but it was the easiest to implement.
+ + *
+ + *========================================================================*/
+ +{
+ +      return (-EIO);
+ +}
+ +
+ +/*++======================================================================*/
+ +int novfs_i_create(struct inode *dir, struct dentry *dentry, int mode,
+ +                 struct nameidata *nd)
+ +{
+ +      char *path, *buf;
+ +      struct novfs_entry_info info;
+ +      void *handle;
+ +      struct novfs_schandle session;
+ +      int retCode = -EACCES;
+ +
-       DbgPrint("novfs_i_create: mode=0%o flags=0%o %.*s\n", mode,
++      DbgPrint("mode=0%o flags=0%o %.*s", mode,
+ +               nd->NDOPENFLAGS, dentry->d_name.len, dentry->d_name.name);
+ +
+ +      if (IS_ROOT(dentry) ||  /* Root */
+ +          IS_ROOT(dentry->d_parent) ||        /* User */
+ +          IS_ROOT(dentry->d_parent->d_parent) ||      /* Server */
+ +          IS_ROOT(dentry->d_parent->d_parent->d_parent)) {    /* Volume */
+ +              return (-EACCES);
+ +      }
+ +
+ +      if (mode | S_IFREG) {
+ +              if (dir->i_private) {
+ +                      session =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) dir->i_private)->
+ +                                              Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              ((struct inode_data *) dir->i_private)->Scope =
+ +                                  novfs_get_scope(dentry);
+ +                              session =
+ +                                  novfs_scope_get_sessionId(((struct inode_data *) dir->
+ +                                                       i_private)->Scope);
+ +                      }
+ +
+ +                      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +                      if (buf) {
+ +                              path =
+ +                                  novfs_dget_path(dentry, buf,
+ +                                                  PATH_LENGTH_BUFFER);
+ +                              if (path) {
+ +                                      retCode =
+ +                                          novfs_open_file(path,
+ +                                                          nd->
+ +                                                          NDOPENFLAGS |
+ +                                                          O_RDWR, &info,
+ +                                                          &handle, session);
+ +                                      if (!retCode && handle) {
+ +                                              novfs_close_file(handle,
+ +                                                               session);
+ +                                              if (!novfs_i_mknod
+ +                                                  (dir, dentry,
+ +                                                   mode | S_IFREG, 0)) {
+ +                                                      if (dentry->d_inode) {
+ +                                                              ((struct inode_data *)
+ +                                                               dentry->
+ +                                                               d_inode->
+ +                                                               i_private)->
+ +                                                    Flags |= UPDATE_INODE;
+ +                                                      }
+ +                                              }
+ +                                      }
+ +                              }
+ +                              kfree(buf);
+ +                      }
+ +              }
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +void update_inode(struct inode *Inode, struct novfs_entry_info *Info)
+ +{
+ +      static char dbuf[128];
+ +
-       DbgPrint("update_inode: Inode=0x%p I_ino=%d\n", Inode, Inode->i_ino);
++      DbgPrint("Inode=0x%p I_ino=%d", Inode, Inode->i_ino);
+ +
-       DbgPrint("update_inode: atime=%s\n",
-                ctime_r(&Info->atime.tv_sec, dbuf));
-       DbgPrint("update_inode: ctime=%s\n",
-                ctime_r(&Info->ctime.tv_sec, dbuf));
-       DbgPrint("update_inode: mtime=%s %d\n",
-                ctime_r(&Info->mtime.tv_sec, dbuf), Info->mtime.tv_nsec);
-       DbgPrint("update_inode: size=%lld\n", Info->size);
-       DbgPrint("update_inode: mode=0%o\n", Info->mode);
++      DbgPrint("atime=%s", ctime_r(&Info->atime.tv_sec, dbuf));
++      DbgPrint("ctime=%s", ctime_r(&Info->ctime.tv_sec, dbuf));
++      DbgPrint("mtime=%s %d", ctime_r(&Info->mtime.tv_sec, dbuf),
++               Info->mtime.tv_nsec);
++      DbgPrint("size=%lld", Info->size);
++      DbgPrint("mode=0%o", Info->mode);
+ +
+ +      if (Inode &&
+ +          ((Inode->i_size != Info->size) ||
+ +           (Inode->i_mtime.tv_sec != Info->mtime.tv_sec) ||
+ +           (Inode->i_mtime.tv_nsec != Info->mtime.tv_nsec))) {
-               DbgPrint
-                   ("update_inode: calling invalidate_remote_inode sz  %d %d\n",
++              DbgPrint ("calling invalidate_remote_inode sz  %d %d",
+ +                   Inode->i_size, Info->size);
-               DbgPrint
-                   ("update_inode: calling invalidate_remote_inode sec %d %d\n",
++              DbgPrint ("calling invalidate_remote_inode sec %d %d",
+ +                   Inode->i_mtime.tv_sec, Info->mtime.tv_sec);
-               DbgPrint
-                   ("update_inode: calling invalidate_remote_inode ns  %d %d\n",
++              DbgPrint ("calling invalidate_remote_inode ns  %d %d",
+ +                   Inode->i_mtime.tv_nsec, Info->mtime.tv_nsec);
+ +
+ +              if (Inode && Inode->i_mapping) {
+ +                      invalidate_remote_inode(Inode);
+ +              }
+ +      }
+ +
+ +      Inode->i_mode = Info->mode;
+ +      Inode->i_size = Info->size;
+ +      Inode->i_atime = Info->atime;
+ +      Inode->i_ctime = Info->ctime;
+ +      Inode->i_mtime = Info->mtime;
+ +
+ +      if (Inode->i_size && Inode->i_sb->s_blocksize) {
+ +              Inode->i_blocks =
+ +                  (unsigned long) (Info->size >> (loff_t) Inode->i_blkbits);
+ +              Inode->i_bytes = Info->size & (Inode->i_sb->s_blocksize - 1);
+ +
-               DbgPrint("update_inode: i_sb->s_blocksize=%d\n",
-                        Inode->i_sb->s_blocksize);
-               DbgPrint("update_inode: i_blkbits=%d\n", Inode->i_blkbits);
-               DbgPrint("update_inode: i_blocks=%d\n", Inode->i_blocks);
-               DbgPrint("update_inode: i_bytes=%d\n", Inode->i_bytes);
++              DbgPrint("i_sb->s_blocksize=%d", Inode->i_sb->s_blocksize);
++              DbgPrint("i_blkbits=%d", Inode->i_blkbits);
++              DbgPrint("i_blocks=%d", Inode->i_blocks);
++              DbgPrint("i_bytes=%d", Inode->i_bytes);
+ +      }
+ +}
+ +
+ +struct dentry *novfs_i_lookup(struct inode *dir, struct dentry *dentry,
+ +                            struct nameidata *nd)
+ +{
+ +      struct dentry *retVal = ERR_PTR(-ENOENT);
+ +      struct dentry *parent;
+ +      struct novfs_entry_info *info = NULL;
+ +      struct inode_data *id;
+ +      struct inode *inode = NULL;
-       uid_t uid = current->euid;
++      uid_t uid = current_euid();
+ +      ino_t ino = 0;
+ +      struct qstr name;
+ +      char *buf;
+ +
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (buf) {
+ +              char *path;
+ +              path = novfs_dget_path(dentry, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
-                       DbgPrint
-                           ("novfs_i_lookup: dir 0x%p %d hash %d inode 0x%0p %s\n",
++                      DbgPrint("dir 0x%p %d hash %d inode 0x%0p %s",
+ +                           dir, dir->i_ino, dentry->d_name.hash,
+ +                           dentry->d_inode, path);
+ +              }
+ +              kfree(buf);
+ +      } else {
-               DbgPrint
-                   ("novfs_i_lookup: dir 0x%p %d name %.*s hash %d inode 0x%0p\n",
++              DbgPrint("dir 0x%p %d name %.*s hash %d inode 0x%0p",
+ +                   dir, dir->i_ino, dentry->d_name.len, dentry->d_name.name,
+ +                   dentry->d_name.hash, dentry->d_inode);
+ +      }
+ +
+ +      if ((dentry->d_name.len == 7)
+ +          && (0 == strncmp(dentry->d_name.name, " !xover", 7))) {
+ +              dentry->d_op = &novfs_dentry_operations;
+ +              igrab(dir);
+ +              d_add(dentry, dir);
+ +              return NULL;
+ +      }
+ +      if ((dentry->d_name.len == 7)
+ +          && (0 == strncmp(dentry->d_name.name, "z!xover", 7))) {
+ +              dentry->d_op = &novfs_dentry_operations;
+ +              igrab(dir);
+ +              d_add(dentry, dir);
+ +              return NULL;
+ +      }
+ +
+ +      if (dir && (id = dir->i_private)) {
+ +              retVal = 0;
+ +              if (IS_ROOT(dentry)) {
-                       DbgPrint("novfs_i_lookup: Root entry=0x%p\n",
-                                novfs_root);
++                      DbgPrint("Root entry=0x%p", novfs_root);
+ +                      inode = novfs_root->d_inode;
+ +                      return (0);
+ +              } else {
+ +                      info =
+ +                          kmalloc(sizeof(struct novfs_entry_info) +
+ +                                       PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +                      if (info) {
+ +                              if (NULL ==
+ +                                  (retVal =
+ +                                   ERR_PTR(verify_dentry(dentry, 1)))) {
+ +                                      name.name = dentry->d_name.name;
+ +                                      name.len = dentry->d_name.len;
+ +                                      name.hash = novfs_internal_hash(&name);
+ +
+ +                                      if (novfs_lock_inode_cache(dir)) {
+ +                                              if (!novfs_get_entry
+ +                                                  (dir, &name, &ino, info)) {
+ +                                                      inode =
+ +                                                          ilookup(dentry->
+ +                                                                  d_sb, ino);
+ +                                                      if (inode) {
+ +                                                              update_inode
+ +                                                                  (inode,
+ +                                                                   info);
+ +                                                      }
+ +                                              }
+ +                                              novfs_unlock_inode_cache(dir);
+ +                                      }
+ +
+ +                                      if (!inode && ino) {
+ +                                              uid = novfs_scope_get_uid(id->Scope);
+ +                                              if (novfs_lock_inode_cache(dir)) {
+ +                                                      inode = novfs_get_inode (dentry->d_sb, info->mode, 0, uid, ino, &name);
+ +                                                      if (inode) {
+ +                                                              if (!novfs_get_entry(dir, &dentry->d_name, &ino, info)) {
+ +                                                                      update_inode
+ +                                                                          (inode,
+ +                                                                           info);
+ +                                                              }
+ +                                                      }
+ +                                                      novfs_unlock_inode_cache
+ +                                                          (dir);
+ +                                              }
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +
+ +      if (!retVal) {
+ +              dentry->d_op = &novfs_dentry_operations;
+ +              if (inode) {
+ +                      parent = dget_parent(dentry);
+ +                      novfs_d_add(dentry->d_parent, dentry, inode, 1);
+ +                      dput(parent);
+ +              } else {
+ +                      d_add(dentry, inode);
+ +              }
+ +      }
+ +
+ +      if (info)
+ +              kfree(info);
+ +
-       DbgPrint
-           ("novfs_i_lookup: inode=0x%p dentry->d_inode=0x%p return=0x%p\n",
++      DbgPrint("inode=0x%p dentry->d_inode=0x%p return=0x%p",
+ +           dir, dentry->d_inode, retVal);
+ +
+ +      return (retVal);
+ +}
+ +
+ +int novfs_i_unlink(struct inode *dir, struct dentry *dentry)
+ +{
+ +      int retCode = -ENOENT;
+ +      struct inode *inode;
+ +      struct novfs_schandle session;
+ +      char *path, *buf;
+ +      uint64_t t64;
+ +
-       DbgPrint("novfs_i_unlink: dir=0x%p dir->i_ino=%d %.*s\n", dir,
++      DbgPrint("dir=0x%p dir->i_ino=%d %.*s", dir,
+ +               dir->i_ino, dentry->d_name.len, dentry->d_name.name);
-       DbgPrint("novfs_i_unlink: IS_ROOT(dentry)=%d\n", IS_ROOT(dentry));
-       DbgPrint("novfs_i_unlink: IS_ROOT(dentry->d_parent)=%d\n",
++      DbgPrint("IS_ROOT(dentry)=%d", IS_ROOT(dentry));
++      DbgPrint("IS_ROOT(dentry->d_parent)=%d",
+ +               IS_ROOT(dentry->d_parent));
-       DbgPrint("novfs_i_unlink: IS_ROOT(dentry->d_parent->d_parent)=%d\n",
++      DbgPrint("IS_ROOT(dentry->d_parent->d_parent)=%d",
+ +               IS_ROOT(dentry->d_parent->d_parent));
-       DbgPrint
-           ("novfs_i_unlink: IS_ROOT(dentry->d_parent->d_parent->d_parent)=%d\n",
++      DbgPrint("IS_ROOT(dentry->d_parent->d_parent->d_parent)=%d",
+ +           IS_ROOT(dentry->d_parent->d_parent->d_parent));
+ +
+ +      if (IS_ROOT(dentry) ||  /* Root */
+ +          IS_ROOT(dentry->d_parent) ||        /* User */
+ +          (!IS_ROOT(dentry->d_parent->d_parent) &&    /* Server */
+ +           IS_ROOT(dentry->d_parent->d_parent->d_parent))) {  /* Volume */
+ +              return (-EACCES);
+ +      }
+ +
+ +      inode = dentry->d_inode;
+ +      if (inode) {
-               DbgPrint
-                   ("novfs_i_unlink: dir=0x%p dir->i_ino=%d inode=0x%p ino=%d\n",
++              DbgPrint("dir=0x%p dir->i_ino=%d inode=0x%p ino=%d",
+ +                   dir, dir->i_ino, inode, inode->i_ino);
+ +              if (inode->i_private) {
+ +                      session =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              ((struct inode_data *) inode->i_private)->Scope =
+ +                                  novfs_get_scope(dentry);
+ +                              session =
+ +                                  novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                                       i_private)->Scope);
+ +                      }
+ +
+ +                      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +                      if (buf) {
+ +                              path =
+ +                                  novfs_dget_path(dentry, buf,
+ +                                                  PATH_LENGTH_BUFFER);
+ +                              if (path) {
-                                       DbgPrint
-                                           ("novfs_i_unlink: path %s mode 0%o\n",
-                                            path, inode->i_mode);
++                                      DbgPrint("path %s mode 0%o",
++                                               path, inode->i_mode);
+ +                                      if (IS_ROOT(dentry->d_parent->d_parent)) {
+ +                                              retCode = novfs_daemon_logout(&dentry->d_name, &session);
+ +                                      } else {
+ +                                              retCode =
+ +                                                  novfs_delete(path,
+ +                                                               S_ISDIR(inode->i_mode), session);
+ +                                              if (retCode) {
+ +                                                      struct iattr ia;
+ +                                                      memset(&ia, 0, sizeof(ia));
+ +                                                      ia.ia_valid = ATTR_MODE;
+ +                                                      ia.ia_mode = S_IRWXU;
+ +                                                      novfs_set_attr(path, &ia, session);
+ +                                                      retCode = novfs_delete(path, S_ISDIR(inode->i_mode), session);
+ +                                              }
+ +                                      }
+ +                                      if (!retCode || IS_DEADDIR(inode)) {
+ +                                              novfs_remove_inode_entry(dir,
+ +                                                                       &dentry->
+ +                                                                       d_name,
+ +                                                                       0);
+ +                                              dentry->d_time = 0;
+ +                                              t64 = 0;
+ +                                              novfs_scope_set_userspace(&t64, &t64,
+ +                                                                  &t64, &t64);
+ +                                              retCode = 0;
+ +                                      }
+ +                              }
+ +                              kfree(buf);
+ +                      }
+ +              }
+ +      }
+ +
-       DbgPrint("novfs_i_unlink: retCode 0x%x\n", retCode);
++      DbgPrint("retCode 0x%x", retCode);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_i_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+ +{
+ +      char *path, *buf;
+ +      struct novfs_schandle session;
+ +      int retCode = 0;
+ +      struct inode *inode;
+ +      struct novfs_entry_info info;
+ +      uid_t uid;
+ +
-       DbgPrint("novfs_i_mkdir: dir=0x%p ino=%d dentry=0x%p %.*s mode=0%lo\n",
++      DbgPrint("dir=0x%p ino=%d dentry=0x%p %.*s mode=0%lo",
+ +               dir, dir->i_ino, dentry, dentry->d_name.len,
+ +               dentry->d_name.name, mode);
+ +
+ +      if (IS_ROOT(dentry) ||  /* Root */
+ +          IS_ROOT(dentry->d_parent) ||        /* User */
+ +          IS_ROOT(dentry->d_parent->d_parent) ||      /* Server */
+ +          IS_ROOT(dentry->d_parent->d_parent->d_parent)) {    /* Volume */
+ +              return (-EACCES);
+ +      }
+ +
+ +      mode |= S_IFDIR;
+ +      mode &= (S_IFMT | S_IRWXU);
+ +      if (dir->i_private) {
+ +              session =
+ +                  novfs_scope_get_sessionId(((struct inode_data *) dir->i_private)->Scope);
+ +              if (0 == SC_PRESENT(session)) {
+ +                      ((struct inode_data *) dir->i_private)->Scope =
+ +                          novfs_get_scope(dentry);
+ +                      session =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) dir->i_private)->
+ +                                              Scope);
+ +              }
+ +
+ +              uid = novfs_scope_get_uid(((struct inode_data *) dir->i_private)->Scope);
+ +              buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +              if (buf) {
+ +                      path = novfs_dget_path(dentry, buf, PATH_LENGTH_BUFFER);
+ +                      if (path) {
-                               DbgPrint("novfs_i_mkdir: path %s\n", path);
++                              DbgPrint("path %s", path);
+ +                              retCode =
+ +                                  novfs_create(path, S_ISDIR(mode), session);
+ +                              if (!retCode) {
+ +                                      retCode =
+ +                                          novfs_get_file_info(path, &info,
+ +                                                              session);
+ +                                      if (!retCode) {
+ +                                              retCode =
+ +                                                  novfs_i_mknod(dir, dentry,
+ +                                                                mode, 0);
+ +                                              inode = dentry->d_inode;
+ +                                              if (inode) {
+ +                                                      update_inode(inode,
+ +                                                                   &info);
+ +                                                      ((struct inode_data *) inode->
+ +                                                       i_private)->Flags &=
+ +                                     ~UPDATE_INODE;
+ +
+ +                                                      dentry->d_time =
+ +                                                          jiffies +
+ +                                                          (novfs_update_timeout
+ +                                                           * HZ);
+ +
+ +                                                      novfs_lock_inode_cache
+ +                                                          (dir);
+ +                                                      if (novfs_update_entry
+ +                                                          (dir,
+ +                                                           &dentry->d_name, 0,
+ +                                                           &info)) {
+ +                                                              novfs_add_inode_entry
+ +                                                                  (dir,
+ +                                                                   &dentry->
+ +                                                                   d_name,
+ +                                                                   inode->
+ +                                                                   i_ino,
+ +                                                                   &info);
+ +                                                      }
+ +                                                      novfs_unlock_inode_cache
+ +                                                          (dir);
+ +                                              }
+ +
+ +                                      }
+ +                              }
+ +                      }
+ +                      kfree(buf);
+ +              }
+ +      }
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_i_rmdir(struct inode *inode, struct dentry *dentry)
+ +{
+ +      return (novfs_i_unlink(inode, dentry));
+ +}
+ +
+ +int novfs_i_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+ +{
+ +      struct inode *inode = NULL;
+ +      int retCode = -EACCES;
+ +      uid_t uid;
+ +      struct dentry *parent;
+ +
+ +      if (IS_ROOT(dentry) ||  /* Root */
+ +          IS_ROOT(dentry->d_parent) ||        /* User */
+ +          IS_ROOT(dentry->d_parent->d_parent) ||      /* Server */
+ +          IS_ROOT(dentry->d_parent->d_parent->d_parent)) {    /* Volume */
+ +              return (-EACCES);
+ +      }
+ +
+ +      if (((struct inode_data *) dir->i_private)) {
+ +              uid = novfs_scope_get_uid(((struct inode_data *) dir->i_private)->Scope);
+ +              if (mode & (S_IFREG | S_IFDIR)) {
+ +                      inode =
+ +                          novfs_get_inode(dir->i_sb, mode, dev, uid, 0, &dentry->d_name);
+ +              }
+ +      }
+ +      if (inode) {
+ +              struct novfs_entry_info info;
+ +
+ +              dentry->d_op = &novfs_dentry_operations;
+ +              parent = dget_parent(dentry);
+ +              novfs_d_add(parent, dentry, inode, 0);
+ +              memset(&info, 0, sizeof(info));
+ +              info.mode = inode->i_mode;
+ +              novfs_lock_inode_cache(dir);
+ +              novfs_add_inode_entry(dir, &dentry->d_name, inode->i_ino,
+ +                                    &info);
+ +              novfs_unlock_inode_cache(dir);
+ +
+ +              dput(parent);
+ +
+ +              retCode = 0;
+ +      }
-       DbgPrint("novfs_i_mknod: return 0x%x\n", retCode);
++      DbgPrint("return 0x%x", retCode);
+ +      return retCode;
+ +}
+ +
+ +int novfs_i_rename(struct inode *odir, struct dentry *od, struct inode *ndir,
+ +                 struct dentry *nd)
+ +{
+ +      int retCode = -ENOTEMPTY;
+ +      char *newpath, *newbuf, *newcon;
+ +      char *oldpath, *oldbuf, *oldcon;
+ +      struct qstr newname, oldname;
+ +      struct novfs_entry_info *info = NULL;
+ +      int oldlen, newlen;
+ +      struct novfs_schandle session;
+ +      ino_t ino;
+ +
+ +      if (IS_ROOT(od) ||      /* Root */
+ +          IS_ROOT(od->d_parent) ||    /* User */
+ +          IS_ROOT(od->d_parent->d_parent) ||  /* Server */
+ +          IS_ROOT(od->d_parent->d_parent->d_parent)) {        /* Volume */
+ +              return (-EACCES);
+ +      }
+ +
-       DbgPrint("novfs_i_rename: odir=0x%p ino=%d ndir=0x%p ino=%d\n", odir,
++      DbgPrint("odir=0x%p ino=%d ndir=0x%p ino=%d", odir,
+ +               odir->i_ino, ndir, ndir->i_ino);
+ +
+ +      oldbuf = kmalloc(PATH_LENGTH_BUFFER * 2, GFP_KERNEL);
+ +      newbuf = oldbuf + PATH_LENGTH_BUFFER;
+ +      if (oldbuf && newbuf) {
+ +              oldpath = novfs_dget_path(od, oldbuf, PATH_LENGTH_BUFFER);
+ +              newpath = novfs_dget_path(nd, newbuf, PATH_LENGTH_BUFFER);
+ +              if (oldpath && newpath) {
+ +                      oldlen = PATH_LENGTH_BUFFER - (int)(oldpath - oldbuf);
+ +                      newlen = PATH_LENGTH_BUFFER - (int)(newpath - newbuf);
+ +
-                       DbgPrint
-                           ("novfs_i_rename: od=0x%p od->inode=0x%p od->inode->i_ino=%d %s\n",
++                      DbgPrint("od=0x%p od->inode=0x%p od->inode->i_ino=%d %s",
+ +                           od, od->d_inode, od->d_inode->i_ino, oldpath);
+ +                      if (nd->d_inode) {
-                               DbgPrint
-                                   ("novfs_i_rename: nd=0x%p nd->inode=0x%p nd->inode->i_ino=%d %s\n",
++                              DbgPrint("nd=0x%p nd->inode=0x%p nd->inode->i_ino=%d %s",
+ +                                   nd, nd->d_inode, nd->d_inode->i_ino,
+ +                                   newpath);
+ +                      } else {
-                               DbgPrint
-                                   ("novfs_i_rename: nd=0x%p nd->inode=0x%p %s\n",
++                              DbgPrint("nd=0x%p nd->inode=0x%p %s",
+ +                                   nd, nd->d_inode, newpath);
+ +                      }
+ +
+ +                      /*
+ +                       * Check to see if two different servers or different volumes
+ +                       */
+ +                      newcon = strchr(newpath + 1, '\\');
+ +                      oldcon = strchr(oldpath + 1, '\\');
-                       DbgPrint("novfs_i_rename: newcon=0x%p newpath=0x%p\n",
-                                newcon, newpath);
-                       DbgPrint("novfs_i_rename: oldcon=0x%p oldpath=0x%p\n",
-                                oldcon, oldpath);
++                      DbgPrint("newcon=0x%p newpath=0x%p", newcon, newpath);
++                      DbgPrint("oldcon=0x%p oldpath=0x%p", oldcon, oldpath);
+ +                      retCode = -EXDEV;
+ +                      if (newcon && oldcon
+ +                          && ((int)(newcon - newpath) ==
+ +                              (int)(oldcon - oldpath))) {
+ +                              newcon = strchr(newcon + 1, '\\');
+ +                              oldcon = strchr(oldcon + 1, '\\');
-                               DbgPrint("novfs_i_rename2: newcon=0x%p newpath=0x%p\n", newcon, newpath);
-                               DbgPrint("novfs_i_rename2: oldcon=0x%p oldpath=0x%p\n", oldcon, oldpath);
++                              DbgPrint("2; newcon=0x%p newpath=0x%p",
++                                       newcon, newpath);
++                              DbgPrint("2; oldcon=0x%p oldpath=0x%p",
++                                       oldcon, oldpath);
+ +                              if (newcon && oldcon &&
+ +                                  ((int)(newcon - newpath) == (int)(oldcon - oldpath))) {
+ +                                      newname.name = newpath;
+ +                                      newname.len = (int)(newcon - newpath);
+ +                                      newname.hash = 0;
+ +
+ +                                      oldname.name = oldpath;
+ +                                      oldname.len = (int)(oldcon - oldpath);
+ +                                      oldname.hash = 0;
+ +                                      if (!novfs_d_strcmp(&newname, &oldname)) {
+ +
+ +                                              if (od->d_inode
+ +                                                  && od->d_inode->i_private) {
+ +
+ +                                                      if (nd->d_inode
+ +                                                          && nd->d_inode->
+ +                                                          i_private) {
+ +                                                              session =
+ +                                                                  novfs_scope_get_sessionId
+ +                                                                  (((struct inode_data *) ndir->i_private)->Scope);
+ +                                                              if (0 ==
+ +                                                                  SC_PRESENT
+ +                                                                  (session)) {
+ +                                                                      ((struct inode_data *) ndir->i_private)->Scope = novfs_get_scope(nd);
+ +                                                                      session
+ +                                                                          =
+ +                                                                          novfs_scope_get_sessionId
+ +                                                                          (((struct inode_data *) ndir->i_private)->Scope);
+ +                                                              }
+ +
+ +                                                              retCode =
+ +                                                                  novfs_delete(newpath, S_ISDIR(nd->d_inode->i_mode), session);
+ +                                                              if (retCode) {
+ +                                                                      struct iattr ia;
+ +                                                                      memset(&ia, 0, sizeof(ia));
+ +                                                                      ia.ia_valid = ATTR_MODE;
+ +                                                                      ia.ia_mode = S_IRWXU;
+ +                                                                      novfs_set_attr(newpath, &ia, session);
+ +                                                                      retCode = novfs_delete(newpath, S_ISDIR(nd->d_inode->i_mode), session);
+ +                                                              }
+ +
+ +                                                      }
+ +
+ +                                                      session = novfs_scope_get_sessionId(((struct inode_data *) ndir->i_private)->Scope);
+ +                                                      if (0 == SC_PRESENT(session)) {
+ +                                                              ((struct inode_data *)ndir->i_private)->Scope = novfs_get_scope(nd);
+ +                                                              session = novfs_scope_get_sessionId(((struct inode_data *) ndir->i_private)->Scope);
+ +                                                      }
+ +                                                      retCode = novfs_rename_file(S_ISDIR(od->d_inode->i_mode), oldpath, oldlen - 1, newpath, newlen - 1, session);
+ +
+ +                                                      if (!retCode) {
+ +                                                              info = (struct novfs_entry_info *) oldbuf;
+ +                                                              od->d_time = 0;
+ +                                                              novfs_remove_inode_entry(odir, &od->d_name, 0);
+ +                                                              novfs_remove_inode_entry(ndir, &nd->d_name, 0);
+ +                                                              novfs_get_file_info(newpath, info, session);
+ +                                                              nd->d_time = jiffies + (novfs_update_timeout * HZ);
+ +
+ +                                                              if (od->d_inode && od->d_inode->i_ino) {
+ +                                                                      ino = od->d_inode-> i_ino;
+ +                                                              } else {
+ +                                                                      ino = (ino_t)atomic_inc_return(&novfs_Inode_Number);
+ +                                                              }
+ +                                                              novfs_add_inode_entry(ndir, &nd->d_name, ino, info);
+ +                                                      }
+ +                                              }
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +
+ +      if (oldbuf)
+ +              kfree(oldbuf);
+ +
-       DbgPrint("novfs_i_rename: return %d\n", retCode);
++      DbgPrint("return %d", retCode);
+ +      return (retCode);
+ +}
+ +
+ +
+ +int novfs_i_setattr(struct dentry *dentry, struct iattr *attr)
+ +{
+ +      char *path, *buf;
+ +      struct inode *inode = dentry->d_inode;
+ +      char atime_buf[32];
+ +      char mtime_buf[32];
+ +      char ctime_buf[32];
+ +      unsigned int ia_valid = attr->ia_valid;
+ +      struct novfs_schandle session;
+ +      int retVal = 0;
+ +      struct iattr mattr;
+ +
+ +      if (IS_ROOT(dentry) ||  /* Root */
+ +          IS_ROOT(dentry->d_parent) ||        /* User */
+ +          IS_ROOT(dentry->d_parent->d_parent) ||      /* Server */
+ +          IS_ROOT(dentry->d_parent->d_parent->d_parent)) {    /* Volume */
+ +              return (-EACCES);
+ +      }
+ +
+ +      if (inode && inode->i_private) {
+ +              session =
+ +                  novfs_scope_get_sessionId(((struct inode_data *) inode->i_private)->
+ +                                      Scope);
+ +              if (0 == SC_PRESENT(session)) {
+ +                      ((struct inode_data *) inode->i_private)->Scope =
+ +                          novfs_get_scope(dentry);
+ +                      session =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
+ +              }
+ +
+ +              buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +              if (buf) {
+ +                      path = novfs_dget_path(dentry, buf, PATH_LENGTH_BUFFER);
+ +                      if (path) {
+ +                              strcpy(atime_buf, "Unspecified");
+ +                              strcpy(mtime_buf, "Unspecified");
+ +                              strcpy(ctime_buf, "Unspecified");
+ +                              if (attr->ia_valid & ATTR_ATIME) {
+ +                                      ctime_r(&attr->ia_atime.tv_sec,
+ +                                              atime_buf);
+ +                              }
+ +                              if (attr->ia_valid & ATTR_MTIME) {
+ +                                      ctime_r(&attr->ia_mtime.tv_sec,
+ +                                              mtime_buf);
+ +                              }
+ +                              if (attr->ia_valid & ATTR_CTIME) {
+ +                                      ctime_r(&attr->ia_ctime.tv_sec,
+ +                                              ctime_buf);
+ +                              }
+ +                              /* Removed for Bug 132374. jlt */
-                               DbgPrint("novfs_i_setattr: %s\n"
++                              __DbgPrint("%s: %s\n"
+ +                                       "   ia_valid:      0x%x\n"
+ +                                       "   ia_mode:       0%o\n"
+ +                                       "   ia_uid:        %d\n"
+ +                                       "   ia_gid:        %d\n"
+ +                                       "   ia_size:       %lld\n"
+ +                                       "   ia_atime:      %s\n"
+ +                                       "   ia_mtime:      %s\n"
-                                        "   ia_ctime:      %s\n",
++                                       "   ia_ctime:      %s\n", __func__,
+ +                                       path,
+ +                                       attr->ia_valid,
+ +                                       attr->ia_mode,
+ +                                       attr->ia_uid,
+ +                                       attr->ia_gid,
+ +                                       attr->ia_size,
+ +                                       atime_buf, mtime_buf, ctime_buf);
+ +
+ +                              if ((attr->ia_valid & ATTR_FILE)
+ +                                  && (attr->ia_valid & ATTR_SIZE)) {
+ +                                      memcpy(&mattr, attr, sizeof(mattr));
+ +                                      mattr.ia_valid &=
+ +                                          ~(ATTR_FILE | ATTR_SIZE);
+ +                                      attr = &mattr;
+ +                                      ia_valid = attr->ia_valid;
+ +#if 0   // thanks to vfs changes in our tree...
+ +                                      retVal =
+ +                                          novfs_trunc_ex(attr->
+ +                                                                 ia_file->
+ +                                                                 private_data,
+ +                                                                 attr->
+ +                                                                 ia_size,
+ +                                                                 session);
+ +                                      if (!retVal) {
+ +                                              inode->i_size = attr->ia_size;
+ +                                              ((struct inode_data *) inode->
+ +                                               i_private)->Flags |=
+ +                             UPDATE_INODE;
+ +                                      }
+ +#endif
+ +                              }
+ +
+ +                              if (ia_valid
+ +                                  && !(retVal =
+ +                                       novfs_set_attr(path, attr, session))) {
+ +                                      ((struct inode_data *) inode->i_private)->
+ +                                          Flags |= UPDATE_INODE;
+ +
+ +                                      if (ia_valid & ATTR_ATIME)
+ +                                              inode->i_atime = attr->ia_atime;
+ +                                      if (ia_valid & ATTR_MTIME)
+ +                                              inode->i_mtime = attr->ia_mtime;
+ +                                      if (ia_valid & ATTR_CTIME)
+ +                                              inode->i_ctime = attr->ia_ctime;
+ +                                      if (ia_valid & ATTR_MODE) {
+ +                                              inode->i_mode =
+ +                                                  attr->
+ +                                                  ia_mode & (S_IFMT |
+ +                                                             S_IRWXU);
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +              kfree(buf);
+ +      }
-       DbgPrint("novfs_i_setattr: return 0x%x\n", retVal);
++      DbgPrint("return 0x%x", retVal);
+ +
+ +      return (retVal);
+ +}
+ +
+ +int novfs_i_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ +                  struct kstat *kstat)
+ +{
+ +      int retCode = 0;
+ +      char atime_buf[32];
+ +      char mtime_buf[32];
+ +      char ctime_buf[32];
+ +      struct inode *inode = dentry->d_inode;
+ +
+ +      struct novfs_entry_info info;
+ +      char *path, *buf;
+ +      struct novfs_schandle session;
+ +      struct inode_data *id;
+ +
+ +      if (!IS_ROOT(dentry) && !IS_ROOT(dentry->d_parent)) {
+ +              SC_INITIALIZE(session);
+ +              id = dentry->d_inode->i_private;
+ +
+ +              if (id && (id->Flags & UPDATE_INODE)) {
+ +                      session = novfs_scope_get_sessionId(id->Scope);
+ +
+ +                      if (0 == SC_PRESENT(session)) {
+ +                              id->Scope = novfs_get_scope(dentry);
+ +                              session = novfs_scope_get_sessionId(id->Scope);
+ +                      }
+ +
+ +                      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +                      if (buf) {
+ +                              path =
+ +                                  novfs_dget_path(dentry, buf,
+ +                                                  PATH_LENGTH_BUFFER);
+ +                              if (path) {
+ +                                      retCode =
+ +                                          novfs_get_file_info(path, &info,
+ +                                                              session);
+ +                                      if (!retCode) {
+ +                                              update_inode(inode, &info);
+ +                                              id->Flags &= ~UPDATE_INODE;
+ +                                      }
+ +                              }
+ +                              kfree(buf);
+ +                      }
+ +              }
+ +      }
+ +
+ +      kstat->ino = inode->i_ino;
+ +      kstat->dev = inode->i_sb->s_dev;
+ +      kstat->mode = inode->i_mode;
+ +      kstat->nlink = inode->i_nlink;
+ +      kstat->uid = inode->i_uid;
+ +      kstat->gid = inode->i_gid;
+ +      kstat->rdev = inode->i_rdev;
+ +      kstat->size = i_size_read(inode);
+ +      kstat->atime = inode->i_atime;
+ +      kstat->mtime = inode->i_mtime;
+ +      kstat->ctime = inode->i_ctime;
+ +      kstat->blksize = inode->i_sb->s_blocksize;
+ +      kstat->blocks = inode->i_blocks;
+ +      if (inode->i_bytes) {
+ +              kstat->blocks++;
+ +      }
+ +      ctime_r(&kstat->atime.tv_sec, atime_buf);
+ +      ctime_r(&kstat->mtime.tv_sec, mtime_buf);
+ +      ctime_r(&kstat->ctime.tv_sec, ctime_buf);
+ +
-       DbgPrint("novfs_i_getattr: 0x%x 0x%p <%.*s>\n"
++      __DbgPrint("%s: 0x%x 0x%p <%.*s>\n"
+ +               "   ino: %d\n"
+ +               "   dev: 0x%x\n"
+ +               "   mode: 0%o\n"
+ +               "   nlink: 0x%x\n"
+ +               "   uid: 0x%x\n"
+ +               "   gid: 0x%x\n"
+ +               "   rdev: 0x%x\n"
+ +               "   size: 0x%llx\n"
+ +               "   atime: %s\n"
+ +               "   mtime: %s\n"
+ +               "   ctime: %s\n"
+ +               "   blksize: 0x%x\n"
-                "   blocks: 0x%x\n",
++               "   blocks: 0x%x\n", __func__,
+ +               retCode, dentry, dentry->d_name.len, dentry->d_name.name,
+ +               kstat->ino,
+ +               kstat->dev,
+ +               kstat->mode,
+ +               kstat->nlink,
+ +               kstat->uid,
+ +               kstat->gid,
+ +               kstat->rdev,
+ +               kstat->size,
+ +               atime_buf,
+ +               mtime_buf, ctime_buf, kstat->blksize, kstat->blocks);
+ +      return (retCode);
+ +}
+ +
+ +ssize_t novfs_i_getxattr(struct dentry *dentry, const char *name, void *buffer,
+ +                   size_t buffer_size)
+ +{
+ +      struct inode *inode = dentry->d_inode;
+ +      struct novfs_schandle sessionId;
+ +      char *path, *buf, *bufRead;
+ +      ssize_t dataLen;
+ +
+ +      int retxcode = 0;
+ +
+ +      SC_INITIALIZE(sessionId);
+ +
-       DbgPrint("novfs_i_getxattr: Ian\n");    /*%.*s\n", dentry->d_name.len, dentry->d_name.name); */
-       DbgPrint
-           ("novfs_i_getxattr: dentry->d_name.len %u, dentry->d_name.name %s\n",
++      DbgPrint("Ian");        /*%.*s\n", dentry->d_name.len, dentry->d_name.name); */
++      DbgPrint("dentry->d_name.len %u, dentry->d_name.name %s",
+ +           dentry->d_name.len, dentry->d_name.name);
-       DbgPrint("novfs_i_getxattr: name %s\n", name);
-       DbgPrint("novfs_i_getxattr: size %u\n", buffer_size);
++      DbgPrint("name %s", name);
++      DbgPrint("size %u", buffer_size);
+ +
+ +      if (inode && inode->i_private) {
+ +              sessionId =
+ +                  novfs_scope_get_sessionId(((struct inode_data *) inode->i_private)->
+ +                                      Scope);
-               DbgPrint("novfs_i_getxattr: SessionId = %u\n", sessionId);
++              DbgPrint("SessionId = %u", sessionId);
+ +              //if (0 == sessionId)
+ +              if (0 == SC_PRESENT(sessionId)) {
+ +                      ((struct inode_data *) inode->i_private)->Scope =
+ +                          novfs_get_scope(dentry);
+ +                      sessionId =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
-                       DbgPrint("novfs_i_getxattr: SessionId = %u\n",
-                                sessionId);
++                      DbgPrint("SessionId = %u", sessionId);
+ +              }
+ +      }
+ +
+ +      dataLen = 0;
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (buf) {
+ +              path = novfs_dget_path(dentry, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
+ +                      bufRead = kmalloc(XA_BUFFER, GFP_KERNEL);
+ +                      if (bufRead) {
+ +                              retxcode =
+ +                                  novfs_getx_file_info(path, name, bufRead,
+ +                                                       XA_BUFFER, &dataLen,
+ +                                                       sessionId);
-                               DbgPrint
-                                   ("novfs_i_getxattr: after novfs_GetX_File_Info retxcode = %d\n",
++                              DbgPrint("after novfs_GetX_File_Info retxcode = %d",
+ +                                   retxcode);
+ +                              if (!retxcode) {
+ +                                      novfs_dump(64, bufRead);
+ +                                      if (buffer_size != 0) {
+ +                                              if (buffer_size >= dataLen) {
+ +                                                      memcpy(buffer, bufRead,
+ +                                                             dataLen);
+ +                                              } else {
-                                                       DbgPrint
-                                                           ("novfs_i_getxattr: (!!!) not enough buffer_size. buffer_size = %d, dataLen = %d\n",
++                                                      DbgPrint("(!!!) not enough buffer_size. buffer_size = %d, dataLen = %d",
+ +                                                           buffer_size,
+ +                                                           dataLen);
+ +                                                      retxcode = -ERANGE;
+ +                                              }
+ +                                      }
+ +
+ +                                      if (bufRead) {
+ +                                              kfree(bufRead);
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +              kfree(buf);
+ +      }
+ +
+ +      if (retxcode) {
+ +              dataLen = retxcode;
+ +      } else {
+ +              if ((buffer_size > 0) && (buffer_size < dataLen)) {
+ +                      dataLen = -ERANGE;
+ +              }
+ +      }
+ +
+ +      return (dataLen);
+ +}
+ +
+ +int novfs_i_setxattr(struct dentry *dentry, const char *name, const void *value,
+ +                   size_t value_size, int flags)
+ +{
+ +
+ +      struct inode *inode = dentry->d_inode;
+ +      struct novfs_schandle sessionId;
+ +      char *path, *buf;
+ +      unsigned long bytesWritten = 0;
+ +      int retError = 0;
+ +      int retxcode = 0;
+ +
+ +      SC_INITIALIZE(sessionId);
+ +
-       DbgPrint("novfs_i_setxattr: Ian\n");    /*%.*s\n", dentry->d_name.len, dentry->d_name.name); */
-       DbgPrint
-           ("novfs_i_setxattr: dentry->d_name.len %u, dentry->d_name.name %s\n",
-            dentry->d_name.len, dentry->d_name.name);
-       DbgPrint("novfs_i_setxattr: name %s\n", name);
-       DbgPrint("novfs_i_setxattr: value_size %u\n", value_size);
-       DbgPrint("novfs_i_setxattr: flags %d\n", flags);
++      DbgPrint("Ian");        /*%.*s\n", dentry->d_name.len, dentry->d_name.name); */
++      DbgPrint("dentry->d_name.len %u, dentry->d_name.name %s",
++               dentry->d_name.len, dentry->d_name.name);
++      DbgPrint("name %s", name);
++      DbgPrint("value_size %u", value_size);
++      DbgPrint("flags %d", flags);
+ +
+ +      if (inode && inode->i_private) {
+ +              sessionId =
+ +                  novfs_scope_get_sessionId(((struct inode_data *) inode->i_private)->
+ +                                      Scope);
-               DbgPrint("novfs_i_setxattr: SessionId = %u\n", sessionId);
++              DbgPrint("SessionId = %u", sessionId);
+ +              //if (0 == sessionId)
+ +              if (0 == SC_PRESENT(sessionId)) {
+ +                      ((struct inode_data *) inode->i_private)->Scope =
+ +                          novfs_get_scope(dentry);
+ +                      sessionId =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
-                       DbgPrint("novfs_i_setxattr: SessionId = %u\n",
-                                sessionId);
++                      DbgPrint("SessionId = %u", sessionId);
+ +              }
+ +      }
+ +
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (buf) {
+ +              path = novfs_dget_path(dentry, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
+ +                      retxcode =
+ +                          novfs_setx_file_info(path, name, value, value_size,
+ +                                               &bytesWritten, flags,
+ +                                               sessionId);
+ +                      if (!retxcode) {
-                               DbgPrint
-                                   ("novfs_i_setxattr: bytesWritten = %u\n",
-                                    bytesWritten);
++                              DbgPrint("bytesWritten = %u", bytesWritten);
+ +                      }
+ +              }
+ +              kfree(buf);
+ +      }
+ +
+ +      if (retxcode) {
+ +              retError = retxcode;
+ +      }
+ +
+ +      if (bytesWritten < value_size) {
+ +              retError = retxcode;
+ +      }
+ +      return (retError);
+ +}
+ +
+ +ssize_t novfs_i_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
+ +{
+ +      struct inode *inode = dentry->d_inode;
+ +      struct novfs_schandle sessionId;
+ +      char *path, *buf, *bufList;
+ +      ssize_t dataLen;
+ +      int retxcode = 0;
+ +
+ +      SC_INITIALIZE(sessionId);
+ +
-       DbgPrint("novfs_i_listxattr: Ian\n");   //%.*s\n", dentry->d_name.len, dentry->d_name.name);
-       DbgPrint
-           ("novfs_i_listxattr: dentry->d_name.len %u, dentry->d_name.name %s\n",
++      DbgPrint("Ian");        //%.*s\n", dentry->d_name.len, dentry->d_name.name);
++      DbgPrint("dentry->d_name.len %u, dentry->d_name.name %s",
+ +           dentry->d_name.len, dentry->d_name.name);
-       DbgPrint("novfs_i_listxattr: size %u\n", buffer_size);
++      DbgPrint("size %u", buffer_size);
+ +
+ +      if (inode && inode->i_private) {
+ +              sessionId =
+ +                  novfs_scope_get_sessionId(((struct inode_data *) inode->i_private)->
+ +                                      Scope);
-               DbgPrint("novfs_i_listxattr: SessionId = %u\n", sessionId);
++              DbgPrint("SessionId = %u", sessionId);
+ +              //if (0 == sessionId)
+ +              if (0 == SC_PRESENT(sessionId)) {
+ +                      ((struct inode_data *) inode->i_private)->Scope =
+ +                          novfs_get_scope(dentry);
+ +                      sessionId =
+ +                          novfs_scope_get_sessionId(((struct inode_data *) inode->
+ +                                               i_private)->Scope);
-                       DbgPrint("novfs_i_listxattr: SessionId = %u\n",
-                                sessionId);
++                      DbgPrint("SessionId = %u", sessionId);
+ +              }
+ +      }
+ +
+ +      dataLen = 0;
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (buf) {
+ +              path = novfs_dget_path(dentry, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
+ +                      bufList = kmalloc(XA_BUFFER, GFP_KERNEL);
+ +                      if (bufList) {
+ +                              retxcode =
+ +                                  novfs_listx_file_info(path, bufList,
+ +                                                        XA_BUFFER, &dataLen,
+ +                                                        sessionId);
+ +
+ +                              novfs_dump(64, bufList);
+ +                              if (buffer_size != 0) {
+ +                                      if (buffer_size >= dataLen) {
+ +                                              memcpy(buffer, bufList,
+ +                                                     dataLen);
+ +                                      } else {
-                                               DbgPrint
-                                                   ("novfs_i_listxattr: (!!!) not enough buffer_size. buffer_size = %d, dataLen = %d\n",
++                                              DbgPrint("(!!!) not enough buffer_size. buffer_size = %d, dataLen = %d",
+ +                                                   buffer_size, dataLen);
+ +                                              retxcode = -1;
+ +                                      }
+ +                              }
+ +
+ +                              if (bufList) {
+ +                                      kfree(bufList);
+ +                              }
+ +                      }
+ +
+ +              }
+ +              kfree(buf);
+ +      }
+ +
+ +      if (retxcode) {
+ +              dataLen = -1;
+ +      } else {
+ +
+ +              if ((buffer_size > 0) && (buffer_size < dataLen)) {
+ +                      dataLen = -ERANGE;
+ +              }
+ +      }
+ +      return (dataLen);
+ +}
+ +
+ +int novfs_i_revalidate(struct dentry *dentry)
+ +{
+ +
-       DbgPrint("novfs_i_revalidate: name %.*s\n", dentry->d_name.len,
-                dentry->d_name.name);
++      DbgPrint("name %.*s", dentry->d_name.len, dentry->d_name.name);
+ +
+ +      return (0);
+ +}
+ +
+ +void novfs_read_inode(struct inode *inode)
+ +{
-       DbgPrint("novfs_read_inode: 0x%p %d\n", inode, inode->i_ino);
++      DbgPrint("0x%p %d", inode, inode->i_ino);
+ +}
+ +
+ +void novfs_write_inode(struct inode *inode)
+ +{
-       DbgPrint("novfs_write_inode: Inode=0x%p Ino=%d\n", inode, inode->i_ino);
++      DbgPrint("Inode=0x%p Ino=%d", inode, inode->i_ino);
+ +}
+ +
+ +int novfs_notify_change(struct dentry *dentry, struct iattr *attr)
+ +{
+ +      struct inode *inode = dentry->d_inode;
+ +
-       DbgPrint
-           ("novfs_notify_change: Dentry=0x%p Name=%.*s Inode=0x%p Ino=%d ia_valid=0x%x\n",
++      DbgPrint("Dentry=0x%p Name=%.*s Inode=0x%p Ino=%d ia_valid=0x%x",
+ +           dentry, dentry->d_name.len, dentry->d_name.name, inode,
+ +           inode->i_ino, attr->ia_valid);
+ +      return (0);
+ +}
+ +
+ +void novfs_clear_inode(struct inode *inode)
+ +{
+ +      InodeCount--;
+ +
+ +      if (inode->i_private) {
+ +              struct inode_data *id = inode->i_private;
+ +
-               DbgPrint
-                   ("novfs_clear_inode: inode=0x%p ino=%d Scope=0x%p Name=%s\n",
++              DbgPrint("inode=0x%p ino=%d Scope=0x%p Name=%s",
+ +                   inode, inode->i_ino, id->Scope, id->Name);
+ +
+ +              novfs_free_inode_cache(inode);
+ +
+ +              down(&InodeList_lock);
+ +              list_del(&id->IList);
+ +              up(&InodeList_lock);
+ +
+ +              kfree(inode->i_private);
+ +              inode->i_private = NULL;
+ +
+ +              remove_inode_hash(inode);
+ +
+ +      } else {
-               DbgPrint("novfs_clear_inode: inode=0x%p ino=%d\n", inode,
-                        inode->i_ino);
++              DbgPrint("inode=0x%p ino=%d", inode, inode->i_ino);
+ +      }
+ +}
+ +
+ +/* Called when /proc/mounts is read */
+ +int novfs_show_options(struct seq_file *s, struct vfsmount *m)
+ +{
+ +      char *buf, *path, *tmp;
+ +
+ +      buf = kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +      if (buf) {
+ +              struct path my_path;
+ +              my_path.mnt = m;
+ +              my_path.dentry = m->mnt_root;
+ +              path = d_path(&my_path, buf, PATH_LENGTH_BUFFER);
+ +              if (path) {
+ +                      if (!novfs_current_mnt
+ +                          || (novfs_current_mnt
+ +                              && strcmp(novfs_current_mnt, path))) {
-                               DbgPrint("novfs_show_options: %.*s %.*s %s\n",
++                              DbgPrint("%.*s %.*s %s",
+ +                                       m->mnt_root->d_name.len,
+ +                                       m->mnt_root->d_name.name,
+ +                                       m->mnt_mountpoint->d_name.len,
+ +                                       m->mnt_mountpoint->d_name.name, path);
+ +                              tmp = kmalloc(PATH_LENGTH_BUFFER -
+ +                                                       (int)(path - buf),
+ +                                                       GFP_KERNEL);
+ +                              if (tmp) {
+ +                                      strcpy(tmp, path);
+ +                                      path = novfs_current_mnt;
+ +                                      novfs_current_mnt = tmp;
+ +                                      novfs_daemon_set_mnt_point(novfs_current_mnt);
+ +
+ +                                      if (path) {
+ +                                              kfree(path);
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +              kfree(buf);
+ +      }
+ +      return (0);
+ +}
+ +
+ +/*   Called when statfs(2) system called. */
+ +int novfs_statfs(struct dentry *de, struct kstatfs *buf)
+ +{
+ +      uint64_t td, fd, te, fe;
+ +      struct super_block *sb = de->d_sb;
+ +
-       DbgPrint("novfs_statfs:\n");
++      DbgPrint("");
+ +
+ +      td = fd = te = fe = 0;
+ +
+ +      novfs_scope_get_userspace(&td, &fd, &te, &fe);
+ +
-       DbgPrint("td=%llu\n", td);
-       DbgPrint("fd=%llu\n", fd);
-       DbgPrint("te=%llu\n", te);
-       DbgPrint("fe=%llu\n", fd);
++      DbgPrint("td=%llu", td);
++      DbgPrint("fd=%llu", fd);
++      DbgPrint("te=%llu", te);
++      DbgPrint("fe=%llu", fd);
+ +      /* fix for Nautilus */
+ +      if (sb->s_blocksize == 0)
+ +              sb->s_blocksize = 4096;
+ +
+ +      buf->f_type = sb->s_magic;
+ +      buf->f_bsize = sb->s_blocksize;
+ +      buf->f_namelen = NW_MAX_PATH_LENGTH;
+ +      buf->f_blocks =
+ +          (sector_t) (td +
+ +                      (uint64_t) (sb->s_blocksize -
+ +                                  1)) >> (uint64_t) sb->s_blocksize_bits;
+ +      buf->f_bfree = (sector_t) fd >> (uint64_t) sb->s_blocksize_bits;
+ +      buf->f_bavail = (sector_t) buf->f_bfree;
+ +      buf->f_files = (sector_t) te;
+ +      buf->f_ffree = (sector_t) fe;
+ +      buf->f_frsize = sb->s_blocksize;
+ +      if (te > 0xffffffff)
+ +              buf->f_files = 0xffffffff;
+ +
+ +      if (fe > 0xffffffff)
+ +              buf->f_ffree = 0xffffffff;
+ +
-       DbgPrint("f_type:    0x%x\n", buf->f_type);
-       DbgPrint("f_bsize:   %u\n", buf->f_bsize);
-       DbgPrint("f_namelen: %d\n", buf->f_namelen);
-       DbgPrint("f_blocks:  %llu\n", buf->f_blocks);
-       DbgPrint("f_bfree:   %llu\n", buf->f_bfree);
-       DbgPrint("f_bavail:  %llu\n", buf->f_bavail);
-       DbgPrint("f_files:   %llu\n", buf->f_files);
-       DbgPrint("f_ffree:   %llu\n", buf->f_ffree);
-       DbgPrint("f_frsize:  %u\n", buf->f_frsize);
++      DbgPrint("f_type:    0x%x", buf->f_type);
++      DbgPrint("f_bsize:   %u", buf->f_bsize);
++      DbgPrint("f_namelen: %d", buf->f_namelen);
++      DbgPrint("f_blocks:  %llu", buf->f_blocks);
++      DbgPrint("f_bfree:   %llu", buf->f_bfree);
++      DbgPrint("f_bavail:  %llu", buf->f_bavail);
++      DbgPrint("f_files:   %llu", buf->f_files);
++      DbgPrint("f_ffree:   %llu", buf->f_ffree);
++      DbgPrint("f_frsize:  %u", buf->f_frsize);
+ +
+ +      return 0;
+ +}
+ +
+ +struct inode *novfs_get_inode(struct super_block *sb, int mode, int dev,
+ +                            uid_t Uid, ino_t ino, struct qstr *name)
+ +{
+ +      struct inode *inode = new_inode(sb);
+ +
+ +      if (inode) {
+ +              InodeCount++;
+ +              inode->i_mode = mode;
+ +              inode->i_uid = Uid;
+ +              inode->i_gid = 0;
+ +              inode->i_blkbits = sb->s_blocksize_bits;
+ +              inode->i_blocks = 0;
+ +              inode->i_rdev = 0;
+ +              inode->i_ino = (ino) ? ino : (ino_t)atomic_inc_return(&novfs_Inode_Number);
+ +              if (novfs_page_cache) {
+ +                      inode->i_mapping->a_ops = &novfs_aops;
+ +              } else {
+ +                      inode->i_mapping->a_ops = &novfs_nocache_aops;
+ +              }
+ +              inode->i_mapping->backing_dev_info = &novfs_backing_dev_info;
+ +              inode->i_atime.tv_sec = 0;
+ +              inode->i_atime.tv_nsec = 0;
+ +              inode->i_mtime = inode->i_ctime = inode->i_atime;
+ +
-               DbgPrint("novfs_get_inode: Inode=0x%p I_ino=%d len=%d\n", inode,
-                        inode->i_ino, name->len);
++              DbgPrint("Inode=0x%p I_ino=%d len=%d",
++                       inode, inode->i_ino, name->len);
+ +
+ +              if (NULL !=
+ +                  (inode->i_private =
+ +                   kmalloc(sizeof(struct inode_data) + name->len,
+ +                                GFP_KERNEL))) {
+ +                      struct inode_data *id;
+ +                      id = inode->i_private;
+ +
-                       DbgPrint("novfs_get_inode: i_private 0x%p\n", id);
++                      DbgPrint("i_private 0x%p", id);
+ +
+ +                      id->Scope = NULL;
+ +                      id->Flags = 0;
+ +                      id->Inode = inode;
+ +
+ +                      id->cntDC = 1;
+ +
+ +                      INIT_LIST_HEAD(&id->DirCache);
+ +                      init_MUTEX(&id->DirCacheLock);
+ +
+ +                      id->FileHandle = 0;
+ +                      id->CacheFlag = 0;
+ +
+ +                      down(&InodeList_lock);
+ +
+ +                      list_add_tail(&id->IList, &InodeList);
+ +                      up(&InodeList_lock);
+ +
+ +                      id->Name[0] = '\0';
+ +
+ +                      memcpy(id->Name, name->name, name->len);
+ +                      id->Name[name->len] = '\0';
+ +
-                       DbgPrint("novfs_get_inode: name %s\n", id->Name);
++                      DbgPrint("name %s", id->Name);
+ +              }
+ +
+ +              insert_inode_hash(inode);
+ +
+ +              switch (mode & S_IFMT) {
+ +
+ +              case S_IFREG:
+ +                      inode->i_op = &novfs_file_inode_operations;
+ +                      inode->i_fop = &novfs_file_operations;
+ +                      break;
+ +
+ +              case S_IFDIR:
+ +                      inode->i_op = &novfs_inode_operations;
+ +                      inode->i_fop = &novfs_dir_operations;
+ +                      inode->i_blkbits = 0;
+ +                      break;
+ +
+ +              default:
+ +                      init_special_inode(inode, mode, dev);
+ +                      break;
+ +              }
+ +
-               DbgPrint("novfs_get_inode: size=%lld\n", inode->i_size);
-               DbgPrint("novfs_get_inode: mode=0%o\n", inode->i_mode);
-               DbgPrint("novfs_get_inode: i_sb->s_blocksize=%d\n",
-                        inode->i_sb->s_blocksize);
-               DbgPrint("novfs_get_inode: i_blkbits=%d\n", inode->i_blkbits);
-               DbgPrint("novfs_get_inode: i_blocks=%d\n", inode->i_blocks);
-               DbgPrint("novfs_get_inode: i_bytes=%d\n", inode->i_bytes);
++              DbgPrint("size=%lld", inode->i_size);
++              DbgPrint("mode=0%o", inode->i_mode);
++              DbgPrint("i_sb->s_blocksize=%d", inode->i_sb->s_blocksize);
++              DbgPrint("i_blkbits=%d", inode->i_blkbits);
++              DbgPrint("i_blocks=%d", inode->i_blocks);
++              DbgPrint("i_bytes=%d", inode->i_bytes);
+ +      }
+ +
-       DbgPrint("novfs_get_inode: 0x%p %d\n", inode, inode->i_ino);
++      DbgPrint("0x%p %d", inode, inode->i_ino);
+ +      return (inode);
+ +}
+ +
+ +int novfs_fill_super(struct super_block *SB, void *Data, int Silent)
+ +{
+ +      struct inode *inode;
+ +      struct dentry *server, *tree;
+ +      struct qstr name;
+ +      struct novfs_entry_info info;
+ +
+ +      SB->s_blocksize = PAGE_CACHE_SIZE;
+ +      SB->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ +      SB->s_maxbytes = 0xFFFFFFFFFFFFFFFFULL; /* Max file size */
+ +      SB->s_op = &novfs_ops;
+ +      SB->s_flags |= (MS_NODIRATIME | MS_NODEV | MS_POSIXACL);
+ +      SB->s_magic = NOVFS_MAGIC;
+ +
+ +      name.len = 1;
+ +      name.name = "/";
+ +
+ +      inode = novfs_get_inode(SB, S_IFDIR | 0777, 0, 0, 0, &name);
+ +      if (!inode) {
+ +              return (-ENOMEM);
+ +      }
+ +
+ +      novfs_root = d_alloc_root(inode);
+ +
+ +      if (!novfs_root) {
+ +              iput(inode);
+ +              return (-ENOMEM);
+ +      }
+ +      novfs_root->d_time = jiffies + (novfs_update_timeout * HZ);
+ +
+ +      inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ +
+ +      SB->s_root = novfs_root;
+ +
-       DbgPrint("novfs_fill_super: root 0x%p\n", novfs_root);
++      DbgPrint("root 0x%p", novfs_root);
+ +
+ +      if (novfs_root) {
+ +              novfs_root->d_op = &novfs_dentry_operations;
+ +
+ +              name.name = SERVER_DIRECTORY_NAME;
+ +              name.len = strlen(SERVER_DIRECTORY_NAME);
+ +              name.hash = novfs_internal_hash(&name);
+ +
+ +              inode = novfs_get_inode(SB, S_IFDIR | 0777, 0, 0, 0, &name);
+ +              if (inode) {
+ +                      info.mode = inode->i_mode;
+ +                      info.namelength = 0;
+ +                      inode->i_size = info.size = 0;
+ +                      inode->i_uid = info.uid = 0;
+ +                      inode->i_gid = info.gid = 0;
+ +                      inode->i_atime = info.atime =
+ +                          inode->i_ctime = info.ctime =
+ +                          inode->i_mtime = info.mtime = CURRENT_TIME;
+ +
+ +                      server = d_alloc(novfs_root, &name);
+ +                      if (server) {
+ +                              server->d_op = &novfs_dentry_operations;
+ +                              server->d_time = 0xffffffff;
+ +                              d_add(server, inode);
-                               DbgPrint("novfs_fill_super: d_add %s 0x%p\n",
++                              DbgPrint("d_add %s 0x%p",
+ +                                       SERVER_DIRECTORY_NAME, server);
+ +                              novfs_add_inode_entry(novfs_root->d_inode,
+ +                                                    &name, inode->i_ino,
+ +                                                    &info);
+ +                      }
+ +              }
+ +
+ +              name.name = TREE_DIRECTORY_NAME;
+ +              name.len = strlen(TREE_DIRECTORY_NAME);
+ +              name.hash = novfs_internal_hash(&name);
+ +
+ +              inode = novfs_get_inode(SB, S_IFDIR | 0777, 0, 0, 0, &name);
+ +              if (inode) {
+ +                      info.mode = inode->i_mode;
+ +                      info.namelength = 0;
+ +                      inode->i_size = info.size = 0;
+ +                      inode->i_uid = info.uid = 0;
+ +                      inode->i_gid = info.gid = 0;
+ +                      inode->i_atime = info.atime =
+ +                          inode->i_ctime = info.ctime =
+ +                          inode->i_mtime = info.mtime = CURRENT_TIME;
+ +                      tree = d_alloc(novfs_root, &name);
+ +                      if (tree) {
+ +                              tree->d_op = &novfs_dentry_operations;
+ +                              tree->d_time = 0xffffffff;
+ +
+ +                              d_add(tree, inode);
-                               DbgPrint("novfs_fill_super: d_add %s 0x%p\n",
++                              DbgPrint("d_add %s 0x%p",
+ +                                       TREE_DIRECTORY_NAME, tree);
+ +                              novfs_add_inode_entry(novfs_root->d_inode,
+ +                                                    &name, inode->i_ino,
+ +                                                    &info);
+ +                      }
+ +              }
+ +      }
+ +
+ +      return (0);
+ +}
+ +
+ +static int novfs_get_sb(struct file_system_type *Fstype, int Flags,
+ +               const char *Dev_name, void *Data, struct vfsmount *Mnt)
+ +{
-       DbgPrint("novfs_get_sb: Fstype=0x%x Dev_name=%s\n", Fstype, Dev_name);
++      DbgPrint("Fstype=0x%x Dev_name=%s", Fstype, Dev_name);
+ +      return get_sb_nodev(Fstype, Flags, Data, novfs_fill_super, Mnt);
+ +}
+ +
+ +static void novfs_kill_sb(struct super_block *super)
+ +{
+ +      shrink_dcache_sb(super);
+ +      kill_litter_super(super);
+ +}
+ +
+ +ssize_t novfs_Control_read(struct file *file, char *buf, size_t nbytes,
+ +                         loff_t * ppos)
+ +{
+ +      ssize_t retval = 0;
+ +
-       DbgPrint("novfs_Control_read: kernel_locked 0x%x\n", kernel_locked());
++      DbgPrint("kernel_locked 0x%x", kernel_locked());
+ +
+ +      return retval;
+ +}
+ +
+ +ssize_t novfs_Control_write(struct file * file, const char *buf, size_t nbytes,
+ +                          loff_t * ppos)
+ +{
+ +      ssize_t retval = 0;
+ +
-       DbgPrint("novfs_Control_write: kernel_locked 0x%x\n", kernel_locked());
++      DbgPrint("kernel_locked 0x%x", kernel_locked());
+ +      if (buf && nbytes) {
+ +      }
+ +
+ +      return (retval);
+ +}
+ +
+ +int novfs_Control_ioctl(struct inode *inode, struct file *file,
+ +                      unsigned int cmd, unsigned long arg)
+ +{
+ +      int retval = 0;
+ +
-       DbgPrint("novfs_Control_ioctl: kernel_locked 0x%x\n", kernel_locked());
++      DbgPrint("kernel_locked 0x%x", kernel_locked());
+ +
+ +      return (retval);
+ +}
+ +
+ +static struct file_system_type novfs_fs_type = {
+ +      .name = "novfs",
+ +      .get_sb = novfs_get_sb,
+ +      .kill_sb = novfs_kill_sb,
+ +      .owner = THIS_MODULE,
+ +};
+ +
+ +int __init init_novfs(void)
+ +{
+ +      int retCode;
+ +
+ +      lastDir[0] = 0;
+ +      lastTime = get_nanosecond_time();
+ +
+ +      inHAX = 0;
+ +      inHAXTime = get_nanosecond_time();
+ +
+ +      retCode = novfs_proc_init();
+ +
+ +      novfs_profile_init();
+ +
+ +      if (!retCode) {
-               DbgPrint("init_novfs: %s %s %s\n", __DATE__, __TIME__,
-                        NOVFS_VERSION_STRING);
++              DbgPrint("%s %s %s", __DATE__, __TIME__, NOVFS_VERSION_STRING);
+ +              novfs_daemon_queue_init();
+ +              novfs_scope_init();
+ +              retCode = register_filesystem(&novfs_fs_type);
+ +              if (retCode) {
+ +                      novfs_proc_exit();
+ +                      novfs_daemon_queue_exit();
+ +                      novfs_scope_exit();
+ +              }
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +void __exit exit_novfs(void)
+ +{
+ +      novfs_scope_exit();
+ +      novfs_daemon_queue_exit();
+ +      novfs_profile_exit();
+ +      novfs_proc_exit();
+ +      unregister_filesystem(&novfs_fs_type);
+ +
+ +      if (novfs_current_mnt) {
+ +              kfree(novfs_current_mnt);
+ +              novfs_current_mnt = NULL;
+ +      }
+ +}
+ +
+ +int novfs_lock_inode_cache(struct inode *i)
+ +{
+ +      struct inode_data *id;
+ +      int retVal = 0;
+ +
-       DbgPrint("novfs_lock_inode_cache: 0x%p\n", i);
++      DbgPrint("0x%p", i);
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              down(&id->DirCacheLock);
+ +              retVal = 1;
+ +      }
-       DbgPrint("novfs_lock_inode_cache: return %d\n", retVal);
++      DbgPrint("return %d", retVal);
+ +      return (retVal);
+ +}
+ +
+ +void novfs_unlock_inode_cache(struct inode *i)
+ +{
+ +      struct inode_data *id;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              up(&id->DirCacheLock);
+ +      }
+ +}
+ +
+ +int novfs_enumerate_inode_cache(struct inode *i, struct list_head **iteration,
+ +                              ino_t * ino, struct novfs_entry_info *info)
+ +/*
+ + *  Arguments:   struct inode *i - pointer to directory inode
+ + *
+ + *  Returns:     0 - item found
+ + *              -1 - done
+ + *
+ + *  Abstract:    Unlocks inode cache.
+ + *
+ + *  Notes:       DirCacheLock should be held before calling this routine.
+ + *========================================================================*/
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      struct list_head *l = NULL;
+ +      int retVal = -1;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              if ((NULL == iteration) || (NULL == *iteration)) {
+ +                      l = id->DirCache.next;
+ +              } else {
+ +                      l = *iteration;
+ +              }
+ +
+ +              if (l == &id->DirCache) {
+ +                      l = NULL;
+ +              } else {
+ +                      dc = list_entry(l, struct novfs_dir_cache, list);
+ +
+ +                      *ino = dc->ino;
+ +                      info->type = 0;
+ +                      info->mode = dc->mode;
+ +                      info->size = dc->size;
+ +                      info->atime = dc->atime;
+ +                      info->mtime = dc->mtime;
+ +                      info->ctime = dc->ctime;
+ +                      info->namelength = dc->nameLen;
+ +                      memcpy(info->name, dc->name, dc->nameLen);
+ +                      info->name[dc->nameLen] = '\0';
+ +                      retVal = 0;
+ +
+ +                      l = l->next;
+ +              }
+ +      }
+ +      *iteration = l;
+ +      return (retVal);
+ +}
+ +
+ +/* DirCacheLock should be held before calling this routine. */
+ +int novfs_get_entry(struct inode *i, struct qstr *name, ino_t * ino,
+ +                  struct novfs_entry_info *info)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      int retVal = -1;
+ +      char *n = "<NULL>";
+ +      int nl = 6;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              if (name && name->len) {
+ +                      n = (char *)name->name;
+ +                      nl = name->len;
+ +              }
+ +
+ +              dc = novfs_lookup_inode_cache(i, name, *ino);
+ +              if (dc) {
+ +                      dc->flags |= ENTRY_VALID;
+ +                      retVal = 0;
+ +                      *ino = dc->ino;
+ +                      info->type = 0;
+ +                      info->mode = dc->mode;
+ +                      info->size = dc->size;
+ +                      info->atime = dc->atime;
+ +                      info->mtime = dc->mtime;
+ +                      info->ctime = dc->ctime;
+ +                      info->namelength = dc->nameLen;
+ +                      memcpy(info->name, dc->name, dc->nameLen);
+ +                      info->name[dc->nameLen] = '\0';
+ +                      retVal = 0;
+ +              }
+ +
-               DbgPrint("novfs_get_entry:\n"
-                        "   inode: 0x%p\n"
-                        "   name:  %.*s\n" "   ino:   %d\n", i, nl, n, *ino);
++              DbgPrint("inode: 0x%p; name: %.*s; ino: %d\n", i, nl, n, *ino);
+ +      }
-       DbgPrint("novfs_get_entry: return %d\n", retVal);
++      DbgPrint("return %d", retVal);
+ +      return (retVal);
+ +}
+ +
+ + /*DirCacheLock should be held before calling this routine. */
+ +int novfs_get_entry_by_pos(struct inode *i, loff_t pos, ino_t * ino,
+ +                         struct novfs_entry_info *info)
+ +{
+ +      int retVal = -1;
+ +      loff_t count = 0;
+ +      loff_t i_pos = pos - 2;
+ +      struct list_head *inter = NULL;
+ +      while (!novfs_enumerate_inode_cache(i, &inter, ino, info)) {
-               DbgPrint
-                   ("novfs_dir_readdir : novfs_get_entry_by_pos : info->name = %s\n",
-                    info->name);
++              DbgPrint("info->name = %s", info->name);
+ +              if (count == i_pos) {
+ +                      retVal = 0;
+ +                      break;
+ +              } else
+ +                      count++;
+ +      }
+ +
+ +      return retVal;
+ +}
+ +
+ +/* DirCacheLock should be held before calling this routine. */
+ +int novfs_get_entry_time(struct inode *i, struct qstr *name, ino_t * ino,
+ +                       struct novfs_entry_info *info, u64 * EntryTime)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      int retVal = -1;
+ +      char *n = "<NULL>";
+ +      int nl = 6;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              if (name && name->len) {
+ +                      n = (char *)name->name;
+ +                      nl = name->len;
+ +              }
-               DbgPrint("novfs_get_entry_time:\n"
-                        "   inode: 0x%p\n"
-                        "   name:  %.*s\n" "   ino:   %d\n", i, nl, n, *ino);
++              DbgPrint("inode: 0x%p; name:  %.*s; ino: %d", i, nl, n, *ino);
+ +
+ +              dc = novfs_lookup_inode_cache(i, name, *ino);
+ +              if (dc) {
+ +                      retVal = 0;
+ +                      *ino = dc->ino;
+ +                      info->type = 0;
+ +                      info->mode = dc->mode;
+ +                      info->size = dc->size;
+ +                      info->atime = dc->atime;
+ +                      info->mtime = dc->mtime;
+ +                      info->ctime = dc->ctime;
+ +                      info->namelength = dc->nameLen;
+ +                      memcpy(info->name, dc->name, dc->nameLen);
+ +                      info->name[dc->nameLen] = '\0';
+ +                      if (EntryTime) {
+ +                              *EntryTime = dc->jiffies;
+ +                      }
+ +                      retVal = 0;
+ +              }
+ +      }
-       DbgPrint("novfs_get_entry_time: return %d\n", retVal);
++      DbgPrint("return %d", retVal);
+ +      return (retVal);
+ +}
+ +
+ +/*
+ + *  Abstract:    This routine will return the first entry on the list
+ + *               and then remove it.
+ + *
+ + *  Notes:       DirCacheLock should be held before calling this routine.
+ + *
+ + */
+ +int novfs_get_remove_entry(struct inode *i, ino_t * ino, struct novfs_entry_info *info)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      struct list_head *l = NULL;
+ +      int retVal = -1;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              l = id->DirCache.next;
+ +
+ +              if (l != &id->DirCache) {
+ +                      dc = list_entry(l, struct novfs_dir_cache, list);
+ +
+ +                      *ino = dc->ino;
+ +                      info->type = 0;
+ +                      info->mode = dc->mode;
+ +                      info->size = dc->size;
+ +                      info->atime = dc->atime;
+ +                      info->mtime = dc->mtime;
+ +                      info->ctime = dc->ctime;
+ +                      info->namelength = dc->nameLen;
+ +                      memcpy(info->name, dc->name, dc->nameLen);
+ +                      info->name[dc->nameLen] = '\0';
+ +                      retVal = 0;
+ +
+ +                      list_del(&dc->list);
+ +                      kfree(dc);
+ +                      DCCount--;
+ +
+ +                      id->cntDC--;
+ +              }
+ +      }
+ +      return (retVal);
+ +}
+ +
+ +/*
+ + *  Abstract:    Marks all entries in the directory cache as invalid.
+ + *
+ + *  Notes:       DirCacheLock should be held before calling this routine.
+ + *
+ + *========================================================================*/
+ +void novfs_invalidate_inode_cache(struct inode *i)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      struct list_head *l;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              list_for_each(l, &id->DirCache) {
+ +                      dc = list_entry(l, struct novfs_dir_cache, list);
+ +                      dc->flags &= ~ENTRY_VALID;
+ +              }
+ +      }
+ +}
+ +
+ +/*++======================================================================*/
+ +struct novfs_dir_cache *novfs_lookup_inode_cache(struct inode *i, struct qstr *name,
+ +                                  ino_t ino)
+ +/*
+ + *  Returns:     struct novfs_dir_cache entry if match
+ + *               NULL - if there is no match.
+ + *
+ + *  Abstract:    Checks a inode directory to see if there are any enties
+ + *               matching name or ino.  If name is specified then ino is
+ + *               not used.  ino is use if name is not specified.
+ + *
+ + *  Notes:       DirCacheLock should be held before calling this routine.
+ + *
+ + *========================================================================*/
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc, *retVal = NULL;
+ +      struct list_head *l;
+ +      char *n = "<NULL>";
+ +      int nl = 6;
+ +      int hash = 0;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              if (name && name->name) {
+ +                      nl = name->len;
+ +                      n = (char *)name->name;
+ +                      hash = name->hash;
+ +              }
-               DbgPrint("novfs_lookup_inode_cache:\n"
-                        "   inode: 0x%p\n"
-                        "   name:  %.*s\n"
-                        "   hash:  0x%x\n"
-                        "   len:   %d\n"
-                        "   ino:   %d\n", i, nl, n, hash, nl, ino);
++              DbgPrint("inode: 0x%p; name:  %.*s; hash:  0x%x;\n"
++                       "   len:   %d; ino:   %d", i, nl, n, hash, nl, ino);
+ +
+ +              list_for_each(l, &id->DirCache) {
+ +                      dc = list_entry(l, struct novfs_dir_cache, list);
+ +                      if (name) {
+ +
+ +/*         DbgPrint("novfs_lookup_inode_cache: 0x%p\n" \
+ +                  "   ino:   %d\n" \
+ +                  "   hash:  0x%x\n" \
+ +                  "   len:   %d\n" \
+ +                  "   name:  %.*s\n",
+ +            dc, dc->ino, dc->hash, dc->nameLen, dc->nameLen, dc->name);
+ +*/
+ +                              if ((name->hash == dc->hash) &&
+ +                                  (name->len == dc->nameLen) &&
+ +                                  (0 ==
+ +                                   memcmp(name->name, dc->name, name->len))) {
+ +                                      retVal = dc;
+ +                                      break;
+ +                              }
+ +                      } else {
+ +                              if (ino == dc->ino) {
+ +                                      retVal = dc;
+ +                                      break;
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +
-       DbgPrint("novfs_lookup_inode_cache: return 0x%p\n", retVal);
++      DbgPrint("return 0x%p", retVal);
+ +      return (retVal);
+ +}
+ +
+ +/*
+ + * Checks a inode directory to see if there are any enties matching name
+ + * or ino.  If entry is found the valid bit is set.
+ + *
+ + * DirCacheLock should be held before calling this routine.
+ + */
+ +int novfs_lookup_validate(struct inode *i, struct qstr *name, ino_t ino)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      int retVal = -1;
+ +      char *n = "<NULL>";
+ +      int nl = 6;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              if (name && name->len) {
+ +                      n = (char *)name->name;
+ +                      nl = name->len;
+ +              }
-               DbgPrint("novfs_update_entry:\n"
-                        "   inode: 0x%p\n"
-                        "   name:  %.*s\n" "   ino:   %d\n", i, nl, n, ino);
++              DbgPrint("inode: 0x%p; name:  %.*s; ino:   %d", i, nl, n, ino);
+ +
+ +              dc = novfs_lookup_inode_cache(i, name, ino);
+ +              if (dc) {
+ +                      dc->flags |= ENTRY_VALID;
+ +                      retVal = 0;
+ +              }
+ +      }
+ +      return (retVal);
+ +}
+ +
+ +/*
+ + * Added entry to directory cache.
+ + *
+ + * DirCacheLock should be held before calling this routine.
+ + */
+ +int novfs_add_inode_entry(struct inode *i,
+ +                        struct qstr *name, ino_t ino, struct novfs_entry_info *info)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *new;
+ +      int retVal = -ENOMEM;
+ +      struct novfs_dir_cache *todel;
+ +      struct list_head *todeltmp;
+ +
+ +      //SClark
-       DbgPrint("novfs_add_inode_entry:\n" "   i: %u\n", i);
++      DbgPrint("i: %p", i);
+ +      if ((id = i->i_private)) {
-               DbgPrint("   i->i_private: %p\n", id);
++              DbgPrint("i->i_private: %p", id);
+ +              if (id->DirCache.next)
-                       DbgPrint("   id->DirCache.next: %p\n",
-                                id->DirCache.next);
++                      DbgPrint("id->DirCache.next: %p", id->DirCache.next);
+ +      }
+ +      //SClark
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              new = kmalloc(sizeof(struct novfs_dir_cache) + name->len, GFP_KERNEL);
+ +              if (new) {
+ +                      id->cntDC++;
+ +
+ +                      DCCount++;
-                       DbgPrint("novfs_add_inode_entry:\n"
-                                "   inode: 0x%p\n"
-                                "   id:    0x%p\n"
-                                "   DC:    0x%p\n"
-                                "   new:   0x%p\n"
-                                "   name:  %.*s\n"
-                                "   ino:   %d\n"
-                                "   size:  %lld\n"
-                                "   mode:  0x%x\n",
++                      DbgPrint("inode: 0x%p; id: 0x%p; DC: 0x%p; new: 0x%p; "
++                               "name:  %.*s; ino: %d; size: %lld; mode: 0x%x",
+ +                               i, id, &id->DirCache, new, name->len,
+ +                               name->name, ino, info->size, info->mode);
+ +
+ +                      retVal = 0;
+ +                      new->flags = ENTRY_VALID;
+ +                      new->jiffies = get_jiffies_64();
+ +                      new->size = info->size;
+ +                      new->mode = info->mode;
+ +                      new->atime = info->atime;
+ +                      new->mtime = info->mtime;
+ +                      new->ctime = info->ctime;
+ +                      new->ino = ino;
+ +                      new->hash = name->hash;
+ +                      new->nameLen = name->len;
+ +                      memcpy(new->name, name->name, name->len);
+ +                      new->name[new->nameLen] = '\0';
+ +                      list_add(&new->list, &id->DirCache);
+ +
+ +                      if (id->cntDC > 20) {
+ +                              todeltmp = id->DirCache.prev;
+ +                              todel = list_entry(todeltmp, struct novfs_dir_cache, list);
+ +
+ +                              list_del(&todel->list);
+ +
+ +                              kfree(todel);
+ +
+ +                              DCCount--;
+ +                              id->cntDC--;
+ +                      }
+ +
+ +              }
+ +      }
+ +      return (retVal);
+ +}
+ +
+ +/*
+ + *  DirCacheLock should be held before calling this routine.
+ + */
+ +int novfs_update_entry(struct inode *i, struct qstr *name, ino_t ino,
+ +                     struct novfs_entry_info *info)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      int retVal = -1;
+ +      char *n = "<NULL>";
+ +      int nl = 6;
+ +      char atime_buf[32];
+ +      char mtime_buf[32];
+ +      char ctime_buf[32];
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +
+ +              if (name && name->len) {
+ +                      n = (char *)name->name;
+ +                      nl = name->len;
+ +              }
+ +              ctime_r(&info->atime.tv_sec, atime_buf);
+ +              ctime_r(&info->mtime.tv_sec, mtime_buf);
+ +              ctime_r(&info->ctime.tv_sec, ctime_buf);
-               DbgPrint("novfs_update_entry:\n"
-                        "   inode: 0x%p\n"
-                        "   name:  %.*s\n"
-                        "   ino:   %d\n"
-                        "   size:  %lld\n"
-                        "   atime: %s\n"
-                        "   mtime: %s\n"
-                        "   ctime: %s\n",
++              DbgPrint("inode: 0x%p; name: %.*s; ino: %d; size: %lld; "
++                       "atime: %s; mtime: %s; ctime: %s",
+ +                       i, nl, n, ino, info->size, atime_buf, mtime_buf,
+ +                       ctime_buf);
+ +
+ +              dc = novfs_lookup_inode_cache(i, name, ino);
+ +              if (dc) {
+ +                      retVal = 0;
+ +                      dc->flags = ENTRY_VALID;
+ +                      dc->jiffies = get_jiffies_64();
+ +                      dc->size = info->size;
+ +                      dc->mode = info->mode;
+ +                      dc->atime = info->atime;
+ +                      dc->mtime = info->mtime;
+ +                      dc->ctime = info->ctime;
+ +
+ +                      ctime_r(&dc->atime.tv_sec, atime_buf);
+ +                      ctime_r(&dc->mtime.tv_sec, mtime_buf);
+ +                      ctime_r(&dc->ctime.tv_sec, ctime_buf);
-                       DbgPrint("novfs_update_entry entry: 0x%p\n"
-                                "   flags:   0x%x\n"
-                                "   jiffies: %lld\n"
-                                "   ino:     %d\n"
-                                "   size:    %lld\n"
-                                "   mode:    0%o\n"
-                                "   atime:   %s\n"
-                                "   mtime:   %s %d\n"
-                                "   ctime:   %s\n"
-                                "   hash:    0x%x\n"
-                                "   nameLen: %d\n"
-                                "   name:    %s\n",
++                      DbgPrint("entry: 0x%p; flags: 0x%x; jiffies: %lld; "
++                               "ino: %d; size: %lld; mode: 0%o; atime: %s; "
++                               "mtime: %s %d; ctime: %s; hash: 0x%x; "
++                               " nameLen: %d; name: %s",
+ +                               dc, dc->flags, dc->jiffies, dc->ino, dc->size,
+ +                               dc->mode, atime_buf, mtime_buf,
+ +                               dc->mtime.tv_nsec, ctime_buf, dc->hash,
+ +                               dc->nameLen, dc->name);
+ +              }
+ +      }
-       DbgPrint("novfs_update_entry: return %d\n", retVal);
++      DbgPrint("return %d", retVal);
+ +      return (retVal);
+ +}
+ +
+ +/*
+ + *  Removes entry from directory cache.  You can specify a name
+ + *  or an inode number.
+ + *
+ + *  DirCacheLock should be held before calling this routine.
+ + */
+ +void novfs_remove_inode_entry(struct inode *i, struct qstr *name, ino_t ino)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      char *n = "<NULL>";
+ +      int nl = 6;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              dc = novfs_lookup_inode_cache(i, name, ino);
+ +              if (dc) {
+ +                      if (name && name->name) {
+ +                              nl = name->len;
+ +                              n = (char *)name->name;
+ +                      }
-                       DbgPrint("novfs_remove_inode_entry:\n"
-                                "   inode: 0x%p\n"
-                                "   id:    0x%p\n"
-                                "   DC:    0x%p\n"
-                                "   name:  %.*s\n"
-                                "   ino:   %d\n"
-                                "   entry: 0x%p\n"
-                                "      name: %.*s\n"
-                                "      ino:  %d\n"
-                                "      next: 0x%p\n"
-                                "      prev: 0x%p\n",
++                      DbgPrint("inode: 0x%p; id: 0x%p; DC: 0x%p; "
++                               "name: %.*s; ino: %d entry: 0x%p "
++                               "[name: %.*s; ino: %d; next: 0x%p; "
++                               "prev: 0x%p]",
+ +                               i, id, &id->DirCache, nl, n, ino, dc,
+ +                               dc->nameLen, dc->name, dc->ino, dc->list.next,
+ +                               dc->list.prev);
+ +                      list_del(&dc->list);
+ +                      kfree(dc);
+ +                      DCCount--;
+ +
+ +                      id->cntDC--;
+ +              }
+ +      }
+ +}
+ +
+ +/*
+ + * Frees all invalid entries in the directory cache.
+ + *
+ + * DirCacheLock should be held before calling this routine.
+ + */
+ +void novfs_free_invalid_entries(struct inode *i)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      struct list_head *l;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              list_for_each(l, &id->DirCache) {
+ +                      dc = list_entry(l, struct novfs_dir_cache, list);
+ +                      if (0 == (dc->flags & ENTRY_VALID)) {
-                               DbgPrint("novfs_free_invalid_entries:\n"
-                                        "   inode: 0x%p\n"
-                                        "   id:    0x%p\n"
-                                        "   entry:    0x%p\n"
-                                        "   name:  %.*s\n"
-                                        "   ino:   %d\n",
++                              DbgPrint("inode: 0x%p; id: 0x%p; entry: 0x%p; "
++                                       "name: %.*s; ino: %d",
+ +                                       i, id, dc, dc->nameLen, dc->name,
+ +                                       dc->ino);
+ +                              l = l->prev;
+ +                              list_del(&dc->list);
+ +                              kfree(dc);
+ +                              DCCount--;
+ +
+ +                              id->cntDC--;
+ +                      }
+ +              }
+ +      }
+ +}
+ +
+ +/*
+ + *  Frees all entries in the inode cache.
+ + *
+ + *  DirCacheLock should be held before calling this routine.
+ + */
+ +void novfs_free_inode_cache(struct inode *i)
+ +{
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      struct list_head *l;
+ +
+ +      if (i && (id = i->i_private) && id->DirCache.next) {
+ +              list_for_each(l, &id->DirCache) {
+ +                      dc = list_entry(l, struct novfs_dir_cache, list);
+ +                      l = l->prev;
+ +                      list_del(&dc->list);
+ +                      kfree(dc);
+ +                      DCCount--;
+ +
+ +                      id->cntDC--;
+ +              }
+ +      }
+ +}
+ +
+ +void novfs_dump_inode(void *pf)
+ +{
+ +      struct inode *inode;
+ +      void (*pfunc) (char *Fmt, ...) = pf;
+ +      struct inode_data *id;
+ +      struct novfs_dir_cache *dc;
+ +      struct list_head *il, *l;
+ +      char atime_buf[32];
+ +      char mtime_buf[32];
+ +      char ctime_buf[32];
+ +      unsigned long icnt = 0, dccnt = 0;
+ +
+ +      down(&InodeList_lock);
+ +      list_for_each(il, &InodeList) {
+ +              id = list_entry(il, struct inode_data, IList);
+ +              inode = id->Inode;
+ +              if (inode) {
+ +                      icnt++;
+ +
+ +                      pfunc("Inode=0x%p I_ino=%d\n", inode, inode->i_ino);
+ +
+ +                      pfunc("   atime=%s\n",
+ +                            ctime_r(&inode->i_atime.tv_sec, atime_buf));
+ +                      pfunc("   ctime=%s\n",
+ +                            ctime_r(&inode->i_mtime.tv_sec, atime_buf));
+ +                      pfunc("   mtime=%s\n",
+ +                            ctime_r(&inode->i_ctime.tv_sec, atime_buf));
+ +                      pfunc("   size=%lld\n", inode->i_size);
+ +                      pfunc("   mode=0%o\n", inode->i_mode);
+ +                      pfunc("   count=0%o\n", atomic_read(&inode->i_count));
+ +              }
+ +
+ +              pfunc("   nofs_inode_data: 0x%p Name=%s Scope=0x%p\n", id, id->Name,
+ +                    id->Scope);
+ +
+ +              if (id->DirCache.next) {
+ +                      list_for_each(l, &id->DirCache) {
+ +                              dccnt++;
+ +                              dc = list_entry(l, struct novfs_dir_cache,
+ +                                              list);
+ +                              ctime_r(&dc->atime.tv_sec, atime_buf);
+ +                              ctime_r(&dc->mtime.tv_sec, mtime_buf);
+ +                              ctime_r(&dc->ctime.tv_sec, ctime_buf);
+ +
+ +                              pfunc("   Cache Entry: 0x%p\n"
+ +                                    "      flags:   0x%x\n"
+ +                                    "      jiffies: %llu\n"
+ +                                    "      ino:     %u\n"
+ +                                    "      size:    %llu\n"
+ +                                    "      mode:    0%o\n"
+ +                                    "      atime:   %s\n"
+ +                                    "      mtime:   %s\n"
+ +                                    "      ctime:   %s\n"
+ +                                    "      hash:    0x%x\n"
+ +                                    "      len:     %d\n"
+ +                                    "      name:    %s\n",
+ +                                    dc, dc->flags, dc->jiffies,
+ +                                    dc->ino, dc->size, dc->mode,
+ +                                    atime_buf, mtime_buf, ctime_buf,
+ +                                    dc->hash, dc->nameLen, dc->name);
+ +                      }
+ +              }
+ +      }
+ +      up(&InodeList_lock);
+ +
+ +      pfunc("Inodes: %d(%d) DirCache: %d(%d)\n", InodeCount, icnt, DCCount,
+ +            dccnt);
+ +
+ +}
+ +
+ +module_init(init_novfs);
+ +module_exit(exit_novfs);
+ +
+ +MODULE_LICENSE("GPL");
+ +MODULE_AUTHOR("Novell Inc.");
+ +MODULE_DESCRIPTION("Novell NetWare Client for Linux");
+ +MODULE_VERSION(NOVFS_VERSION_STRING);
diff --cc fs/novfs/nwcapi.c

index ea16add,0000000..1ebf74a

mode 100644,000000..100644
--- 1/fs/novfs/nwcapi.c
--- /dev/null
+++ b/fs/novfs/nwcapi.c
@@@ -1,2208 -1,0 +1,2202 @@@
+ +/*
+ + * Novell NCP Redirector for Linux
+ + * Author: James Turner/Richard Williams
+ + *
+ + * This file contains functions used to interface to the library interface of
+ + * the daemon.
+ + *
+ + * Copyright (C) 2005 Novell, Inc.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + */
+ +
+ +#include <linux/module.h>
+ +#include <linux/fs.h>
+ +#include <linux/slab.h>
+ +#include <linux/list.h>
+ +#include <linux/timer.h>
+ +#include <linux/poll.h>
+ +#include <linux/semaphore.h>
+ +#include <asm/uaccess.h>
+ +
+ +#include "nwcapi.h"
+ +#include "nwerror.h"
+ +#include "vfs.h"
+ +#include "commands.h"
+ +
+ +#ifndef strlen_user
+ +#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+ +#endif
+ +
+ +static void GetUserData(struct nwc_scan_conn_info * connInfo, struct novfs_xplat_call_request *cmd, struct novfs_xplat_call_reply *reply);
+ +static void GetConnData(struct nwc_get_conn_info * connInfo, struct novfs_xplat_call_request *cmd, struct novfs_xplat_call_reply *reply);
+ +
+ +/*++======================================================================*/
+ +int novfs_open_conn_by_name(struct novfs_xplat *pdata, void ** Handle, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwd_open_conn_by_name *openConn, *connReply;
+ +      struct nwc_open_conn_by_name ocbn;
+ +      int retCode = 0;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +      char *data;
+ +
+ +      cpylen = copy_from_user(&ocbn, pdata->reqData, sizeof(ocbn));
+ +      datalen = sizeof(*openConn) + strlen_user(ocbn.pName->pString) + strlen_user(ocbn.pServiceType);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_OPEN_CONN_BY_NAME;
+ +
+ +      cmd->dataLen = datalen;
+ +      openConn = (struct nwd_open_conn_by_name *) cmd->data;
+ +
+ +      openConn->nameLen = strlen_user(ocbn.pName->pString);
+ +      openConn->serviceLen = strlen_user(ocbn.pServiceType);
+ +      openConn->uConnFlags = ocbn.uConnFlags;
+ +      openConn->ConnHandle = Uint32toHandle(ocbn.ConnHandle);
+ +      data = (char *)openConn;
+ +      data += sizeof(*openConn);
+ +      openConn->oName = sizeof(*openConn);
+ +
+ +      openConn->oServiceType = openConn->oName + openConn->nameLen;
+ +      cpylen =
+ +              copy_from_user(data, ocbn.pName->pString,
+ +                              openConn->nameLen);
+ +      data += openConn->nameLen;
+ +      cpylen =
+ +              copy_from_user(data, ocbn.pServiceType,
+ +                              openConn->serviceLen);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              /*
+ +               * we got reply data from the daemon
+ +               */
+ +              connReply = (struct nwd_open_conn_by_name *) reply->data;
+ +              retCode = reply->Reply.ErrorCode;
+ +              if (!retCode) {
+ +                      /*
+ +                       * we got valid data.
+ +                       */
+ +                      connReply = (struct nwd_open_conn_by_name *) reply->data;
+ +                      ocbn.RetConnHandle = HandletoUint32(connReply->newConnHandle);
+ +                      *Handle = connReply->newConnHandle;
+ +
+ +                      cpylen = copy_to_user(pdata->reqData, &ocbn, sizeof(ocbn));
-                       DbgPrint("New Conn Handle = %X\n", connReply->newConnHandle);
++                      DbgPrint("New Conn Handle = %X", connReply->newConnHandle);
+ +              }
+ +              kfree(reply);
+ +      }
+ +
+ +      kfree(cmd);
+ +      return ((int)retCode);
+ +
+ +}
+ +
+ +int novfs_open_conn_by_addr(struct novfs_xplat *pdata, void ** Handle, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwd_open_conn_by_addr *openConn, *connReply;
+ +      struct nwc_open_conn_by_addr ocba;
+ +      struct nwc_tran_addr tranAddr;
+ +      int retCode = 0;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +      char addr[MAX_ADDRESS_LENGTH];
+ +
+ +      cpylen = copy_from_user(&ocba, pdata->reqData, sizeof(ocba));
+ +      datalen = sizeof(*openConn);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_OPEN_CONN_BY_ADDRESS;
+ +      cmd->dataLen = datalen;
+ +      openConn = (struct nwd_open_conn_by_addr *) cmd->data;
+ +
+ +      cpylen =
+ +              copy_from_user(&tranAddr, ocba.pTranAddr, sizeof(tranAddr));
+ +
-       DbgPrint("NwOpenConnByAddr: tranAddr\n");
++      DbgPrint("tranAddr");
+ +      novfs_dump(sizeof(tranAddr), &tranAddr);
+ +
+ +      openConn->TranAddr.uTransportType = tranAddr.uTransportType;
+ +      openConn->TranAddr.uAddressLength = tranAddr.uAddressLength;
+ +      memset(addr, 0xcc, sizeof(addr) - 1);
+ +
+ +      cpylen =
+ +              copy_from_user(addr, tranAddr.puAddress,
+ +                              tranAddr.uAddressLength);
+ +
-       DbgPrint("NwOpenConnByAddr: addr\n");
++      DbgPrint("addr");
+ +      novfs_dump(sizeof(addr), addr);
+ +
+ +      openConn->TranAddr.oAddress = *(unsigned int *) (&addr[2]);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              /*
+ +               * we got reply data from the daemon
+ +               */
+ +              connReply = (struct nwd_open_conn_by_addr *) reply->data;
+ +              retCode = reply->Reply.ErrorCode;
+ +              if (!retCode) {
+ +                      /*
+ +                       * we got valid data.
+ +                       */
+ +                      connReply = (struct nwd_open_conn_by_addr *) reply->data;
+ +                      ocba.ConnHandle =
+ +                              HandletoUint32(connReply->ConnHandle);
+ +                      *Handle = connReply->ConnHandle;
+ +                      cpylen =
+ +                              copy_to_user(pdata->reqData, &ocba,
+ +                                              sizeof(ocba));
-                       DbgPrint("New Conn Handle = %X\n",
-                                       connReply->ConnHandle);
++                      DbgPrint("New Conn Handle = %X", connReply->ConnHandle);
+ +              }
+ +              kfree(reply);
+ +      }
+ +
+ +      kfree(cmd);
+ +
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_open_conn_by_ref(struct novfs_xplat *pdata, void ** Handle, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwd_open_conn_by_ref *openConn;
+ +      struct nwc_open_conn_by_ref ocbr;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen = copy_from_user(&ocbr, pdata->reqData, sizeof(ocbr));
+ +      datalen = sizeof(*openConn);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_OPEN_CONN_BY_REFERENCE;
+ +      cmd->dataLen = datalen;
+ +      openConn = (struct nwd_open_conn_by_ref *) cmd->data;
+ +
+ +      openConn->uConnReference =
+ +              (void *) (unsigned long) ocbr.uConnReference;
+ +      openConn->uConnFlags = ocbr.uConnFlags;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              /*
+ +               * we got reply data from the daemon
+ +               */
+ +              openConn = (struct nwd_open_conn_by_ref *) reply->data;
+ +              retCode = reply->Reply.ErrorCode;
+ +              if (!retCode) {
+ +                      /*
+ +                       * we got valid data.
+ +                       */
+ +                      ocbr.ConnHandle =
+ +                              HandletoUint32(openConn->ConnHandle);
+ +                      *Handle = openConn->ConnHandle;
+ +
+ +                      cpylen =
+ +                              copy_to_user(pdata->reqData, &ocbr,
+ +                                              sizeof(ocbr));
-                       DbgPrint("New Conn Handle = %X\n",
-                                       openConn->ConnHandle);
++                      DbgPrint("New Conn Handle = %X", openConn->ConnHandle);
+ +              }
+ +              kfree(reply);
+ +      }
+ +
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_raw_send(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct nwc_request xRequest;
+ +      struct nwc_frag *frag, *cFrag, *reqFrag;
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen, totalLen;
+ +      unsigned int x;
+ +      struct nwd_ncp_req *ncpData;
+ +      struct nwd_ncp_rep *ncpReply;
+ +      unsigned char *reqData;
+ +      unsigned long actualReplyLength = 0;
+ +
-       DbgPrint("[XPLAT] Process Raw NCP Send\n");
++      DbgPrint("[XPLAT] Process Raw NCP Send");
+ +      cpylen = copy_from_user(&xRequest, pdata->reqData, sizeof(xRequest));
+ +
+ +      /*
+ +       * Figure out the length of the request
+ +       */
+ +      frag =
+ +          kmalloc(xRequest.uNumReplyFrags * sizeof(struct nwc_frag), GFP_KERNEL);
+ +
-       DbgPrint("[XPLAT RawNCP] - Reply Frag Count 0x%X\n",
++      DbgPrint("[XPLAT RawNCP] - Reply Frag Count 0x%X",
+ +               xRequest.uNumReplyFrags);
+ +
+ +      if (!frag)
+ +              return (retCode);
+ +
+ +      cpylen =
+ +          copy_from_user(frag, xRequest.pReplyFrags,
+ +                         xRequest.uNumReplyFrags * sizeof(struct nwc_frag));
+ +      totalLen = 0;
+ +
+ +      cFrag = frag;
+ +      for (x = 0; x < xRequest.uNumReplyFrags; x++) {
-               DbgPrint("[XPLAT - RawNCP] - Frag Len = %d\n", cFrag->uLength);
++              DbgPrint("[XPLAT - RawNCP] - Frag Len = %d", cFrag->uLength);
+ +              totalLen += cFrag->uLength;
+ +              cFrag++;
+ +      }
+ +
-       DbgPrint("[XPLAT - RawNCP] - totalLen = %d\n", totalLen);
++      DbgPrint("[XPLAT - RawNCP] - totalLen = %d", totalLen);
+ +      datalen = 0;
+ +      reqFrag =
+ +          kmalloc(xRequest.uNumRequestFrags * sizeof(struct nwc_frag),
+ +                       GFP_KERNEL);
+ +      if (!reqFrag) {
+ +              kfree(frag);
+ +              return (retCode);
+ +      }
+ +
+ +      cpylen =
+ +          copy_from_user(reqFrag, xRequest.pRequestFrags,
+ +                         xRequest.uNumRequestFrags * sizeof(struct nwc_frag));
+ +      cFrag = reqFrag;
+ +      for (x = 0; x < xRequest.uNumRequestFrags; x++) {
+ +              datalen += cFrag->uLength;
+ +              cFrag++;
+ +      }
+ +
+ +      /*
+ +       * Allocate the cmd Request
+ +       */
+ +      cmdlen = datalen + sizeof(*cmd) + sizeof(*ncpData);
-       DbgPrint("[XPLAT RawNCP] - Frag Count 0x%X\n",
++      DbgPrint("[XPLAT RawNCP] - Frag Count 0x%X",
+ +               xRequest.uNumRequestFrags);
-       DbgPrint("[XPLAT RawNCP] - Total Command Data Len = %x\n", cmdlen);
++      DbgPrint("[XPLAT RawNCP] - Total Command Data Len = %x", cmdlen);
+ +
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_RAW_NCP_REQUEST;
+ +
+ +      /*
+ +       * build the NCP Request
+ +       */
+ +      cmd->dataLen = cmdlen - sizeof(*cmd);
+ +      ncpData = (struct nwd_ncp_req *) cmd->data;
+ +      ncpData->replyLen = totalLen;
+ +      ncpData->requestLen = datalen;
+ +      ncpData->ConnHandle = (void *) (unsigned long) xRequest.ConnHandle;
+ +      ncpData->function = xRequest.uFunction;
+ +
+ +      reqData = ncpData->data;
+ +      cFrag = reqFrag;
+ +
+ +      for (x = 0; x < xRequest.uNumRequestFrags; x++) {
+ +              cpylen =
+ +                      copy_from_user(reqData, cFrag->pData,
+ +                                      cFrag->uLength);
+ +              reqData += cFrag->uLength;
+ +              cFrag++;
+ +      }
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
-       DbgPrint("RawNCP - reply = %x\n", reply);
-       DbgPrint("RawNCP - retCode = %x\n", retCode);
++      DbgPrint("RawNCP - reply = %x", reply);
++      DbgPrint("RawNCP - retCode = %x", retCode);
+ +
+ +      if (reply) {
+ +              /*
+ +               * we got reply data from the daemon
+ +               */
+ +              ncpReply = (struct nwd_ncp_rep *) reply->data;
+ +              retCode = reply->Reply.ErrorCode;
+ +
-               DbgPrint("RawNCP - Reply Frag Count 0x%X\n",
++              DbgPrint("RawNCP - Reply Frag Count 0x%X",
+ +                              xRequest.uNumReplyFrags);
+ +
+ +              /*
+ +               * We need to copy the reply frags to the packet.
+ +               */
+ +              reqData = ncpReply->data;
+ +              cFrag = frag;
+ +
+ +              totalLen = ncpReply->replyLen;
+ +              for (x = 0; x < xRequest.uNumReplyFrags; x++) {
+ +
-                       DbgPrint("RawNCP - Copy Frag %d: 0x%X\n", x,
++                      DbgPrint("RawNCP - Copy Frag %d: 0x%X", x,
+ +                                      cFrag->uLength);
+ +
+ +                      datalen =
+ +                              min((unsigned long) cFrag->uLength, totalLen);
+ +
+ +                      cpylen =
+ +                              copy_to_user(cFrag->pData, reqData,
+ +                                              datalen);
+ +                      totalLen -= datalen;
+ +                      reqData += datalen;
+ +                      actualReplyLength += datalen;
+ +
+ +                      cFrag++;
+ +              }
+ +
+ +              kfree(reply);
+ +      } else {
+ +              retCode = -EIO;
+ +      }
+ +
+ +      kfree(cmd);
+ +      xRequest.uActualReplyLength = actualReplyLength;
+ +      cpylen = copy_to_user(pdata->reqData, &xRequest, sizeof(xRequest));
+ +
+ +      kfree(reqFrag);
+ +      kfree(frag);
+ +
+ +      return (retCode);
+ +}
+ +
+ +int novfs_conn_close(struct novfs_xplat *pdata, void ** Handle, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_close_conn cc;
+ +      struct nwd_close_conn *nwdClose;
+ +      int retCode = 0;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen = copy_from_user(&cc, pdata->reqData, sizeof(cc));
+ +
+ +      datalen = sizeof(*nwdClose);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_CLOSE_CONN;
+ +
+ +      nwdClose = (struct nwd_close_conn *) cmd->data;
+ +      cmd->dataLen = sizeof(*nwdClose);
+ +      *Handle = nwdClose->ConnHandle = Uint32toHandle(cc.ConnHandle);
+ +
+ +      /*
+ +       * send the request
+ +       */
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen, 0);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_sys_conn_close(struct novfs_xplat *pdata, unsigned long *Handle, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_close_conn cc;
+ +      struct nwd_close_conn *nwdClose;
+ +      unsigned int retCode = 0;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen = copy_from_user(&cc, pdata->reqData, sizeof(cc));
+ +
+ +      datalen = sizeof(*nwdClose);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_SYS_CLOSE_CONN;
+ +
+ +      nwdClose = (struct nwd_close_conn *) cmd->data;
+ +      cmd->dataLen = sizeof(*nwdClose);
+ +      nwdClose->ConnHandle = (void *) (unsigned long) cc.ConnHandle;
+ +      *Handle = (unsigned long) cc.ConnHandle;
+ +
+ +      /*
+ +       * send the request
+ +       */
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0, (void **)&reply, &replylen, 0);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_login_id(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct nwc_login_id lgn, *plgn;
+ +      int retCode = -ENOMEM;
+ +      struct ncl_string server;
+ +      struct ncl_string username;
+ +      struct ncl_string password;
+ +      unsigned long cpylen;
+ +      struct nwc_string nwcStr;
+ +
+ +      cpylen = copy_from_user(&lgn, pdata->reqData, sizeof(lgn));
+ +
-       DbgPrint("NwLoginIdentity:\n");
++      DbgPrint("");
+ +      novfs_dump(sizeof(lgn), &lgn);
+ +
+ +      cpylen = copy_from_user(&nwcStr, lgn.pDomainName, sizeof(nwcStr));
-       DbgPrint("NwLoginIdentity: DomainName\n");
++      DbgPrint("DomainName\n");
+ +      novfs_dump(sizeof(nwcStr), &nwcStr);
+ +
+ +      if ((server.buffer = kmalloc(nwcStr.DataLen, GFP_KERNEL))) {
+ +              server.type = nwcStr.DataType;
+ +              server.len = nwcStr.DataLen;
+ +              if (!copy_from_user((void *)server.buffer, nwcStr.pBuffer, server.len)) {
-                       DbgPrint("NwLoginIdentity: Server\n");
++                      DbgPrint("Server");
+ +                      novfs_dump(server.len, server.buffer);
+ +
+ +                      cpylen = copy_from_user(&nwcStr, lgn.pObjectName, sizeof(nwcStr));
-                       DbgPrint("NwLoginIdentity: ObjectName\n");
++                      DbgPrint("ObjectName");
+ +                      novfs_dump(sizeof(nwcStr), &nwcStr);
+ +
+ +                      if ((username.buffer = kmalloc(nwcStr.DataLen, GFP_KERNEL))) {
+ +                              username.type = nwcStr.DataType;
+ +                              username.len = nwcStr.DataLen;
+ +                              if (!copy_from_user((void *)username.buffer, nwcStr.pBuffer, username.len)) {
-                                       DbgPrint("NwLoginIdentity: User\n");
++                                      DbgPrint("User");
+ +                                      novfs_dump(username.len, username.buffer);
+ +
+ +                                      cpylen = copy_from_user(&nwcStr, lgn.pPassword, sizeof(nwcStr));
-                                       DbgPrint("NwLoginIdentity: Password\n");
++                                      DbgPrint("Password");
+ +                                      novfs_dump(sizeof(nwcStr), &nwcStr);
+ +
+ +                                      if ((password.buffer = kmalloc(nwcStr.DataLen, GFP_KERNEL))) {
+ +                                              password.type = nwcStr.DataType;
+ +                                              password.len = nwcStr.DataLen;
+ +                                              if (!copy_from_user((void *)password.buffer, nwcStr.pBuffer, password.len)) {
+ +                                                      retCode =  novfs_do_login(&server, &username, &password, (void **)&lgn.AuthenticationId, &Session);
+ +                                                      if (retCode) {
+ +                                                              lgn.AuthenticationId = 0;
+ +                                                      }
+ +
+ +                                                      plgn = (struct nwc_login_id *)pdata->reqData;
+ +                                                      cpylen = copy_to_user(&plgn->AuthenticationId, &lgn.AuthenticationId, sizeof(plgn->AuthenticationId));
+ +                                              }
+ +                                              memset(password.buffer, 0, password.len);
+ +                                              kfree(password.buffer);
+ +                                      }
+ +                              }
+ +                              memset(username.buffer, 0, username.len);
+ +                              kfree(username.buffer);
+ +                      }
+ +              }
+ +              kfree(server.buffer);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_auth_conn(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct nwc_auth_with_id pauth;
+ +      struct nwc_auth_wid *pDauth;
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      datalen = sizeof(*pDauth);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_AUTHENTICATE_CONN_WITH_ID;
+ +
+ +      cpylen = copy_from_user(&pauth, pdata->reqData, sizeof(pauth));
+ +
+ +      pDauth = (struct nwc_auth_wid *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      pDauth->AuthenticationId = pauth.AuthenticationId;
+ +      pDauth->ConnHandle = (void *) (unsigned long) pauth.ConnHandle;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +int novfs_license_conn(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_license_conn lisc;
+ +      struct nwc_lisc_id * pDLisc;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      datalen = sizeof(*pDLisc);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_LICENSE_CONN;
+ +
+ +      cpylen = copy_from_user(&lisc, pdata->reqData, sizeof(lisc));
+ +
+ +      pDLisc = (struct nwc_lisc_id *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      pDLisc->ConnHandle = (void *) (unsigned long) lisc.ConnHandle;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_logout_id(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_lo_id logout, *pDLogout;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      datalen = sizeof(*pDLogout);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_LOGOUT_IDENTITY;
+ +
+ +      cpylen =
+ +              copy_from_user(&logout, pdata->reqData, sizeof(logout));
+ +
+ +      pDLogout = (struct nwc_lo_id *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      pDLogout->AuthenticationId = logout.AuthenticationId;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_unlicense_conn(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_unlic_conn *pUconn, ulc;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen = copy_from_user(&ulc, pdata->reqData, sizeof(ulc));
+ +      datalen = sizeof(*pUconn);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_UNLICENSE_CONN;
+ +      cmd->dataLen = datalen;
+ +      pUconn = (struct nwc_unlic_conn *) cmd->data;
+ +
+ +      pUconn->ConnHandle = (void *) (unsigned long) ulc.ConnHandle;
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              /*
+ +               * we got reply data from the daemon
+ +               */
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_unauthenticate(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_unauthenticate auth, *pDAuth;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      datalen = sizeof(*pDAuth);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = (struct novfs_xplat_call_request *)kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_UNAUTHENTICATE_CONN;
+ +
+ +      cpylen = copy_from_user(&auth, pdata->reqData, sizeof(auth));
+ +
+ +      pDAuth = (struct nwc_unauthenticate *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      pDAuth->AuthenticationId = auth.AuthenticationId;
+ +      pDAuth->ConnHandle = (void *) (unsigned long) auth.ConnHandle;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_get_conn_info(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_get_conn_info connInfo;
+ +      struct nwd_conn_info *pDConnInfo;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, replylen, cpylen;
+ +
+ +      cmdlen = sizeof(*cmd) + sizeof(*pDConnInfo);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      cpylen =
+ +          copy_from_user(&connInfo, pdata->reqData, sizeof(struct nwc_get_conn_info));
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_GET_CONN_INFO;
+ +
+ +      pDConnInfo = (struct nwd_conn_info *) cmd->data;
+ +
+ +      pDConnInfo->ConnHandle = (void *) (unsigned long) connInfo.ConnHandle;
+ +      pDConnInfo->uInfoLevel = connInfo.uInfoLevel;
+ +      pDConnInfo->uInfoLength = connInfo.uInfoLength;
+ +      cmd->dataLen = sizeof(*pDConnInfo);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              if (!retCode) {
+ +                      GetConnData(&connInfo, cmd, reply);
+ +              }
+ +
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_set_conn_info(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_set_conn_info connInfo;
+ +      struct nwd_set_conn_info *pDConnInfo;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, replylen, cpylen;
+ +
+ +      cmdlen = sizeof(*cmd) + sizeof(*pDConnInfo);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      cpylen =
+ +          copy_from_user(&connInfo, pdata->reqData, sizeof(struct nwc_set_conn_info));
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_SET_CONN_INFO;
+ +
+ +      pDConnInfo = (struct nwd_set_conn_info *) cmd->data;
+ +
+ +      pDConnInfo->ConnHandle = (void *) (unsigned long) connInfo.ConnHandle;
+ +      pDConnInfo->uInfoLevel = connInfo.uInfoLevel;
+ +      pDConnInfo->uInfoLength = connInfo.uInfoLength;
+ +      cmd->dataLen = sizeof(*pDConnInfo);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_get_id_info(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_get_id_info qidInfo, *gId;
+ +      struct nwd_get_id_info *idInfo;
+ +      struct nwc_string xferStr;
+ +      char *str;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, replylen, cpylen;
+ +
+ +      cmdlen = sizeof(*cmd) + sizeof(*idInfo);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      cpylen = copy_from_user(&qidInfo, pdata->reqData, sizeof(qidInfo));
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_GET_IDENTITY_INFO;
+ +
+ +      idInfo = (struct nwd_get_id_info *) cmd->data;
+ +
+ +      idInfo->AuthenticationId = qidInfo.AuthenticationId;
+ +      cmd->dataLen = sizeof(*idInfo);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +
+ +              if (!reply->Reply.ErrorCode) {
+ +                      /*
+ +                       * Save the return info to the user structure.
+ +                       */
+ +                      gId = pdata->reqData;
+ +                      idInfo = (struct nwd_get_id_info *) reply->data;
+ +                      cpylen =
+ +                              copy_to_user(&gId->AuthenticationId,
+ +                                              &idInfo->AuthenticationId,
+ +                                              sizeof(idInfo->
+ +                                                      AuthenticationId));
+ +                      cpylen =
+ +                              copy_to_user(&gId->AuthType,
+ +                                              &idInfo->AuthType,
+ +                                              sizeof(idInfo->AuthType));
+ +                      cpylen =
+ +                              copy_to_user(&gId->IdentityFlags,
+ +                                              &idInfo->IdentityFlags,
+ +                                              sizeof(idInfo->IdentityFlags));
+ +                      cpylen =
+ +                              copy_to_user(&gId->NameType,
+ +                                              &idInfo->NameType,
+ +                                              sizeof(idInfo->NameType));
+ +                      cpylen =
+ +                              copy_to_user(&gId->ObjectType,
+ +                                              &idInfo->ObjectType,
+ +                                              sizeof(idInfo->ObjectType));
+ +
+ +                      cpylen =
+ +                              copy_from_user(&xferStr, gId->pDomainName,
+ +                                              sizeof(struct nwc_string));
+ +                      str =
+ +                              (char *)((char *)reply->data +
+ +                                              idInfo->pDomainNameOffset);
+ +                      cpylen =
+ +                              copy_to_user(xferStr.pBuffer, str,
+ +                                              idInfo->domainLen);
+ +                      xferStr.DataType = NWC_STRING_TYPE_ASCII;
+ +                      xferStr.DataLen = idInfo->domainLen;
+ +                      cpylen =
+ +                              copy_to_user(gId->pDomainName, &xferStr,
+ +                                              sizeof(struct nwc_string));
+ +
+ +                      cpylen =
+ +                              copy_from_user(&xferStr, gId->pObjectName,
+ +                                              sizeof(struct nwc_string));
+ +                      str =
+ +                              (char *)((char *)reply->data +
+ +                                              idInfo->pObjectNameOffset);
+ +                      cpylen =
+ +                              copy_to_user(xferStr.pBuffer, str,
+ +                                              idInfo->objectLen);
+ +                      xferStr.DataLen = idInfo->objectLen - 1;
+ +                      xferStr.DataType = NWC_STRING_TYPE_ASCII;
+ +                      cpylen =
+ +                              copy_to_user(gId->pObjectName, &xferStr,
+ +                                              sizeof(struct nwc_string));
+ +              }
+ +
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +}
+ +
+ +int novfs_scan_conn_info(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_scan_conn_info connInfo, *rInfo;
+ +      struct nwd_scan_conn_info *pDConnInfo;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, replylen, cpylen;
+ +      unsigned char *localData;
+ +
+ +      cpylen =
+ +          copy_from_user(&connInfo, pdata->reqData, sizeof(struct nwc_scan_conn_info));
+ +
+ +      cmdlen = sizeof(*cmd) + sizeof(*pDConnInfo) + connInfo.uScanInfoLen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_SCAN_CONN_INFO;
+ +
+ +      pDConnInfo = (struct nwd_scan_conn_info *) cmd->data;
+ +
-       DbgPrint("NwScanConnInfo: Input Data\n");
-       DbgPrint("connInfo.uScanIndex = 0x%X\n", connInfo.uScanIndex);
-       DbgPrint("connInfo.uConnectionReference = 0x%X\n",
++      DbgPrint("Input Data");
++      __DbgPrint("    connInfo.uScanIndex = 0x%X\n", connInfo.uScanIndex);
++      __DbgPrint("    connInfo.uConnectionReference = 0x%X\n",
+ +                      connInfo.uConnectionReference);
-       DbgPrint("connInfo.uScanInfoLevel = 0x%X\n",
++      __DbgPrint("    connInfo.uScanInfoLevel = 0x%X\n",
+ +                      connInfo.uScanInfoLevel);
-       DbgPrint("connInfo.uScanInfoLen = 0x%X\n",
++      __DbgPrint("    connInfo.uScanInfoLen = 0x%X\n",
+ +                      connInfo.uScanInfoLen);
-       DbgPrint("connInfo.uReturnInfoLength = 0x%X\n",
++      __DbgPrint("    connInfo.uReturnInfoLength = 0x%X\n",
+ +                      connInfo.uReturnInfoLength);
-       DbgPrint("connInfo.uReturnInfoLevel = 0x%X\n",
++      __DbgPrint("    connInfo.uReturnInfoLevel = 0x%X\n",
+ +                      connInfo.uReturnInfoLevel);
-       DbgPrint("connInfo.uScanFlags = 0x%X\n", connInfo.uScanFlags);
++      __DbgPrint("    connInfo.uScanFlags = 0x%X\n", connInfo.uScanFlags);
+ +
+ +      pDConnInfo->uScanIndex = connInfo.uScanIndex;
+ +      pDConnInfo->uConnectionReference =
+ +              connInfo.uConnectionReference;
+ +      pDConnInfo->uScanInfoLevel = connInfo.uScanInfoLevel;
+ +      pDConnInfo->uScanInfoLen = connInfo.uScanInfoLen;
+ +      pDConnInfo->uReturnInfoLength = connInfo.uReturnInfoLength;
+ +      pDConnInfo->uReturnInfoLevel = connInfo.uReturnInfoLevel;
+ +      pDConnInfo->uScanFlags = connInfo.uScanFlags;
+ +
+ +      if (pDConnInfo->uScanInfoLen) {
+ +              localData = (unsigned char *) pDConnInfo;
+ +              pDConnInfo->uScanConnInfoOffset = sizeof(*pDConnInfo);
+ +              localData += pDConnInfo->uScanConnInfoOffset;
+ +              cpylen =
+ +                      copy_from_user(localData, connInfo.pScanConnInfo,
+ +                                      connInfo.uScanInfoLen);
+ +      } else {
+ +              pDConnInfo->uScanConnInfoOffset = 0;
+ +      }
+ +
+ +      cmd->dataLen = sizeof(*pDConnInfo);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
-               DbgPrint("NwScanConnInfo: Reply recieved\n");
-               DbgPrint("   NextIndex = %x\n", connInfo.uScanIndex);
-               DbgPrint("   ErrorCode = %x\n", reply->Reply.ErrorCode);
-               DbgPrint("   data = %p\n", reply->data);
++              DbgPrint("Reply recieved");
++              __DbgPrint("   NextIndex = %x\n", connInfo.uScanIndex);
++              __DbgPrint("   ErrorCode = %x\n", reply->Reply.ErrorCode);
++              __DbgPrint("   data = %p\n", reply->data);
+ +
+ +              pDConnInfo = (struct nwd_scan_conn_info *) reply->data;
+ +              retCode = (unsigned long) reply->Reply.ErrorCode;
+ +              if (!retCode) {
+ +                      GetUserData(&connInfo, cmd, reply);
+ +                      rInfo = (struct nwc_scan_conn_info *) pdata->repData;
+ +                      cpylen =
+ +                              copy_to_user(pdata->repData,
+ +                                              &pDConnInfo->uScanIndex,
+ +                                              sizeof(pDConnInfo->
+ +                                                      uScanIndex));
+ +                      cpylen =
+ +                              copy_to_user(&rInfo->uConnectionReference,
+ +                                              &pDConnInfo->
+ +                                              uConnectionReference,
+ +                                              sizeof(pDConnInfo->
+ +                                                      uConnectionReference));
+ +              } else {
+ +                      unsigned long x;
+ +
+ +                      x = 0;
+ +                      rInfo = (struct nwc_scan_conn_info *) pdata->reqData;
+ +                      cpylen =
+ +                              copy_to_user(&rInfo->uConnectionReference,
+ +                                              &x,
+ +                                              sizeof(rInfo->
+ +                                                      uConnectionReference));
+ +              }
+ +
+ +              kfree(reply);
+ +      } else {
+ +              retCode = -EIO;
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +}
+ +
+ +/*
+ + *  Copies the user data out of the scan conn info call.
+ + */
+ +static void GetUserData(struct nwc_scan_conn_info * connInfo, struct novfs_xplat_call_request *cmd, struct novfs_xplat_call_reply *reply)
+ +{
+ +      unsigned long uLevel;
+ +      struct nwd_scan_conn_info *pDConnInfo;
+ +
+ +      unsigned char *srcData = NULL;
+ +      unsigned long dataLen = 0, cpylen;
+ +
+ +      pDConnInfo = (struct nwd_scan_conn_info *) reply->data;
+ +      uLevel = pDConnInfo->uReturnInfoLevel;
-       DbgPrint
-           ("[GetUserData] uLevel = %d, reply = 0x%p, reply->data = 0x%X\n",
++      DbgPrint("uLevel = %d, reply = 0x%p, reply->data = 0x%X",
+ +           uLevel, reply, reply->data);
+ +
+ +      switch (uLevel) {
+ +      case NWC_CONN_INFO_RETURN_ALL:
+ +      case NWC_CONN_INFO_NDS_STATE:
+ +      case NWC_CONN_INFO_MAX_PACKET_SIZE:
+ +      case NWC_CONN_INFO_LICENSE_STATE:
+ +      case NWC_CONN_INFO_PUBLIC_STATE:
+ +      case NWC_CONN_INFO_SERVICE_TYPE:
+ +      case NWC_CONN_INFO_DISTANCE:
+ +      case NWC_CONN_INFO_SERVER_VERSION:
+ +      case NWC_CONN_INFO_AUTH_ID:
+ +      case NWC_CONN_INFO_SUSPENDED:
+ +      case NWC_CONN_INFO_WORKGROUP_ID:
+ +      case NWC_CONN_INFO_SECURITY_STATE:
+ +      case NWC_CONN_INFO_CONN_NUMBER:
+ +      case NWC_CONN_INFO_USER_ID:
+ +      case NWC_CONN_INFO_BCAST_STATE:
+ +      case NWC_CONN_INFO_CONN_REF:
+ +      case NWC_CONN_INFO_AUTH_STATE:
+ +      case NWC_CONN_INFO_TREE_NAME:
+ +      case NWC_CONN_INFO_SERVER_NAME:
+ +      case NWC_CONN_INFO_VERSION:
+ +              srcData = (unsigned char *) pDConnInfo;
+ +              srcData += pDConnInfo->uReturnConnInfoOffset;
+ +              dataLen = pDConnInfo->uReturnInfoLength;
+ +              break;
+ +
+ +      case NWC_CONN_INFO_TRAN_ADDR:
+ +              {
+ +                      unsigned char *dstData = connInfo->pReturnConnInfo;
+ +                      struct nwc_tran_addr tranAddr;
+ +
+ +                      srcData = (unsigned char *) reply->data;
+ +                      dataLen = reply->dataLen;
+ +
-                       DbgPrint
-                           ("GetUserData NWC_CONN_INFO_TRAN_ADDR 0x%p -> 0x%p :: 0x%X\n",
++                      DbgPrint("NWC_CONN_INFO_TRAN_ADDR 0x%p -> 0x%p :: 0x%X",
+ +                           srcData, connInfo->pReturnConnInfo, dataLen);
+ +
+ +                      cpylen =
+ +                          copy_from_user(&tranAddr, dstData,
+ +                                         sizeof(tranAddr));
+ +
+ +                      srcData +=
+ +                          ((struct nwd_scan_conn_info *) srcData)->
+ +                          uReturnConnInfoOffset;
+ +
+ +                      tranAddr.uTransportType =
+ +                          ((struct nwd_tran_addr *)  srcData)->uTransportType;
+ +                      tranAddr.uAddressLength =
+ +                          ((struct tagNwdTranAddrEx *) srcData)->uAddressLength;
+ +
+ +                      cpylen =
+ +                          copy_to_user(dstData, &tranAddr, sizeof(tranAddr));
+ +                      cpylen =
+ +                          copy_to_user(tranAddr.puAddress,
+ +                                       ((struct tagNwdTranAddrEx *) srcData)->Buffer,
+ +                                       ((struct tagNwdTranAddrEx *) srcData)->
+ +                                       uAddressLength);
+ +                      dataLen = 0;
+ +                      break;
+ +              }
+ +      case NWC_CONN_INFO_RETURN_NONE:
+ +      case NWC_CONN_INFO_TREE_NAME_UNICODE:
+ +      case NWC_CONN_INFO_SERVER_NAME_UNICODE:
+ +      case NWC_CONN_INFO_LOCAL_TRAN_ADDR:
+ +      case NWC_CONN_INFO_ALTERNATE_ADDR:
+ +      case NWC_CONN_INFO_SERVER_GUID:
+ +      default:
+ +              break;
+ +      }
+ +
+ +      if (srcData && dataLen) {
-               DbgPrint("Copy Data in GetUserData 0x%p -> 0x%p :: 0x%X\n",
++              DbgPrint("Copy Data 0x%p -> 0x%p :: 0x%X",
+ +                       srcData, connInfo->pReturnConnInfo, dataLen);
+ +              cpylen =
+ +                  copy_to_user(connInfo->pReturnConnInfo, srcData, dataLen);
+ +      }
+ +
+ +      return;
+ +}
+ +
+ +/*
+ + *  Copies the user data out of the scan conn info call.
+ + */
+ +static void GetConnData(struct nwc_get_conn_info * connInfo, struct novfs_xplat_call_request *cmd, struct novfs_xplat_call_reply *reply)
+ +{
+ +      unsigned long uLevel;
+ +      struct nwd_conn_info * pDConnInfo;
+ +
+ +      unsigned char *srcData = NULL;
+ +      unsigned long dataLen = 0, cpylen;
+ +
+ +      pDConnInfo = (struct nwd_conn_info *) cmd->data;
+ +      uLevel = pDConnInfo->uInfoLevel;
+ +
+ +      switch (uLevel) {
+ +      case NWC_CONN_INFO_RETURN_ALL:
+ +              srcData = (unsigned char *) reply->data;
+ +              dataLen = reply->dataLen;
+ +              break;
+ +
+ +      case NWC_CONN_INFO_RETURN_NONE:
+ +              dataLen = 0;
+ +              break;
+ +
+ +      case NWC_CONN_INFO_TRAN_ADDR:
+ +              {
+ +                      unsigned char *dstData = connInfo->pConnInfo;
+ +                      struct nwc_tran_addr tranAddr;
+ +
+ +                      srcData = (unsigned char *) reply->data;
+ +
+ +                      cpylen =
+ +                          copy_from_user(&tranAddr, dstData,
+ +                                         sizeof(tranAddr));
+ +                      tranAddr.uTransportType =
+ +                          ((struct tagNwdTranAddrEx *) srcData)->uTransportType;
+ +                      tranAddr.uAddressLength =
+ +                          ((struct tagNwdTranAddrEx *) srcData)->uAddressLength;
+ +
+ +                      cpylen =
+ +                          copy_to_user(dstData, &tranAddr, sizeof(tranAddr));
+ +                      cpylen =
+ +                          copy_to_user(tranAddr.puAddress,
+ +                                       ((struct tagNwdTranAddrEx *) srcData)->Buffer,
+ +                                       ((struct tagNwdTranAddrEx *) srcData)->
+ +                                       uAddressLength);
+ +                      dataLen = 0;
+ +                      break;
+ +              }
+ +      case NWC_CONN_INFO_NDS_STATE:
+ +      case NWC_CONN_INFO_MAX_PACKET_SIZE:
+ +      case NWC_CONN_INFO_LICENSE_STATE:
+ +      case NWC_CONN_INFO_PUBLIC_STATE:
+ +      case NWC_CONN_INFO_SERVICE_TYPE:
+ +      case NWC_CONN_INFO_DISTANCE:
+ +      case NWC_CONN_INFO_SERVER_VERSION:
+ +      case NWC_CONN_INFO_AUTH_ID:
+ +      case NWC_CONN_INFO_SUSPENDED:
+ +      case NWC_CONN_INFO_WORKGROUP_ID:
+ +      case NWC_CONN_INFO_SECURITY_STATE:
+ +      case NWC_CONN_INFO_CONN_NUMBER:
+ +      case NWC_CONN_INFO_USER_ID:
+ +      case NWC_CONN_INFO_BCAST_STATE:
+ +      case NWC_CONN_INFO_CONN_REF:
+ +      case NWC_CONN_INFO_AUTH_STATE:
+ +      case NWC_CONN_INFO_VERSION:
+ +      case NWC_CONN_INFO_SERVER_NAME:
+ +      case NWC_CONN_INFO_TREE_NAME:
+ +              srcData = (unsigned char *) reply->data;
+ +              dataLen = reply->dataLen;
+ +              break;
+ +
+ +      case NWC_CONN_INFO_TREE_NAME_UNICODE:
+ +      case NWC_CONN_INFO_SERVER_NAME_UNICODE:
+ +              break;
+ +
+ +      case NWC_CONN_INFO_LOCAL_TRAN_ADDR:
+ +              break;
+ +
+ +      case NWC_CONN_INFO_ALTERNATE_ADDR:
+ +              break;
+ +
+ +      case NWC_CONN_INFO_SERVER_GUID:
+ +              break;
+ +
+ +      default:
+ +              break;
+ +      }
+ +
+ +      if (srcData && dataLen) {
+ +              cpylen =
+ +                  copy_to_user(connInfo->pConnInfo, srcData,
+ +                               connInfo->uInfoLength);
+ +      }
+ +
+ +      return;
+ +}
+ +
+ +int novfs_get_daemon_ver(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwd_get_reqversion *pDVersion;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +
+ +      datalen = sizeof(*pDVersion);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_GET_REQUESTER_VERSION;
+ +      cmdlen = sizeof(*cmd);
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              pDVersion = (struct nwd_get_reqversion *) reply->data;
+ +              cpylen =
+ +                      copy_to_user(pDVersion, pdata->reqData,
+ +                                      sizeof(*pDVersion));
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_get_preferred_DS_tree(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwd_get_pref_ds_tree *pDGetTree;
+ +      struct nwc_get_pref_ds_tree xplatCall, *p;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +      unsigned char *dPtr;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData,
+ +                         sizeof(struct nwc_get_pref_ds_tree));
+ +      datalen = sizeof(*pDGetTree) + xplatCall.uTreeLength;
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_GET_PREFERRED_DS_TREE;
+ +      cmdlen = sizeof(*cmd);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              if (!retCode) {
+ +                      pDGetTree =
+ +                              (struct nwd_get_pref_ds_tree *) reply->data;
+ +                      dPtr =
+ +                              reply->data + pDGetTree->DsTreeNameOffset;
+ +                      p = (struct nwc_get_pref_ds_tree *) pdata->reqData;
+ +
-                       DbgPrint
-                               ("NwcGetPreferredDSTree: Reply recieved\n");
-                       DbgPrint("   TreeLen = %x\n",
++                      DbgPrint("Reply recieved");
++                      __DbgPrint("   TreeLen = %x\n",
+ +                                      pDGetTree->uTreeLength);
-                       DbgPrint("   TreeName = %s\n", dPtr);
++                      __DbgPrint("   TreeName = %s\n", dPtr);
+ +
+ +                      cpylen =
+ +                              copy_to_user(p, &pDGetTree->uTreeLength, 4);
+ +                      cpylen =
+ +                              copy_to_user(xplatCall.pDsTreeName, dPtr,
+ +                                              pDGetTree->uTreeLength);
+ +              }
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_set_preferred_DS_tree(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwd_set_pref_ds_tree *pDSetTree;
+ +      struct nwc_set_pref_ds_tree xplatCall;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +      unsigned char *dPtr;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData,
+ +                         sizeof(struct nwc_set_pref_ds_tree));
+ +      datalen = sizeof(*pDSetTree) + xplatCall.uTreeLength;
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_SET_PREFERRED_DS_TREE;
+ +
+ +      pDSetTree = (struct nwd_set_pref_ds_tree *) cmd->data;
+ +      pDSetTree->DsTreeNameOffset = sizeof(*pDSetTree);
+ +      pDSetTree->uTreeLength = xplatCall.uTreeLength;
+ +
+ +      dPtr = cmd->data + sizeof(*pDSetTree);
+ +      cpylen =
+ +              copy_from_user(dPtr, xplatCall.pDsTreeName,
+ +                              xplatCall.uTreeLength);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_set_default_ctx(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_set_def_name_ctx xplatCall;
+ +      struct nwd_set_def_name_ctx * pDSet;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, datalen, replylen, cpylen;
+ +      unsigned char *dPtr;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData,
+ +                         sizeof(struct nwc_set_def_name_ctx));
+ +      datalen =
+ +          sizeof(*pDSet) + xplatCall.uTreeLength + xplatCall.uNameLength;
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_SET_DEFAULT_NAME_CONTEXT;
+ +      cmd->dataLen =
+ +              sizeof(struct nwd_set_def_name_ctx) +
+ +              xplatCall.uTreeLength + xplatCall.uNameLength;
+ +
+ +      pDSet = (struct nwd_set_def_name_ctx *) cmd->data;
+ +      dPtr = cmd->data;
+ +
+ +      pDSet->TreeOffset = sizeof(struct nwd_set_def_name_ctx);
+ +      pDSet->uTreeLength = xplatCall.uTreeLength;
+ +      pDSet->NameContextOffset =
+ +              pDSet->TreeOffset + xplatCall.uTreeLength;
+ +      pDSet->uNameLength = xplatCall.uNameLength;
+ +
+ +      //sgled      cpylen = copy_from_user(dPtr+pDSet->TreeOffset, xplatCall.pTreeName, xplatCall.uTreeLength);
+ +      cpylen = copy_from_user(dPtr + pDSet->TreeOffset, xplatCall.pDsTreeName, xplatCall.uTreeLength);        //sgled
+ +      cpylen =
+ +              copy_from_user(dPtr + pDSet->NameContextOffset,
+ +                              xplatCall.pNameContext,
+ +                              xplatCall.uNameLength);
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_get_default_ctx(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_get_def_name_ctx xplatCall;
+ +      struct nwd_get_def_name_ctx * pGet;
+ +      char *dPtr;
+ +      int retCode = -ENOMEM;
+ +      unsigned long cmdlen, replylen, cpylen;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData,
+ +                         sizeof(struct nwc_get_def_name_ctx));
+ +      cmdlen =
+ +          sizeof(*cmd) + sizeof(struct nwd_get_def_name_ctx ) +
+ +          xplatCall.uTreeLength;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_GET_DEFAULT_NAME_CONTEXT;
+ +      cmd->dataLen =
+ +              sizeof(struct nwd_get_def_name_ctx) + xplatCall.uTreeLength;
+ +
+ +      pGet = (struct nwd_get_def_name_ctx *) cmd->data;
+ +      dPtr = cmd->data;
+ +
+ +      pGet->TreeOffset = sizeof(struct nwd_get_def_name_ctx );
+ +      pGet->uTreeLength = xplatCall.uTreeLength;
+ +
+ +      //sgled      cpylen = copy_from_user( dPtr + pGet->TreeOffset, xplatCall.pTreeName, xplatCall.uTreeLength);
+ +      cpylen = copy_from_user(dPtr + pGet->TreeOffset, xplatCall.pDsTreeName, xplatCall.uTreeLength); //sgled
+ +      dPtr[pGet->TreeOffset + pGet->uTreeLength] = 0;
+ +
+ +      retCode =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              retCode = reply->Reply.ErrorCode;
+ +              if (!retCode) {
+ +                      pGet = (struct nwd_get_def_name_ctx *) reply->data;
+ +
-                       DbgPrint
-                               ("NwcGetDefaultNameCtx: retCode=0x%x uNameLength1=%d uNameLength2=%d\n",
++                      DbgPrint("retCode=0x%x uNameLength1=%d uNameLength2=%d",
+ +                               retCode, pGet->uNameLength,
+ +                               xplatCall.uNameLength);
+ +                      if (xplatCall.uNameLength < pGet->uNameLength) {
+ +                              pGet->uNameLength =
+ +                                      xplatCall.uNameLength;
+ +                              retCode = NWE_BUFFER_OVERFLOW;
+ +                      }
+ +                      dPtr = (char *)pGet + pGet->NameContextOffset;
+ +                      cpylen =
+ +                              copy_to_user(xplatCall.pNameContext, dPtr,
+ +                                              pGet->uNameLength);
+ +              }
+ +
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (retCode);
+ +
+ +}
+ +
+ +int novfs_query_feature(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct nwc_query_feature xpCall;
+ +      int status = 0;
+ +      unsigned long cpylen;
+ +
+ +      cpylen =
+ +          copy_from_user(&xpCall, pdata->reqData, sizeof(struct nwc_query_feature));
+ +      switch (xpCall.Feature) {
+ +      case NWC_FEAT_NDS:
+ +      case NWC_FEAT_NDS_MTREE:
+ +      case NWC_FEAT_PRN_CAPTURE:
+ +      case NWC_FEAT_NDS_RESOLVE:
+ +
+ +              status = NWE_REQUESTER_FAILURE;
+ +
+ +      }
+ +      return (status);
+ +}
+ +
+ +int novfs_get_tree_monitored_conn(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_get_tree_monitored_conn_ref xplatCall, *p;
+ +      struct nwd_get_tree_monitored_conn_ref *pDConnRef;
+ +      char *dPtr;
+ +      unsigned long status = -ENOMEM, cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData,
+ +                         sizeof(struct nwc_get_tree_monitored_conn_ref));
+ +      datalen = sizeof(*pDConnRef) + xplatCall.pTreeName->DataLen;
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_GET_TREE_MONITORED_CONN_REF;
+ +
+ +      pDConnRef = (struct nwd_get_tree_monitored_conn_ref *) cmd->data;
+ +      pDConnRef->TreeName.boffset = sizeof(*pDConnRef);
+ +      pDConnRef->TreeName.len = xplatCall.pTreeName->DataLen;
+ +      pDConnRef->TreeName.type = xplatCall.pTreeName->DataType;
+ +
+ +      dPtr = cmd->data + sizeof(*pDConnRef);
+ +      cpylen =
+ +              copy_from_user(dPtr, xplatCall.pTreeName->pBuffer,
+ +                              pDConnRef->TreeName.len);
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              pDConnRef = (struct nwd_get_tree_monitored_conn_ref *) reply->data;
+ +              dPtr = reply->data + pDConnRef->TreeName.boffset;
+ +              p = (struct nwc_get_tree_monitored_conn_ref *) pdata->reqData;
+ +              cpylen =
+ +                      copy_to_user(&p->uConnReference,
+ +                                      &pDConnRef->uConnReference, 4);
+ +
+ +              status = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_enum_ids(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_enum_ids xplatCall, *eId;
+ +      struct nwd_enum_ids *pEnum;
+ +      struct nwc_string xferStr;
+ +      char *str;
+ +      unsigned long status = -ENOMEM, cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData,
+ +                         sizeof(struct nwc_enum_ids));
+ +      datalen = sizeof(*pEnum);
+ +      cmdlen = datalen + sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_ENUMERATE_IDENTITIES;
+ +
-       DbgPrint("NwcEnumIdentities: Send Request\n");
-       DbgPrint("   iterator = %x\n", xplatCall.Iterator);
-       DbgPrint("   cmdlen = %d\n", cmdlen);
++      DbgPrint("Send Request");
++      __DbgPrint("   iterator = %x\n", xplatCall.Iterator);
++      __DbgPrint("   cmdlen = %d\n", cmdlen);
+ +
+ +      pEnum = (struct nwd_enum_ids *) cmd->data;
+ +      pEnum->Iterator = xplatCall.Iterator;
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +
+ +              eId = pdata->repData;
+ +              pEnum = (struct nwd_enum_ids *) reply->data;
+ +              cpylen =
+ +                      copy_to_user(&eId->Iterator, &pEnum->Iterator,
+ +                                      sizeof(pEnum->Iterator));
-               DbgPrint("[XPLAT NWCAPI] Found AuthId 0x%X\n",
++              DbgPrint("[XPLAT NWCAPI] Found AuthId 0x%X",
+ +                              pEnum->AuthenticationId);
+ +              cpylen =
+ +                      copy_to_user(&eId->AuthenticationId,
+ +                                      &pEnum->AuthenticationId,
+ +                                      sizeof(pEnum->AuthenticationId));
+ +              cpylen =
+ +                      copy_to_user(&eId->AuthType, &pEnum->AuthType,
+ +                                      sizeof(pEnum->AuthType));
+ +              cpylen =
+ +                      copy_to_user(&eId->IdentityFlags,
+ +                                      &pEnum->IdentityFlags,
+ +                                      sizeof(pEnum->IdentityFlags));
+ +              cpylen =
+ +                      copy_to_user(&eId->NameType, &pEnum->NameType,
+ +                                      sizeof(pEnum->NameType));
+ +              cpylen =
+ +                      copy_to_user(&eId->ObjectType, &pEnum->ObjectType,
+ +                                      sizeof(pEnum->ObjectType));
+ +
+ +              if (!status) {
+ +                      cpylen =
+ +                              copy_from_user(&xferStr, eId->pDomainName,
+ +                                              sizeof(struct nwc_string));
+ +                      str =
+ +                              (char *)((char *)reply->data +
+ +                                              pEnum->domainNameOffset);
-                       DbgPrint("[XPLAT NWCAPI] Found Domain %s\n",
++                      DbgPrint("[XPLAT NWCAPI] Found Domain %s",
+ +                                      str);
+ +                      cpylen =
+ +                              copy_to_user(xferStr.pBuffer, str,
+ +                                              pEnum->domainNameLen);
+ +                      xferStr.DataType = NWC_STRING_TYPE_ASCII;
+ +                      xferStr.DataLen = pEnum->domainNameLen - 1;
+ +                      cpylen =
+ +                              copy_to_user(eId->pDomainName, &xferStr,
+ +                                              sizeof(struct nwc_string));
+ +
+ +                      cpylen =
+ +                              copy_from_user(&xferStr, eId->pObjectName,
+ +                                              sizeof(struct nwc_string));
+ +                      str =
+ +                              (char *)((char *)reply->data +
+ +                                              pEnum->objectNameOffset);
-                       DbgPrint("[XPLAT NWCAPI] Found User %s\n", str);
++                      DbgPrint("[XPLAT NWCAPI] Found User %s", str);
+ +                      cpylen =
+ +                              copy_to_user(xferStr.pBuffer, str,
+ +                                              pEnum->objectNameLen);
+ +                      xferStr.DataType = NWC_STRING_TYPE_ASCII;
+ +                      xferStr.DataLen = pEnum->objectNameLen - 1;
+ +                      cpylen =
+ +                              copy_to_user(eId->pObjectName, &xferStr,
+ +                                              sizeof(struct nwc_string));
+ +              }
+ +
+ +              kfree(reply);
+ +
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_change_auth_key(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_change_key xplatCall;
+ +      struct nwd_change_key *pNewKey;
+ +      struct nwc_string xferStr;
+ +      char *str;
+ +      unsigned long status = -ENOMEM, cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData, sizeof(struct nwc_change_key));
+ +
+ +      datalen =
+ +          sizeof(struct nwd_change_key) + xplatCall.pDomainName->DataLen +
+ +          xplatCall.pObjectName->DataLen + xplatCall.pNewPassword->DataLen +
+ +          xplatCall.pVerifyPassword->DataLen;
+ +
+ +      cmdlen = sizeof(*cmd) + datalen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      pNewKey = (struct nwd_change_key *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_CHANGE_KEY;
+ +
+ +      pNewKey->NameType = xplatCall.NameType;
+ +      pNewKey->ObjectType = xplatCall.ObjectType;
+ +      pNewKey->AuthType = xplatCall.AuthType;
+ +      str = (char *)pNewKey;
+ +
+ +      /*
+ +       * Get the tree name
+ +       */
+ +      str += sizeof(*pNewKey);
+ +      cpylen =
+ +              copy_from_user(&xferStr, xplatCall.pDomainName,
+ +                              sizeof(struct nwc_string));
+ +      pNewKey->domainNameOffset = sizeof(*pNewKey);
+ +      cpylen = copy_from_user(str, xferStr.pBuffer, xferStr.DataLen);
+ +      pNewKey->domainNameLen = xferStr.DataLen;
+ +
+ +      /*
+ +       * Get the User Name
+ +       */
+ +      str += pNewKey->domainNameLen;
+ +      cpylen =
+ +              copy_from_user(&xferStr, xplatCall.pObjectName,
+ +                              sizeof(struct nwc_string));
+ +      pNewKey->objectNameOffset =
+ +              pNewKey->domainNameOffset + pNewKey->domainNameLen;
+ +      cpylen = copy_from_user(str, xferStr.pBuffer, xferStr.DataLen);
+ +      pNewKey->objectNameLen = xferStr.DataLen;
+ +
+ +      /*
+ +       * Get the New Password
+ +       */
+ +      str += pNewKey->objectNameLen;
+ +      cpylen =
+ +              copy_from_user(&xferStr, xplatCall.pNewPassword,
+ +                              sizeof(struct nwc_string));
+ +      pNewKey->newPasswordOffset =
+ +              pNewKey->objectNameOffset + pNewKey->objectNameLen;
+ +      cpylen = copy_from_user(str, xferStr.pBuffer, xferStr.DataLen);
+ +      pNewKey->newPasswordLen = xferStr.DataLen;
+ +
+ +      /*
+ +       * Get the Verify Password
+ +       */
+ +      str += pNewKey->newPasswordLen;
+ +      cpylen =
+ +              copy_from_user(&xferStr, xplatCall.pVerifyPassword,
+ +                              sizeof(struct nwc_string));
+ +      pNewKey->verifyPasswordOffset =
+ +              pNewKey->newPasswordOffset + pNewKey->newPasswordLen;
+ +      cpylen = copy_from_user(str, xferStr.pBuffer, xferStr.DataLen);
+ +      pNewKey->verifyPasswordLen = xferStr.DataLen;
+ +
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      memset(cmd, 0, cmdlen);
+ +
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_set_pri_conn(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_set_primary_conn xplatCall;
+ +      struct nwd_set_primary_conn *pConn;
+ +      unsigned long status = -ENOMEM, cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData,
+ +                         sizeof(struct nwc_set_primary_conn));
+ +
+ +      datalen = sizeof(struct nwd_set_primary_conn);
+ +      cmdlen = sizeof(*cmd) + datalen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      pConn = (struct nwd_set_primary_conn *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_SET_PRIMARY_CONN;
+ +      pConn->ConnHandle = (void *) (unsigned long) xplatCall.ConnHandle;
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_get_pri_conn(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      unsigned long status = -ENOMEM, cmdlen, replylen, cpylen;
+ +
+ +      cmdlen = (unsigned long) (&((struct novfs_xplat_call_request *) 0)->data);
+ +
+ +      cmd.dataLen = 0;
+ +      cmd.Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd.Command.SequenceNumber = 0;
+ +      cmd.Command.SessionId = Session;
+ +      cmd.NwcCommand = NWC_GET_PRIMARY_CONN;
+ +
+ +      status =
+ +              Queue_Daemon_Command((void *)&cmd, cmdlen, NULL, 0, (void **)&reply,
+ +                              &replylen, INTERRUPTIBLE);
+ +
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +              if (!status) {
+ +                      cpylen =
+ +                              copy_to_user(pdata->repData, reply->data,
+ +                                              sizeof(unsigned long));
+ +              }
+ +
+ +              kfree(reply);
+ +      }
+ +
+ +      return (status);
+ +}
+ +
+ +int novfs_set_map_drive(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      unsigned long status = 0, datalen, cmdlen, replylen;
+ +      struct nwc_map_drive_ex symInfo;
+ +
-       DbgPrint("Call to NwcSetMapDrive\n");
++      DbgPrint("");
+ +      cmdlen = sizeof(*cmd);
+ +      if (copy_from_user(&symInfo, pdata->reqData, sizeof(symInfo)))
+ +              return -EFAULT;
+ +      datalen = sizeof(symInfo) + symInfo.dirPathOffsetLength +
+ +          symInfo.linkOffsetLength;
+ +
-       DbgPrint(" cmdlen = %d\n", cmdlen);
-       DbgPrint(" dataLen = %d\n", datalen);
-       DbgPrint(" symInfo.dirPathOffsetLength = %d\n",
++      __DbgPrint(" cmdlen = %d\n", cmdlen);
++      __DbgPrint(" dataLen = %d\n", datalen);
++      __DbgPrint(" symInfo.dirPathOffsetLength = %d\n",
+ +               symInfo.dirPathOffsetLength);
-       DbgPrint(" symInfo.linkOffsetLength = %d\n", symInfo.linkOffsetLength);
-       DbgPrint(" pdata->datalen = %d\n", pdata->reqLen);
++      __DbgPrint(" symInfo.linkOffsetLength = %d\n", symInfo.linkOffsetLength);
++      __DbgPrint(" pdata->datalen = %d\n", pdata->reqLen);
+ +
+ +      novfs_dump(sizeof(symInfo), &symInfo);
+ +
+ +      cmdlen += datalen;
+ +
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->dataLen = datalen;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_MAP_DRIVE;
+ +
+ +      if (copy_from_user(cmd->data, pdata->reqData, datalen)) {
+ +              kfree(cmd);
+ +              return -EFAULT;
+ +      }
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +
+ +}
+ +
+ +int novfs_unmap_drive(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      unsigned long status = 0, datalen, cmdlen, replylen, cpylen;
+ +      struct nwc_unmap_drive_ex symInfo;
+ +
-       DbgPrint("Call to NwcUnMapDrive\n");
++      DbgPrint("");
+ +
+ +      cpylen = copy_from_user(&symInfo, pdata->reqData, sizeof(symInfo));
+ +      cmdlen = sizeof(*cmd);
+ +      datalen = sizeof(symInfo) + symInfo.linkLen;
+ +
+ +      cmdlen += datalen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->dataLen = datalen;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_UNMAP_DRIVE;
+ +
+ +      cpylen = copy_from_user(cmd->data, pdata->reqData, datalen);
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_enum_drives(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      unsigned long status = 0, cmdlen, replylen, cpylen;
+ +      unsigned long offset;
+ +      char *cp;
+ +
-       DbgPrint("Call to NwcEnumerateDrives\n");
++      DbgPrint("");
+ +
+ +      cmdlen = sizeof(*cmd);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cmd->dataLen = 0;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_ENUMERATE_DRIVES;
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
-               DbgPrint("Status Code = 0x%X\n", status);
++              DbgPrint("Status Code = 0x%X", status);
+ +              if (!status) {
+ +                      offset =
+ +                              sizeof(((struct nwc_get_mapped_drives *) pdata->
+ +                                                      repData)->MapBuffLen);
+ +                      cp = reply->data;
+ +                      replylen =
+ +                              ((struct nwc_get_mapped_drives *) pdata->repData)->
+ +                              MapBuffLen;
+ +                      cpylen =
+ +                              copy_to_user(pdata->repData, cp, offset);
+ +                      cp += offset;
+ +                      cpylen =
+ +                              copy_to_user(((struct nwc_get_mapped_drives *) pdata->
+ +                                                      repData)->MapBuffer, cp,
+ +                                              min(replylen - offset,
+ +                                                      reply->dataLen - offset));
+ +              }
+ +
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_get_bcast_msg(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      unsigned long cmdlen, replylen;
+ +      int status = 0x8866, cpylen;
+ +      struct nwc_get_bcast_notification msg;
+ +      struct nwd_get_bcast_notification *dmsg;
+ +
+ +      cmdlen = sizeof(*cmd) + sizeof(*dmsg);
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      cpylen = copy_from_user(&msg, pdata->reqData, sizeof(msg));
+ +      cmd->dataLen = sizeof(*dmsg);
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +
+ +      cmd->NwcCommand = NWC_GET_BROADCAST_MESSAGE;
+ +      dmsg = (struct nwd_get_bcast_notification *) cmd->data;
+ +      dmsg->uConnReference = (void *) (unsigned long) msg.uConnReference;
+ +
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +
+ +              if (!status) {
+ +                      char *cp = pdata->repData;
+ +
+ +                      dmsg =
+ +                              (struct nwd_get_bcast_notification *) reply->data;
+ +                      if (pdata->repLen < dmsg->messageLen) {
+ +                              dmsg->messageLen = pdata->repLen;
+ +                      }
+ +                      msg.messageLen = dmsg->messageLen;
+ +                      cpylen =
+ +                              offsetof(struct
+ +                                              nwc_get_bcast_notification,
+ +                                              message);
+ +                      cp += cpylen;
+ +                      cpylen =
+ +                              copy_to_user(pdata->repData, &msg, cpylen);
+ +                      cpylen =
+ +                              copy_to_user(cp, dmsg->message,
+ +                                              msg.messageLen);
+ +              } else {
+ +                      msg.messageLen = 0;
+ +                      msg.message[0] = 0;
+ +                      cpylen = offsetof(struct
+ +                                      nwc_get_bcast_notification,
+ +                                      message);
+ +                      cpylen =
+ +                              copy_to_user(pdata->repData, &msg,
+ +                                              sizeof(msg));
+ +              }
+ +
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_set_key_value(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_set_key xplatCall;
+ +      struct nwd_set_key *pNewKey;
+ +      struct nwc_string cstrObjectName, cstrPassword;
+ +      char *str;
+ +      unsigned long status = -ENOMEM, cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen = copy_from_user(&xplatCall, pdata->reqData, sizeof(struct nwc_set_key));
+ +      cpylen =
+ +          copy_from_user(&cstrObjectName, xplatCall.pObjectName,
+ +                         sizeof(struct nwc_string));
+ +      cpylen =
+ +          copy_from_user(&cstrPassword, xplatCall.pNewPassword,
+ +                         sizeof(struct nwc_string));
+ +
+ +      datalen =
+ +          sizeof(struct nwd_set_key ) + cstrObjectName.DataLen + cstrPassword.DataLen;
+ +
+ +      cmdlen = sizeof(*cmd) + datalen;
+ +      cmd = kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      pNewKey = (struct nwd_set_key *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_SET_KEY;
+ +
+ +      pNewKey->ObjectType = xplatCall.ObjectType;
+ +      pNewKey->AuthenticationId = xplatCall.AuthenticationId;
+ +      pNewKey->ConnHandle = (void *) (unsigned long) xplatCall.ConnHandle;
+ +      str = (char *)pNewKey;
+ +
+ +      /*
+ +       * Get the User Name
+ +       */
+ +      str += sizeof(struct nwd_set_key );
+ +      cpylen =
+ +              copy_from_user(str, cstrObjectName.pBuffer,
+ +                              cstrObjectName.DataLen);
+ +
+ +      str += pNewKey->objectNameLen = cstrObjectName.DataLen;
+ +      pNewKey->objectNameOffset = sizeof(struct nwd_set_key );
+ +
+ +      /*
+ +       * Get the Verify Password
+ +       */
+ +      cpylen =
+ +              copy_from_user(str, cstrPassword.pBuffer,
+ +                              cstrPassword.DataLen);
+ +
+ +      pNewKey->newPasswordLen = cstrPassword.DataLen;
+ +      pNewKey->newPasswordOffset =
+ +              pNewKey->objectNameOffset + pNewKey->objectNameLen;
+ +
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
+ +
+ +int novfs_verify_key_value(struct novfs_xplat *pdata, struct novfs_schandle Session)
+ +{
+ +      struct novfs_xplat_call_request *cmd;
+ +      struct novfs_xplat_call_reply *reply;
+ +      struct nwc_verify_key xplatCall;
+ +      struct nwd_verify_key *pNewKey;
+ +      struct nwc_string xferStr;
+ +      char *str;
+ +      unsigned long status = -ENOMEM, cmdlen, datalen, replylen, cpylen;
+ +
+ +      cpylen =
+ +          copy_from_user(&xplatCall, pdata->reqData, sizeof(struct nwc_verify_key));
+ +
+ +      datalen =
+ +          sizeof(struct nwd_verify_key) + xplatCall.pDomainName->DataLen +
+ +          xplatCall.pObjectName->DataLen + xplatCall.pVerifyPassword->DataLen;
+ +
+ +      cmdlen = sizeof(*cmd) + datalen;
+ +      cmd = (struct novfs_xplat_call_request *)kmalloc(cmdlen, GFP_KERNEL);
+ +
+ +      if (!cmd)
+ +              return -ENOMEM;
+ +
+ +      pNewKey = (struct nwd_verify_key *) cmd->data;
+ +      cmd->dataLen = datalen;
+ +      cmd->Command.CommandType = VFS_COMMAND_XPLAT_CALL;
+ +      cmd->Command.SequenceNumber = 0;
+ +      cmd->Command.SessionId = Session;
+ +      cmd->NwcCommand = NWC_VERIFY_KEY;
+ +
+ +      pNewKey->NameType = xplatCall.NameType;
+ +      pNewKey->ObjectType = xplatCall.ObjectType;
+ +      pNewKey->AuthType = xplatCall.AuthType;
+ +      str = (char *)pNewKey;
+ +
+ +      /*
+ +       * Get the tree name
+ +       */
+ +      str += sizeof(*pNewKey);
+ +      cpylen =
+ +              copy_from_user(&xferStr, xplatCall.pDomainName,
+ +                              sizeof(struct nwc_string));
+ +      pNewKey->domainNameOffset = sizeof(*pNewKey);
+ +      cpylen = copy_from_user(str, xferStr.pBuffer, xferStr.DataLen);
+ +      pNewKey->domainNameLen = xferStr.DataLen;
+ +
+ +      /*
+ +       * Get the User Name
+ +       */
+ +      str += pNewKey->domainNameLen;
+ +      cpylen =
+ +              copy_from_user(&xferStr, xplatCall.pObjectName,
+ +                              sizeof(struct nwc_string));
+ +      pNewKey->objectNameOffset =
+ +              pNewKey->domainNameOffset + pNewKey->domainNameLen;
+ +      cpylen = copy_from_user(str, xferStr.pBuffer, xferStr.DataLen);
+ +      pNewKey->objectNameLen = xferStr.DataLen;
+ +
+ +      /*
+ +       * Get the Verify Password
+ +       */
+ +      str += pNewKey->objectNameLen;
+ +      cpylen =
+ +              copy_from_user(&xferStr, xplatCall.pVerifyPassword,
+ +                              sizeof(struct nwc_string));
+ +      pNewKey->verifyPasswordOffset =
+ +              pNewKey->objectNameOffset + pNewKey->objectNameLen;
+ +      cpylen = copy_from_user(str, xferStr.pBuffer, xferStr.DataLen);
+ +      pNewKey->verifyPasswordLen = xferStr.DataLen;
+ +
+ +      status =
+ +              Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0,
+ +                              (void **)&reply, &replylen,
+ +                              INTERRUPTIBLE);
+ +      if (reply) {
+ +              status = reply->Reply.ErrorCode;
+ +              kfree(reply);
+ +      }
+ +      kfree(cmd);
+ +      return (status);
+ +}
diff --cc fs/novfs/proc.c

index e176ef8,0000000..a812e1e

mode 100644,000000..100644
--- 1/fs/novfs/proc.c
--- /dev/null
+++ b/fs/novfs/proc.c
@@@ -1,153 -1,0 +1,153 @@@
+ +/*
+ + * Novell NCP Redirector for Linux
+ + * Author: James Turner
+ + *
+ + * This module contains functions that create the interface to the proc
+ + * filesystem.
+ + *
+ + * Copyright (C) 2005 Novell, Inc.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + */
+ +
+ +#include <linux/module.h>
+ +#include <linux/kernel.h>
+ +#include <linux/proc_fs.h>
+ +#include <linux/smp_lock.h>
+ +
+ +#include "vfs.h"
+ +
+ +struct proc_dir_entry *novfs_procfs_dir;
+ +struct proc_dir_entry *Novfs_Control;
+ +struct proc_dir_entry *Novfs_Library;
+ +struct proc_dir_entry *Novfs_Version;
+ +
+ +static struct file_operations novfs_daemon_proc_fops;
+ +static struct file_operations novfs_lib_proc_fops;
+ +
+ +/*===[ Code ]=============================================================*/
+ +
+ +static int Novfs_Get_Version(char *page, char **start, off_t off, int count, int *eof, void *data)
+ +{
+ +      char *buf, tbuf[48];
+ +      int len = 0, i;
+ +
+ +      if (!off) {
+ +              buf = page + off;
+ +              *start = buf;
+ +              len = sprintf(buf, "Novfs Version=%s\n", NOVFS_VERSION_STRING);
+ +              i = novfs_daemon_getversion(tbuf, sizeof(tbuf));
+ +              if ((i > 0) && i < (count - len)) {
+ +                      len += sprintf(buf + len, "Novfsd Version=%s\n", tbuf);
+ +              }
+ +
+ +              if (novfs_current_mnt) {
+ +                      i = strlen(novfs_current_mnt);
+ +                      if ((i > 0) && i < (count - len)) {
+ +                              len +=
+ +                                  sprintf(buf + len, "Novfs mount=%s\n",
+ +                                          novfs_current_mnt);
+ +                      }
+ +              }
-               DbgPrint("Novfs_Get_Version:\n%s\n", buf);
++              DbgPrint("%s", buf);
+ +      }
+ +      *eof = 1;
+ +      return (len);
+ +}
+ +
+ +int novfs_proc_init(void)
+ +{
+ +      int retCode = 0;
+ +
+ +      novfs_procfs_dir = proc_mkdir(MODULE_NAME, NULL);
+ +      if (novfs_procfs_dir) {
+ +              novfs_procfs_dir->owner = THIS_MODULE;
+ +
+ +              Novfs_Control = create_proc_entry("Control", 0600, novfs_procfs_dir);
+ +
+ +              if (Novfs_Control) {
+ +                      Novfs_Control->owner = THIS_MODULE;
+ +                      Novfs_Control->size = 0;
+ +                      memcpy(&novfs_daemon_proc_fops,
+ +                                      Novfs_Control->proc_fops,
+ +                                      sizeof(struct file_operations));
+ +
+ +                      /*
+ +                       * Setup our functions
+ +                       */
+ +                      novfs_daemon_proc_fops.owner = THIS_MODULE;
+ +                      novfs_daemon_proc_fops.open = novfs_daemon_open_control;
+ +                      novfs_daemon_proc_fops.release = novfs_daemon_close_control;
+ +                      novfs_daemon_proc_fops.read = novfs_daemon_cmd_send;
+ +                      novfs_daemon_proc_fops.write = novfs_daemon_recv_reply;
+ +                      novfs_daemon_proc_fops.ioctl = novfs_daemon_ioctl;
+ +
+ +                      Novfs_Control->proc_fops = &novfs_daemon_proc_fops;
+ +              } else {
+ +                      remove_proc_entry(MODULE_NAME, NULL);
+ +                      return (-ENOENT);
+ +              }
+ +
+ +              Novfs_Library = create_proc_entry("Library", 0666, novfs_procfs_dir);
+ +              if (Novfs_Library) {
+ +                      Novfs_Library->owner = THIS_MODULE;
+ +                      Novfs_Library->size = 0;
+ +
+ +                      /*
+ +                       * Setup our file functions
+ +                       */
+ +                      memcpy(&novfs_lib_proc_fops, Novfs_Library->proc_fops,
+ +                             sizeof(struct file_operations));
+ +                      novfs_lib_proc_fops.owner = THIS_MODULE;
+ +                      novfs_lib_proc_fops.open = novfs_daemon_lib_open;
+ +                      novfs_lib_proc_fops.release = novfs_daemon_lib_close;
+ +                      novfs_lib_proc_fops.read = novfs_daemon_lib_read;
+ +                      novfs_lib_proc_fops.write = novfs_daemon_lib_write;
+ +                      novfs_lib_proc_fops.llseek = novfs_daemon_lib_llseek;
+ +                      novfs_lib_proc_fops.ioctl = novfs_daemon_lib_ioctl;
+ +                      Novfs_Library->proc_fops = &novfs_lib_proc_fops;
+ +              } else {
+ +                      remove_proc_entry("Control", novfs_procfs_dir);
+ +                      remove_proc_entry(MODULE_NAME, NULL);
+ +                      return (-ENOENT);
+ +              }
+ +
+ +              Novfs_Version =
+ +                  create_proc_read_entry("Version", 0444, novfs_procfs_dir,
+ +                                         Novfs_Get_Version, NULL);
+ +              if (Novfs_Version) {
+ +                      Novfs_Version->owner = THIS_MODULE;
+ +                      Novfs_Version->size = 0;
+ +              } else {
+ +                      remove_proc_entry("Library", novfs_procfs_dir);
+ +                      remove_proc_entry("Control", novfs_procfs_dir);
+ +                      remove_proc_entry(MODULE_NAME, NULL);
+ +                      retCode = -ENOENT;
+ +              }
+ +      } else {
+ +              retCode = -ENOENT;
+ +      }
+ +      return (retCode);
+ +}
+ +
+ +void novfs_proc_exit(void)
+ +{
+ +
-       DbgPrint("Uninit_Procfs_Interface remove_proc_entry(Version, NULL)\n");
++      DbgPrint("remove_proc_entry(Version, NULL)\n");
+ +      remove_proc_entry("Version", novfs_procfs_dir);
+ +
-       DbgPrint("Uninit_Procfs_Interface remove_proc_entry(Control, NULL)\n");
++      DbgPrint("remove_proc_entry(Control, NULL)\n");
+ +      remove_proc_entry("Control", novfs_procfs_dir);
+ +
-       DbgPrint("Uninit_Procfs_Interface remove_proc_entry(Library, NULL)\n");
++      DbgPrint("remove_proc_entry(Library, NULL)\n");
+ +      remove_proc_entry("Library", novfs_procfs_dir);
+ +
-       DbgPrint("Uninit_Procfs_Interface remove_proc_entry(%s, NULL)\n",
++      DbgPrint("remove_proc_entry(%s, NULL)\n",
+ +               MODULE_NAME);
+ +      remove_proc_entry(MODULE_NAME, NULL);
+ +
-       DbgPrint("Uninit_Procfs_Interface done\n");
++      DbgPrint("done\n");
+ +}
diff --cc fs/novfs/profile.c

index 2b19cfc,0000000..f67f032

mode 100644,000000..100644
--- 1/fs/novfs/profile.c
--- /dev/null
+++ b/fs/novfs/profile.c
@@@ -1,710 -1,0 +1,710 @@@
+ +/*
+ + * Novell NCP Redirector for Linux
+ + * Author: James Turner
+ + *
+ + * This file contains a debugging code for the novfs VFS.
+ + *
+ + * Copyright (C) 2005 Novell, Inc.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + */
+ +
+ +#include <linux/module.h>
+ +#include <linux/kernel.h>
+ +#include <linux/init.h>
+ +#include <linux/proc_fs.h>
+ +#include <linux/sched.h>
+ +#include <asm/uaccess.h>
+ +#include <linux/vmalloc.h>
+ +#include <linux/time.h>
+ +
+ +#include <linux/profile.h>
+ +#include <linux/notifier.h>
+ +
+ +#include "vfs.h"
+ +
+ +/*===[ Manifest constants ]===============================================*/
+ +#define DBGBUFFERSIZE (1024*1024*32)
+ +
+ +/*===[ Type definitions ]=================================================*/
+ +struct local_rtc_time {
+ +      int tm_sec;
+ +      int tm_min;
+ +      int tm_hour;
+ +      int tm_mday;
+ +      int tm_mon;
+ +      int tm_year;
+ +      int tm_wday;
+ +      int tm_yday;
+ +      int tm_isdst;
+ +};
+ +
+ +char *DbgPrintBuffer = NULL;
+ +char DbgPrintOn = 0;
+ +char DbgSyslogOn = 0;
+ +char DbgProfileOn = 0;
+ +
+ +static unsigned long DbgPrintBufferOffset = 0;
+ +static unsigned long DbgPrintBufferReadOffset = 0;
+ +static unsigned long DbgPrintBufferSize = DBGBUFFERSIZE;
+ +
+ +static struct file_operations Dbg_proc_file_operations;
+ +static struct file_operations dentry_proc_file_ops;
+ +static struct file_operations inode_proc_file_ops;
+ +
+ +static struct proc_dir_entry *dbg_dir = NULL;
+ +static struct proc_dir_entry *dbg_file = NULL;
+ +static struct proc_dir_entry *dentry_file = NULL;
+ +static struct proc_dir_entry *inode_file = NULL;
+ +
+ +static DECLARE_MUTEX(LocalPrint_lock);
+ +
+ +static ssize_t User_proc_write_DbgBuffer(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos)
+ +{
+ +      ssize_t retval = nbytes;
+ +      u_char *lbuf, *p;
+ +      int i;
+ +      u_long cpylen;
+ +
+ +      lbuf = kmalloc(nbytes + 1, GFP_KERNEL);
+ +      if (lbuf) {
+ +              cpylen = copy_from_user(lbuf, buf, nbytes);
+ +
+ +              lbuf[nbytes] = 0;
-               DbgPrint("User_proc_write_DbgBuffer: %s\n", lbuf);
++              DbgPrint("%s", lbuf);
+ +
+ +              for (i = 0; lbuf[i] && lbuf[i] != '\n'; i++) ;
+ +
+ +              if ('\n' == lbuf[i]) {
+ +                      lbuf[i] = '\0';
+ +              }
+ +
+ +              if (!strcmp("on", lbuf)) {
+ +                      DbgPrintBufferOffset = DbgPrintBufferReadOffset = 0;
+ +                      DbgPrintOn = 1;
+ +              } else if (!strcmp("off", lbuf)) {
+ +                      DbgPrintOn = 0;
+ +              } else if (!strcmp("reset", lbuf)) {
+ +                      DbgPrintBufferOffset = DbgPrintBufferReadOffset = 0;
+ +              } else if (NULL != (p = strchr(lbuf, ' '))) {
+ +                      *p++ = '\0';
+ +                      if (!strcmp("syslog", lbuf)) {
+ +
+ +                              if (!strcmp("on", p)) {
+ +                                      DbgSyslogOn = 1;
+ +                              } else if (!strcmp("off", p)) {
+ +                                      DbgSyslogOn = 0;
+ +                              }
+ +                      } else if (!strcmp("novfsd", lbuf)) {
+ +                              novfs_daemon_debug_cmd_send(p);
+ +                      } else if (!strcmp("file_update_timeout", lbuf)) {
+ +                              novfs_update_timeout =
+ +                                  simple_strtoul(p, NULL, 0);
+ +                      } else if (!strcmp("cache", lbuf)) {
+ +                              if (!strcmp("on", p)) {
+ +                                      novfs_page_cache = 1;
+ +                              } else if (!strcmp("off", p)) {
+ +                                      novfs_page_cache = 0;
+ +                              }
+ +                      } else if (!strcmp("profile", lbuf)) {
+ +                              if (!strcmp("on", p)) {
+ +                                      DbgProfileOn = 1;
+ +                              } else if (!strcmp("off", p)) {
+ +                                      DbgProfileOn = 0;
+ +                              }
+ +                      }
+ +              }
+ +              kfree(lbuf);
+ +      }
+ +
+ +      return (retval);
+ +}
+ +
+ +static ssize_t User_proc_read_DbgBuffer(struct file *file, char *buf, size_t nbytes, loff_t * ppos)
+ +{
+ +      ssize_t retval = 0;
+ +      size_t count;
+ +
+ +      if (0 != (count = DbgPrintBufferOffset - DbgPrintBufferReadOffset)) {
+ +
+ +              if (count > nbytes) {
+ +                      count = nbytes;
+ +              }
+ +
+ +              count -=
+ +                  copy_to_user(buf, &DbgPrintBuffer[DbgPrintBufferReadOffset],
+ +                               count);
+ +
+ +              if (count == 0) {
+ +                      if (retval == 0)
+ +                              retval = -EFAULT;
+ +              } else {
+ +                      DbgPrintBufferReadOffset += count;
+ +                      if (DbgPrintBufferReadOffset >= DbgPrintBufferOffset) {
+ +                              DbgPrintBufferOffset =
+ +                                  DbgPrintBufferReadOffset = 0;
+ +                      }
+ +                      retval = count;
+ +              }
+ +      }
+ +
+ +      return retval;
+ +}
+ +
+ +static int proc_read_DbgBuffer(char *page, char **start, off_t off, int count, int *eof, void *data)
+ +{
+ +      int len;
+ +
+ +      printk(KERN_ALERT "proc_read_DbgBuffer: off=%ld count=%d DbgPrintBufferOffset=%lu DbgPrintBufferReadOffset=%lu\n", off, count, DbgPrintBufferOffset, DbgPrintBufferReadOffset);
+ +
+ +      len = DbgPrintBufferOffset - DbgPrintBufferReadOffset;
+ +
+ +      if ((int)(DbgPrintBufferOffset - DbgPrintBufferReadOffset) > count)
+ +              len = count;
+ +
+ +      if (len) {
+ +              memcpy(page, &DbgPrintBuffer[DbgPrintBufferReadOffset], len);
+ +              DbgPrintBufferReadOffset += len;
+ +      }
+ +
+ +      if (DbgPrintBufferReadOffset >= DbgPrintBufferOffset)
+ +              DbgPrintBufferOffset = DbgPrintBufferReadOffset = 0;
+ +
+ +      printk(KERN_ALERT "proc_read_DbgBuffer: return %d\n", len);
+ +
+ +      return len;
+ +}
+ +
+ +#define DBG_BUFFER_SIZE (2*1024)
+ +
+ +static int LocalPrint(char *Fmt, ...)
+ +{
+ +      int len = 0;
+ +      va_list args;
+ +
+ +      if (DbgPrintBuffer) {
+ +              va_start(args, Fmt);
+ +              len += vsnprintf(DbgPrintBuffer + DbgPrintBufferOffset,
+ +                               DbgPrintBufferSize - DbgPrintBufferOffset,
+ +                               Fmt, args);
+ +              DbgPrintBufferOffset += len;
+ +      }
+ +
+ +      return (len);
+ +}
+ +
- int DbgPrint(char *Fmt, ...)
++int ___DbgPrint(const char *site, const char *Fmt, ...)
+ +{
+ +      char *buf;
+ +      int len = 0;
+ +      unsigned long offset;
+ +      va_list args;
+ +
+ +      if ((DbgPrintBuffer && DbgPrintOn) || DbgSyslogOn) {
+ +              buf = kmalloc(DBG_BUFFER_SIZE, GFP_KERNEL);
+ +
+ +              if (buf) {
+ +                      va_start(args, Fmt);
+ +                      len = sprintf(buf, "[%d] ", current->pid);
++                      len += strncat(buf + len, site, DBG_BUFFER_SIZE - len);
+ +
-                       len +=
-                           vsnprintf(buf + len, DBG_BUFFER_SIZE - len, Fmt,
++                      len += vsnprintf(buf + len, DBG_BUFFER_SIZE - len, Fmt,
+ +                                    args);
+ +                      if (-1 == len) {
+ +                              len = DBG_BUFFER_SIZE - 1;
+ +                              buf[len] = '\0';
+ +                      }
+ +                      /*
+ +                         len = sprintf(&DbgPrintBuffer[offset], "[%llu] ", ts);
+ +                         len += vsprintf(&DbgPrintBuffer[offset+len], Fmt, args);
+ +                       */
+ +
+ +                      if (len) {
+ +                              if (DbgSyslogOn) {
+ +                                      printk("<6>%s", buf);
+ +                              }
+ +
+ +                              if (DbgPrintBuffer && DbgPrintOn) {
+ +                                      if ((DbgPrintBufferOffset + len) >
+ +                                          DbgPrintBufferSize) {
+ +                                              offset = DbgPrintBufferOffset;
+ +                                              DbgPrintBufferOffset = 0;
+ +                                              memset(&DbgPrintBuffer[offset],
+ +                                                     0,
+ +                                                     DbgPrintBufferSize -
+ +                                                     offset);
+ +                                      }
+ +
+ +                                      mb();
+ +
+ +                                      if ((DbgPrintBufferOffset + len) <
+ +                                          DbgPrintBufferSize) {
+ +                                              DbgPrintBufferOffset += len;
+ +                                              offset =
+ +                                                  DbgPrintBufferOffset - len;
+ +                                              memcpy(&DbgPrintBuffer[offset],
+ +                                                     buf, len + 1);
+ +                                      }
+ +                              }
+ +                      }
+ +                      kfree(buf);
+ +              }
+ +      }
+ +
+ +      return (len);
+ +}
+ +
+ +static void doline(unsigned char *b, unsigned char *e, unsigned char *l)
+ +{
+ +      unsigned char c;
+ +
+ +      *b++ = ' ';
+ +
+ +      while (l < e) {
+ +              c = *l++;
+ +              if ((c < ' ') || (c > '~')) {
+ +                      c = '.';
+ +              }
+ +              *b++ = c;
+ +              *b = '\0';
+ +      }
+ +}
+ +
+ +void novfs_dump(int size, void *dumpptr)
+ +{
+ +      unsigned char *ptr = (unsigned char *)dumpptr;
+ +      unsigned char *line = NULL, buf[100], *bptr = buf;
+ +      int i;
+ +
+ +      if (DbgPrintBuffer || DbgSyslogOn) {
+ +              if (size) {
+ +                      for (i = 0; i < size; i++) {
+ +                              if (0 == (i % 16)) {
+ +                                      if (line) {
+ +                                              doline(bptr, ptr, line);
-                                               DbgPrint("%s\n", buf);
++                                              __DbgPrint("%s\n", buf);
+ +                                              bptr = buf;
+ +                                      }
+ +                                      bptr += sprintf(bptr, "0x%p: ", ptr);
+ +                                      line = ptr;
+ +                              }
+ +                              bptr += sprintf(bptr, "%02x ", *ptr++);
+ +                      }
+ +                      doline(bptr, ptr, line);
-                       DbgPrint("%s\n", buf);
++                      __DbgPrint("%s\n", buf);
+ +              }
+ +      }
+ +}
+ +
+ +#define FEBRUARY      2
+ +#define       STARTOFTIME     1970
+ +#define SECDAY                86400L
+ +#define SECYR         (SECDAY * 365)
+ +#define       leapyear(year)          ((year) % 4 == 0)
+ +#define       days_in_year(a)         (leapyear(a) ? 366 : 365)
+ +#define       days_in_month(a)        (month_days[(a) - 1])
+ +
+ +static int month_days[12] = {
+ +      31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+ +};
+ +
+ +/*
+ + * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
+ + */
+ +static void NovfsGregorianDay(struct local_rtc_time *tm)
+ +{
+ +      int leapsToDate;
+ +      int lastYear;
+ +      int day;
+ +      int MonthOffset[] =
+ +          { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
+ +
+ +      lastYear = tm->tm_year - 1;
+ +
+ +      /*
+ +       * Number of leap corrections to apply up to end of last year
+ +       */
+ +      leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400;
+ +
+ +      /*
+ +       * This year is a leap year if it is divisible by 4 except when it is
+ +       * divisible by 100 unless it is divisible by 400
+ +       *
+ +       * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 will be
+ +       */
+ +      if ((tm->tm_year % 4 == 0) &&
+ +          ((tm->tm_year % 100 != 0) || (tm->tm_year % 400 == 0)) &&
+ +          (tm->tm_mon > 2)) {
+ +              /*
+ +               * We are past Feb. 29 in a leap year
+ +               */
+ +              day = 1;
+ +      } else {
+ +              day = 0;
+ +      }
+ +
+ +      day += lastYear * 365 + leapsToDate + MonthOffset[tm->tm_mon - 1] +
+ +          tm->tm_mday;
+ +
+ +      tm->tm_wday = day % 7;
+ +}
+ +
+ +static void private_to_tm(int tim, struct local_rtc_time *tm)
+ +{
+ +      register int i;
+ +      register long hms, day;
+ +
+ +      day = tim / SECDAY;
+ +      hms = tim % SECDAY;
+ +
+ +      /* Hours, minutes, seconds are easy */
+ +      tm->tm_hour = hms / 3600;
+ +      tm->tm_min = (hms % 3600) / 60;
+ +      tm->tm_sec = (hms % 3600) % 60;
+ +
+ +      /* Number of years in days */
+ +      for (i = STARTOFTIME; day >= days_in_year(i); i++)
+ +              day -= days_in_year(i);
+ +      tm->tm_year = i;
+ +
+ +      /* Number of months in days left */
+ +      if (leapyear(tm->tm_year))
+ +              days_in_month(FEBRUARY) = 29;
+ +      for (i = 1; day >= days_in_month(i); i++)
+ +              day -= days_in_month(i);
+ +      days_in_month(FEBRUARY) = 28;
+ +      tm->tm_mon = i;
+ +
+ +      /* Days are what is left over (+1) from all that. */
+ +      tm->tm_mday = day + 1;
+ +
+ +      /*
+ +       * Determine the day of week
+ +       */
+ +      NovfsGregorianDay(tm);
+ +}
+ +
+ +char *ctime_r(time_t * clock, char *buf)
+ +{
+ +      struct local_rtc_time tm;
+ +      static char *DAYOFWEEK[] =
+ +          { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
+ +      static char *MONTHOFYEAR[] =
+ +          { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
+ +"Oct", "Nov", "Dec" };
+ +
+ +      private_to_tm(*clock, &tm);
+ +
+ +      sprintf(buf, "%s %s %d %d:%02d:%02d %d", DAYOFWEEK[tm.tm_wday],
+ +              MONTHOFYEAR[tm.tm_mon - 1], tm.tm_mday, tm.tm_hour, tm.tm_min,
+ +              tm.tm_sec, tm.tm_year);
+ +      return (buf);
+ +}
+ +
+ +static void dump(struct dentry *parent, void *pf)
+ +{
+ +      void (*pfunc) (char *Fmt, ...) = pf;
+ +      struct l {
+ +              struct l *next;
+ +              struct dentry *dentry;
+ +      } *l, *n, *start;
+ +      struct list_head *p;
+ +      struct dentry *d;
+ +      char *buf, *path, *sd;
+ +      char inode_number[16];
+ +
+ +      buf = (char *)kmalloc(PATH_LENGTH_BUFFER, GFP_KERNEL);
+ +
+ +      if (NULL == buf) {
+ +              return;
+ +      }
+ +
+ +      if (parent) {
+ +              pfunc("starting 0x%p %.*s\n", parent, parent->d_name.len,
+ +                    parent->d_name.name);
+ +              if (parent->d_subdirs.next == &parent->d_subdirs) {
+ +                      pfunc("No children...\n");
+ +              } else {
+ +                      start = kmalloc(sizeof(*start), GFP_KERNEL);
+ +                      if (start) {
+ +                              start->next = NULL;
+ +                              start->dentry = parent;
+ +                              l = start;
+ +                              while (l) {
+ +                                      p = l->dentry->d_subdirs.next;
+ +                                      while (p != &l->dentry->d_subdirs) {
+ +                                              d = list_entry(p, struct dentry,
+ +                                                             d_u.d_child);
+ +                                              p = p->next;
+ +
+ +                                              if (d->d_subdirs.next !=
+ +                                                  &d->d_subdirs) {
+ +                                                      n = kmalloc(sizeof
+ +                                                                       (*n),
+ +                                                                       GFP_KERNEL);
+ +                                                      if (n) {
+ +                                                              n->next =
+ +                                                                  l->next;
+ +                                                              l->next = n;
+ +                                                              n->dentry = d;
+ +                                                      }
+ +                                              } else {
+ +                                                      path = novfs_scope_dget_path(d, buf, PATH_LENGTH_BUFFER, 1);
+ +                                                      if (path) {
+ +                                                              pfunc
+ +                                                                  ("1-0x%p %s\n"
+ +                                                                   "   d_name:    %.*s\n"
+ +                                                                   "   d_parent:  0x%p\n"
+ +                                                                   "   d_count:   %d\n"
+ +                                                                   "   d_flags:   0x%x\n"
+ +                                                                   "   d_subdirs: 0x%p\n"
+ +                                                                   "   d_inode:   0x%p\n",
+ +                                                                   d, path,
+ +                                                                   d->d_name.
+ +                                                                   len,
+ +                                                                   d->d_name.
+ +                                                                   name,
+ +                                                                   d->
+ +                                                                   d_parent,
+ +                                                                   atomic_read
+ +                                                                   (&d->
+ +                                                                    d_count),
+ +                                                                   d->d_flags,
+ +                                                                   d->
+ +                                                                   d_subdirs.
+ +                                                                   next,
+ +                                                                   d->
+ +                                                                   d_inode);
+ +                                                      }
+ +                                              }
+ +                                      }
+ +                                      l = l->next;
+ +                              }
+ +                              l = start;
+ +                              while (l) {
+ +                                      d = l->dentry;
+ +                                      path =
+ +                                          novfs_scope_dget_path(d, buf,
+ +                                                          PATH_LENGTH_BUFFER,
+ +                                                          1);
+ +                                      if (path) {
+ +                                              sd = " (None)";
+ +                                              if (&d->d_subdirs !=
+ +                                                  d->d_subdirs.next) {
+ +                                                      sd = "";
+ +                                              }
+ +                                              inode_number[0] = '\0';
+ +                                              if (d->d_inode) {
+ +                                                      sprintf(inode_number,
+ +                                                              " (%lu)",
+ +                                                              d->d_inode->
+ +                                                              i_ino);
+ +                                              }
+ +                                              pfunc("0x%p %s\n"
+ +                                                    "   d_parent:  0x%p\n"
+ +                                                    "   d_count:   %d\n"
+ +                                                    "   d_flags:   0x%x\n"
+ +                                                    "   d_subdirs: 0x%p%s\n"
+ +                                                    "   d_inode:   0x%p%s\n",
+ +                                                    d, path, d->d_parent,
+ +                                                    atomic_read(&d->d_count),
+ +                                                    d->d_flags,
+ +                                                    d->d_subdirs.next, sd,
+ +                                                    d->d_inode, inode_number);
+ +                                      }
+ +
+ +                                      n = l;
+ +                                      l = l->next;
+ +                                      kfree(n);
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +
+ +      kfree(buf);
+ +
+ +}
+ +
+ +static ssize_t common_read(char *buf, size_t len, loff_t * off)
+ +{
+ +      ssize_t retval = 0;
+ +      size_t count;
+ +      unsigned long offset = *off;
+ +
+ +      if (0 != (count = DbgPrintBufferOffset - offset)) {
+ +              if (count > len) {
+ +                      count = len;
+ +              }
+ +
+ +              count -= copy_to_user(buf, &DbgPrintBuffer[offset], count);
+ +
+ +              if (count == 0) {
+ +                      retval = -EFAULT;
+ +              } else {
+ +                      *off += (loff_t) count;
+ +                      retval = count;
+ +              }
+ +      }
+ +      return retval;
+ +
+ +}
+ +
+ +static ssize_t novfs_profile_read_inode(struct file * file, char *buf, size_t len,
+ +                         loff_t * off)
+ +{
+ +      ssize_t retval = 0;
+ +      unsigned long offset = *off;
+ +      static char save_DbgPrintOn;
+ +
+ +      if (offset == 0) {
+ +              down(&LocalPrint_lock);
+ +              save_DbgPrintOn = DbgPrintOn;
+ +              DbgPrintOn = 0;
+ +
+ +              DbgPrintBufferOffset = DbgPrintBufferReadOffset = 0;
+ +              novfs_dump_inode(LocalPrint);
+ +      }
+ +
+ +
+ +      retval = common_read(buf, len, off);
+ +
+ +      if (0 == retval) {
+ +              DbgPrintOn = save_DbgPrintOn;
+ +              DbgPrintBufferOffset = DbgPrintBufferReadOffset = 0;
+ +
+ +              up(&LocalPrint_lock);
+ +      }
+ +
+ +      return retval;
+ +
+ +}
+ +
+ +static ssize_t novfs_profile_dentry_read(struct file * file, char *buf, size_t len,
+ +                                   loff_t * off)
+ +{
+ +      ssize_t retval = 0;
+ +      unsigned long offset = *off;
+ +      static char save_DbgPrintOn;
+ +
+ +      if (offset == 0) {
+ +              down(&LocalPrint_lock);
+ +              save_DbgPrintOn = DbgPrintOn;
+ +              DbgPrintOn = 0;
+ +              DbgPrintBufferOffset = DbgPrintBufferReadOffset = 0;
+ +              dump(novfs_root, LocalPrint);
+ +      }
+ +
+ +      retval = common_read(buf, len, off);
+ +
+ +      if (0 == retval) {
+ +              DbgPrintBufferOffset = DbgPrintBufferReadOffset = 0;
+ +              DbgPrintOn = save_DbgPrintOn;
+ +
+ +              up(&LocalPrint_lock);
+ +      }
+ +
+ +      return retval;
+ +
+ +}
+ +
+ +uint64_t get_nanosecond_time()
+ +{
+ +      struct timespec ts;
+ +      uint64_t retVal;
+ +
+ +      ts = current_kernel_time();
+ +
+ +      retVal = (uint64_t) NSEC_PER_SEC;
+ +      retVal *= (uint64_t) ts.tv_sec;
+ +      retVal += (uint64_t) ts.tv_nsec;
+ +
+ +      return (retVal);
+ +}
+ +
+ +void novfs_profile_init()
+ +{
+ +      if (novfs_procfs_dir)
+ +              dbg_dir = novfs_procfs_dir;
+ +      else
+ +              dbg_dir = proc_mkdir(MODULE_NAME, NULL);
+ +
+ +      if (dbg_dir) {
+ +              dbg_dir->owner = THIS_MODULE;
+ +              dbg_file = create_proc_read_entry("Debug",
+ +                                                0600,
+ +                                                dbg_dir,
+ +                                                proc_read_DbgBuffer, NULL);
+ +              if (dbg_file) {
+ +                      dbg_file->owner = THIS_MODULE;
+ +                      dbg_file->size = DBGBUFFERSIZE;
+ +                      memcpy(&Dbg_proc_file_operations, dbg_file->proc_fops,
+ +                             sizeof(struct file_operations));
+ +                      Dbg_proc_file_operations.read =
+ +                          User_proc_read_DbgBuffer;
+ +                      Dbg_proc_file_operations.write =
+ +                          User_proc_write_DbgBuffer;
+ +                      dbg_file->proc_fops = &Dbg_proc_file_operations;
+ +              } else {
+ +                      remove_proc_entry(MODULE_NAME, NULL);
+ +                      vfree(DbgPrintBuffer);
+ +                      DbgPrintBuffer = NULL;
+ +              }
+ +      }
+ +
+ +      if (DbgPrintBuffer) {
+ +              if (dbg_dir) {
+ +                      inode_file = create_proc_entry("inode", 0600, dbg_dir);
+ +                      if (inode_file) {
+ +                              inode_file->owner = THIS_MODULE;
+ +                              inode_file->size = 0;
+ +                              memcpy(&inode_proc_file_ops,
+ +                                     inode_file->proc_fops,
+ +                                     sizeof(struct file_operations));
+ +                              inode_proc_file_ops.owner = THIS_MODULE;
+ +                              inode_proc_file_ops.read =
+ +                                      novfs_profile_read_inode;
+ +                              inode_file->proc_fops = &inode_proc_file_ops;
+ +                      }
+ +
+ +                      dentry_file = create_proc_entry("dentry",
+ +                                                      0600, dbg_dir);
+ +                      if (dentry_file) {
+ +                              dentry_file->owner = THIS_MODULE;
+ +                              dentry_file->size = 0;
+ +                              memcpy(&dentry_proc_file_ops,
+ +                                     dentry_file->proc_fops,
+ +                                     sizeof(struct file_operations));
+ +                              dentry_proc_file_ops.owner = THIS_MODULE;
+ +                              dentry_proc_file_ops.read = novfs_profile_dentry_read;
+ +                              dentry_file->proc_fops = &dentry_proc_file_ops;
+ +                      }
+ +
+ +              } else {
+ +                      vfree(DbgPrintBuffer);
+ +                      DbgPrintBuffer = NULL;
+ +              }
+ +      }
+ +}
+ +
+ +void novfs_profile_exit(void)
+ +{
+ +      if (dbg_file)
+ +              DbgPrint("Calling remove_proc_entry(Debug, NULL)\n"),
+ +                  remove_proc_entry("Debug", dbg_dir);
+ +      if (inode_file)
+ +              DbgPrint("Calling remove_proc_entry(inode, NULL)\n"),
+ +                  remove_proc_entry("inode", dbg_dir);
+ +      if (dentry_file)
+ +              DbgPrint("Calling remove_proc_entry(dentry, NULL)\n"),
+ +                  remove_proc_entry("dentry", dbg_dir);
+ +
+ +      if (dbg_dir && (dbg_dir != novfs_procfs_dir)) {
+ +              DbgPrint("Calling remove_proc_entry(%s, NULL)\n", MODULE_NAME);
+ +              remove_proc_entry(MODULE_NAME, NULL);
+ +      }
+ +}
+ +
+ +
diff --cc fs/novfs/vfs.h

index adac005,0000000..220cc2d

mode 100644,000000..100644
--- 1/fs/novfs/vfs.h
--- /dev/null
+++ b/fs/novfs/vfs.h
@@@ -1,451 -1,0 +1,454 @@@
+ +/*
+ + * Novell NCP Redirector for Linux
+ + * Author: James Turner
+ + *
+ + * Include file for novfs.
+ + *
+ + * Copyright (C) 2005 Novell, Inc.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + */
+ +#ifndef __NOVFS_H
+ +#define __NOVFS_H
+ +
+ +#ifndef __STDC_VERSION__
+ +#define __STDC_VERSION__ 0L
+ +#endif
+ +
+ +#include <linux/version.h>
+ +#include <linux/namei.h>
+ +
+ +#include "nwcapi.h"
+ +
+ +
+ +#ifndef  XTIER_SCHANDLE
+ +struct novfs_schandle {
+ +      void * hTypeId;
+ +      void * hId;
+ +
+ +};
+ +
+ +#include "commands.h"
+ +
+ +#define SC_PRESENT(X)         ((X.hTypeId != NULL) || (X.hId != NULL)) ? 1 : 0
+ +#define SC_EQUAL(X, Y)                ((X.hTypeId == Y.hTypeId) && (X.hId == Y.hId)) ? 1 : 0
+ +#define SC_INITIALIZE(X)      {X.hTypeId = X.hId = NULL;}
+ +
+ +#define UID_TO_SCHANDLE(hSC, uid)     \
+ +              { \
+ +                      hSC.hTypeId = NULL; \
+ +                      hSC.hId = (void *)(unsigned long)(uid); \
+ +              }
+ +
+ +#define XTIER_SCHANDLE
+ +#endif
+ +
+ +
+ +/*===[ Manifest constants ]===============================================*/
+ +#define NOVFS_MAGIC           0x4e574653
+ +#define MODULE_NAME           "novfs"
+ +
+ +#define TREE_DIRECTORY_NAME   ".Trees"
+ +#define SERVER_DIRECTORY_NAME ".Servers"
+ +
+ +#define PATH_LENGTH_BUFFER    PATH_MAX
+ +#define NW_MAX_PATH_LENGTH    255
+ +
+ +#define XA_BUFFER             (8 * 1024)
+ +
+ +#define IOC_LOGIN             0x4a540000
+ +#define IOC_LOGOUT            0x4a540001
+ +#define IOC_XPLAT             0x4a540002
+ +#define IOC_SESSION           0x4a540003
+ +#define IOC_DEBUGPRINT                0x4a540004
+ +
+ +/*
+ + * NetWare file attributes
+ + */
+ +
+ +#define NW_ATTRIBUTE_NORMAL           0x00
+ +#define NW_ATTRIBUTE_READ_ONLY                0x01
+ +#define NW_ATTRIBUTE_HIDDEN           0x02
+ +#define NW_ATTRIBUTE_SYSTEM           0x04
+ +#define NW_ATTRIBUTE_EXECUTE_ONLY     0x08
+ +#define NW_ATTRIBUTE_DIRECTORY                0x10
+ +#define NW_ATTRIBUTE_ARCHIVE          0x20
+ +#define NW_ATTRIBUTE_EXECUTE          0x40
+ +#define NW_ATTRIBUTE_SHAREABLE                0x80
+ +
+ +/*
+ + * Define READ/WRITE flag for DATA_LIST
+ + */
+ +#define DLREAD                0
+ +#define DLWRITE               1
+ +
+ +/*
+ + * Define list type
+ + */
+ +#define USER_LIST     1
+ +#define SERVER_LIST   2
+ +#define VOLUME_LIST   3
+ +
+ +/*
+ + * Define flags used in for inodes
+ + */
+ +#define USER_INODE    1
+ +#define UPDATE_INODE  2
+ +
+ +/*
+ + * Define flags for directory cache flags
+ + */
+ +#define ENTRY_VALID   0x00000001
+ +
+ +#ifdef INTENT_MAGIC
+ +#define NDOPENFLAGS intent.it_flags
+ +#else
+ +#define NDOPENFLAGS intent.open.flags
+ +#endif
+ +
+ +/*
+ + * daemon_command_t flags values
+ + */
+ +#define INTERRUPTIBLE 1
+ +
+ +#ifndef NOVFS_VFS_MAJOR
+ +#define NOVFS_VFS_MAJOR               0
+ +#endif
+ +
+ +#ifndef NOVFS_VFS_MINOR
+ +#define NOVFS_VFS_MINOR               0
+ +#endif
+ +
+ +#ifndef NOVFS_VFS_SUB
+ +#define NOVFS_VFS_SUB         0
+ +#endif
+ +
+ +#ifndef NOVFS_VFS_RELEASE
+ +#define NOVFS_VFS_RELEASE     0
+ +#endif
+ +
+ +#define VALUE_TO_STR( value ) #value
+ +#define DEFINE_TO_STR(value) VALUE_TO_STR(value)
+ +
+ +#define NOVFS_VERSION_STRING \
+ +         DEFINE_TO_STR(NOVFS_VFS_MAJOR)"." \
+ +         DEFINE_TO_STR(NOVFS_VFS_MINOR)"." \
+ +         DEFINE_TO_STR(NOVFS_VFS_SUB)"-" \
+ +         DEFINE_TO_STR(NOVFS_VFS_RELEASE) \
+ +         "\0"
+ +
+ +/*===[ Type definitions ]=================================================*/
+ +struct novfs_entry_info {
+ +      int type;
+ +      umode_t mode;
+ +      uid_t uid;
+ +      gid_t gid;
+ +      loff_t size;
+ +      struct timespec atime;
+ +      struct timespec mtime;
+ +      struct timespec ctime;
+ +      int namelength;
+ +      unsigned char name[1];
+ +};
+ +
+ +struct novfs_string {
+ +      int length;
+ +      unsigned char *data;
+ +};
+ +
+ +struct novfs_login {
+ +      struct novfs_string Server;
+ +      struct novfs_string UserName;
+ +      struct novfs_string Password;
+ +};
+ +
+ +struct novfs_logout {
+ +      struct novfs_string Server;
+ +};
+ +
+ +struct novfs_dir_cache {
+ +      struct list_head list;
+ +      int flags;
+ +      u64 jiffies;
+ +      ino_t ino;
+ +      loff_t size;
+ +      umode_t mode;
+ +      struct timespec atime;
+ +      struct timespec mtime;
+ +      struct timespec ctime;
+ +      unsigned long hash;
+ +      int nameLen;
+ +      char name[1];
+ +};
+ +
+ +struct novfs_data_list {
+ +      void *page;
+ +      void *offset;
+ +      int len;
+ +      int rwflag;
+ +};
+ +
+ +
+ +extern char *ctime_r(time_t * clock, char *buf);
+ +
+ +/*
+ + *  Converts a HANDLE to a u32 type.
+ + */
+ +static inline u32 HandletoUint32(void * h)
+ +{
+ +      return (u32) ((unsigned long) h);
+ +}
+ +
+ +/*
+ + * Converts a u32 to a HANDLE type.
+ + */
+ +static inline void *Uint32toHandle(u32 ui32)
+ +{
+ +      return ((void *) (unsigned long) ui32);
+ +}
+ +
+ +/* Global variables */
+ +
+ +extern struct dentry *novfs_root;
+ +extern struct proc_dir_entry *novfs_procfs_dir;
+ +extern unsigned long novfs_update_timeout;
+ +extern int novfs_page_cache;
+ +extern char *novfs_current_mnt;
+ +extern int novfs_max_iosize;
+ +
+ +
+ +/* Global functions */
+ +extern int novfs_remove_from_root(char *);
+ +extern void novfs_dump_inode(void *pf);
+ +
+ +extern void novfs_dump(int size, void *dumpptr);
+ +
+ +extern int Queue_Daemon_Command(void *request, unsigned long reqlen, void *data,
+ +                              int dlen, void **reply, unsigned long * replen,
+ +                              int interruptible);
+ +extern int novfs_do_login(struct ncl_string * Server, struct ncl_string* Username, struct ncl_string * Password, void **lgnId, struct novfs_schandle *Session);
+ +
+ +extern int novfs_proc_init(void);
+ +extern void novfs_proc_exit(void);
+ +
+ +/*
+ + * daemon.c functions
+ + */
+ +extern void novfs_daemon_queue_init(void);
+ +extern void novfs_daemon_queue_exit(void);
+ +extern int novfs_daemon_logout(struct qstr *Server, struct novfs_schandle *Session);
+ +extern int novfs_daemon_set_mnt_point(char *Path);
+ +extern int novfs_daemon_create_sessionId(struct novfs_schandle * SessionId);
+ +extern int novfs_daemon_destroy_sessionId(struct novfs_schandle SessionId);
+ +extern int novfs_daemon_getpwuid(uid_t uid, int unamelen, char *uname);
+ +extern int novfs_daemon_get_userspace(struct novfs_schandle SessionId,
+ +              uint64_t * TotalSize, uint64_t * TotalFree,
+ +              uint64_t * TotalDirectoryEnties,
+ +              uint64_t * FreeDirectoryEnties);
+ +extern int novfs_daemon_debug_cmd_send(char *Command);
+ +extern ssize_t novfs_daemon_recv_reply(struct file *file,
+ +              const char *buf, size_t nbytes, loff_t * ppos);
+ +extern ssize_t novfs_daemon_cmd_send(struct file *file, char *buf,
+ +              size_t len, loff_t * off);
+ +extern int novfs_daemon_ioctl(struct inode *inode, struct file *file,
+ +              unsigned int cmd, unsigned long arg);
+ +extern int novfs_daemon_lib_close(struct inode *inode, struct file *file);
+ +extern int novfs_daemon_lib_ioctl(struct inode *inode, struct file *file,
+ +              unsigned int cmd, unsigned long arg);
+ +extern int novfs_daemon_lib_open(struct inode *inode, struct file *file);
+ +extern ssize_t novfs_daemon_lib_read(struct file *file, char *buf,
+ +              size_t len, loff_t * off);
+ +extern ssize_t novfs_daemon_lib_write(struct file *file, const char *buf,
+ +              size_t len, loff_t * off);
+ +extern loff_t novfs_daemon_lib_llseek(struct file *file, loff_t offset,
+ +              int origin);
+ +extern int novfs_daemon_open_control(struct inode *Inode, struct file *File);
+ +extern int novfs_daemon_close_control(struct inode *Inode, struct file *File);
+ +extern int novfs_daemon_getversion(char *Buf, int Length);
+ +
+ +
+ +/*
+ + * file.c functions
+ + */
+ +extern int novfs_verify_file(struct qstr *Path, struct novfs_schandle SessionId);
+ +extern int novfs_get_alltrees(struct dentry *parent);
+ +extern int novfs_get_servers(unsigned char **ServerList,
+ +              struct novfs_schandle SessionId);
+ +extern int novfs_get_vols(struct qstr *Server,
+ +              unsigned char **VolumeList, struct novfs_schandle SessionId);
+ +extern int novfs_get_file_info(unsigned char *Path,
+ +              struct novfs_entry_info *Info, struct novfs_schandle SessionId);
+ +extern int novfs_getx_file_info(char *Path, const char *Name,
+ +              char *buffer, ssize_t buffer_size, ssize_t *dataLen,
+ +              struct novfs_schandle SessionId);
+ +extern int novfs_listx_file_info(char *Path, char *buffer,
+ +              ssize_t buffer_size, ssize_t *dataLen,
+ +              struct novfs_schandle SessionId);
+ +extern int novfs_setx_file_info(char *Path, const char *Name, const void *Value,
+ +                              unsigned long valueLen,
+ +                              unsigned long *bytesWritten, int flags,
+ +                              struct novfs_schandle SessionId);
+ +
+ +extern int novfs_get_dir_listex(unsigned char *Path, void **EnumHandle,
+ +      int *Count, struct novfs_entry_info **Info,
+ +      struct novfs_schandle SessionId);
+ +extern int novfs_open_file(unsigned char *Path, int Flags,
+ +              struct novfs_entry_info * Info, void **Handle,
+ +              struct novfs_schandle SessionId);
+ +extern int novfs_create(unsigned char *Path, int DirectoryFlag,
+ +                      struct novfs_schandle SessionId);
+ +extern int novfs_close_file(void * Handle, struct novfs_schandle SessionId);
+ +extern int novfs_read_file(void * Handle, unsigned char *Buffer,
+ +              size_t * Bytes, loff_t * Offset,
+ +              struct novfs_schandle SessionId);
+ +extern int novfs_read_pages(void * Handle, struct novfs_data_list *DList,
+ +              int DList_Cnt, size_t * Bytes, loff_t * Offset,
+ +                          struct novfs_schandle SessionId);
+ +extern int novfs_write_file(void * Handle, unsigned char *Buffer,
+ +                          size_t * Bytes, loff_t * Offset,
+ +                          struct novfs_schandle SessionId);
+ +extern int novfs_write_page(void * Handle, struct page *Page,
+ +                          struct novfs_schandle SessionId);
+ +extern int novfs_write_pages(void * Handle, struct novfs_data_list *DList,
+ +              int DList_Cnt, size_t Bytes, loff_t Offset,
+ +              struct novfs_schandle SessionId);
+ +extern int novfs_delete(unsigned char *Path, int DirectoryFlag,
+ +                      struct novfs_schandle SessionId);
+ +extern int novfs_trunc(unsigned char *Path, int PathLen,
+ +                     struct novfs_schandle SessionId);
+ +extern int novfs_trunc_ex(void * Handle, loff_t Offset,
+ +                                struct novfs_schandle SessionId);
+ +extern int novfs_rename_file(int DirectoryFlag, unsigned char *OldName,
+ +                           int OldLen, unsigned char *NewName, int NewLen,
+ +                           struct novfs_schandle SessionId);
+ +extern int novfs_set_attr(unsigned char *Path, struct iattr *Attr,
+ +                        struct novfs_schandle SessionId);
+ +extern int novfs_get_file_cache_flag(unsigned char * Path,
+ +              struct novfs_schandle SessionId);
+ +extern int novfs_set_file_lock(struct novfs_schandle SessionId, void * fhandle,
+ +                             unsigned char fl_type, loff_t fl_start,
+ +                             loff_t len);
+ +
+ +extern struct inode *novfs_get_inode(struct super_block *sb, int mode,
+ +              int dev, uid_t uid, ino_t ino, struct qstr *name);
+ +extern int novfs_read_stream(void * ConnHandle, unsigned char * Handle,
+ +                           unsigned char * Buffer, size_t * Bytes, loff_t * Offset,
+ +                           int User, struct novfs_schandle SessionId);
+ +extern int novfs_write_stream(void * ConnHandle, unsigned char * Handle,
+ +                            unsigned char * Buffer, size_t * Bytes, loff_t * Offset,
+ +                            struct novfs_schandle SessionId);
+ +extern int novfs_close_stream(void * ConnHandle, unsigned char * Handle,
+ +                            struct novfs_schandle SessionId);
+ +
+ +extern int novfs_add_to_root(char *);
+ +extern int novfs_end_directory_enumerate(void *EnumHandle,
+ +                                      struct novfs_schandle SessionId);
+ +
+ +/*
+ + * scope.c functions
+ + */
+ +extern void novfs_scope_init(void);
+ +extern void novfs_scope_exit(void);
+ +extern void *novfs_scope_lookup(void);
+ +extern uid_t novfs_scope_get_uid(struct novfs_scope_list *);
+ +extern struct novfs_schandle novfs_scope_get_sessionId(struct
+ +              novfs_scope_list *);
+ +extern char *novfs_get_scopeusers(void);
+ +extern int novfs_scope_set_userspace(uint64_t * TotalSize, uint64_t * Free,
+ +                             uint64_t * TotalEnties, uint64_t * FreeEnties);
+ +extern int novfs_scope_get_userspace(uint64_t * TotalSize, uint64_t * Free,
+ +                             uint64_t * TotalEnties, uint64_t * FreeEnties);
+ +extern char *novfs_scope_dget_path(struct dentry *Dentry, char *Buf,
+ +                           unsigned int Buflen, int Flags);
+ +extern void novfs_scope_cleanup(void);
+ +extern struct novfs_scope_list *novfs_get_scope_from_name(struct qstr *);
+ +extern struct novfs_scope_list *novfs_get_scope(struct dentry *);
+ +extern char *novfs_scope_get_username(void);
+ +
+ +/*
+ + * profile.c functions
+ + */
+ +extern u64 get_nanosecond_time(void);
- extern int DbgPrint(char *Fmt, ...);
++extern int ___DbgPrint(const char *site, const char *Fmt, ...);
++#define DbgPrint(fmt, args...)        ___DbgPrint(__func__, ": " fmt "\n", ##args)
++#define __DbgPrint(fmt, args...)      ___DbgPrint("", fmt, ##args)
++
+ +extern void novfs_profile_init(void);
+ +extern void novfs_profile_exit(void);
+ +
+ +/*
+ + * nwcapi.c functions
+ + */
+ +extern int novfs_auth_conn(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_conn_close(struct novfs_xplat *pdata,
+ +              void **Handle, struct novfs_schandle Session);
+ +extern int novfs_get_conn_info(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_set_conn_info(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_get_daemon_ver(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_get_id_info(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_license_conn(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_login_id(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_logout_id(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_open_conn_by_addr(struct novfs_xplat *pdata,
+ +              void **Handle, struct novfs_schandle Session);
+ +extern int novfs_open_conn_by_name(struct novfs_xplat *pdata,
+ +              void **Handle, struct novfs_schandle Session);
+ +extern int novfs_open_conn_by_ref(struct novfs_xplat *pdata,
+ +              void **Handle, struct novfs_schandle Session);
+ +extern int novfs_query_feature(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_raw_send(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_scan_conn_info(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_sys_conn_close(struct novfs_xplat *pdata,
+ +              unsigned long *Handle, struct novfs_schandle Session);
+ +extern int novfs_unauthenticate(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_unlicense_conn(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_change_auth_key(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_enum_ids(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_get_default_ctx(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_get_preferred_DS_tree(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_get_tree_monitored_conn(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_set_default_ctx(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_set_preferred_DS_tree(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_set_pri_conn(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_get_pri_conn(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_set_map_drive(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_unmap_drive(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_enum_drives(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_get_bcast_msg(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_set_key_value(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +extern int novfs_verify_key_value(struct novfs_xplat *pdata,
+ +              struct novfs_schandle Session);
+ +
+ +
+ +#endif        /* __NOVFS_H */
+ +
diff --cc fs/open.c

index 2b904ac,a3a78ce..133041e
--- 1/fs/open.c
--- 2/fs/open.c
+++ b/fs/open.c
@@@ -1197,3 -1189,3 +1192,5 @@@ int nonseekable_open(struct inode *inod
   }
   
   EXPORT_SYMBOL(nonseekable_open);
++DEFINE_TRACE(fs_open);
++DEFINE_TRACE(fs_close);
diff --cc fs/partitions/check.c
Simple merge
diff --cc fs/proc/array.c

index adc7a53,7e4877d..0c683ed
--- 1/fs/proc/array.c
--- 2/fs/proc/array.c
+++ b/fs/proc/array.c
@@@ -188,11 -184,9 +185,11 @@@ static inline void task_state(struct se
                 task_tgid_nr_ns(p, ns),
                 pid_nr_ns(pid, ns),
                 ppid, tpid,
-               p->uid, p->euid, p->suid, p->fsuid,
-               p->gid, p->egid, p->sgid, p->fsgid);
+               cred->uid, cred->euid, cred->suid, cred->fsuid,
+               cred->gid, cred->egid, cred->sgid, cred->fsgid);
   
+ +      task_utrace_proc_status(m, p);
+ +
         task_lock(p);
         if (p->files)
                 fdt = files_fdtable(p->files);
diff --cc fs/proc/meminfo.c

index 0000000,43d2394..b0e6be4

mode 000000,100644..100644
--- /dev/null
--- 2/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@@ -1,0 -1,174 +1,302 @@@
+ #include <linux/fs.h>
+ #include <linux/hugetlb.h>
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/mman.h>
+ #include <linux/mmzone.h>
+ #include <linux/proc_fs.h>
+ #include <linux/quicklist.h>
+ #include <linux/seq_file.h>
+ #include <linux/swap.h>
+ #include <linux/vmstat.h>
+ #include <asm/atomic.h>
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include "internal.h"
+ 
+ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
+ {
+ }
++#ifdef        CONFIG_KDB
++#include <linux/kdb.h>
++#include <linux/kdbprivate.h>
++/* Like meminfo_read_proc() but without the locks and using kdb_printf() */
++void
++kdb_meminfo_read_proc(void)
++{
++      struct sysinfo i;
++      unsigned long committed;
++      unsigned long allowed;
++      struct vmalloc_info vmi;
++      long cached;
++      unsigned long pages[NR_LRU_LISTS];
++      int lru;
++
++/*
++ * display in kilobytes.
++ */
++#define K(x) ((x) << (PAGE_SHIFT - 10))
++      si_meminfo(&i);
++      kdb_si_swapinfo(&i);
++      committed = atomic_read(&vm_committed_space);
++      allowed = ((totalram_pages - hugetlb_total_pages())
++              * sysctl_overcommit_ratio / 100) + total_swap_pages;
++
++      cached = global_page_state(NR_FILE_PAGES) -
++                      total_swapcache_pages - i.bufferram;
++      if (cached < 0)
++              cached = 0;
++
++      get_vmalloc_info(&vmi);
++
++      for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
++              pages[lru] = global_page_state(NR_LRU_BASE + lru);
++
++      kdb_printf(
++              "MemTotal:       %8lu kB\n"
++              "MemFree:        %8lu kB\n"
++              "Buffers:        %8lu kB\n"
++              "Cached:         %8lu kB\n"
++              "SwapCached:     %8lu kB\n"
++              "Active:         %8lu kB\n"
++              "Inactive:       %8lu kB\n"
++              "Active(anon):   %8lu kB\n"
++              "Inactive(anon): %8lu kB\n"
++              "Active(file):   %8lu kB\n"
++              "Inactive(file): %8lu kB\n"
++#ifdef CONFIG_UNEVICTABLE_LRU
++              "Unevictable:    %8lu kB\n"
++              "Mlocked:        %8lu kB\n"
++#endif
++#ifdef CONFIG_HIGHMEM
++              "HighTotal:      %8lu kB\n"
++              "HighFree:       %8lu kB\n"
++              "LowTotal:       %8lu kB\n"
++              "LowFree:        %8lu kB\n"
++#endif
++              "SwapTotal:      %8lu kB\n"
++              "SwapFree:       %8lu kB\n"
++              "Dirty:          %8lu kB\n"
++              "Writeback:      %8lu kB\n"
++              "AnonPages:      %8lu kB\n"
++              "Mapped:         %8lu kB\n"
++              "Slab:           %8lu kB\n"
++              "SReclaimable:   %8lu kB\n"
++              "SUnreclaim:     %8lu kB\n"
++              "PageTables:     %8lu kB\n"
++#ifdef CONFIG_QUICKLIST
++              "Quicklists:     %8lu kB\n"
++#endif
++              "NFS_Unstable:   %8lu kB\n"
++              "Bounce:         %8lu kB\n"
++              "WritebackTmp:   %8lu kB\n"
++              "CommitLimit:    %8lu kB\n"
++              "Committed_AS:   %8lu kB\n"
++              "VmallocTotal:   %8lu kB\n"
++              "VmallocUsed:    %8lu kB\n"
++              "VmallocChunk:   %8lu kB\n",
++              K(i.totalram),
++              K(i.freeram),
++              K(i.bufferram),
++              K(cached),
++              K(total_swapcache_pages),
++              K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
++              K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
++              K(pages[LRU_ACTIVE_ANON]),
++              K(pages[LRU_INACTIVE_ANON]),
++              K(pages[LRU_ACTIVE_FILE]),
++              K(pages[LRU_INACTIVE_FILE]),
++#ifdef CONFIG_UNEVICTABLE_LRU
++              K(pages[LRU_UNEVICTABLE]),
++              K(global_page_state(NR_MLOCK)),
++#endif
++#ifdef CONFIG_HIGHMEM
++              K(i.totalhigh),
++              K(i.freehigh),
++              K(i.totalram-i.totalhigh),
++              K(i.freeram-i.freehigh),
++#endif
++              K(i.totalswap),
++              K(i.freeswap),
++              K(global_page_state(NR_FILE_DIRTY)),
++              K(global_page_state(NR_WRITEBACK)),
++              K(global_page_state(NR_ANON_PAGES)),
++              K(global_page_state(NR_FILE_MAPPED)),
++              K(global_page_state(NR_SLAB_RECLAIMABLE) +
++                              global_page_state(NR_SLAB_UNRECLAIMABLE)),
++              K(global_page_state(NR_SLAB_RECLAIMABLE)),
++              K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
++              K(global_page_state(NR_PAGETABLE)),
++#ifdef CONFIG_QUICKLIST
++              K(quicklist_total_size()),
++#endif
++              K(global_page_state(NR_UNSTABLE_NFS)),
++              K(global_page_state(NR_BOUNCE)),
++              K(global_page_state(NR_WRITEBACK_TEMP)),
++              K(allowed),
++              K(committed),
++              (unsigned long)VMALLOC_TOTAL >> 10,
++              vmi.used >> 10,
++              vmi.largest_chunk >> 10
++              );
++
++#ifdef        CONFIG_HUGETLBFS
++      kdb_hugetlb_report_meminfo();
++#endif
++}
++#endif        /* CONFIG_KDB */
+ 
+ static int meminfo_proc_show(struct seq_file *m, void *v)
+ {
+       struct sysinfo i;
+       unsigned long committed;
+       unsigned long allowed;
+       struct vmalloc_info vmi;
+       long cached;
+       unsigned long pages[NR_LRU_LISTS];
+       int lru;
+ 
+ /*
+  * display in kilobytes.
+  */
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+       si_meminfo(&i);
+       si_swapinfo(&i);
+       committed = atomic_long_read(&vm_committed_space);
+       allowed = ((totalram_pages - hugetlb_total_pages())
+               * sysctl_overcommit_ratio / 100) + total_swap_pages;
+ 
+       cached = global_page_state(NR_FILE_PAGES) -
+                       total_swapcache_pages - i.bufferram;
+       if (cached < 0)
+               cached = 0;
+ 
+       get_vmalloc_info(&vmi);
+ 
+       for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ 
+       /*
+        * Tagged format, for easy grepping and expansion.
+        */
+       seq_printf(m,
+               "MemTotal:       %8lu kB\n"
+               "MemFree:        %8lu kB\n"
+               "Buffers:        %8lu kB\n"
+               "Cached:         %8lu kB\n"
+               "SwapCached:     %8lu kB\n"
+               "Active:         %8lu kB\n"
+               "Inactive:       %8lu kB\n"
+               "Active(anon):   %8lu kB\n"
+               "Inactive(anon): %8lu kB\n"
+               "Active(file):   %8lu kB\n"
+               "Inactive(file): %8lu kB\n"
+ #ifdef CONFIG_UNEVICTABLE_LRU
+               "Unevictable:    %8lu kB\n"
+               "Mlocked:        %8lu kB\n"
+ #endif
+ #ifdef CONFIG_HIGHMEM
+               "HighTotal:      %8lu kB\n"
+               "HighFree:       %8lu kB\n"
+               "LowTotal:       %8lu kB\n"
+               "LowFree:        %8lu kB\n"
+ #endif
+ #ifndef CONFIG_MMU
+               "MmapCopy:       %8lu kB\n"
+ #endif
+               "SwapTotal:      %8lu kB\n"
+               "SwapFree:       %8lu kB\n"
+               "Dirty:          %8lu kB\n"
+               "Writeback:      %8lu kB\n"
+               "AnonPages:      %8lu kB\n"
+               "Mapped:         %8lu kB\n"
+               "Slab:           %8lu kB\n"
+               "SReclaimable:   %8lu kB\n"
+               "SUnreclaim:     %8lu kB\n"
+               "PageTables:     %8lu kB\n"
+ #ifdef CONFIG_QUICKLIST
+               "Quicklists:     %8lu kB\n"
+ #endif
+               "NFS_Unstable:   %8lu kB\n"
+               "Bounce:         %8lu kB\n"
+               "WritebackTmp:   %8lu kB\n"
+               "CommitLimit:    %8lu kB\n"
+               "Committed_AS:   %8lu kB\n"
+               "VmallocTotal:   %8lu kB\n"
+               "VmallocUsed:    %8lu kB\n"
+               "VmallocChunk:   %8lu kB\n",
+               K(i.totalram),
+               K(i.freeram),
+               K(i.bufferram),
+               K(cached),
+               K(total_swapcache_pages),
+               K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
+               K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
+               K(pages[LRU_ACTIVE_ANON]),
+               K(pages[LRU_INACTIVE_ANON]),
+               K(pages[LRU_ACTIVE_FILE]),
+               K(pages[LRU_INACTIVE_FILE]),
+ #ifdef CONFIG_UNEVICTABLE_LRU
+               K(pages[LRU_UNEVICTABLE]),
+               K(global_page_state(NR_MLOCK)),
+ #endif
+ #ifdef CONFIG_HIGHMEM
+               K(i.totalhigh),
+               K(i.freehigh),
+               K(i.totalram-i.totalhigh),
+               K(i.freeram-i.freehigh),
+ #endif
+ #ifndef CONFIG_MMU
+               K((unsigned long) atomic_read(&mmap_pages_allocated)),
+ #endif
+               K(i.totalswap),
+               K(i.freeswap),
+               K(global_page_state(NR_FILE_DIRTY)),
+               K(global_page_state(NR_WRITEBACK)),
+               K(global_page_state(NR_ANON_PAGES)),
+               K(global_page_state(NR_FILE_MAPPED)),
+               K(global_page_state(NR_SLAB_RECLAIMABLE) +
+                               global_page_state(NR_SLAB_UNRECLAIMABLE)),
+               K(global_page_state(NR_SLAB_RECLAIMABLE)),
+               K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
+               K(global_page_state(NR_PAGETABLE)),
+ #ifdef CONFIG_QUICKLIST
+               K(quicklist_total_size()),
+ #endif
+               K(global_page_state(NR_UNSTABLE_NFS)),
+               K(global_page_state(NR_BOUNCE)),
+               K(global_page_state(NR_WRITEBACK_TEMP)),
+               K(allowed),
+               K(committed),
+               (unsigned long)VMALLOC_TOTAL >> 10,
+               vmi.used >> 10,
+               vmi.largest_chunk >> 10
+               );
+ 
+       hugetlb_report_meminfo(m);
+ 
+       arch_report_meminfo(m);
+ 
+       return 0;
+ #undef K
+ }
+ 
+ static int meminfo_proc_open(struct inode *inode, struct file *file)
+ {
+       return single_open(file, meminfo_proc_show, NULL);
+ }
+ 
+ static const struct file_operations meminfo_proc_fops = {
+       .open           = meminfo_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+ };
+ 
+ static int __init proc_meminfo_init(void)
+ {
+       proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
+       return 0;
+ }
+ module_init(proc_meminfo_init);
diff --cc fs/read_write.c

index ef1b005,400fe81..603bc50
--- 1/fs/read_write.c
--- 2/fs/read_write.c
+++ b/fs/read_write.c
@@@ -844,3 -864,3 +879,12 @@@ SYSCALL_DEFINE4(sendfile64, int, out_fd
   
         return do_sendfile(out_fd, in_fd, NULL, count, 0);
   }
++
++DEFINE_TRACE(fs_lseek);
++DEFINE_TRACE(fs_llseek);
++DEFINE_TRACE(fs_read);
++DEFINE_TRACE(fs_write);
++DEFINE_TRACE(fs_readv);
++DEFINE_TRACE(fs_writev);
++DEFINE_TRACE(fs_pread64);
++DEFINE_TRACE(fs_pwrite64);
diff --cc fs/reiserfs/file.c
Simple merge
diff --cc fs/reiserfs/inode.c

index 60086fb,55fce92..2c4fb19
--- 1/fs/reiserfs/inode.c
--- 2/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@@ -1736,13 -1747,13 +1727,14 @@@ static int reiserfs_new_symlink(struct 
      if we return non-zero, we also end the transaction.  */
   int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
                        struct inode *dir, int mode, const char *symname,
- -                     /* 0 for regular, EMTRY_DIR_SIZE for dirs, 
+ +                     /* 0 for regular, EMTRY_DIR_SIZE for dirs,
                           strlen (symname) for symlinks) */
                        loff_t i_size, struct dentry *dentry,
- -                     struct inode *inode)
+ +                     struct inode *inode,
+ +                     struct reiserfs_security_handle *security)
   {
         struct super_block *sb;
+       struct reiserfs_iget_args args;
         INITIALIZE_PATH(path_to_key);
         struct cpu_key key;
         struct item_head ih;
@@@ -1770,8 -1781,22 +1762,22 @@@
                 err = -ENOMEM;
                 goto out_bad_inode;
         }
+       args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
+       if (old_format_only(sb))
+               make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
+                                 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
+       else
+               make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
+                                 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
+       memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
+       args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
+       if (insert_inode_locked4(inode, args.objectid,
+                            reiserfs_find_actor, &args) < 0) {
+               err = -EINVAL;
+               goto out_bad_inode;
+       }
         if (old_format_only(sb))
- -              /* not a perfect generation count, as object ids can be reused, but 
+ +              /* not a perfect generation count, as object ids can be reused, but
                  ** this is as good as reiserfs can do right now.
                  ** note that the private part of inode isn't filled in yet, we have
                  ** to use the directory.
@@@ -1805,14 -1830,16 +1811,7 @@@
         REISERFS_I(inode)->i_attrs =
             REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
         sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
- -      mutex_init(&(REISERFS_I(inode)->i_mmap));
- -      reiserfs_init_acl_access(inode);
- -      reiserfs_init_acl_default(inode);
- -      reiserfs_init_xattr_rwsem(inode);
   
-       if (old_format_only(sb))
-               make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
-                                 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
-       else
-               make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
-                                 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
- 
         /* key to search for correct place for new stat data */
         _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
                       le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
@@@ -1903,26 -1927,12 +1898,25 @@@
                         goto out_inserted_sd;
                 }
         } else if (inode->i_sb->s_flags & MS_POSIXACL) {
- -              reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, "
+ +              reiserfs_warning(inode->i_sb, "jdm-13090",
+ +                               "ACLs aren't enabled in the fs, "
                                  "but vfs thinks they are!");
- -      } else if (is_reiserfs_priv_object(dir)) {
- -              reiserfs_mark_inode_private(inode);
+ +      } else if (IS_PRIVATE(dir))
+ +              inode->i_flags |= S_PRIVATE;
+ +
+ +      if (security->name) {
+ +              retval = reiserfs_security_write(th, inode, security);
+ +              if (retval) {
+ +                      err = retval;
+ +                      reiserfs_check_path(&path_to_key);
+ +                      retval = journal_end(th, th->t_super,
+ +                                           th->t_blocks_allocated);
+ +                      if (retval)
+ +                              err = retval;
+ +                      goto out_inserted_sd;
+ +              }
         }
   
-       insert_inode_hash(inode);
         reiserfs_update_sd(th, inode);
         reiserfs_check_path(&path_to_key);
   
@@@ -1949,7 -1959,20 +1943,8 @@@
         out_inserted_sd:
         inode->i_nlink = 0;
         th->t_trans_id = 0;     /* so the caller can't use this handle later */
+       unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
- -
- -      /* If we were inheriting an ACL, we need to release the lock so that
- -       * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
- -       * code really needs to be reworked, but this will take care of it
- -       * for now. -jeffm */
- -#ifdef CONFIG_REISERFS_FS_POSIX_ACL
- -      if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
- -              reiserfs_write_unlock_xattrs(dir->i_sb);
- -              iput(inode);
- -              reiserfs_write_lock_xattrs(dir->i_sb);
- -      } else
- -#endif
- -              iput(inode);
+ +      iput(inode);
         return err;
   }
   
diff --cc fs/reiserfs/journal.c

index 0858a48,9643c3b..d69addc
--- 1/fs/reiserfs/journal.c
--- 2/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@@ -82,8 -82,6 +82,7 @@@ static struct workqueue_struct *commit_
   #define LIST_TOUCHED 1
   #define LIST_DIRTY   2
   #define LIST_COMMIT_PENDING  4        /* someone will commit this list */
+ +#define LIST_DEAD 8
- #define LIST_CURRENT 16
   
   /* flags for do_journal_end */
   #define FLUSH_ALL   1         /* flush commit and real blocks */
@@@ -1033,14 -1031,6 +1032,17 @@@ static int flush_commit_list(struct sup
         /* before we can put our commit blocks on disk, we have to make sure everyone older than
          ** us is on disk too
          */
+ +      if (jl->j_len <= 0) {
-               reiserfs_warning(s, "journal-d1", "%j; "
-                                "trans_id = %u; "
++              reiserfs_warning(s, "journal-d1",
++                               "jl->j_len = %lu; jl->j_state = %lx; "
++                               "jl->j_trans_id = %u; "
++                               "jl->j_refcount = %d; "
+ +                               "journal->trans_id = %u; "
+ +                               "oldest live jl->j_trans_id = %u\n",
-                                jl, trans_id, journal->j_trans_id,
++                               jl->j_len, jl->j_state,
++                               trans_id, journal->j_trans_id,
+ +              JOURNAL_LIST_ENTRY(journal->j_journal_list.next)->j_trans_id);
+ +      }
         BUG_ON(jl->j_len <= 0);
         BUG_ON(trans_id == journal->j_trans_id);
   
@@@ -2839,8 -2820,7 +2838,7 @@@ int journal_init(struct super_block *sb
         brelse(bhjh);
   
         journal->j_list_bitmap_index = 0;
- -      journal_list_init(p_s_sb);
+ +      journal_list_init(sb);
-       journal->j_current_jl->j_state |= LIST_CURRENT;
   
         memset(journal->j_list_hash_table, 0,
                JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
@@@ -4178,9 -4152,7 +4176,7 @@@ static int do_journal_end(struct reiser
          ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
          */
   
- -      journal->j_current_jl = alloc_journal_list(p_s_sb);
+ +      journal->j_current_jl = alloc_journal_list(sb);
-       journal->j_current_jl->j_state |= LIST_CURRENT;
-       jl->j_state &= LIST_CURRENT;
   
         /* now it is safe to insert this transaction on the main list */
         list_add_tail(&jl->j_list, &journal->j_journal_list);
diff --cc fs/reiserfs/namei.c

index 7d5a32f,738967f..a1919e9
--- 1/fs/reiserfs/namei.c
--- 2/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@@ -399,18 -400,10 +398,10 @@@ struct dentry *reiserfs_get_parent(stru
         inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
         reiserfs_write_unlock(dir->i_sb);
   
-       if (!inode || IS_ERR(inode)) {
-               return ERR_PTR(-EACCES);
-       }
-       parent = d_alloc_anon(inode);
-       if (!parent) {
-               iput(inode);
-               parent = ERR_PTR(-ENOMEM);
-       }
-       return parent;
+       return d_obtain_alias(inode);
   }
   
- -/* add entry to the directory (entry can be hidden). 
+ +/* add entry to the directory (entry can be hidden).
   
   insert definition of when hidden directories are used here -Hans
   
@@@ -813,8 -824,11 +809,9 @@@ static int reiserfs_mkdir(struct inode 
         reiserfs_update_sd(&th, dir);
   
         d_instantiate(dentry, inode);
+       unlock_new_inode(inode);
         retval = journal_end(&th, dir->i_sb, jbegin_count);
         out_failed:
- -      if (locked)
- -              reiserfs_write_unlock_xattrs(dir->i_sb);
         reiserfs_write_unlock(dir->i_sb);
         return retval;
   }
diff --cc fs/reiserfs/prints.c

index 1be36b1,740bb8c..536eaca
--- 1/fs/reiserfs/prints.c
--- 2/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@@ -157,28 -157,15 +157,13 @@@ static void sprintf_disk_child(char *bu
                 dc_size(dc));
   }
   
- static void sprintf_journal_list(char *buf, struct reiserfs_journal_list *jl)
- {
-       sprintf(buf, "[j_start=%lu, j_state=%lu, j_len=%lu, j_nonzerolen=%d, "
-                    "j_commit_left=%u, j_older_commits_done=%u, j_trans_id=%u, "
-                    "j_timestamp=%ld, j_refcount=%d (%08x%08x%08x%08x%08x%08x)]",
-                    jl->j_start, jl->j_state, jl->j_len,
-                    atomic_read(&jl->j_nonzerolen),
-                    atomic_read(&jl->j_commit_left),
-                    atomic_read(&jl->j_older_commits_done),
-                    jl->j_trans_id, jl->j_timestamp, jl->j_refcount,
-                    jl->j_magic1, jl->j_magic2, jl->j_magic3, jl->j_magic4,
-                    jl->j_magic5, jl->j_magic6);
- }
- 
- -static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip)
+ +static char *is_there_reiserfs_struct(char *fmt, int *what)
   {
         char *k = fmt;
   
- -      *skip = 0;
- -
         while ((k = strchr(k, '%')) != NULL) {
                 if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
-                   k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a' ||
-                   k[1] == 'j') {
+                   k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') {
                         *what = k[1];
                         break;
                 }
diff --cc fs/reiserfs/procfs.c
Simple merge
diff --cc fs/reiserfs/super.c

index 8961c20,f3c820b..3771e92
--- 1/fs/reiserfs/super.c
--- 2/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@@ -2118,9 -2096,9 +2122,9 @@@ static int reiserfs_quota_on(struct sup
         /* Journaling quota? */
         if (REISERFS_SB(sb)->s_qf_names[type]) {
                 /* Quotafile not of fs root? */
-               if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+               if (path.dentry->d_parent != sb->s_root)
- -                      reiserfs_warning(sb,
- -                               "reiserfs: Quota file not on filesystem root. "
+ +                      reiserfs_warning(sb, "super-6521",
+ +                               "Quota file not on filesystem root. "
                                  "Journalled quota will not work.");
         }
   
diff --cc fs/select.c

index 4c07663,0fe0e14..7e69339
--- 1/fs/select.c
--- 2/fs/select.c
+++ b/fs/select.c
@@@ -24,7 -24,7 +24,8 @@@
   #include <linux/fdtable.h>
   #include <linux/fs.h>
   #include <linux/rcupdate.h>
+ #include <linux/hrtimer.h>
+ +#include <trace/fs.h>
   
   #include <asm/uaccess.h>
   
@@@ -233,7 -417,6 +418,7 @@@ int do_select(int n, fd_set_bits *fds, 
                                 file = fget_light(i, &fput_needed);
                                 if (file) {
                                         f_op = file->f_op;
-                                       trace_fs_select(i, *timeout);
++                                      trace_fs_select(i, end_time);
                                         mask = DEFAULT_POLLMASK;
                                         if (f_op && f_op->poll)
                                                 mask = (*f_op->poll)(file, retval ? NULL : wait);
@@@ -845,3 -941,3 +944,6 @@@ SYSCALL_DEFINE5(ppoll, struct pollfd __
         return ret;
   }
   #endif /* HAVE_SET_RESTORE_SIGMASK */
++
++DEFINE_TRACE(fs_select);
++DEFINE_TRACE(fs_poll);
diff --cc fs/super.c
Simple merge
diff --cc fs/xattr.c

index 95e958d,197c4fc..7769646
--- 1/fs/xattr.c
--- 2/fs/xattr.c
+++ b/fs/xattr.c
@@@ -305,10 -301,9 +301,9 @@@ SYSCALL_DEFINE5(fsetxattr, int, fd, con
                 return error;
         dentry = f->f_path.dentry;
         audit_inode(NULL, dentry);
- -      error = mnt_want_write(f->f_path.mnt);
+ +      error = mnt_want_write_file(f->f_path.mnt, f);
         if (!error) {
-               error = setxattr(dentry, f->f_vfsmnt, name, value, size, flags,
-                                f);
+               error = setxattr(dentry, name, value, size, flags);
                 mnt_drop_write(f->f_path.mnt);
         }
         fput(f);
@@@ -535,9 -528,9 +528,9 @@@ SYSCALL_DEFINE2(fremovexattr, int, fd, 
                 return error;
         dentry = f->f_path.dentry;
         audit_inode(NULL, dentry);
- -      error = mnt_want_write(f->f_path.mnt);
+ +      error = mnt_want_write_file(f->f_path.mnt, f);
         if (!error) {
-               error = removexattr(dentry, f->f_path.mnt, name, f);
+               error = removexattr(dentry, name);
                 mnt_drop_write(f->f_path.mnt);
         }
         fput(f);
diff --cc fs/xfs/Kconfig
Simple merge
diff --cc fs/xfs/Makefile

index ba17121,c3dc491..547ef98
--- 1/fs/xfs/Makefile
--- 2/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@@ -108,9 -106,8 +108,9 @@@ xfs-y                              += $(addprefix $(XFS_LINUX)/, 
                                    xfs_iops.o \
                                    xfs_lrw.o \
                                    xfs_super.o \
-                                  xfs_vnode.o \
+                                  xfs_sync.o \
- -                                 xfs_xattr.o)
+ +                                 xfs_xattr.o \
+ +                                 xfs_ksyms.o)
   
   # Objects in support/
   xfs-y                         += $(addprefix support/, \
diff --cc fs/xfs/dmapi/xfs_dm.c

index 527bf14,0000000..9bfa508

mode 100644,000000..100644
--- 1/fs/xfs/dmapi/xfs_dm.c
--- /dev/null
+++ b/fs/xfs/dmapi/xfs_dm.c
@@@ -1,3337 -1,0 +1,3327 @@@
+ +/*
+ + * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ + * All Rights Reserved.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License as
+ + * published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it would be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write the Free Software Foundation,
+ + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ + */
+ +#include "xfs.h"
+ +#include "xfs_fs.h"
+ +#include "xfs_types.h"
+ +#include "xfs_bit.h"
+ +#include "xfs_log.h"
+ +#include "xfs_inum.h"
- #include "xfs_clnt.h"
+ +#include "xfs_trans.h"
+ +#include "xfs_sb.h"
+ +#include "xfs_ag.h"
+ +#include "xfs_dir2.h"
+ +#include "xfs_alloc.h"
+ +#include "xfs_dmapi.h"
+ +#include "xfs_mount.h"
+ +#include "xfs_da_btree.h"
+ +#include "xfs_bmap_btree.h"
+ +#include "xfs_alloc_btree.h"
+ +#include "xfs_ialloc_btree.h"
+ +#include "xfs_dir2_sf.h"
+ +#include "xfs_attr_sf.h"
+ +#include "xfs_dinode.h"
+ +#include "xfs_inode.h"
+ +#include "xfs_btree.h"
+ +#include "xfs_ialloc.h"
+ +#include "xfs_itable.h"
+ +#include "xfs_bmap.h"
+ +#include "xfs_rw.h"
+ +#include "xfs_acl.h"
+ +#include "xfs_attr.h"
+ +#include "xfs_attr_leaf.h"
+ +#include "xfs_inode_item.h"
- #include "xfs_vfsops.h"
+ +#include "xfs_vnodeops.h"
+ +#include <dmapi.h>
+ +#include <dmapi_kern.h>
+ +#include "xfs_dm.h"
+ +
+ +#include <linux/mount.h>
+ +
+ +#define MAXNAMLEN MAXNAMELEN
+ +
+ +#define MIN_DIO_SIZE(mp)              ((mp)->m_sb.sb_sectsize)
+ +#define MAX_DIO_SIZE(mp)              (INT_MAX & ~(MIN_DIO_SIZE(mp) - 1))
+ +
+ +static void up_rw_sems(struct inode *ip, int flags)
+ +{
+ +      if (flags & DM_FLAGS_IALLOCSEM_WR)
+ +              up_write(&ip->i_alloc_sem);
+ +      if (flags & DM_FLAGS_IMUX)
+ +              mutex_unlock(&ip->i_mutex);
+ +}
+ +
+ +static void down_rw_sems(struct inode *ip, int flags)
+ +{
+ +      if (flags & DM_FLAGS_IMUX)
+ +              mutex_lock(&ip->i_mutex);
+ +      if (flags & DM_FLAGS_IALLOCSEM_WR)
+ +              down_write(&ip->i_alloc_sem);
+ +}
+ +
+ +
+ +/* Structure used to hold the on-disk version of a dm_attrname_t.  All
+ +   on-disk attribute names start with the 8-byte string "SGI_DMI_".
+ +*/
+ +
+ +typedef struct        {
+ +      char    dan_chars[DMATTR_PREFIXLEN + DM_ATTR_NAME_SIZE + 1];
+ +} dm_dkattrname_t;
+ +
+ +/* Structure used by xfs_dm_get_bulkall(), used as the "private_data"
+ + * that we want xfs_bulkstat to send to our formatter.
+ + */
+ +typedef struct {
+ +      dm_fsid_t       fsid;
+ +      void __user     *laststruct;
+ +      dm_dkattrname_t attrname;
+ +} dm_bulkstat_one_t;
+ +
+ +/* In the on-disk inode, DMAPI attribute names consist of the user-provided
+ +   name with the DMATTR_PREFIXSTRING pre-pended.  This string must NEVER be
+ +   changed!
+ +*/
+ +
+ +static        const   char    dmattr_prefix[DMATTR_PREFIXLEN + 1] = DMATTR_PREFIXSTRING;
+ +
+ +static        dm_size_t  dm_min_dio_xfer = 0; /* direct I/O disabled for now */
+ +
+ +
+ +/* See xfs_dm_get_dmattr() for a description of why this is needed. */
+ +
+ +#define XFS_BUG_KLUDGE        256     /* max size of an in-inode attribute value */
+ +
+ +#define DM_MAX_ATTR_BYTES_ON_DESTROY  256
+ +
+ +#define DM_STAT_SIZE(dmtype,namelen)  \
+ +      (sizeof(dmtype) + sizeof(dm_handle_t) + namelen)
+ +
+ +#define DM_STAT_ALIGN         (sizeof(__uint64_t))
+ +
+ +/* DMAPI's E2BIG == EA's ERANGE */
+ +#define DM_EA_XLATE_ERR(err) { if (err == ERANGE) err = E2BIG; }
+ +
+ +static inline size_t dm_stat_align(size_t size)
+ +{
+ +      return (size + (DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1);
+ +}
+ +
+ +static inline size_t dm_stat_size(size_t namelen)
+ +{
+ +      return dm_stat_align(sizeof(dm_stat_t) + sizeof(dm_handle_t) + namelen);
+ +}
+ +
+ +/*
+ + *    xfs_dm_send_data_event()
+ + *
+ + *    Send data event to DMAPI.  Drop IO lock (if specified) before
+ + *    the dm_send_data_event() call and reacquire it afterwards.
+ + */
+ +int
+ +xfs_dm_send_data_event(
+ +      dm_eventtype_t  event,
+ +      xfs_inode_t     *ip,
+ +      xfs_off_t       offset,
+ +      size_t          length,
+ +      int             flags,
+ +      int             *lock_flags)
+ +{
-       struct inode    *inode = ip->i_vnode;
++      struct inode    *inode = &ip->i_vnode;
+ +      int             error;
+ +      uint16_t        dmstate;
+ +
+ +      /* Returns positive errors to XFS */
+ +
+ +      do {
+ +              dmstate = ip->i_d.di_dmstate;
+ +              if (lock_flags)
+ +                      xfs_iunlock(ip, *lock_flags);
+ +
+ +              up_rw_sems(inode, flags);
+ +
+ +              error = dm_send_data_event(event, inode, DM_RIGHT_NULL,
+ +                              offset, length, flags);
+ +              error = -error; /* DMAPI returns negative errors */
+ +
+ +              down_rw_sems(inode, flags);
+ +
+ +              if (lock_flags)
+ +                      xfs_ilock(ip, *lock_flags);
+ +      } while (!error && (ip->i_d.di_dmstate != dmstate));
+ +
+ +      return error;
+ +}
+ +
+ +/*    prohibited_mr_events
+ + *
+ + *    Return event bits representing any events which cannot have managed
+ + *    region events set due to memory mapping of the file.  If the maximum
+ + *    protection allowed in any pregion includes PROT_WRITE, and the region
+ + *    is shared and not text, then neither READ nor WRITE events can be set.
+ + *    Otherwise if the file is memory mapped, no READ event can be set.
+ + *
+ + */
+ +STATIC int
+ +prohibited_mr_events(
+ +      struct address_space *mapping)
+ +{
+ +      int prohibited = (1 << DM_EVENT_READ);
+ +
+ +      if (!mapping_mapped(mapping))
+ +              return 0;
+ +
+ +      spin_lock(&mapping->i_mmap_lock);
+ +      if (mapping_writably_mapped(mapping))
+ +              prohibited |= (1 << DM_EVENT_WRITE);
+ +      spin_unlock(&mapping->i_mmap_lock);
+ +
+ +      return prohibited;
+ +}
+ +
+ +#ifdef        DEBUG_RIGHTS
+ +STATIC int
+ +xfs_vp_to_hexhandle(
+ +      struct inode    *inode,
+ +      u_int           type,
+ +      char            *buffer)
+ +{
+ +      dm_handle_t     handle;
+ +      u_char          *ip;
+ +      int             length;
+ +      int             error;
+ +      int             i;
+ +
+ +      /*
+ +       * XXX: dm_vp_to_handle doesn't exist.
+ +       *      Looks like this debug code is rather dead.
+ +       */
+ +      if ((error = dm_vp_to_handle(inode, &handle)))
+ +              return(error);
+ +
+ +      if (type == DM_FSYS_OBJ) {      /* a filesystem handle */
+ +              length = DM_FSHSIZE;
+ +      } else {
+ +              length = DM_HSIZE(handle);
+ +      }
+ +      for (ip = (u_char *)&handle, i = 0; i < length; i++) {
+ +              *buffer++ = "0123456789abcdef"[ip[i] >> 4];
+ +              *buffer++ = "0123456789abcdef"[ip[i] & 0xf];
+ +      }
+ +      *buffer = '\0';
+ +      return(0);
+ +}
+ +#endif        /* DEBUG_RIGHTS */
+ +
+ +
+ +
+ +
+ +/* Copy in and validate an attribute name from user space.  It should be a
+ +   string of at least one and at most DM_ATTR_NAME_SIZE characters.  Because
+ +   the dm_attrname_t structure doesn't provide room for the trailing NULL
+ +   byte, we just copy in one extra character and then zero it if it
+ +   happens to be non-NULL.
+ +*/
+ +
+ +STATIC int
+ +xfs_copyin_attrname(
+ +      dm_attrname_t   __user *from,   /* dm_attrname_t in user space */
+ +      dm_dkattrname_t *to)            /* name buffer in kernel space */
+ +{
+ +      int error = 0;
+ +      size_t len;
+ +
+ +      strcpy(to->dan_chars, dmattr_prefix);
+ +
+ +        len = strnlen_user((char __user *)from, DM_ATTR_NAME_SIZE);
+ +        if (len == 0)
+ +            error = EFAULT;
+ +        else {
+ +         if (copy_from_user(&to->dan_chars[DMATTR_PREFIXLEN], from, len))
+ +              to->dan_chars[sizeof(to->dan_chars) - 1] = '\0';
+ +         else if (to->dan_chars[DMATTR_PREFIXLEN] == '\0')
+ +              error = EINVAL;
+ +         else
+ +              to->dan_chars[DMATTR_PREFIXLEN + len - 1] = '\0';
+ +        }
+ +
+ +      return error;
+ +}
+ +
+ +
+ +/*
+ + * Convert the XFS flags into their DMAPI flag equivalent for export
+ + */
+ +STATIC uint
+ +_xfs_dic2dmflags(
+ +      __uint16_t              di_flags)
+ +{
+ +      uint                    flags = 0;
+ +
+ +      if (di_flags & XFS_DIFLAG_ANY) {
+ +              if (di_flags & XFS_DIFLAG_REALTIME)
+ +                      flags |= DM_XFLAG_REALTIME;
+ +              if (di_flags & XFS_DIFLAG_PREALLOC)
+ +                      flags |= DM_XFLAG_PREALLOC;
+ +              if (di_flags & XFS_DIFLAG_IMMUTABLE)
+ +                      flags |= DM_XFLAG_IMMUTABLE;
+ +              if (di_flags & XFS_DIFLAG_APPEND)
+ +                      flags |= DM_XFLAG_APPEND;
+ +              if (di_flags & XFS_DIFLAG_SYNC)
+ +                      flags |= DM_XFLAG_SYNC;
+ +              if (di_flags & XFS_DIFLAG_NOATIME)
+ +                      flags |= DM_XFLAG_NOATIME;
+ +              if (di_flags & XFS_DIFLAG_NODUMP)
+ +                      flags |= DM_XFLAG_NODUMP;
+ +      }
+ +      return flags;
+ +}
+ +
+ +STATIC uint
+ +xfs_ip2dmflags(
+ +      xfs_inode_t     *ip)
+ +{
+ +      return _xfs_dic2dmflags(ip->i_d.di_flags) |
+ +                      (XFS_IFORK_Q(ip) ? DM_XFLAG_HASATTR : 0);
+ +}
+ +
+ +STATIC uint
+ +xfs_dic2dmflags(
+ +      xfs_dinode_t    *dip)
+ +{
-       return _xfs_dic2dmflags(be16_to_cpu(dip->di_core.di_flags)) |
++      return _xfs_dic2dmflags(be16_to_cpu(dip->di_flags)) |
+ +                      (XFS_DFORK_Q(dip) ? DM_XFLAG_HASATTR : 0);
+ +}
+ +
+ +/*
+ + * This copies selected fields in an inode into a dm_stat structure.  Because
+ + * these fields must return the same values as they would in stat(), the
+ + * majority of this code was copied directly from xfs_getattr().  Any future
+ + * changes to xfs_gettattr() must also be reflected here.
+ + */
+ +STATIC void
+ +xfs_dip_to_stat(
+ +      xfs_mount_t             *mp,
+ +      xfs_ino_t               ino,
+ +      xfs_dinode_t            *dip,
+ +      dm_stat_t               *buf)
+ +{
-       xfs_dinode_core_t       *dic = &dip->di_core;
++      xfs_dinode_t    *dic = dip;
+ +
+ +      /*
+ +       * The inode format changed when we moved the link count and
+ +       * made it 32 bits long.  If this is an old format inode,
+ +       * convert it in memory to look like a new one.  If it gets
+ +       * flushed to disk we will convert back before flushing or
+ +       * logging it.  We zero out the new projid field and the old link
+ +       * count field.  We'll handle clearing the pad field (the remains
+ +       * of the old uuid field) when we actually convert the inode to
+ +       * the new format. We don't change the version number so that we
+ +       * can distinguish this from a real new format inode.
+ +       */
-       if (dic->di_version == XFS_DINODE_VERSION_1) {
++      if (dic->di_version == 1) {
+ +              buf->dt_nlink = be16_to_cpu(dic->di_onlink);
+ +              /*buf->dt_xfs_projid = 0;*/
+ +      } else {
+ +              buf->dt_nlink = be32_to_cpu(dic->di_nlink);
+ +              /*buf->dt_xfs_projid = be16_to_cpu(dic->di_projid);*/
+ +      }
+ +      buf->dt_ino = ino;
+ +      buf->dt_dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
+ +      buf->dt_mode = be16_to_cpu(dic->di_mode);
+ +      buf->dt_uid = be32_to_cpu(dic->di_uid);
+ +      buf->dt_gid = be32_to_cpu(dic->di_gid);
+ +      buf->dt_size = be64_to_cpu(dic->di_size);
+ +      buf->dt_atime = be32_to_cpu(dic->di_atime.t_sec);
+ +      buf->dt_mtime = be32_to_cpu(dic->di_mtime.t_sec);
+ +      buf->dt_ctime = be32_to_cpu(dic->di_ctime.t_sec);
+ +      buf->dt_xfs_xflags = xfs_dic2dmflags(dip);
+ +      buf->dt_xfs_extsize =
+ +              be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
+ +      buf->dt_xfs_extents = be32_to_cpu(dic->di_nextents);
+ +      buf->dt_xfs_aextents = be16_to_cpu(dic->di_anextents);
+ +      buf->dt_xfs_igen = be32_to_cpu(dic->di_gen);
+ +      buf->dt_xfs_dmstate = be16_to_cpu(dic->di_dmstate);
+ +
+ +      switch (dic->di_format) {
+ +      case XFS_DINODE_FMT_DEV:
-               buf->dt_rdev = be32_to_cpu(dip->di_u.di_dev);
++              buf->dt_rdev = xfs_dinode_get_rdev(dic);
+ +              buf->dt_blksize = BLKDEV_IOSIZE;
+ +              buf->dt_blocks = 0;
+ +              break;
+ +      case XFS_DINODE_FMT_LOCAL:
+ +      case XFS_DINODE_FMT_UUID:
+ +              buf->dt_rdev = 0;
+ +              buf->dt_blksize = mp->m_sb.sb_blocksize;
+ +              buf->dt_blocks = 0;
+ +              break;
+ +      case XFS_DINODE_FMT_EXTENTS:
+ +      case XFS_DINODE_FMT_BTREE:
+ +              buf->dt_rdev = 0;
+ +              buf->dt_blksize = mp->m_sb.sb_blocksize;
+ +              buf->dt_blocks =
+ +                      XFS_FSB_TO_BB(mp, be64_to_cpu(dic->di_nblocks));
+ +              break;
+ +      }
+ +
+ +      memset(&buf->dt_pad1, 0, sizeof(buf->dt_pad1));
+ +      memset(&buf->dt_pad2, 0, sizeof(buf->dt_pad2));
+ +      memset(&buf->dt_pad3, 0, sizeof(buf->dt_pad3));
+ +
+ +      /* Finally fill in the DMAPI specific fields */
+ +      buf->dt_pers = 0;
+ +      buf->dt_change = 0;
+ +      buf->dt_nevents = DM_EVENT_MAX;
+ +      buf->dt_emask = be32_to_cpu(dic->di_dmevmask);
+ +      buf->dt_dtime = be32_to_cpu(dic->di_ctime.t_sec);
+ +      /* Set if one of READ, WRITE or TRUNCATE bits is set in emask */
+ +      buf->dt_pmanreg = (DMEV_ISSET(DM_EVENT_READ, buf->dt_emask) ||
+ +                      DMEV_ISSET(DM_EVENT_WRITE, buf->dt_emask) ||
+ +                      DMEV_ISSET(DM_EVENT_TRUNCATE, buf->dt_emask)) ? 1 : 0;
+ +}
+ +
+ +/*
+ + * Pull out both ondisk and incore fields, incore has preference.
+ + * The inode must be kept locked SHARED by the caller.
+ + */
+ +STATIC void
+ +xfs_ip_to_stat(
+ +      xfs_mount_t             *mp,
+ +      xfs_ino_t               ino,
+ +      xfs_inode_t             *ip,
+ +      dm_stat_t               *buf)
+ +{
+ +      xfs_icdinode_t          *dic = &ip->i_d;
+ +
+ +      buf->dt_ino = ino;
+ +      buf->dt_nlink = dic->di_nlink;
+ +      /*buf->dt_xfs_projid = dic->di_projid;*/
+ +      buf->dt_mode = dic->di_mode;
+ +      buf->dt_uid = dic->di_uid;
+ +      buf->dt_gid = dic->di_gid;
+ +      buf->dt_size = XFS_ISIZE(ip);
+ +      buf->dt_dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
+ +      vn_atime_to_time_t(VFS_I(ip), &buf->dt_atime);
+ +      buf->dt_mtime = dic->di_mtime.t_sec;
+ +      buf->dt_ctime = dic->di_ctime.t_sec;
+ +      buf->dt_xfs_xflags = xfs_ip2dmflags(ip);
+ +      buf->dt_xfs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
+ +      buf->dt_xfs_extents = dic->di_nextents;
+ +      buf->dt_xfs_aextents = dic->di_anextents;
+ +      buf->dt_xfs_igen = dic->di_gen;
+ +      buf->dt_xfs_dmstate = dic->di_dmstate;
+ +
+ +      switch (dic->di_format) {
+ +      case XFS_DINODE_FMT_DEV:
+ +              buf->dt_rdev = ip->i_df.if_u2.if_rdev;
+ +              buf->dt_blksize = BLKDEV_IOSIZE;
+ +              buf->dt_blocks = 0;
+ +              break;
+ +      case XFS_DINODE_FMT_LOCAL:
+ +      case XFS_DINODE_FMT_UUID:
+ +              buf->dt_rdev = 0;
+ +              buf->dt_blksize = mp->m_sb.sb_blocksize;
+ +              buf->dt_blocks = 0;
+ +              break;
+ +      case XFS_DINODE_FMT_EXTENTS:
+ +      case XFS_DINODE_FMT_BTREE:
+ +              buf->dt_rdev = 0;
+ +              buf->dt_blksize = mp->m_sb.sb_blocksize;
+ +              buf->dt_blocks = XFS_FSB_TO_BB(mp,
+ +                              (dic->di_nblocks + ip->i_delayed_blks));
+ +              break;
+ +      }
+ +
+ +      memset(&buf->dt_pad1, 0, sizeof(buf->dt_pad1));
+ +      memset(&buf->dt_pad2, 0, sizeof(buf->dt_pad2));
+ +      memset(&buf->dt_pad3, 0, sizeof(buf->dt_pad3));
+ +
+ +      /* Finally fill in the DMAPI specific fields */
+ +      buf->dt_pers = 0;
+ +      buf->dt_change = 0;
+ +      buf->dt_nevents = DM_EVENT_MAX;
+ +      buf->dt_emask = dic->di_dmevmask;
+ +      buf->dt_dtime = dic->di_ctime.t_sec;
+ +      /* Set if one of READ, WRITE or TRUNCATE bits is set in emask */
+ +      buf->dt_pmanreg = (DMEV_ISSET(DM_EVENT_READ, buf->dt_emask) ||
+ +                      DMEV_ISSET(DM_EVENT_WRITE, buf->dt_emask) ||
+ +                      DMEV_ISSET(DM_EVENT_TRUNCATE, buf->dt_emask)) ? 1 : 0;
+ +}
+ +
+ +/*
+ + * Take the handle and put it at the end of a dm_xstat buffer.
+ + * dt_compname is unused in bulkstat - so we zero it out.
+ + * Finally, update link in dm_xstat_t to point to next struct.
+ + */
+ +STATIC void
+ +xfs_dm_handle_to_xstat(
+ +      dm_xstat_t      *xbuf,
+ +      size_t          xstat_sz,
+ +      dm_handle_t     *handle,
+ +      size_t          handle_sz)
+ +{
+ +      dm_stat_t       *sbuf = &xbuf->dx_statinfo;
+ +
+ +      memcpy(xbuf + 1, handle, handle_sz);
+ +      sbuf->dt_handle.vd_offset = (ssize_t) sizeof(dm_xstat_t);
+ +      sbuf->dt_handle.vd_length = (size_t) DM_HSIZE(*handle);
+ +      memset(&sbuf->dt_compname, 0, sizeof(dm_vardata_t));
+ +      sbuf->_link = xstat_sz;
+ +}
+ +
+ +STATIC int
+ +xfs_dm_bulkall_iget_one(
+ +      xfs_mount_t     *mp,
+ +      xfs_ino_t       ino,
+ +      xfs_daddr_t     bno,
+ +      int             *value_lenp,
+ +      dm_xstat_t      *xbuf,
+ +      u_int           *xstat_szp,
+ +      char            *attr_name,
+ +      caddr_t         attr_buf)
+ +{
+ +      xfs_inode_t     *ip;
+ +      dm_handle_t     handle;
+ +      u_int           xstat_sz = *xstat_szp;
+ +      int             value_len = *value_lenp;
+ +      int             error;
+ +
+ +      error = xfs_iget(mp, NULL, ino,
+ +                       XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
+ +      if (error)
+ +              return error;
+ +
+ +      xfs_ip_to_stat(mp, ino, ip, &xbuf->dx_statinfo);
-       dm_ip_to_handle(ip->i_vnode, &handle);
++      dm_ip_to_handle(&ip->i_vnode, &handle);
+ +      xfs_dm_handle_to_xstat(xbuf, xstat_sz, &handle, sizeof(handle));
+ +
+ +      /* Drop ILOCK_SHARED for call to xfs_attr_get */
+ +      xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ +
+ +      memset(&xbuf->dx_attrdata, 0, sizeof(dm_vardata_t));
+ +      error = xfs_attr_get(ip, attr_name, attr_buf, &value_len, ATTR_ROOT);
-       iput(ip->i_vnode);
++      iput(&ip->i_vnode);
+ +
+ +      DM_EA_XLATE_ERR(error);
+ +      if (error && (error != ENOATTR)) {
+ +              if (error == E2BIG)
+ +                      error = ENOMEM;
+ +              return error;
+ +      }
+ +
+ +      /* How much space was in the attr? */
+ +      if (error != ENOATTR) {
+ +              xbuf->dx_attrdata.vd_offset = xstat_sz;
+ +              xbuf->dx_attrdata.vd_length = value_len;
+ +              xstat_sz += (value_len+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1);
+ +      }
+ +      *xstat_szp = xbuf->dx_statinfo._link = xstat_sz;
+ +      *value_lenp = value_len;
+ +      return 0;
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_inline_attr(
+ +      xfs_mount_t     *mp,
+ +      xfs_dinode_t    *dip,
+ +      char            *attr_name,
+ +      caddr_t         attr_buf,
+ +      int             *value_lenp)
+ +{
-       if (dip->di_core.di_aformat == XFS_DINODE_FMT_LOCAL) {
++      if (dip->di_aformat == XFS_DINODE_FMT_LOCAL) {
+ +              xfs_attr_shortform_t    *sf;
+ +              xfs_attr_sf_entry_t     *sfe;
+ +              unsigned int            namelen = strlen(attr_name);
+ +              unsigned int            valuelen = *value_lenp;
+ +              int                     i;
+ +
+ +              sf = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
+ +              sfe = &sf->list[0];
+ +              for (i = 0; i < sf->hdr.count;
+ +                              sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+ +                      if (sfe->namelen != namelen)
+ +                              continue;
+ +                      if (!(sfe->flags & XFS_ATTR_ROOT))
+ +                              continue;
+ +                      if (memcmp(attr_name, sfe->nameval, namelen) != 0)
+ +                              continue;
+ +                      if (valuelen < sfe->valuelen)
+ +                              return ERANGE;
+ +                      valuelen = sfe->valuelen;
+ +                      memcpy(attr_buf, &sfe->nameval[namelen], valuelen);
+ +                      *value_lenp = valuelen;
+ +                      return 0;
+ +              }
+ +      }
+ +      *value_lenp = 0;
+ +      return ENOATTR;
+ +}
+ +
+ +STATIC void
+ +dm_dip_to_handle(
+ +      xfs_ino_t       ino,
+ +      xfs_dinode_t    *dip,
+ +      dm_fsid_t       *fsid,
+ +      dm_handle_t     *handlep)
+ +{
+ +      dm_fid_t        fid;
+ +      int             hsize;
+ +
+ +      fid.dm_fid_len = sizeof(struct dm_fid) - sizeof(fid.dm_fid_len);
+ +      fid.dm_fid_pad = 0;
+ +      fid.dm_fid_ino = ino;
-       fid.dm_fid_gen = be32_to_cpu(dip->di_core.di_gen);
++      fid.dm_fid_gen = be32_to_cpu(dip->di_gen);
+ +
+ +      memcpy(&handlep->ha_fsid, fsid, sizeof(*fsid));
+ +      memcpy(&handlep->ha_fid, &fid, fid.dm_fid_len + sizeof(fid.dm_fid_len));
+ +      hsize = DM_HSIZE(*handlep);
+ +      memset((char *)handlep + hsize, 0, sizeof(*handlep) - hsize);
+ +}
+ +
+ +STATIC int
+ +xfs_dm_bulkall_inline_one(
+ +      xfs_mount_t     *mp,
+ +      xfs_ino_t       ino,
+ +      xfs_dinode_t    *dip,
+ +      dm_fsid_t       *fsid,
+ +      int             *value_lenp,
+ +      dm_xstat_t      *xbuf,
+ +      u_int           *xstat_szp,
+ +      char            *attr_name,
+ +      caddr_t         attr_buf)
+ +{
+ +      dm_handle_t     handle;
+ +      u_int           xstat_sz = *xstat_szp;
+ +      int             value_len = *value_lenp;
+ +      int             error;
+ +
-       if (dip->di_core.di_mode == 0)
++      if (dip->di_mode == 0)
+ +              return ENOENT;
+ +
+ +      xfs_dip_to_stat(mp, ino, dip, &xbuf->dx_statinfo);
+ +      dm_dip_to_handle(ino, dip, fsid, &handle);
+ +      xfs_dm_handle_to_xstat(xbuf, xstat_sz, &handle, sizeof(handle));
+ +
+ +      memset(&xbuf->dx_attrdata, 0, sizeof(dm_vardata_t));
+ +      error = xfs_dm_inline_attr(mp, dip, attr_name, attr_buf, &value_len);
+ +      DM_EA_XLATE_ERR(error);
+ +      if (error && (error != ENOATTR)) {
+ +              if (error == E2BIG)
+ +                      error = ENOMEM;
+ +              return error;
+ +      }
+ +
+ +      /* How much space was in the attr? */
+ +      if (error != ENOATTR) {
+ +              xbuf->dx_attrdata.vd_offset = xstat_sz;
+ +              xbuf->dx_attrdata.vd_length = value_len;
+ +              xstat_sz += (value_len+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1);
+ +      }
+ +      *xstat_szp = xbuf->dx_statinfo._link = xstat_sz;
+ +      *value_lenp = value_len;
+ +      return 0;
+ +}
+ +
+ +/*
+ + * This is used by dm_get_bulkall().
+ + * Given a inumber, it igets the inode and fills the given buffer
+ + * with the dm_xstat structure for the file.
+ + */
+ +STATIC int
+ +xfs_dm_bulkall_one(
+ +      xfs_mount_t     *mp,            /* mount point for filesystem */
+ +      xfs_ino_t       ino,            /* inode number to get data for */
+ +      void            __user *buffer, /* buffer to place output in */
+ +      int             ubsize,         /* size of buffer */
+ +      void            *private_data,  /* my private data */
+ +      xfs_daddr_t     bno,            /* starting block of inode cluster */
+ +      int             *ubused,        /* amount of buffer we used */
+ +      void            *dibuff,        /* on-disk inode buffer */
+ +      int             *res)           /* bulkstat result code */
+ +{
+ +      dm_xstat_t      *xbuf;
+ +      u_int           xstat_sz;
+ +      int             error;
+ +      int             value_len;
+ +      int             kern_buf_sz;
+ +      int             attr_buf_sz;
+ +      caddr_t         attr_buf;
+ +      void __user     *attr_user_buf;
+ +      dm_bulkstat_one_t *dmb = (dm_bulkstat_one_t*)private_data;
+ +
+ +      /* Returns positive errors to XFS */
+ +
+ +      *res = BULKSTAT_RV_NOTHING;
+ +
+ +      if (!buffer || xfs_internal_inum(mp, ino))
+ +              return EINVAL;
+ +
+ +      xstat_sz = DM_STAT_SIZE(*xbuf, 0);
+ +      xstat_sz = (xstat_sz + (DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1);
+ +      if (xstat_sz > ubsize)
+ +              return ENOMEM;
+ +
+ +      kern_buf_sz = xstat_sz;
+ +      xbuf = kmem_alloc(kern_buf_sz, KM_SLEEP);
+ +
+ +      /* Determine place to drop attr value, and available space. */
+ +      value_len = ubsize - xstat_sz;
+ +      if (value_len > ATTR_MAX_VALUELEN)
+ +              value_len = ATTR_MAX_VALUELEN;
+ +
+ +      attr_user_buf = buffer + xstat_sz;
+ +      attr_buf_sz = value_len;
+ +      attr_buf = kmem_alloc(attr_buf_sz, KM_SLEEP);
+ +
+ +      if (!dibuff)
+ +              error = xfs_dm_bulkall_iget_one(mp, ino, bno,
+ +                                              &value_len, xbuf, &xstat_sz,
+ +                                              dmb->attrname.dan_chars,
+ +                                              attr_buf);
+ +      else
+ +              error = xfs_dm_bulkall_inline_one(mp, ino,
+ +                                                (xfs_dinode_t *)dibuff,
+ +                                                &dmb->fsid,
+ +                                                &value_len, xbuf, &xstat_sz,
+ +                                                dmb->attrname.dan_chars,
+ +                                                attr_buf);
+ +      if (error)
+ +              goto out_free_buffers;
+ +
+ +      if (copy_to_user(buffer, xbuf, kern_buf_sz)) {
+ +              error = EFAULT;
+ +              goto out_free_buffers;
+ +      }
+ +      if (copy_to_user(attr_user_buf, attr_buf, value_len)) {
+ +              error = EFAULT;
+ +              goto out_free_buffers;
+ +      }
+ +
+ +      kmem_free(attr_buf);
+ +      kmem_free(xbuf);
+ +
+ +      *res = BULKSTAT_RV_DIDONE;
+ +      if (ubused)
+ +              *ubused = xstat_sz;
+ +      dmb->laststruct = buffer;
+ +      return 0;
+ +
+ + out_free_buffers:
+ +      kmem_free(attr_buf);
+ +      kmem_free(xbuf);
+ +      return error;
+ +}
+ +
+ +/*
+ + * Take the handle and put it at the end of a dm_stat buffer.
+ + * dt_compname is unused in bulkstat - so we zero it out.
+ + * Finally, update link in dm_stat_t to point to next struct.
+ + */
+ +STATIC void
+ +xfs_dm_handle_to_stat(
+ +      dm_stat_t       *sbuf,
+ +      size_t          stat_sz,
+ +      dm_handle_t     *handle,
+ +      size_t          handle_sz)
+ +{
+ +      memcpy(sbuf + 1, handle, handle_sz);
+ +      sbuf->dt_handle.vd_offset = (ssize_t) sizeof(dm_stat_t);
+ +      sbuf->dt_handle.vd_length = (size_t) DM_HSIZE(*handle);
+ +      memset(&sbuf->dt_compname, 0, sizeof(dm_vardata_t));
+ +      sbuf->_link = stat_sz;
+ +}
+ +
+ +STATIC int
+ +xfs_dm_bulkattr_iget_one(
+ +      xfs_mount_t     *mp,
+ +      xfs_ino_t       ino,
+ +      xfs_daddr_t     bno,
+ +      dm_stat_t       *sbuf,
+ +      u_int           stat_sz)
+ +{
+ +      xfs_inode_t     *ip;
+ +      dm_handle_t     handle;
+ +      int             error;
+ +
+ +      error = xfs_iget(mp, NULL, ino,
+ +                       XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
+ +      if (error)
+ +              return error;
+ +
+ +      xfs_ip_to_stat(mp, ino, ip, sbuf);
-       dm_ip_to_handle(ip->i_vnode, &handle);
++      dm_ip_to_handle(&ip->i_vnode, &handle);
+ +      xfs_dm_handle_to_stat(sbuf, stat_sz, &handle, sizeof(handle));
+ +
+ +      xfs_iput(ip, XFS_ILOCK_SHARED);
+ +      return 0;
+ +}
+ +
+ +STATIC int
+ +xfs_dm_bulkattr_inline_one(
+ +      xfs_mount_t     *mp,
+ +      xfs_ino_t       ino,
+ +      xfs_dinode_t    *dip,
+ +      dm_fsid_t       *fsid,
+ +      dm_stat_t       *sbuf,
+ +      u_int           stat_sz)
+ +{
+ +      dm_handle_t     handle;
+ +
-       if (dip->di_core.di_mode == 0)
++      if (dip->di_mode == 0)
+ +              return ENOENT;
+ +      xfs_dip_to_stat(mp, ino, dip, sbuf);
+ +      dm_dip_to_handle(ino, dip, fsid, &handle);
+ +      xfs_dm_handle_to_stat(sbuf, stat_sz, &handle, sizeof(handle));
+ +      return 0;
+ +}
+ +
+ +/*
+ + * This is used by dm_get_bulkattr().
+ + * Given a inumber, it igets the inode and fills the given buffer
+ + * with the dm_stat structure for the file.
+ + */
+ +STATIC int
+ +xfs_dm_bulkattr_one(
+ +      xfs_mount_t     *mp,            /* mount point for filesystem */
+ +      xfs_ino_t       ino,            /* inode number to get data for */
+ +      void            __user *buffer, /* buffer to place output in */
+ +      int             ubsize,         /* size of buffer */
+ +      void            *private_data,  /* my private data */
+ +      xfs_daddr_t     bno,            /* starting block of inode cluster */
+ +      int             *ubused,        /* amount of buffer we used */
+ +      void            *dibuff,        /* on-disk inode buffer */
+ +      int             *res)           /* bulkstat result code */
+ +{
+ +      dm_stat_t       *sbuf;
+ +      u_int           stat_sz;
+ +      int             error;
+ +      dm_bulkstat_one_t *dmb = (dm_bulkstat_one_t*)private_data;
+ +
+ +      /* Returns positive errors to XFS */
+ +
+ +      *res = BULKSTAT_RV_NOTHING;
+ +
+ +      if (!buffer || xfs_internal_inum(mp, ino))
+ +              return EINVAL;
+ +
+ +      stat_sz = DM_STAT_SIZE(*sbuf, 0);
+ +      stat_sz = (stat_sz+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1);
+ +      if (stat_sz > ubsize)
+ +              return ENOMEM;
+ +
+ +      sbuf = kmem_alloc(stat_sz, KM_SLEEP);
+ +
+ +      if (!dibuff)
+ +              error = xfs_dm_bulkattr_iget_one(mp, ino, bno, sbuf, stat_sz);
+ +      else
+ +              error = xfs_dm_bulkattr_inline_one(mp, ino,
+ +                                                 (xfs_dinode_t *)dibuff,
+ +                                                 &dmb->fsid, sbuf, stat_sz);
+ +      if (error)
+ +              goto out_free_buffer;
+ +
+ +      if (copy_to_user(buffer, sbuf, stat_sz)) {
+ +              error = EFAULT;
+ +              goto out_free_buffer;
+ +      }
+ +
+ +      kmem_free(sbuf);
+ +      *res = BULKSTAT_RV_DIDONE;
+ +      if (ubused)
+ +              *ubused = stat_sz;
+ +      dmb->laststruct = buffer;
+ +      return 0;
+ +
+ + out_free_buffer:
+ +      kmem_free(sbuf);
+ +      return error;
+ +}
+ +
+ +/* xfs_dm_f_get_eventlist - return the dm_eventset_t mask for inode ip. */
+ +
+ +STATIC int
+ +xfs_dm_f_get_eventlist(
+ +      xfs_inode_t     *ip,
+ +      dm_right_t      right,
+ +      u_int           nelem,
+ +      dm_eventset_t   *eventsetp,             /* in kernel space! */
+ +      u_int           *nelemp)                /* in kernel space! */
+ +{
+ +      dm_eventset_t   eventset;
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(EACCES);
+ +
+ +      /* Note that we MUST return a regular file's managed region bits as
+ +         part of the mask because dm_get_eventlist is supposed to return the
+ +         union of all managed region flags in those bits.  Since we only
+ +         support one region, we can just return the bits as they are.  For
+ +         all other object types, the bits will already be zero.  Handy, huh?
+ +      */
+ +
+ +      eventset = ip->i_d.di_dmevmask;
+ +
+ +      /* Now copy the event mask and event count back to the caller.  We
+ +         return the lesser of nelem and DM_EVENT_MAX.
+ +      */
+ +
+ +      if (nelem > DM_EVENT_MAX)
+ +              nelem = DM_EVENT_MAX;
+ +      eventset &= (1 << nelem) - 1;
+ +
+ +      *eventsetp = eventset;
+ +      *nelemp = nelem;
+ +      return(0);
+ +}
+ +
+ +
+ +/* xfs_dm_f_set_eventlist - update the dm_eventset_t mask in the inode vp.  Only the
+ +   bits from zero to maxevent-1 are being replaced; higher bits are preserved.
+ +*/
+ +
+ +STATIC int
+ +xfs_dm_f_set_eventlist(
+ +      xfs_inode_t     *ip,
+ +      dm_right_t      right,
+ +      dm_eventset_t   *eventsetp,     /* in kernel space! */
+ +      u_int           maxevent)
+ +{
+ +      dm_eventset_t   eventset;
+ +      dm_eventset_t   max_mask;
+ +      dm_eventset_t   valid_events;
+ +      xfs_trans_t     *tp;
+ +      xfs_mount_t     *mp;
+ +      int             error;
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(EACCES);
+ +
+ +      eventset = *eventsetp;
+ +      if (maxevent >= sizeof(ip->i_d.di_dmevmask) * NBBY)
+ +              return(EINVAL);
+ +      max_mask = (1 << maxevent) - 1;
+ +
+ +      if (S_ISDIR(ip->i_d.di_mode)) {
+ +              valid_events = DM_XFS_VALID_DIRECTORY_EVENTS;
+ +      } else {        /* file or symlink */
+ +              valid_events = DM_XFS_VALID_FILE_EVENTS;
+ +      }
+ +      if ((eventset & max_mask) & ~valid_events)
+ +              return(EINVAL);
+ +
+ +      /* Adjust the event mask so that the managed region bits will not
+ +         be altered.
+ +      */
+ +
+ +      max_mask &= ~(1 <<DM_EVENT_READ);       /* preserve current MR bits */
+ +      max_mask &= ~(1 <<DM_EVENT_WRITE);
+ +      max_mask &= ~(1 <<DM_EVENT_TRUNCATE);
+ +
+ +      mp = ip->i_mount;
+ +      tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
+ +      error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ +      if (error) {
+ +              xfs_trans_cancel(tp, 0);
+ +              return(error);
+ +      }
+ +      xfs_ilock(ip, XFS_ILOCK_EXCL);
+ +      xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ +
+ +      ip->i_d.di_dmevmask = (eventset & max_mask) | (ip->i_d.di_dmevmask & ~max_mask);
+ +
+ +      xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       igrab(ip->i_vnode);
++      igrab(&ip->i_vnode);
+ +      xfs_trans_commit(tp, 0);
+ +
+ +      return(0);
+ +}
+ +
+ +
+ +/* xfs_dm_fs_get_eventlist - return the dm_eventset_t mask for filesystem vfsp. */
+ +
+ +STATIC int
+ +xfs_dm_fs_get_eventlist(
+ +      xfs_mount_t     *mp,
+ +      dm_right_t      right,
+ +      u_int           nelem,
+ +      dm_eventset_t   *eventsetp,             /* in kernel space! */
+ +      u_int           *nelemp)                /* in kernel space! */
+ +{
+ +      dm_eventset_t   eventset;
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(EACCES);
+ +
+ +      eventset = mp->m_dmevmask;
+ +
+ +      /* Now copy the event mask and event count back to the caller.  We
+ +         return the lesser of nelem and DM_EVENT_MAX.
+ +      */
+ +
+ +      if (nelem > DM_EVENT_MAX)
+ +              nelem = DM_EVENT_MAX;
+ +      eventset &= (1 << nelem) - 1;
+ +
+ +      *eventsetp = eventset;
+ +      *nelemp = nelem;
+ +      return(0);
+ +}
+ +
+ +
+ +/* xfs_dm_fs_set_eventlist - update the dm_eventset_t mask in the mount structure for
+ +   filesystem vfsp.  Only the bits from zero to maxevent-1 are being replaced;
+ +   higher bits are preserved.
+ +*/
+ +
+ +STATIC int
+ +xfs_dm_fs_set_eventlist(
+ +      xfs_mount_t     *mp,
+ +      dm_right_t      right,
+ +      dm_eventset_t   *eventsetp,     /* in kernel space! */
+ +      u_int           maxevent)
+ +{
+ +      dm_eventset_t   eventset;
+ +      dm_eventset_t   max_mask;
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(EACCES);
+ +
+ +      eventset = *eventsetp;
+ +
+ +      if (maxevent >= sizeof(mp->m_dmevmask) * NBBY)
+ +              return(EINVAL);
+ +      max_mask = (1 << maxevent) - 1;
+ +
+ +      if ((eventset & max_mask) & ~DM_XFS_VALID_FS_EVENTS)
+ +              return(EINVAL);
+ +
+ +      mp->m_dmevmask = (eventset & max_mask) | (mp->m_dmevmask & ~max_mask);
+ +      return(0);
+ +}
+ +
+ +
+ +/* Code in this routine must exactly match the logic in xfs_diordwr() in
+ +   order for this to work!
+ +*/
+ +
+ +STATIC int
+ +xfs_dm_direct_ok(
+ +      xfs_inode_t     *ip,
+ +      dm_off_t        off,
+ +      dm_size_t       len,
+ +      void            __user *bufp)
+ +{
+ +      xfs_mount_t     *mp;
+ +
+ +      mp = ip->i_mount;
+ +
+ +      /* Realtime files can ONLY do direct I/O. */
+ +
+ +      if (XFS_IS_REALTIME_INODE(ip))
+ +              return(1);
+ +
+ +      /* If direct I/O is disabled, or if the request is too small, use
+ +         buffered I/O.
+ +      */
+ +
+ +      if (!dm_min_dio_xfer || len < dm_min_dio_xfer)
+ +              return(0);
+ +
+ +#if 0
+ +      /* If the request is not well-formed or is too large, use
+ +         buffered I/O.
+ +      */
+ +
+ +      if ((__psint_t)bufp & scache_linemask)  /* if buffer not aligned */
+ +              return(0);
+ +      if (off & mp->m_blockmask)              /* if file offset not aligned */
+ +              return(0);
+ +      if (len & mp->m_blockmask)              /* if xfer length not aligned */
+ +              return(0);
+ +      if (len > ctooff(v.v_maxdmasz - 1))     /* if transfer too large */
+ +              return(0);
+ +
+ +      /* A valid direct I/O candidate. */
+ +
+ +      return(1);
+ +#else
+ +      return(0);
+ +#endif
+ +}
+ +
+ +
+ +/* We need to be able to select various combinations of O_NONBLOCK,
+ +   O_DIRECT, and O_SYNC, yet we don't have a file descriptor and we don't have
+ +   the file's pathname.        All we have is a handle.
+ +*/
+ +
+ +STATIC int
+ +xfs_dm_rdwr(
+ +      struct inode    *inode,
+ +      uint            fflag,
+ +      mode_t          fmode,
+ +      dm_off_t        off,
+ +      dm_size_t       len,
+ +      void            __user *bufp,
+ +      int             *rvp)
+ +{
++      const struct cred *cred = current_cred();
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +      int             error;
+ +      int             oflags;
+ +      ssize_t         xfer;
+ +      struct file     *file;
+ +      struct dentry   *dentry;
+ +
+ +      if ((off < 0) || (off > i_size_read(inode)) || !S_ISREG(inode->i_mode))
+ +              return EINVAL;
+ +
+ +      if (fmode & FMODE_READ) {
+ +              oflags = O_RDONLY;
+ +      } else {
+ +              oflags = O_WRONLY;
+ +      }
+ +
+ +      /*
+ +       * Build file descriptor flags and I/O flags.  O_NONBLOCK is needed so
+ +       * that we don't block on mandatory file locks. This is an invisible IO,
+ +       * don't change the atime.
+ +       */
+ +
+ +      oflags |= O_LARGEFILE | O_NONBLOCK | O_NOATIME;
+ +      if (xfs_dm_direct_ok(ip, off, len, bufp))
+ +              oflags |= O_DIRECT;
+ +
+ +      if (fflag & O_SYNC)
+ +              oflags |= O_SYNC;
+ +
+ +      if (inode->i_fop == NULL) {
+ +              /* no iput; caller did get, and will do put */
+ +              return EINVAL;
+ +      }
+ +
+ +      igrab(inode);
+ +
-       dentry = d_alloc_anon(inode);
++      dentry = d_obtain_alias(inode);
+ +      if (dentry == NULL) {
+ +              iput(inode);
+ +              return ENOMEM;
+ +      }
+ +
-       file = dentry_open(dentry, mntget(ip->i_mount->m_vfsmount), oflags);
++      file = dentry_open(dentry, mntget(ip->i_mount->m_vfsmount), oflags,
++                         cred);
+ +      if (IS_ERR(file)) {
+ +              return -PTR_ERR(file);
+ +      }
-       file->f_op = &xfs_invis_file_operations;
++      file->f_mode |= FMODE_NOCMTIME;
+ +
+ +      if (fmode & FMODE_READ) {
+ +              xfer = file->f_op->read(file, bufp, len, (loff_t*)&off);
+ +      } else {
+ +              xfer = file->f_op->write(file, bufp, len, (loff_t*)&off);
+ +      }
+ +
+ +      if (xfer >= 0) {
+ +              *rvp = xfer;
+ +              error = 0;
+ +      } else {
+ +              /* xfs_read/xfs_write return negative error--flip it */
+ +              error = -(int)xfer;
+ +      }
+ +
+ +      fput(file);
+ +      return error;
+ +}
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_clear_inherit(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_attrname_t   __user *attrnamep)
+ +{
+ +      return(-ENOSYS); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_create_by_handle(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      char            __user *cname)
+ +{
+ +      return(-ENOSYS); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_downgrade_right(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           type)           /* DM_FSYS_OBJ or zero */
+ +{
+ +#ifdef        DEBUG_RIGHTS
+ +      char            buffer[sizeof(dm_handle_t) * 2 + 1];
+ +
+ +      if (!xfs_vp_to_hexhandle(inode, type, buffer)) {
+ +              printf("dm_downgrade_right: old %d new %d type %d handle %s\n",
+ +                      right, DM_RIGHT_SHARED, type, buffer);
+ +      } else {
+ +              printf("dm_downgrade_right: old %d new %d type %d handle "
+ +                      "<INVALID>\n", right, DM_RIGHT_SHARED, type);
+ +      }
+ +#endif        /* DEBUG_RIGHTS */
+ +      return(0);
+ +}
+ +
+ +
+ +/* Note: xfs_dm_get_allocinfo() makes no attempt to coalesce two adjacent
+ +   extents when both are of type DM_EXTENT_RES; this is left to the caller.
+ +   XFS guarantees that there will never be two adjacent DM_EXTENT_HOLE extents.
+ +
+ +   In order to provide the caller with all extents in a file including
+ +   those beyond the file's last byte offset, we have to use the xfs_bmapi()
+ +   interface.
+ +*/
+ +
+ +STATIC int
+ +xfs_dm_get_allocinfo_rvp(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_off_t        __user  *offp,
+ +      u_int           nelem,
+ +      dm_extent_t     __user *extentp,
+ +      u_int           __user *nelemp,
+ +      int             *rvp)
+ +{
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +      xfs_mount_t     *mp;            /* file system mount point */
+ +      xfs_fileoff_t   fsb_offset;
+ +      xfs_filblks_t   fsb_length;
+ +      dm_off_t        startoff;
+ +      int             elem;
+ +      xfs_bmbt_irec_t *bmp = NULL;
+ +      u_int           bmpcnt = 50;
+ +      u_int           bmpsz = sizeof(xfs_bmbt_irec_t) * bmpcnt;
+ +      int             error = 0;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      if ((inode->i_mode & S_IFMT) != S_IFREG)
+ +              return(-EINVAL);
+ +
+ +      if (copy_from_user( &startoff, offp, sizeof(startoff)))
+ +              return(-EFAULT);
+ +
+ +      mp = ip->i_mount;
+ +      ASSERT(mp);
+ +
+ +      if (startoff > XFS_MAXIOFFSET(mp))
+ +              return(-EINVAL);
+ +
+ +      if (nelem == 0)
+ +              return(-EINVAL);
+ +
+ +      /* Convert the caller's starting offset into filesystem allocation
+ +         units as required by xfs_bmapi().  Round the offset down so that
+ +         it is sure to be included in the reply.
+ +      */
+ +
+ +      fsb_offset = XFS_B_TO_FSBT(mp, startoff);
+ +      fsb_length = XFS_B_TO_FSB(mp, XFS_MAXIOFFSET(mp)) - fsb_offset;
+ +      elem = 0;
+ +
+ +      if (fsb_length)
+ +              bmp = kmem_alloc(bmpsz, KM_SLEEP);
+ +
+ +      while (fsb_length && elem < nelem) {
+ +              dm_extent_t     extent;
+ +              xfs_filblks_t   fsb_bias;
+ +              dm_size_t       bias;
+ +              int             lock;
+ +              int             num;
+ +              int             i;
+ +
+ +              /* Compute how many getbmap structures to use on the xfs_bmapi
+ +                 call.
+ +              */
+ +
+ +              num = MIN((u_int)(nelem - elem), bmpcnt);
+ +
+ +              xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ +              lock = xfs_ilock_map_shared(ip);
+ +
+ +              error = xfs_bmapi(NULL, ip, fsb_offset, fsb_length,
+ +                      XFS_BMAPI_ENTIRE, NULL, 0, bmp, &num, NULL, NULL);
+ +
+ +              xfs_iunlock_map_shared(ip, lock);
+ +              xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ +
+ +              if (error) {
+ +                      error = -error; /* Return negative error to DMAPI */
+ +                      goto finish_out;
+ +              }
+ +
+ +              /* Fill in the caller's extents, adjusting the bias in the
+ +                 first entry if necessary.
+ +              */
+ +
+ +              for (i = 0; i < num; i++, extentp++) {
+ +                      bias = startoff - XFS_FSB_TO_B(mp, bmp[i].br_startoff);
+ +                      extent.ex_offset = startoff;
+ +                      extent.ex_length =
+ +                              XFS_FSB_TO_B(mp, bmp[i].br_blockcount) - bias;
+ +                      if (bmp[i].br_startblock == HOLESTARTBLOCK) {
+ +                              extent.ex_type = DM_EXTENT_HOLE;
+ +                      } else {
+ +                              extent.ex_type = DM_EXTENT_RES;
+ +                      }
+ +                      startoff = extent.ex_offset + extent.ex_length;
+ +
+ +                      if (copy_to_user( extentp, &extent, sizeof(extent))) {
+ +                              error = -EFAULT;
+ +                              goto finish_out;
+ +                      }
+ +
+ +                      fsb_bias = fsb_offset - bmp[i].br_startoff;
+ +                      fsb_offset += bmp[i].br_blockcount - fsb_bias;
+ +                      fsb_length -= bmp[i].br_blockcount - fsb_bias;
+ +                      elem++;
+ +              }
+ +      }
+ +
+ +      if (fsb_length == 0) {
+ +              startoff = 0;
+ +      }
+ +      if (copy_to_user( offp, &startoff, sizeof(startoff))) {
+ +              error = -EFAULT;
+ +              goto finish_out;
+ +      }
+ +
+ +      if (copy_to_user( nelemp, &elem, sizeof(elem))) {
+ +              error = -EFAULT;
+ +              goto finish_out;
+ +      }
+ +
+ +      *rvp = (fsb_length == 0 ? 0 : 1);
+ +
+ +finish_out:
+ +      if (bmp)
+ +              kmem_free(bmp);
+ +      return(error);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_zero_xstatinfo_link(
+ +      dm_xstat_t __user       *dxs)
+ +{
+ +      dm_xstat_t              *ldxs;
+ +      int                     error = 0;
+ +
+ +      if (!dxs)
+ +              return 0;
+ +      ldxs = kmalloc(sizeof(*ldxs), GFP_KERNEL);
+ +      if (!ldxs)
+ +              return -ENOMEM;
+ +      if (copy_from_user(ldxs, dxs, sizeof(*dxs))) {
+ +              error = -EFAULT;
+ +      } else {
+ +              ldxs->dx_statinfo._link = 0;
+ +              if (copy_to_user(dxs, ldxs, sizeof(*dxs)))
+ +                      error = -EFAULT;
+ +      }
+ +      kfree(ldxs);
+ +      return error;
+ +}
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_get_bulkall_rvp(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           mask,
+ +      dm_attrname_t   __user *attrnamep,
+ +      dm_attrloc_t    __user *locp,
+ +      size_t          buflen,
+ +      void            __user *bufp,   /* address of buffer in user space */
+ +      size_t          __user *rlenp,  /* user space address */
+ +      int             *rvalp)
+ +{
+ +      int             error, done;
+ +      int             nelems;
+ +      u_int           statstruct_sz;
+ +      dm_attrloc_t    loc;
+ +      xfs_mount_t     *mp = XFS_I(inode)->i_mount;
+ +      dm_attrname_t   attrname;
+ +      dm_bulkstat_one_t dmb;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (copy_from_user(&attrname, attrnamep, sizeof(attrname)) ||
+ +          copy_from_user(&loc, locp, sizeof(loc)))
+ +              return -EFAULT;
+ +
+ +      if (attrname.an_chars[0] == '\0')
+ +              return(-EINVAL);
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      /* Because we will write directly to the user's buffer, make sure that
+ +         the buffer is properly aligned.
+ +      */
+ +
+ +      if (((unsigned long)bufp & (DM_STAT_ALIGN - 1)) != 0)
+ +              return(-EFAULT);
+ +
+ +      /* Size of the handle is constant for this function.
+ +       * If there are no files with attributes, then this will be the
+ +       * maximum number of inodes we can get.
+ +       */
+ +
+ +      statstruct_sz = DM_STAT_SIZE(dm_xstat_t, 0);
+ +      statstruct_sz = (statstruct_sz+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1);
+ +
+ +      nelems = buflen / statstruct_sz;
+ +      if (nelems < 1) {
+ +              if (put_user( statstruct_sz, rlenp ))
+ +                      return(-EFAULT);
+ +              return(-E2BIG);
+ +      }
+ +
+ +      /* Build the on-disk version of the attribute name. */
+ +      strcpy(dmb.attrname.dan_chars, dmattr_prefix);
+ +      strncpy(&dmb.attrname.dan_chars[DMATTR_PREFIXLEN],
+ +              attrname.an_chars, DM_ATTR_NAME_SIZE + 1);
+ +      dmb.attrname.dan_chars[sizeof(dmb.attrname.dan_chars) - 1] = '\0';
+ +
+ +      /*
+ +       * fill the buffer with dm_xstat_t's
+ +       */
+ +
+ +      dmb.laststruct = NULL;
+ +      memcpy(&dmb.fsid, mp->m_fixedfsid, sizeof(dm_fsid_t));
+ +      error = xfs_bulkstat(mp, (xfs_ino_t *)&loc, &nelems,
+ +                           xfs_dm_bulkall_one, (void*)&dmb, statstruct_sz,
+ +                           bufp, BULKSTAT_FG_INLINE, &done);
+ +      if (error)
+ +              return(-error); /* Return negative error to DMAPI */
+ +
+ +      *rvalp = !done ? 1 : 0;
+ +
+ +      if (put_user( statstruct_sz * nelems, rlenp ))
+ +              return(-EFAULT);
+ +
+ +      if (copy_to_user( locp, &loc, sizeof(loc)))
+ +              return(-EFAULT);
+ +      /*
+ +       *  If we didn't do any, we must not have any more to do.
+ +       */
+ +      if (nelems < 1)
+ +              return(0);
+ +      /*
+ +       * Set _link in the last struct to zero
+ +       */
+ +      return xfs_dm_zero_xstatinfo_link((dm_xstat_t __user *)dmb.laststruct);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_zero_statinfo_link(
+ +      dm_stat_t __user        *dxs)
+ +{
+ +      dm_stat_t               *ldxs;
+ +      int                     error = 0;
+ +
+ +      if (!dxs)
+ +              return 0;
+ +      ldxs = kmalloc(sizeof(*ldxs), GFP_KERNEL);
+ +      if (!ldxs)
+ +              return -ENOMEM;
+ +      if (copy_from_user(ldxs, dxs, sizeof(*dxs))) {
+ +              error = -EFAULT;
+ +      } else {
+ +              ldxs->_link = 0;
+ +              if (copy_to_user(dxs, ldxs, sizeof(*dxs)))
+ +                      error = -EFAULT;
+ +      }
+ +      kfree(ldxs);
+ +      return error;
+ +}
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_get_bulkattr_rvp(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           mask,
+ +      dm_attrloc_t    __user *locp,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user *rlenp,
+ +      int             *rvalp)
+ +{
+ +      int             error, done;
+ +      int             nelems;
+ +      u_int           statstruct_sz;
+ +      dm_attrloc_t    loc;
+ +      xfs_mount_t     *mp = XFS_I(inode)->i_mount;
+ +      dm_bulkstat_one_t dmb;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      if (copy_from_user( &loc, locp, sizeof(loc)))
+ +              return(-EFAULT);
+ +
+ +      /* Because we will write directly to the user's buffer, make sure that
+ +         the buffer is properly aligned.
+ +      */
+ +
+ +      if (((unsigned long)bufp & (DM_STAT_ALIGN - 1)) != 0)
+ +              return(-EFAULT);
+ +
+ +      /* size of the handle is constant for this function */
+ +
+ +      statstruct_sz = DM_STAT_SIZE(dm_stat_t, 0);
+ +      statstruct_sz = (statstruct_sz+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1);
+ +
+ +      nelems = buflen / statstruct_sz;
+ +      if (nelems < 1) {
+ +              if (put_user( statstruct_sz, rlenp ))
+ +                      return(-EFAULT);
+ +              return(-E2BIG);
+ +      }
+ +
+ +      dmb.laststruct = NULL;
+ +      memcpy(&dmb.fsid, mp->m_fixedfsid, sizeof(dm_fsid_t));
+ +      error = xfs_bulkstat(mp, (xfs_ino_t *)&loc, &nelems,
+ +                              xfs_dm_bulkattr_one, (void*)&dmb,
+ +                              statstruct_sz, bufp, BULKSTAT_FG_INLINE, &done);
+ +      if (error)
+ +              return(-error); /* Return negative error to DMAPI */
+ +
+ +      *rvalp = !done ? 1 : 0;
+ +
+ +      if (put_user( statstruct_sz * nelems, rlenp ))
+ +              return(-EFAULT);
+ +
+ +      if (copy_to_user( locp, &loc, sizeof(loc)))
+ +              return(-EFAULT);
+ +
+ +      /*
+ +       *  If we didn't do any, we must not have any more to do.
+ +       */
+ +      if (nelems < 1)
+ +              return(0);
+ +      /*
+ +       * Set _link in the last struct to zero
+ +       */
+ +      return xfs_dm_zero_statinfo_link((dm_stat_t __user *)dmb.laststruct);
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_get_config(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_config_t     flagname,
+ +      dm_size_t       __user *retvalp)
+ +{
+ +      dm_size_t       retval;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      switch (flagname) {
+ +      case DM_CONFIG_DTIME_OVERLOAD:
+ +      case DM_CONFIG_PERS_ATTRIBUTES:
+ +      case DM_CONFIG_PERS_EVENTS:
+ +      case DM_CONFIG_PERS_MANAGED_REGIONS:
+ +      case DM_CONFIG_PUNCH_HOLE:
+ +      case DM_CONFIG_WILL_RETRY:
+ +              retval = DM_TRUE;
+ +              break;
+ +
+ +      case DM_CONFIG_CREATE_BY_HANDLE:        /* these will never be done */
+ +      case DM_CONFIG_LOCK_UPGRADE:
+ +      case DM_CONFIG_PERS_INHERIT_ATTRIBS:
+ +              retval = DM_FALSE;
+ +              break;
+ +
+ +      case DM_CONFIG_BULKALL:
+ +              retval = DM_TRUE;
+ +              break;
+ +      case DM_CONFIG_MAX_ATTR_ON_DESTROY:
+ +              retval = DM_MAX_ATTR_BYTES_ON_DESTROY;
+ +              break;
+ +
+ +      case DM_CONFIG_MAX_ATTRIBUTE_SIZE:
+ +              retval = ATTR_MAX_VALUELEN;
+ +              break;
+ +
+ +      case DM_CONFIG_MAX_HANDLE_SIZE:
+ +              retval = DM_MAX_HANDLE_SIZE;
+ +              break;
+ +
+ +      case DM_CONFIG_MAX_MANAGED_REGIONS:
+ +              retval = 1;
+ +              break;
+ +
+ +      case DM_CONFIG_TOTAL_ATTRIBUTE_SPACE:
+ +              retval = 0x7fffffff;    /* actually it's unlimited */
+ +              break;
+ +
+ +      default:
+ +              return(-EINVAL);
+ +      }
+ +
+ +      /* Copy the results back to the user. */
+ +
+ +      if (copy_to_user( retvalp, &retval, sizeof(retval)))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_get_config_events(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           nelem,
+ +      dm_eventset_t   __user *eventsetp,
+ +      u_int           __user *nelemp)
+ +{
+ +      dm_eventset_t   eventset;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (nelem == 0)
+ +              return(-EINVAL);
+ +
+ +      eventset = DM_XFS_SUPPORTED_EVENTS;
+ +
+ +      /* Now copy the event mask and event count back to the caller.  We
+ +         return the lesser of nelem and DM_EVENT_MAX.
+ +      */
+ +
+ +      if (nelem > DM_EVENT_MAX)
+ +              nelem = DM_EVENT_MAX;
+ +      eventset &= (1 << nelem) - 1;
+ +
+ +      if (copy_to_user( eventsetp, &eventset, sizeof(eventset)))
+ +              return(-EFAULT);
+ +
+ +      if (put_user(nelem, nelemp))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_get_destroy_dmattr(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_attrname_t  *attrnamep,
+ +      char            **valuepp,
+ +      int             *vlenp)
+ +{
+ +      dm_dkattrname_t dkattrname;
+ +      int             alloc_size;
+ +      int             value_len;
+ +      char            *value;
+ +      int             error;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      *vlenp = -1;            /* assume failure by default */
+ +
+ +      if (attrnamep->an_chars[0] == '\0')
+ +              return(-EINVAL);
+ +
+ +      /* Build the on-disk version of the attribute name. */
+ +
+ +      strcpy(dkattrname.dan_chars, dmattr_prefix);
+ +      strncpy(&dkattrname.dan_chars[DMATTR_PREFIXLEN],
+ +              (char *)attrnamep->an_chars, DM_ATTR_NAME_SIZE + 1);
+ +      dkattrname.dan_chars[sizeof(dkattrname.dan_chars) - 1] = '\0';
+ +
+ +      /* xfs_attr_get will not return anything if the buffer is too small,
+ +         and we don't know how big to make the buffer, so this may take
+ +         two tries to get it right.  The initial try must use a buffer of
+ +         at least XFS_BUG_KLUDGE bytes to prevent buffer overflow because
+ +         of a bug in XFS.
+ +      */
+ +
+ +      alloc_size = XFS_BUG_KLUDGE;
+ +      value = kmalloc(alloc_size, GFP_KERNEL);
+ +      if (value == NULL)
+ +              return(-ENOMEM);
+ +
+ +      error = xfs_attr_get(XFS_I(inode), dkattrname.dan_chars, value,
+ +                                                      &value_len, ATTR_ROOT);
+ +      if (error == ERANGE) {
+ +              kfree(value);
+ +              alloc_size = value_len;
+ +              value = kmalloc(alloc_size, GFP_KERNEL);
+ +              if (value == NULL)
+ +                      return(-ENOMEM);
+ +
+ +              error = xfs_attr_get(XFS_I(inode), dkattrname.dan_chars, value,
+ +                                      &value_len, ATTR_ROOT);
+ +      }
+ +      if (error) {
+ +              kfree(value);
+ +              DM_EA_XLATE_ERR(error);
+ +              return(-error); /* Return negative error to DMAPI */
+ +      }
+ +
+ +      /* The attribute exists and has a value.  Note that a value_len of
+ +         zero is valid!
+ +      */
+ +
+ +      if (value_len == 0) {
+ +              kfree(value);
+ +              *vlenp = 0;
+ +              return(0);
+ +      } else if (value_len > DM_MAX_ATTR_BYTES_ON_DESTROY) {
+ +              char    *value2;
+ +
+ +              value2 = kmalloc(DM_MAX_ATTR_BYTES_ON_DESTROY, GFP_KERNEL);
+ +              if (value2 == NULL) {
+ +                      kfree(value);
+ +                      return(-ENOMEM);
+ +              }
+ +              memcpy(value2, value, DM_MAX_ATTR_BYTES_ON_DESTROY);
+ +              kfree(value);
+ +              value = value2;
+ +              value_len = DM_MAX_ATTR_BYTES_ON_DESTROY;
+ +      }
+ +      *vlenp = value_len;
+ +      *valuepp = value;
+ +      return(0);
+ +}
+ +
+ +/* This code was taken from xfs_fcntl(F_DIOINFO) and modified slightly because
+ +   we don't have a flags parameter (no open file).
+ +   Taken from xfs_ioctl(XFS_IOC_DIOINFO) on Linux.
+ +*/
+ +
+ +STATIC int
+ +xfs_dm_get_dioinfo(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_dioinfo_t    __user *diop)
+ +{
+ +      dm_dioinfo_t    dio;
+ +      xfs_mount_t     *mp;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      mp = ip->i_mount;
+ +
+ +      dio.d_miniosz = dio.d_mem = MIN_DIO_SIZE(mp);
+ +      dio.d_maxiosz = MAX_DIO_SIZE(mp);
+ +      dio.d_dio_only = DM_FALSE;
+ +
+ +      if (copy_to_user(diop, &dio, sizeof(dio)))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +typedef struct dm_readdir_cb {
+ +      xfs_mount_t             *mp;
+ +      char __user             *ubuf;
+ +      dm_stat_t __user        *lastbuf;
+ +      size_t                  spaceleft;
+ +      size_t                  nwritten;
+ +      int                     error;
+ +      dm_stat_t               kstat;
+ +} dm_readdir_cb_t;
+ +
+ +STATIC int
+ +dm_filldir(void *__buf, const char *name, int namelen, loff_t offset,
+ +              u64 ino, unsigned int d_type)
+ +{
+ +      dm_readdir_cb_t *cb = __buf;
+ +      dm_stat_t       *statp = &cb->kstat;
+ +      size_t          len;
+ +      int             error;
+ +      int             needed;
+ +
+ +      /*
+ +       * Make sure we have enough space.
+ +       */
+ +        needed = dm_stat_size(namelen + 1);
+ +      if (cb->spaceleft < needed) {
+ +              cb->spaceleft = 0;
+ +              return -ENOSPC;
+ +      }
+ +
+ +      error = -EINVAL;
+ +      if (xfs_internal_inum(cb->mp, ino))
+ +              goto out_err;
+ +
+ +      memset(statp, 0, dm_stat_size(MAXNAMLEN));
+ +      error = -xfs_dm_bulkattr_iget_one(cb->mp, ino, 0,
+ +                      statp, needed);
+ +      if (error)
+ +              goto out_err;
+ +
+ +      /*
+ +       * On return from bulkstat_one(), stap->_link points
+ +       * at the end of the handle in the stat structure.
+ +       */
+ +      statp->dt_compname.vd_offset = statp->_link;
+ +      statp->dt_compname.vd_length = namelen + 1;
+ +
+ +      len = statp->_link;
+ +
+ +      /* Word-align the record */
+ +      statp->_link = dm_stat_align(len + namelen + 1);
+ +
+ +      error = -EFAULT;
+ +      if (copy_to_user(cb->ubuf, statp, len))
+ +              goto out_err;
+ +      if (copy_to_user(cb->ubuf + len, name, namelen))
+ +              goto out_err;
+ +      if (put_user(0, cb->ubuf + len + namelen))
+ +              goto out_err;
+ +
+ +      cb->lastbuf = (dm_stat_t __user *)cb->ubuf;
+ +      cb->spaceleft -= statp->_link;
+ +      cb->nwritten += statp->_link;
+ +      cb->ubuf += statp->_link;
+ +
+ +      return 0;
+ +
+ + out_err:
+ +      cb->error = error;
+ +      return error;
+ +}
+ +
+ +/* Returns negative errors to DMAPI */
+ +STATIC int
+ +xfs_dm_get_dirattrs_rvp(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           mask,
+ +      dm_attrloc_t    __user *locp,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user *rlenp,
+ +      int             *rvp)
+ +{
+ +      xfs_inode_t     *dp = XFS_I(inode);
+ +      xfs_mount_t     *mp = dp->i_mount;
+ +      dm_readdir_cb_t *cb;
+ +      dm_attrloc_t    loc;
+ +      int             error;
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return -EACCES;
+ +
+ +        /*
+ +         * Make sure that the buffer is properly aligned.
+ +         */
+ +        if (((unsigned long)bufp & (DM_STAT_ALIGN - 1)) != 0)
+ +                return -EFAULT;
+ +
+ +      if (mask & ~(DM_AT_HANDLE|DM_AT_EMASK|DM_AT_PMANR|DM_AT_PATTR|
+ +                   DM_AT_DTIME|DM_AT_CFLAG|DM_AT_STAT))
+ +              return -EINVAL;
+ +
+ +      if (!S_ISDIR(inode->i_mode))
+ +              return -EINVAL;
+ +
+ +        /*
+ +         * bufp should be able to fit at least one dm_stat entry including
+ +         * dt_handle and full size MAXNAMLEN dt_compname.
+ +         */
+ +        if (buflen < dm_stat_size(MAXNAMLEN))
+ +                return -ENOMEM;
+ +
+ +      if (copy_from_user(&loc, locp, sizeof(loc)))
+ +              return -EFAULT;
+ +
+ +      cb = kzalloc(sizeof(*cb) + dm_stat_size(MAXNAMLEN), GFP_KERNEL);
+ +      if (!cb)
+ +              return -ENOMEM;
+ +
+ +      cb->mp = mp;
+ +      cb->spaceleft = buflen;
+ +      cb->ubuf = bufp;
+ +
+ +      mutex_lock(&inode->i_mutex);
+ +      error = -ENOENT;
+ +      if (!IS_DEADDIR(inode)) {
+ +              error = -xfs_readdir(dp, cb, dp->i_size,
+ +                                       (xfs_off_t *)&loc, dm_filldir);
+ +      }
+ +      mutex_unlock(&inode->i_mutex);
+ +
+ +      if (error)
+ +              goto out_kfree;
+ +      if (cb->error) {
+ +              error = cb->error;
+ +              goto out_kfree;
+ +      }
+ +
+ +      error = -EFAULT;
+ +      if (cb->lastbuf && put_user(0, &cb->lastbuf->_link))
+ +              goto out_kfree;
+ +      if (put_user(cb->nwritten, rlenp))
+ +              goto out_kfree;
+ +      if (copy_to_user(locp, &loc, sizeof(loc)))
+ +              goto out_kfree;
+ +
+ +      if (cb->nwritten)
+ +              *rvp = 1;
+ +      else
+ +              *rvp = 0;
+ +      error = 0;
+ +
+ + out_kfree:
+ +      kfree(cb);
+ +      return error;
+ +}
+ +
+ +STATIC int
+ +xfs_dm_get_dmattr(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_attrname_t   __user *attrnamep,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user  *rlenp)
+ +{
+ +      dm_dkattrname_t name;
+ +      char            *value;
+ +      int             value_len;
+ +      int             alloc_size;
+ +      int             error;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      if ((error = xfs_copyin_attrname(attrnamep, &name)) != 0)
+ +              return(-error); /* Return negative error to DMAPI */
+ +
+ +      /* Allocate a buffer to receive the attribute's value.  We allocate
+ +         at least one byte even if the caller specified a buflen of zero.
+ +         (A buflen of zero is considered valid.)
+ +
+ +         Allocating a minimum of XFS_BUG_KLUDGE bytes temporarily works
+ +         around a bug within XFS in which in-inode attribute values are not
+ +         checked to see if they will fit in the buffer before they are
+ +         copied.  Since no in-core attribute value can be larger than 256
+ +         bytes (an 8-bit size field), we allocate that minimum size here to
+ +         prevent buffer overrun in both the kernel's and user's buffers.
+ +      */
+ +
+ +      alloc_size = buflen;
+ +      if (alloc_size < XFS_BUG_KLUDGE)
+ +              alloc_size = XFS_BUG_KLUDGE;
+ +      if (alloc_size > ATTR_MAX_VALUELEN)
+ +              alloc_size = ATTR_MAX_VALUELEN;
+ +      value = kmem_alloc(alloc_size, KM_SLEEP | KM_LARGE);
+ +
+ +      /* Get the attribute's value. */
+ +
+ +      value_len = alloc_size;         /* in/out parameter */
+ +
+ +      error = xfs_attr_get(XFS_I(inode), name.dan_chars, value, &value_len,
+ +                                      ATTR_ROOT);
+ +      DM_EA_XLATE_ERR(error);
+ +
+ +      /* DMAPI requires an errno of ENOENT if an attribute does not exist,
+ +         so remap ENOATTR here.
+ +      */
+ +
+ +      if (error == ENOATTR)
+ +              error = ENOENT;
+ +      if (!error && value_len > buflen)
+ +              error = E2BIG;
+ +      if (!error && copy_to_user(bufp, value, value_len))
+ +              error = EFAULT;
+ +      if (!error || error == E2BIG) {
+ +              if (put_user(value_len, rlenp))
+ +                      error = EFAULT;
+ +      }
+ +
+ +      kmem_free(value);
+ +      return(-error); /* Return negative error to DMAPI */
+ +}
+ +
+ +STATIC int
+ +xfs_dm_get_eventlist(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           type,
+ +      u_int           nelem,
+ +      dm_eventset_t   *eventsetp,
+ +      u_int           *nelemp)
+ +{
+ +      int             error;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (type == DM_FSYS_OBJ) {
+ +              error = xfs_dm_fs_get_eventlist(ip->i_mount, right, nelem,
+ +                      eventsetp, nelemp);
+ +      } else {
+ +              error = xfs_dm_f_get_eventlist(ip, right, nelem,
+ +                      eventsetp, nelemp);
+ +      }
+ +      return(-error); /* Returns negative error to DMAPI */
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_get_fileattr(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           mask,           /* not used; always return everything */
+ +      dm_stat_t       __user *statp)
+ +{
+ +      dm_stat_t       stat;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +      xfs_mount_t     *mp;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      /* Find the mount point. */
+ +
+ +      mp = ip->i_mount;
+ +
+ +      xfs_ilock(ip, XFS_ILOCK_SHARED);
+ +      xfs_ip_to_stat(mp, ip->i_ino, ip, &stat);
+ +      xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ +
+ +      if (copy_to_user( statp, &stat, sizeof(stat)))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +/* We currently only support a maximum of one managed region per file, and
+ +   use the DM_EVENT_READ, DM_EVENT_WRITE, and DM_EVENT_TRUNCATE events in
+ +   the file's dm_eventset_t event mask to implement the DM_REGION_READ,
+ +   DM_REGION_WRITE, and DM_REGION_TRUNCATE flags for that single region.
+ +*/
+ +
+ +STATIC int
+ +xfs_dm_get_region(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           nelem,
+ +      dm_region_t     __user *regbufp,
+ +      u_int           __user *nelemp)
+ +{
+ +      dm_eventset_t   evmask;
+ +      dm_region_t     region;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +      u_int           elem;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      evmask = ip->i_d.di_dmevmask;   /* read the mask "atomically" */
+ +
+ +      /* Get the file's current managed region flags out of the
+ +         dm_eventset_t mask and use them to build a managed region that
+ +         covers the entire file, i.e. set rg_offset and rg_size to zero.
+ +      */
+ +
+ +      memset((char *)&region, 0, sizeof(region));
+ +
+ +      if (evmask & (1 << DM_EVENT_READ))
+ +              region.rg_flags |= DM_REGION_READ;
+ +      if (evmask & (1 << DM_EVENT_WRITE))
+ +              region.rg_flags |= DM_REGION_WRITE;
+ +      if (evmask & (1 << DM_EVENT_TRUNCATE))
+ +              region.rg_flags |= DM_REGION_TRUNCATE;
+ +
+ +      elem = (region.rg_flags ? 1 : 0);
+ +
+ +      if (copy_to_user( nelemp, &elem, sizeof(elem)))
+ +              return(-EFAULT);
+ +      if (elem > nelem)
+ +              return(-E2BIG);
+ +      if (elem && copy_to_user(regbufp, &region, sizeof(region)))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_getall_dmattr(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      size_t          buflen,
+ +      void            __user *bufp,
+ +      size_t          __user *rlenp)
+ +{
+ +      attrlist_cursor_kern_t cursor;
+ +      attrlist_t      *attrlist;
+ +      dm_attrlist_t   __user *ulist;
+ +      int             *last_link;
+ +      int             alignment;
+ +      int             total_size;
+ +      int             list_size = 8192;       /* should be big enough */
+ +      int             error;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      /* Verify that the user gave us a buffer that is 4-byte aligned, lock
+ +         it down, and work directly within that buffer.  As a side-effect,
+ +         values of buflen < sizeof(int) return EINVAL.
+ +      */
+ +
+ +      alignment = sizeof(int) - 1;
+ +      if ((((__psint_t)bufp & alignment) != 0) ||
+ +               !access_ok(VERIFY_WRITE, bufp, buflen)) {
+ +              return(-EFAULT);
+ +      }
+ +      buflen &= ~alignment;           /* round down the alignment */
+ +
+ +      /* Initialize all the structures and variables for the main loop. */
+ +
+ +      memset(&cursor, 0, sizeof(cursor));
+ +      attrlist = (attrlist_t *)kmem_alloc(list_size, KM_SLEEP);
+ +      total_size = 0;
+ +      ulist = (dm_attrlist_t *)bufp;
+ +      last_link = NULL;
+ +
+ +      /* Use vop_attr_list to get the names of DMAPI attributes, and use
+ +         vop_attr_get to get their values.  There is a risk here that the
+ +         DMAPI attributes could change between the vop_attr_list and
+ +         vop_attr_get calls.  If we can detect it, we return EIO to notify
+ +         the user.
+ +      */
+ +
+ +      do {
+ +              int     i;
+ +
+ +              /* Get a buffer full of attribute names.  If there aren't any
+ +                 more or if we encounter an error, then finish up.
+ +              */
+ +
+ +              error = xfs_attr_list(XFS_I(inode), (char *)attrlist, list_size,
+ +                                              ATTR_ROOT, &cursor);
+ +              DM_EA_XLATE_ERR(error);
+ +
+ +              if (error || attrlist->al_count == 0)
+ +                      break;
+ +
+ +              for (i = 0; i < attrlist->al_count; i++) {
+ +                      attrlist_ent_t  *entry;
+ +                      char            *user_name;
+ +                      int             size_needed;
+ +                      int             value_len;
+ +
+ +                      /* Skip over all non-DMAPI attributes.  If the
+ +                         attribute name is too long, we assume it is
+ +                         non-DMAPI even if it starts with the correct
+ +                         prefix.
+ +                      */
+ +
+ +                      entry = ATTR_ENTRY(attrlist, i);
+ +                      if (strncmp(entry->a_name, dmattr_prefix, DMATTR_PREFIXLEN))
+ +                              continue;
+ +                      user_name = &entry->a_name[DMATTR_PREFIXLEN];
+ +                      if (strlen(user_name) > DM_ATTR_NAME_SIZE)
+ +                              continue;
+ +
+ +                      /* We have a valid DMAPI attribute to return.  If it
+ +                         won't fit in the user's buffer, we still need to
+ +                         keep track of the number of bytes for the user's
+ +                         next call.
+ +                      */
+ +
+ +
+ +                      size_needed = sizeof(*ulist) + entry->a_valuelen;
+ +                      size_needed = (size_needed + alignment) & ~alignment;
+ +
+ +                      total_size += size_needed;
+ +                      if (total_size > buflen)
+ +                              continue;
+ +
+ +                      /* Start by filling in all the fields in the
+ +                         dm_attrlist_t structure.
+ +                      */
+ +
+ +                      strncpy((char *)ulist->al_name.an_chars, user_name,
+ +                              DM_ATTR_NAME_SIZE);
+ +                      ulist->al_data.vd_offset = sizeof(*ulist);
+ +                      ulist->al_data.vd_length = entry->a_valuelen;
+ +                      ulist->_link =  size_needed;
+ +                      last_link = &ulist->_link;
+ +
+ +                      /* Next read the attribute's value into its correct
+ +                         location after the dm_attrlist structure.  Any sort
+ +                         of error indicates that the data is moving under us,
+ +                         so we return EIO to let the user know.
+ +                      */
+ +
+ +                      value_len = entry->a_valuelen;
+ +
+ +                      error = xfs_attr_get(XFS_I(inode), entry->a_name,
+ +                                              (void *)(ulist + 1), &value_len,
+ +                                              ATTR_ROOT);
+ +                      DM_EA_XLATE_ERR(error);
+ +
+ +                      if (error || value_len != entry->a_valuelen) {
+ +                              error = EIO;
+ +                              break;
+ +                      }
+ +
+ +                      ulist = (dm_attrlist_t *)((char *)ulist + ulist->_link);
+ +              }
+ +      } while (!error && attrlist->al_more);
+ +      if (last_link)
+ +              *last_link = 0;
+ +
+ +      if (!error && total_size > buflen)
+ +              error = E2BIG;
+ +      if (!error || error == E2BIG) {
+ +              if (put_user(total_size, rlenp))
+ +                      error = EFAULT;
+ +      }
+ +
+ +      kmem_free(attrlist);
+ +      return(-error); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_getall_inherit(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           nelem,
+ +      dm_inherit_t    __user *inheritbufp,
+ +      u_int           __user *nelemp)
+ +{
+ +      return(-ENOSYS); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +/* Initialize location pointer for subsequent dm_get_dirattrs,
+ +   dm_get_bulkattr, and dm_get_bulkall calls.  The same initialization must
+ +   work for inode-based routines (dm_get_dirattrs) and filesystem-based
+ +   routines (dm_get_bulkattr and dm_get_bulkall).  Filesystem-based functions
+ +   call this routine using the filesystem's root inode.
+ +*/
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_init_attrloc(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_attrloc_t    __user *locp)
+ +{
+ +      dm_attrloc_t    loc = 0;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      if (copy_to_user( locp, &loc, sizeof(loc)))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_mkdir_by_handle(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      void            __user *hanp,
+ +      size_t          hlen,
+ +      char            __user *cname)
+ +{
+ +      return(-ENOSYS); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +/*
+ + * Probe and Punch
+ + *
+ + * Hole punching alignment is based on the underlying device base
+ + * allocation size. Because it is not defined in the DMAPI spec, we
+ + * can align how we choose here. Round inwards (offset up and length
+ + * down) to the block, extent or page size whichever is bigger. Our
+ + * DMAPI implementation rounds the hole geometry strictly inwards. If
+ + * this is not possible, return EINVAL for both for xfs_dm_probe_hole
+ + * and xfs_dm_punch_hole which differs from the DMAPI spec.  Note that
+ + * length = 0 is special - it means "punch to EOF" and at that point
+ + * we treat the punch as remove everything past offset (including
+ + * preallocation past EOF).
+ + */
+ +
+ +STATIC int
+ +xfs_dm_round_hole(
+ +      dm_off_t        offset,
+ +      dm_size_t       length,
+ +      dm_size_t       align,
+ +      xfs_fsize_t     filesize,
+ +      dm_off_t        *roff,
+ +      dm_size_t       *rlen)
+ +{
+ +
+ +      dm_off_t        off = offset;
+ +      dm_size_t       len = length;
+ +
+ +      /* Try to round offset up to the nearest boundary */
+ +      *roff = roundup_64(off, align);
+ +      if ((*roff >= filesize) || (len && (len < align)))
+ +              return -EINVAL;
+ +
+ +      if ((len == 0) || ((off + len) == filesize)) {
+ +              /* punch to EOF */
+ +              *rlen = 0;
+ +      } else {
+ +              /* Round length down to the nearest boundary. */
+ +              ASSERT(len >= align);
+ +              ASSERT(align > (*roff - off));
+ +              len -= *roff - off;
+ +              *rlen = len - do_mod(len, align);
+ +              if (*rlen == 0)
+ +                      return -EINVAL; /* requested length is too small */
+ +      }
+ +#ifdef CONFIG_DMAPI_DEBUG
+ +      printk("xfs_dm_round_hole: off %lu, len %ld, align %lu, "
+ +             "filesize %llu, roff %ld, rlen %ld\n",
+ +             offset, length, align, filesize, *roff, *rlen);
+ +#endif
+ +      return 0; /* hole geometry successfully rounded */
+ +}
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_probe_hole(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_off_t        off,
+ +      dm_size_t       len,
+ +      dm_off_t        __user  *roffp,
+ +      dm_size_t       __user *rlenp)
+ +{
+ +      dm_off_t        roff;
+ +      dm_size_t       rlen;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +      xfs_mount_t     *mp;
+ +      uint            lock_flags;
+ +      xfs_fsize_t     realsize;
+ +      dm_size_t       align;
+ +      int             error;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return -EACCES;
+ +
+ +      if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
+ +              return -EINVAL;
+ +
+ +      mp = ip->i_mount;
+ +      lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
+ +      xfs_ilock(ip, lock_flags);
+ +      realsize = ip->i_size;
+ +      xfs_iunlock(ip, lock_flags);
+ +
+ +      if ((off + len) > realsize)
+ +              return -E2BIG;
+ +
+ +      align = 1 << mp->m_sb.sb_blocklog;
+ +
+ +      error = xfs_dm_round_hole(off, len, align, realsize, &roff, &rlen);
+ +      if (error)
+ +              return error;
+ +
+ +      if (copy_to_user( roffp, &roff, sizeof(roff)))
+ +              return -EFAULT;
+ +      if (copy_to_user( rlenp, &rlen, sizeof(rlen)))
+ +              return -EFAULT;
+ +      return(0);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_punch_hole(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_off_t        off,
+ +      dm_size_t       len)
+ +{
+ +      xfs_flock64_t   bf;
+ +      int             error = 0;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +      xfs_mount_t     *mp;
+ +      dm_size_t       align;
+ +      xfs_fsize_t     realsize;
+ +      dm_off_t        roff;
+ +      dm_size_t       rlen;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return -EACCES;
+ +
+ +      /* Make sure there are no leases. */
+ +      error = break_lease(inode, FMODE_WRITE);
+ +      if (error)
+ +              return -EBUSY;
+ +
+ +      error = get_write_access(inode);
+ +      if (error)
+ +              return -EBUSY;
+ +
+ +      mp = ip->i_mount;
+ +
+ +      down_rw_sems(inode, DM_SEM_FLAG_WR);
+ +
+ +      xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ +      realsize = ip->i_size;
+ +      xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ +      align = xfs_get_extsz_hint(ip);
+ +      if (align == 0)
+ +              align = 1;
+ +
+ +      align <<= mp->m_sb.sb_blocklog;
+ +
+ +      if ((off + len) > realsize) {
+ +              xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ +              error = -E2BIG;
+ +              goto up_and_out;
+ +      }
+ +
+ +      if ((off + len) == realsize)
+ +              len = 0;
+ +
+ +      error = xfs_dm_round_hole(off, len, align, realsize, &roff, &rlen);
+ +      if (error || (off != roff) || (len != rlen)) {
+ +              xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ +              error = -EINVAL;
+ +              goto up_and_out;
+ +      }
+ +
+ +      bf.l_type = 0;
+ +      bf.l_whence = 0;
+ +      bf.l_start = (xfs_off_t)off;
+ +      if (len) {
+ +              bf.l_len = len;
+ +      }
+ +      else {
+ +              /*
+ +               * When we are punching to EOF, we have to make sure we punch
+ +               * the last partial block that contains EOF. Round up
+ +               * the length to make sure we punch the block and not just
+ +               * zero it.
+ +               */
+ +              bf.l_len = roundup_64((realsize - off), mp->m_sb.sb_blocksize);
+ +      }
+ +
+ +#ifdef CONFIG_DMAPI_DEBUG
+ +      printk("xfs_dm_punch_hole: off %lu, len %ld, align %lu\n",
+ +              off, len, align);
+ +#endif
+ +
+ +      error = xfs_change_file_space(ip, XFS_IOC_UNRESVSP, &bf,
-                               (xfs_off_t)off, sys_cred,
-                               XFS_ATTR_DMI|XFS_ATTR_NOLOCK);
++                              (xfs_off_t)off, XFS_ATTR_DMI|XFS_ATTR_NOLOCK);
+ +
+ +      /*
+ +       * if punching to end of file, kill any blocks past EOF that
+ +       * may have been (speculatively) preallocated. No point in
+ +       * leaving them around if we are migrating the file....
+ +       */
+ +      if (!error && (len == 0)) {
+ +              error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_NOLOCK);
+ +      }
+ +
+ +      /*
+ +       * negate the error for return here as core XFS functions return
+ +       * positive error numbers
+ +       */
+ +      if (error)
+ +              error = -error;
+ +
+ +      /* Let threads in send_data_event know we punched the file. */
+ +      ip->i_d.di_dmstate++;
+ +      xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       xfs_iflags_set(ip, XFS_IMODIFIED);
+ +
+ +up_and_out:
+ +      up_rw_sems(inode, DM_SEM_FLAG_WR);
+ +      put_write_access(inode);
+ +
+ +      return error;
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_read_invis_rvp(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_off_t        off,
+ +      dm_size_t       len,
+ +      void            __user *bufp,
+ +      int             *rvp)
+ +{
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_SHARED)
+ +              return(-EACCES);
+ +
+ +      return(-xfs_dm_rdwr(inode, 0, FMODE_READ, off, len, bufp, rvp));
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_release_right(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           type)           /* DM_FSYS_OBJ or zero */
+ +{
+ +#ifdef        DEBUG_RIGHTS
+ +      char            buffer[sizeof(dm_handle_t) * 2 + 1];
+ +
+ +      if (!xfs_vp_to_hexhandle(inode, type, buffer)) {
+ +              printf("dm_release_right: old %d type %d handle %s\n",
+ +                      right, type, buffer);
+ +      } else {
+ +              printf("dm_release_right: old %d type %d handle "
+ +                      " <INVALID>\n", right, type);
+ +      }
+ +#endif        /* DEBUG_RIGHTS */
+ +      return(0);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_remove_dmattr(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      int             setdtime,
+ +      dm_attrname_t   __user *attrnamep)
+ +{
+ +      dm_dkattrname_t name;
+ +      int             error;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(-EACCES);
+ +
+ +      if ((error = xfs_copyin_attrname(attrnamep, &name)) != 0)
+ +              return(-error); /* Return negative error to DMAPI */
+ +
+ +      /* Remove the attribute from the object. */
+ +
+ +      error = xfs_attr_remove(XFS_I(inode), name.dan_chars, setdtime ?
+ +                              ATTR_ROOT : (ATTR_ROOT|ATTR_KERNOTIME));
+ +      DM_EA_XLATE_ERR(error);
+ +
+ +      if (error == ENOATTR)
+ +              error = ENOENT;
+ +      return(-error); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_request_right(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           type,           /* DM_FSYS_OBJ or zero */
+ +      u_int           flags,
+ +      dm_right_t      newright)
+ +{
+ +#ifdef        DEBUG_RIGHTS
+ +      char            buffer[sizeof(dm_handle_t) * 2 + 1];
+ +
+ +      if (!xfs_vp_to_hexhandle(inode, type, buffer)) {
+ +              printf("dm_request_right: old %d new %d type %d flags 0x%x "
+ +                      "handle %s\n", right, newright, type, flags, buffer);
+ +      } else {
+ +              printf("dm_request_right: old %d new %d type %d flags 0x%x "
+ +                      "handle <INVALID>\n", right, newright, type, flags);
+ +      }
+ +#endif        /* DEBUG_RIGHTS */
+ +      return(0);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_set_dmattr(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_attrname_t   __user *attrnamep,
+ +      int             setdtime,
+ +      size_t          buflen,
+ +      void            __user *bufp)
+ +{
+ +      dm_dkattrname_t name;
+ +      char            *value;
+ +      int             alloc_size;
+ +      int             error;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(-EACCES);
+ +
+ +      if ((error = xfs_copyin_attrname(attrnamep, &name)) != 0)
+ +              return(-error); /* Return negative error to DMAPI */
+ +      if (buflen > ATTR_MAX_VALUELEN)
+ +              return(-E2BIG);
+ +
+ +      /* Copy in the attribute's value and store the <name,value> pair in
+ +         the object.  We allocate a buffer of at least one byte even if the
+ +         caller specified a buflen of zero.  (A buflen of zero is considered
+ +         valid.)
+ +      */
+ +
+ +      alloc_size = (buflen == 0) ? 1 : buflen;
+ +      value = kmem_alloc(alloc_size, KM_SLEEP);
+ +      if (copy_from_user( value, bufp, buflen)) {
+ +              error = EFAULT;
+ +      } else {
+ +              error = xfs_attr_set(XFS_I(inode), name.dan_chars, value, buflen,
+ +                                      setdtime ? ATTR_ROOT :
+ +                                      (ATTR_ROOT|ATTR_KERNOTIME));
+ +              DM_EA_XLATE_ERR(error);
+ +      }
+ +      kmem_free(value);
+ +      return(-error); /* Return negative error to DMAPI */
+ +}
+ +
+ +STATIC int
+ +xfs_dm_set_eventlist(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           type,
+ +      dm_eventset_t   *eventsetp,     /* in kernel space! */
+ +      u_int           maxevent)
+ +{
+ +      int             error;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (type == DM_FSYS_OBJ) {
+ +              error = xfs_dm_fs_set_eventlist(ip->i_mount, right, eventsetp, maxevent);
+ +      } else {
+ +              error = xfs_dm_f_set_eventlist(ip, right, eventsetp, maxevent);
+ +      }
+ +      return(-error); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +/*
+ + *  This turned out not XFS-specific, but leave it here with get_fileattr.
+ + */
+ +
+ +STATIC int
+ +xfs_dm_set_fileattr(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           mask,
+ +      dm_fileattr_t   __user *statp)
+ +{
+ +      dm_fileattr_t   stat;
+ +      struct iattr    iattr;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(-EACCES);
+ +
+ +      if (copy_from_user( &stat, statp, sizeof(stat)))
+ +              return(-EFAULT);
+ +
+ +      iattr.ia_valid = 0;
+ +
+ +      if (mask & DM_AT_MODE) {
+ +              iattr.ia_valid |= ATTR_MODE;
+ +              iattr.ia_mode = stat.fa_mode;
+ +      }
+ +      if (mask & DM_AT_UID) {
+ +              iattr.ia_valid |= ATTR_UID;
+ +              iattr.ia_uid = stat.fa_uid;
+ +      }
+ +      if (mask & DM_AT_GID) {
+ +              iattr.ia_valid |= ATTR_GID;
+ +              iattr.ia_gid = stat.fa_gid;
+ +      }
+ +      if (mask & DM_AT_ATIME) {
+ +              iattr.ia_valid |= ATTR_ATIME;
+ +              iattr.ia_atime.tv_sec = stat.fa_atime;
+ +              iattr.ia_atime.tv_nsec = 0;
+ +                inode->i_atime.tv_sec = stat.fa_atime;
+ +      }
+ +      if (mask & DM_AT_MTIME) {
+ +              iattr.ia_valid |= ATTR_MTIME;
+ +              iattr.ia_mtime.tv_sec = stat.fa_mtime;
+ +              iattr.ia_mtime.tv_nsec = 0;
+ +      }
+ +      if (mask & DM_AT_CTIME) {
+ +              iattr.ia_valid |= ATTR_CTIME;
+ +              iattr.ia_ctime.tv_sec = stat.fa_ctime;
+ +              iattr.ia_ctime.tv_nsec = 0;
+ +      }
+ +
+ +      /*
+ +       * DM_AT_DTIME only takes effect if DM_AT_CTIME is not specified.  We
+ +       * overload ctime to also act as dtime, i.e. DM_CONFIG_DTIME_OVERLOAD.
+ +       */
+ +      if ((mask & DM_AT_DTIME) && !(mask & DM_AT_CTIME)) {
+ +              iattr.ia_valid |= ATTR_CTIME;
+ +              iattr.ia_ctime.tv_sec = stat.fa_dtime;
+ +              iattr.ia_ctime.tv_nsec = 0;
+ +      }
+ +      if (mask & DM_AT_SIZE) {
+ +              iattr.ia_valid |= ATTR_SIZE;
+ +              iattr.ia_size = stat.fa_size;
+ +      }
+ +
-       return -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_DMI, NULL);
++      return -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_DMI);
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_set_inherit(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      dm_attrname_t   __user *attrnamep,
+ +      mode_t          mode)
+ +{
+ +      return(-ENOSYS); /* Return negative error to DMAPI */
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_set_region(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           nelem,
+ +      dm_region_t     __user *regbufp,
+ +      dm_boolean_t    __user *exactflagp)
+ +{
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +      xfs_trans_t     *tp;
+ +      xfs_mount_t     *mp;
+ +      dm_region_t     region;
+ +      dm_eventset_t   new_mask;
+ +      dm_eventset_t   mr_mask;
+ +      int             error;
+ +      u_int           exactflag;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(-EACCES);
+ +
+ +      /* If the caller gave us more than one dm_region_t structure, complain.
+ +         (He has to call dm_get_config() to find out what our limit is.)
+ +      */
+ +
+ +      if (nelem > 1)
+ +              return(-E2BIG);
+ +
+ +      /* If the user provided a dm_region_t structure, then copy it in,
+ +         validate it, and convert its flags to the corresponding bits in a
+ +         dm_set_eventlist() event mask.  A call with zero regions is
+ +         equivalent to clearing all region flags.
+ +      */
+ +
+ +      new_mask = 0;
+ +      if (nelem == 1) {
+ +              if (copy_from_user( &region, regbufp, sizeof(region)))
+ +                      return(-EFAULT);
+ +
+ +              if (region.rg_flags & ~(DM_REGION_READ|DM_REGION_WRITE|DM_REGION_TRUNCATE))
+ +                      return(-EINVAL);
+ +              if (region.rg_flags & DM_REGION_READ)
+ +                      new_mask |= 1 << DM_EVENT_READ;
+ +              if (region.rg_flags & DM_REGION_WRITE)
+ +                      new_mask |= 1 << DM_EVENT_WRITE;
+ +              if (region.rg_flags & DM_REGION_TRUNCATE)
+ +                      new_mask |= 1 << DM_EVENT_TRUNCATE;
+ +      }
+ +      mr_mask = (1 << DM_EVENT_READ) | (1 << DM_EVENT_WRITE) | (1 << DM_EVENT_TRUNCATE);
+ +
+ +      /* Get the file's existing event mask, clear the old managed region
+ +         bits, add in the new ones, and update the file's mask.
+ +      */
+ +
+ +      if (new_mask & prohibited_mr_events(inode->i_mapping)) {
+ +              /* If the change is simply to remove the READ
+ +               * bit, then that's always okay.  Otherwise, it's busy.
+ +               */
+ +              dm_eventset_t m1;
+ +              m1 = ip->i_d.di_dmevmask & ((1 << DM_EVENT_WRITE) | (1 << DM_EVENT_TRUNCATE));
+ +              if (m1 != new_mask) {
+ +                      return -EBUSY;
+ +              }
+ +      }
+ +
+ +      mp = ip->i_mount;
+ +      tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
+ +      error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
+ +      if (error) {
+ +              xfs_trans_cancel(tp, 0);
+ +              return(-error); /* Return negative error to DMAPI */
+ +      }
+ +      xfs_ilock(ip, XFS_ILOCK_EXCL);
+ +      xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ +
+ +      ip->i_d.di_dmevmask = (ip->i_d.di_dmevmask & ~mr_mask) | new_mask;
+ +
+ +      xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ +      igrab(inode);
+ +      xfs_trans_commit(tp, 0);
+ +
+ +      /* Return the proper value for *exactflagp depending upon whether or not
+ +         we "changed" the user's managed region.  In other words, if the user
+ +         specified a non-zero value for either rg_offset or rg_size, we
+ +         round each of those values back to zero.
+ +      */
+ +
+ +      if (nelem && (region.rg_offset || region.rg_size)) {
+ +              exactflag = DM_FALSE;   /* user region was changed */
+ +      } else {
+ +              exactflag = DM_TRUE;    /* user region was unchanged */
+ +      }
+ +      if (copy_to_user( exactflagp, &exactflag, sizeof(exactflag)))
+ +              return(-EFAULT);
+ +      return(0);
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_symlink_by_handle(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      void __user     *hanp,
+ +      size_t          hlen,
+ +      char            __user *cname,
+ +      char            __user *path)
+ +{
+ +      return(-ENOSYS); /* Return negative errors to DMAPI */
+ +}
+ +
+ +
+ +/*
+ + * xfs_dm_sync_by_handle needs to do the same thing as sys_fsync()
+ + */
+ +STATIC int
+ +xfs_dm_sync_by_handle(
+ +      struct inode    *inode,
+ +      dm_right_t      right)
+ +{
+ +      int             err, ret;
+ +      xfs_inode_t     *ip = XFS_I(inode);
+ +
+ +      /* Returns negative errors to DMAPI */
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(-EACCES);
+ +
+ +      /* We need to protect against concurrent writers.. */
+ +      ret = filemap_fdatawrite(inode->i_mapping);
+ +      down_rw_sems(inode, DM_FLAGS_IMUX);
+ +      err = -xfs_fsync(ip);
+ +      if (!ret)
+ +              ret = err;
+ +      up_rw_sems(inode, DM_FLAGS_IMUX);
+ +      err = filemap_fdatawait(inode->i_mapping);
+ +      if (!ret)
+ +              ret = err;
+ +      xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ +      return ret;
+ +}
+ +
+ +
+ +/* ARGSUSED */
+ +STATIC int
+ +xfs_dm_upgrade_right(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      u_int           type)           /* DM_FSYS_OBJ or zero */
+ +{
+ +#ifdef        DEBUG_RIGHTS
+ +      char            buffer[sizeof(dm_handle_t) * 2 + 1];
+ +
+ +      if (!xfs_vp_to_hexhandle(inode, type, buffer)) {
+ +              printf("dm_upgrade_right: old %d new %d type %d handle %s\n",
+ +                      right, DM_RIGHT_EXCL, type, buffer);
+ +      } else {
+ +              printf("dm_upgrade_right: old %d new %d type %d handle "
+ +                      "<INVALID>\n", right, DM_RIGHT_EXCL, type);
+ +      }
+ +#endif        /* DEBUG_RIGHTS */
+ +      return(0);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_write_invis_rvp(
+ +      struct inode    *inode,
+ +      dm_right_t      right,
+ +      int             flags,
+ +      dm_off_t        off,
+ +      dm_size_t       len,
+ +      void __user     *bufp,
+ +      int             *rvp)
+ +{
+ +      int             fflag = 0;
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (right < DM_RIGHT_EXCL)
+ +              return(-EACCES);
+ +
+ +      if (flags & DM_WRITE_SYNC)
+ +              fflag |= O_SYNC;
+ +      return(-xfs_dm_rdwr(inode, fflag, FMODE_WRITE, off, len, bufp, rvp));
+ +}
+ +
+ +
+ +STATIC void
+ +xfs_dm_obj_ref_hold(
+ +      struct inode    *inode)
+ +{
+ +      igrab(inode);
+ +}
+ +
+ +
+ +static fsys_function_vector_t xfs_fsys_vector[DM_FSYS_MAX];
+ +
+ +
+ +STATIC int
+ +xfs_dm_get_dmapiops(
+ +      struct super_block      *sb,
+ +      void                    *addr)
+ +{
+ +      static  int             initialized = 0;
+ +      dm_fcntl_vector_t       *vecrq;
+ +      fsys_function_vector_t  *vecp;
+ +      int                     i = 0;
+ +
+ +      vecrq = (dm_fcntl_vector_t *)addr;
+ +      vecrq->count =
+ +              sizeof(xfs_fsys_vector) / sizeof(xfs_fsys_vector[0]);
+ +      vecrq->vecp = xfs_fsys_vector;
+ +      if (initialized)
+ +              return(0);
+ +      vecrq->code_level = DM_CLVL_XOPEN;
+ +      vecp = xfs_fsys_vector;
+ +
+ +      vecp[i].func_no = DM_FSYS_CLEAR_INHERIT;
+ +      vecp[i++].u_fc.clear_inherit = xfs_dm_clear_inherit;
+ +      vecp[i].func_no = DM_FSYS_CREATE_BY_HANDLE;
+ +      vecp[i++].u_fc.create_by_handle = xfs_dm_create_by_handle;
+ +      vecp[i].func_no = DM_FSYS_DOWNGRADE_RIGHT;
+ +      vecp[i++].u_fc.downgrade_right = xfs_dm_downgrade_right;
+ +      vecp[i].func_no = DM_FSYS_GET_ALLOCINFO_RVP;
+ +      vecp[i++].u_fc.get_allocinfo_rvp = xfs_dm_get_allocinfo_rvp;
+ +      vecp[i].func_no = DM_FSYS_GET_BULKALL_RVP;
+ +      vecp[i++].u_fc.get_bulkall_rvp = xfs_dm_get_bulkall_rvp;
+ +      vecp[i].func_no = DM_FSYS_GET_BULKATTR_RVP;
+ +      vecp[i++].u_fc.get_bulkattr_rvp = xfs_dm_get_bulkattr_rvp;
+ +      vecp[i].func_no = DM_FSYS_GET_CONFIG;
+ +      vecp[i++].u_fc.get_config = xfs_dm_get_config;
+ +      vecp[i].func_no = DM_FSYS_GET_CONFIG_EVENTS;
+ +      vecp[i++].u_fc.get_config_events = xfs_dm_get_config_events;
+ +      vecp[i].func_no = DM_FSYS_GET_DESTROY_DMATTR;
+ +      vecp[i++].u_fc.get_destroy_dmattr = xfs_dm_get_destroy_dmattr;
+ +      vecp[i].func_no = DM_FSYS_GET_DIOINFO;
+ +      vecp[i++].u_fc.get_dioinfo = xfs_dm_get_dioinfo;
+ +      vecp[i].func_no = DM_FSYS_GET_DIRATTRS_RVP;
+ +      vecp[i++].u_fc.get_dirattrs_rvp = xfs_dm_get_dirattrs_rvp;
+ +      vecp[i].func_no = DM_FSYS_GET_DMATTR;
+ +      vecp[i++].u_fc.get_dmattr = xfs_dm_get_dmattr;
+ +      vecp[i].func_no = DM_FSYS_GET_EVENTLIST;
+ +      vecp[i++].u_fc.get_eventlist = xfs_dm_get_eventlist;
+ +      vecp[i].func_no = DM_FSYS_GET_FILEATTR;
+ +      vecp[i++].u_fc.get_fileattr = xfs_dm_get_fileattr;
+ +      vecp[i].func_no = DM_FSYS_GET_REGION;
+ +      vecp[i++].u_fc.get_region = xfs_dm_get_region;
+ +      vecp[i].func_no = DM_FSYS_GETALL_DMATTR;
+ +      vecp[i++].u_fc.getall_dmattr = xfs_dm_getall_dmattr;
+ +      vecp[i].func_no = DM_FSYS_GETALL_INHERIT;
+ +      vecp[i++].u_fc.getall_inherit = xfs_dm_getall_inherit;
+ +      vecp[i].func_no = DM_FSYS_INIT_ATTRLOC;
+ +      vecp[i++].u_fc.init_attrloc = xfs_dm_init_attrloc;
+ +      vecp[i].func_no = DM_FSYS_MKDIR_BY_HANDLE;
+ +      vecp[i++].u_fc.mkdir_by_handle = xfs_dm_mkdir_by_handle;
+ +      vecp[i].func_no = DM_FSYS_PROBE_HOLE;
+ +      vecp[i++].u_fc.probe_hole = xfs_dm_probe_hole;
+ +      vecp[i].func_no = DM_FSYS_PUNCH_HOLE;
+ +      vecp[i++].u_fc.punch_hole = xfs_dm_punch_hole;
+ +      vecp[i].func_no = DM_FSYS_READ_INVIS_RVP;
+ +      vecp[i++].u_fc.read_invis_rvp = xfs_dm_read_invis_rvp;
+ +      vecp[i].func_no = DM_FSYS_RELEASE_RIGHT;
+ +      vecp[i++].u_fc.release_right = xfs_dm_release_right;
+ +      vecp[i].func_no = DM_FSYS_REMOVE_DMATTR;
+ +      vecp[i++].u_fc.remove_dmattr = xfs_dm_remove_dmattr;
+ +      vecp[i].func_no = DM_FSYS_REQUEST_RIGHT;
+ +      vecp[i++].u_fc.request_right = xfs_dm_request_right;
+ +      vecp[i].func_no = DM_FSYS_SET_DMATTR;
+ +      vecp[i++].u_fc.set_dmattr = xfs_dm_set_dmattr;
+ +      vecp[i].func_no = DM_FSYS_SET_EVENTLIST;
+ +      vecp[i++].u_fc.set_eventlist = xfs_dm_set_eventlist;
+ +      vecp[i].func_no = DM_FSYS_SET_FILEATTR;
+ +      vecp[i++].u_fc.set_fileattr = xfs_dm_set_fileattr;
+ +      vecp[i].func_no = DM_FSYS_SET_INHERIT;
+ +      vecp[i++].u_fc.set_inherit = xfs_dm_set_inherit;
+ +      vecp[i].func_no = DM_FSYS_SET_REGION;
+ +      vecp[i++].u_fc.set_region = xfs_dm_set_region;
+ +      vecp[i].func_no = DM_FSYS_SYMLINK_BY_HANDLE;
+ +      vecp[i++].u_fc.symlink_by_handle = xfs_dm_symlink_by_handle;
+ +      vecp[i].func_no = DM_FSYS_SYNC_BY_HANDLE;
+ +      vecp[i++].u_fc.sync_by_handle = xfs_dm_sync_by_handle;
+ +      vecp[i].func_no = DM_FSYS_UPGRADE_RIGHT;
+ +      vecp[i++].u_fc.upgrade_right = xfs_dm_upgrade_right;
+ +      vecp[i].func_no = DM_FSYS_WRITE_INVIS_RVP;
+ +      vecp[i++].u_fc.write_invis_rvp = xfs_dm_write_invis_rvp;
+ +      vecp[i].func_no = DM_FSYS_OBJ_REF_HOLD;
+ +      vecp[i++].u_fc.obj_ref_hold = xfs_dm_obj_ref_hold;
+ +
+ +      return(0);
+ +}
+ +
+ +
+ +/*    xfs_dm_send_mmap_event - send events needed for memory mapping a file.
+ + *
+ + *    This is a workaround called for files that are about to be
+ + *    mapped.  DMAPI events are not being generated at a low enough level
+ + *    in the kernel for page reads/writes to generate the correct events.
+ + *    So for memory-mapped files we generate read  or write events for the
+ + *    whole byte range being mapped.  If the mmap call can never cause a
+ + *    write to the file, then only a read event is sent.
+ + *
+ + *    Code elsewhere prevents adding managed regions to a file while it
+ + *    is still mapped.
+ + */
+ +
+ +STATIC int
+ +xfs_dm_send_mmap_event(
+ +      struct vm_area_struct *vma,
+ +      unsigned int    wantflag)
+ +{
+ +      xfs_inode_t     *ip;
+ +      int             error = 0;
+ +      dm_eventtype_t  max_event = DM_EVENT_READ;
+ +      xfs_fsize_t     filesize;
+ +      xfs_off_t       length, end_of_area, evsize, offset;
+ +      int             iolock;
+ +
+ +      if (!vma->vm_file)
+ +              return 0;
+ +
+ +      ip = XFS_I(vma->vm_file->f_dentry->d_inode);
+ +
+ +      if (!S_ISREG(vma->vm_file->f_dentry->d_inode->i_mode) ||
+ +          !(ip->i_mount->m_flags & XFS_MOUNT_DMAPI))
+ +              return 0;
+ +
+ +      /* If they specifically asked for 'read', then give it to them.
+ +       * Otherwise, see if it's possible to give them 'write'.
+ +       */
+ +      if( wantflag & VM_READ ){
+ +              max_event = DM_EVENT_READ;
+ +      }
+ +      else if( ! (vma->vm_flags & VM_DENYWRITE) ) {
+ +              if((wantflag & VM_WRITE) || (vma->vm_flags & VM_WRITE))
+ +                      max_event = DM_EVENT_WRITE;
+ +      }
+ +
+ +      if( (wantflag & VM_WRITE) && (max_event != DM_EVENT_WRITE) ){
+ +              return -EACCES;
+ +      }
+ +
+ +      /* Figure out how much of the file is being requested by the user. */
+ +      offset = 0; /* beginning of file, for now */
+ +      length = 0; /* whole file, for now */
+ +
+ +      filesize = ip->i_new_size;
+ +      if (filesize < ip->i_size) {
+ +              filesize = ip->i_size;
+ +      }
+ +
+ +      /* Set first byte number beyond the map area. */
+ +
+ +      if (length) {
+ +              end_of_area = offset + length;
+ +              if (end_of_area > filesize)
+ +                      end_of_area = filesize;
+ +      } else {
+ +              end_of_area = filesize;
+ +      }
+ +
+ +      /* Set the real amount being mapped. */
+ +      evsize = end_of_area - offset;
+ +      if (evsize < 0)
+ +              evsize = 0;
+ +
+ +      if (max_event == DM_EVENT_READ)
+ +              iolock = XFS_IOLOCK_SHARED;
+ +      else
+ +              iolock = XFS_IOLOCK_EXCL;
+ +
+ +      xfs_ilock(ip, iolock);
+ +      /* If write possible, try a DMAPI write event */
+ +      if (max_event == DM_EVENT_WRITE && DM_EVENT_ENABLED(ip, max_event)) {
+ +              error = xfs_dm_send_data_event(max_event, ip, offset,
+ +                                             evsize, 0, &iolock);
+ +              goto out_unlock;
+ +      }
+ +
+ +      /* Try a read event if max_event was != DM_EVENT_WRITE or if it
+ +       * was DM_EVENT_WRITE but the WRITE event was not enabled.
+ +       */
+ +      if (DM_EVENT_ENABLED(ip, DM_EVENT_READ)) {
+ +              error = xfs_dm_send_data_event(DM_EVENT_READ, ip, offset,
+ +                                             evsize, 0, &iolock);
+ +      }
+ +out_unlock:
+ +      xfs_iunlock(ip, iolock);
+ +      return -error;
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_send_destroy_event(
+ +      xfs_inode_t     *ip,
+ +      dm_right_t      vp_right)       /* always DM_RIGHT_NULL */
+ +{
+ +      /* Returns positive errors to XFS */
-       return -dm_send_destroy_event(ip->i_vnode, vp_right);
++      return -dm_send_destroy_event(&ip->i_vnode, vp_right);
+ +}
+ +
+ +
+ +STATIC int
+ +xfs_dm_send_namesp_event(
+ +      dm_eventtype_t  event,
+ +      struct xfs_mount *mp,
+ +      xfs_inode_t     *ip1,
+ +      dm_right_t      vp1_right,
+ +      xfs_inode_t     *ip2,
+ +      dm_right_t      vp2_right,
+ +      const char      *name1,
+ +      const char      *name2,
+ +      mode_t          mode,
+ +      int             retcode,
+ +      int             flags)
+ +{
+ +      /* Returns positive errors to XFS */
+ +      return -dm_send_namesp_event(event, mp ? mp->m_super : NULL,
-                                   ip1->i_vnode, vp1_right,
-                                   ip2 ? ip2->i_vnode : NULL, vp2_right,
++                                  &ip1->i_vnode, vp1_right,
++                                  ip2 ? &ip2->i_vnode : NULL, vp2_right,
+ +                                  name1, name2,
+ +                                  mode, retcode, flags);
+ +}
+ +
+ +STATIC int
+ +xfs_dm_send_mount_event(
+ +      struct xfs_mount        *mp,
+ +      dm_right_t              root_right,
+ +      char                    *mtpt,
+ +      char                    *fsname)
+ +{
+ +      return dm_send_mount_event(mp->m_super, root_right,
+ +                      NULL, DM_RIGHT_NULL,
+ +                      mp->m_rootip ? VFS_I(mp->m_rootip) : NULL,
+ +                      DM_RIGHT_NULL, mtpt, fsname);
+ +}
+ +
+ +STATIC void
+ +xfs_dm_send_unmount_event(
+ +      struct xfs_mount *mp,
+ +      xfs_inode_t     *ip,            /* NULL if unmount successful */
+ +      dm_right_t      vfsp_right,
+ +      mode_t          mode,
+ +      int             retcode,        /* errno, if unmount failed */
+ +      int             flags)
+ +{
-       dm_send_unmount_event(mp->m_super, ip ? ip->i_vnode : NULL,
++      dm_send_unmount_event(mp->m_super, ip ? &ip->i_vnode : NULL,
+ +                            vfsp_right, mode, retcode, flags);
+ +}
+ +
+ +
+ +/*
+ + * Data migration operations accessed by the rest of XFS.
+ + * When DMAPI support is configured in, this vector is used.
+ + */
+ +
+ +xfs_dmops_t   xfs_dmcore_xfs = {
+ +      .xfs_send_data          = xfs_dm_send_data_event,
+ +      .xfs_send_mmap          = xfs_dm_send_mmap_event,
+ +      .xfs_send_destroy       = xfs_dm_send_destroy_event,
+ +      .xfs_send_namesp        = xfs_dm_send_namesp_event,
+ +      .xfs_send_mount         = xfs_dm_send_mount_event,
+ +      .xfs_send_unmount       = xfs_dm_send_unmount_event,
+ +};
+ +EXPORT_SYMBOL(xfs_dmcore_xfs);
+ +
- STATIC const struct file_operations *
- xfs_dm_get_invis_ops(
-       struct inode *ip)
- {
-       return &xfs_invis_file_operations;
- }
- 
+ +STATIC int
+ +xfs_dm_fh_to_inode(
+ +      struct super_block      *sb,
+ +      struct inode            **inode,
+ +      dm_fid_t                *dmfid)
+ +{
+ +      xfs_mount_t             *mp = XFS_M(sb);
+ +      xfs_inode_t             *ip;
+ +      xfs_ino_t               ino;
+ +      unsigned int            igen;
+ +      int                     error;
+ +
+ +      *inode = NULL;
+ +
+ +      if (!dmfid->dm_fid_len) {
+ +              /* filesystem handle */
-               *inode = igrab(mp->m_rootip->i_vnode);
++              *inode = igrab(&mp->m_rootip->i_vnode);
+ +              if (!*inode)
+ +                      return -ENOENT;
+ +              return 0;
+ +      }
+ +
+ +      if (dmfid->dm_fid_len != sizeof(*dmfid) - sizeof(dmfid->dm_fid_len))
+ +              return -EINVAL;
+ +
+ +      ino  = dmfid->dm_fid_ino;
+ +      igen = dmfid->dm_fid_gen;
+ +
+ +      /* fail requests for ino 0 gracefully. */
+ +      if (ino == 0)
+ +              return -ESTALE;
+ +
+ +      error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
+ +      if (error)
+ +              return -error;
+ +      if (!ip)
+ +              return -EIO;
+ +
+ +      if (!ip->i_d.di_mode || ip->i_d.di_gen != igen) {
+ +              xfs_iput_new(ip, XFS_ILOCK_SHARED);
+ +              return -ENOENT;
+ +      }
+ +
-       *inode = ip->i_vnode;
++      *inode = &ip->i_vnode;
+ +      xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ +      return 0;
+ +}
+ +
+ +STATIC int
+ +xfs_dm_inode_to_fh(
+ +      struct inode            *inode,
+ +      dm_fid_t                *dmfid,
+ +      dm_fsid_t               *dmfsid)
+ +{
+ +      xfs_inode_t             *ip = XFS_I(inode);
+ +
+ +      /* Returns negative errors to DMAPI */
+ +
+ +      if (ip->i_mount->m_fixedfsid == NULL)
+ +              return -EINVAL;
+ +
+ +      dmfid->dm_fid_len = sizeof(dm_fid_t) - sizeof(dmfid->dm_fid_len);
+ +      dmfid->dm_fid_pad = 0;
+ +      /*
+ +       * use memcpy because the inode is a long long and there's no
+ +       * assurance that dmfid->dm_fid_ino is properly aligned.
+ +       */
+ +      memcpy(&dmfid->dm_fid_ino, &ip->i_ino, sizeof(dmfid->dm_fid_ino));
+ +      dmfid->dm_fid_gen = ip->i_d.di_gen;
+ +
+ +      memcpy(dmfsid, ip->i_mount->m_fixedfsid, sizeof(*dmfsid));
+ +      return 0;
+ +}
+ +
+ +STATIC void
+ +xfs_dm_get_fsid(
+ +      struct super_block      *sb,
+ +      dm_fsid_t               *fsid)
+ +{
+ +      memcpy(fsid, XFS_M(sb)->m_fixedfsid, sizeof(*fsid));
+ +}
+ +
+ +/*
+ + * Filesystem operations accessed by the DMAPI core.
+ + */
+ +static struct filesystem_dmapi_operations xfs_dmapiops = {
+ +      .get_fsys_vector        = xfs_dm_get_dmapiops,
+ +      .fh_to_inode            = xfs_dm_fh_to_inode,
-       .get_invis_ops          = xfs_dm_get_invis_ops,
+ +      .inode_to_fh            = xfs_dm_inode_to_fh,
+ +      .get_fsid               = xfs_dm_get_fsid,
+ +};
+ +
+ +static int __init
+ +xfs_dm_init(void)
+ +{
+ +      printk(KERN_INFO "SGI XFS Data Management API subsystem\n");
+ +
+ +      dmapi_register(&xfs_fs_type, &xfs_dmapiops);
+ +      return 0;
+ +}
+ +
+ +static void __exit
+ +xfs_dm_exit(void)
+ +{
+ +      dmapi_unregister(&xfs_fs_type);
+ +}
+ +
+ +MODULE_AUTHOR("Silicon Graphics, Inc.");
+ +MODULE_DESCRIPTION("SGI XFS dmapi subsystem");
+ +MODULE_LICENSE("GPL");
+ +
+ +module_init(xfs_dm_init);
+ +module_exit(xfs_dm_exit);
diff --cc fs/xfs/linux-2.6/xfs_aops.c
Simple merge
diff --cc fs/xfs/linux-2.6/xfs_file.c

index 5831edb,e14c4e3..c194e7c
--- 1/fs/xfs/linux-2.6/xfs_file.c
--- 2/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@@ -43,12 -44,9 +44,12 @@@
   #include <linux/smp_lock.h>
   
   static struct vm_operations_struct xfs_file_vm_ops;
+ +#ifdef HAVE_DMAPI
+ +static struct vm_operations_struct xfs_dmapi_file_vm_ops;
+ +#endif
   
- STATIC_INLINE ssize_t
- __xfs_file_read(
+ STATIC ssize_t
+ xfs_file_aio_read(
         struct kiocb            *iocb,
         const struct iovec      *iov,
         unsigned long           nr_segs,
@@@ -207,32 -181,6 +184,23 @@@ xfs_file_fsync
         return -xfs_fsync(XFS_I(dentry->d_inode));
   }
   
+ +#ifdef HAVE_DMAPI
+ +STATIC int
+ +xfs_vm_fault(
+ +      struct vm_area_struct   *vma,
+ +      struct vm_fault *vmf)
+ +{
+ +      struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
+ +      struct xfs_mount *mp = XFS_M(inode->i_sb);
+ +
+ +      ASSERT_ALWAYS(mp->m_flags & XFS_MOUNT_DMAPI);
+ +
+ +      if (XFS_SEND_MMAP(mp, vma, 0))
+ +              return VM_FAULT_SIGBUS;
+ +      return filemap_fault(vma, vmf);
+ +}
+ +#endif /* HAVE_DMAPI */
+ +
- /*
-  * Unfortunately we can't just use the clean and simple readdir implementation
-  * below, because nfs might call back into ->lookup from the filldir callback
-  * and that will deadlock the low-level btree code.
-  *
-  * Hopefully we'll find a better workaround that allows to use the optimal
-  * version at least for local readdirs for 2.6.25.
-  */
- #if 0
   STATIC int
   xfs_file_readdir(
         struct file     *filp,
@@@ -401,89 -225,6 +250,47 @@@ xfs_file_mmap
         return 0;
   }
   
- STATIC long
- xfs_file_ioctl(
-       struct file     *filp,
-       unsigned int    cmd,
-       unsigned long   p)
- {
-       int             error;
-       struct inode    *inode = filp->f_path.dentry->d_inode;
- 
-       error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
-       xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
- 
-       /* NOTE:  some of the ioctl's return positive #'s as a
-        *        byte count indicating success, such as
-        *        readlink_by_handle.  So we don't "sign flip"
-        *        like most other routines.  This means true
-        *        errors need to be returned as a negative value.
-        */
-       return error;
- }
- 
- STATIC long
- xfs_file_ioctl_invis(
-       struct file     *filp,
-       unsigned int    cmd,
-       unsigned long   p)
- {
-       int             error;
-       struct inode    *inode = filp->f_path.dentry->d_inode;
- 
-       error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
-       xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
- 
-       /* NOTE:  some of the ioctl's return positive #'s as a
-        *        byte count indicating success, such as
-        *        readlink_by_handle.  So we don't "sign flip"
-        *        like most other routines.  This means true
-        *        errors need to be returned as a negative value.
-        */
-       return error;
- }
- 
+ +#ifdef HAVE_DMAPI
+ +#ifdef HAVE_VMOP_MPROTECT
+ +STATIC int
+ +xfs_vm_mprotect(
+ +      struct vm_area_struct *vma,
+ +      unsigned int    newflags)
+ +{
+ +      struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
+ +      struct xfs_mount *mp = XFS_M(inode->i_sb);
+ +      int             error = 0;
+ +
+ +      if (mp->m_flags & XFS_MOUNT_DMAPI) {
+ +              if ((vma->vm_flags & VM_MAYSHARE) &&
+ +                  (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
+ +                      error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
+ +      }
+ +      return error;
+ +}
+ +#endif /* HAVE_VMOP_MPROTECT */
+ +#endif /* HAVE_DMAPI */
+ +
+ +#ifdef HAVE_FOP_OPEN_EXEC
+ +/* If the user is attempting to execute a file that is offline then
+ + * we have to trigger a DMAPI READ event before the file is marked as busy
+ + * otherwise the invisible I/O will not be able to write to the file to bring
+ + * it back online.
+ + */
+ +STATIC int
+ +xfs_file_open_exec(
+ +      struct inode    *inode)
+ +{
+ +      struct xfs_mount *mp = XFS_M(inode->i_sb);
+ +      struct xfs_inode *ip = XFS_I(inode);
+ +
+ +      if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
+ +                   DM_EVENT_ENABLED(ip, DM_EVENT_READ))
+ +              return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
+ +      return 0;
+ +}
+ +#endif /* HAVE_FOP_OPEN_EXEC */
+ +
   /*
    * mmap()d file has taken write protection fault and is being made
    * writable. We can set the page state up correctly for a writable
diff --cc fs/xfs/linux-2.6/xfs_iops.c

index b57925b,7aa53fe..d6187b9
--- 1/fs/xfs/linux-2.6/xfs_iops.c
--- 2/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@@ -601,12 -595,7 +595,12 @@@ xfs_vn_setattr
         struct dentry   *dentry,
         struct iattr    *iattr)
   {
- -      return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
+ +      int flags = 0;
+ +#ifdef ATTR_NO_BLOCK
+ +      if (iattr->ia_valid & ATTR_NO_BLOCK)
+ +              flags |= O_NONBLOCK;
+ +#endif
-       return -xfs_setattr(XFS_I(dentry->d_inode), iattr, flags, NULL);
++      return -xfs_setattr(XFS_I(dentry->d_inode), iattr, flags);
   }
   
   /*
diff --cc fs/xfs/linux-2.6/xfs_ksyms.c

index 24dac0f,0000000..d49260b

mode 100644,000000..100644
--- 1/fs/xfs/linux-2.6/xfs_ksyms.c
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ksyms.c
@@@ -1,99 -1,0 +1,94 @@@
+ +/*
+ + * Copyright (c) 2004-2008 Silicon Graphics, Inc.
+ + * All Rights Reserved.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License as
+ + * published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it would be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write the Free Software Foundation,
+ + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ + */
+ +
+ +#include "xfs.h"
+ +#include "xfs_fs.h"
+ +#include "xfs_bit.h"
+ +#include "xfs_buf.h"
+ +#include "xfs_log.h"
- #include "xfs_imap.h"
+ +#include "xfs_inum.h"
- #include "xfs_clnt.h"
+ +#include "xfs_trans.h"
+ +#include "xfs_sb.h"
+ +#include "xfs_ag.h"
+ +#include "xfs_dir2.h"
+ +#include "xfs_alloc.h"
+ +#include "xfs_dmapi.h"
+ +#include "xfs_quota.h"
+ +#include "xfs_mount.h"
+ +#include "xfs_da_btree.h"
+ +#include "xfs_bmap_btree.h"
+ +#include "xfs_alloc_btree.h"
+ +#include "xfs_ialloc_btree.h"
+ +#include "xfs_dir2_sf.h"
+ +#include "xfs_attr_sf.h"
+ +#include "xfs_dinode.h"
+ +#include "xfs_inode.h"
+ +#include "xfs_btree.h"
+ +#include "xfs_ialloc.h"
+ +#include "xfs_bmap.h"
+ +#include "xfs_rtalloc.h"
+ +#include "xfs_error.h"
+ +#include "xfs_itable.h"
+ +#include "xfs_rw.h"
+ +#include "xfs_dir2_data.h"
+ +#include "xfs_dir2_leaf.h"
+ +#include "xfs_dir2_block.h"
+ +#include "xfs_dir2_node.h"
+ +#include "xfs_dir2_trace.h"
+ +#include "xfs_acl.h"
+ +#include "xfs_attr.h"
+ +#include "xfs_attr_leaf.h"
+ +#include "xfs_inode_item.h"
+ +#include "xfs_buf_item.h"
+ +#include "xfs_extfree_item.h"
+ +#include "xfs_log_priv.h"
+ +#include "xfs_trans_priv.h"
+ +#include "xfs_trans_space.h"
+ +#include "xfs_utils.h"
+ +#include "xfs_iomap.h"
+ +#include "xfs_filestream.h"
+ +#include "xfs_vnodeops.h"
- #include "xfs_vfsops.h"
+ +#include "support/ktrace.h"
+ +
+ +EXPORT_SYMBOL(xfs_iunlock);
- EXPORT_SYMBOL(xfs_invis_file_operations);
+ +EXPORT_SYMBOL(xfs_attr_remove);
+ +EXPORT_SYMBOL(xfs_iunlock_map_shared);
+ +EXPORT_SYMBOL(xfs_iget);
+ +EXPORT_SYMBOL(xfs_bmapi);
+ +EXPORT_SYMBOL(xfs_internal_inum);
- EXPORT_SYMBOL(sys_cred);
+ +EXPORT_SYMBOL(xfs_attr_set);
+ +EXPORT_SYMBOL(xfs_trans_reserve);
+ +EXPORT_SYMBOL(xfs_trans_ijoin);
+ +EXPORT_SYMBOL(xfs_free_eofblocks);
+ +EXPORT_SYMBOL(kmem_free);
+ +EXPORT_SYMBOL(_xfs_trans_commit);
+ +EXPORT_SYMBOL(xfs_ilock);
+ +EXPORT_SYMBOL(xfs_attr_get);
+ +EXPORT_SYMBOL(xfs_readdir);
+ +EXPORT_SYMBOL(xfs_setattr);
+ +EXPORT_SYMBOL(xfs_trans_alloc);
+ +EXPORT_SYMBOL(xfs_trans_cancel);
+ +EXPORT_SYMBOL(xfs_fsync);
+ +EXPORT_SYMBOL(xfs_iput_new);
+ +EXPORT_SYMBOL(xfs_bulkstat);
+ +EXPORT_SYMBOL(xfs_ilock_map_shared);
+ +EXPORT_SYMBOL(xfs_iput);
+ +EXPORT_SYMBOL(xfs_trans_log_inode);
+ +EXPORT_SYMBOL(xfs_attr_list);
+ +EXPORT_SYMBOL(kmem_alloc);
+ +EXPORT_SYMBOL(xfs_change_file_space);
diff --cc fs/xfs/linux-2.6/xfs_linux.h
Simple merge
diff --cc fs/xfs/linux-2.6/xfs_super.c
Simple merge
diff --cc fs/xfs/xfs_dmops.c

index 6cd5704,e71e258..c4f6ca6
--- 1/fs/xfs/xfs_dmops.c
--- 2/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@@ -38,26 -37,15 +37,26 @@@ static struct xfs_dmops xfs_dmcore_stu
   };
   
   int
- xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
+ xfs_dmops_get(struct xfs_mount *mp)
   {
-       if (args->flags & XFSMNT_DMAPI) {
+       if (mp->m_flags & XFS_MOUNT_DMAPI) {
- -              cmn_err(CE_WARN,
- -                      "XFS: dmapi support not available in this kernel.");
- -              return EINVAL;
+ +              struct xfs_dmops *ops;
+ +
+ +              ops = symbol_get(xfs_dmcore_xfs);
+ +              if (!ops) {
+ +                      request_module("xfs_dmapi");
+ +                      ops = symbol_get(xfs_dmcore_xfs);
+ +              }
+ +
+ +              if (!ops) {
+ +                      cmn_err(CE_WARN, "XFS: no dmapi support available.");
+ +                      return EINVAL;
+ +              }
+ +              mp->m_dm_ops = ops;
+ +      } else {
+ +              mp->m_dm_ops = &xfs_dmcore_stub;
         }
   
- -      mp->m_dm_ops = &xfs_dmcore_stub;
         return 0;
   }
   
diff --cc fs/xfs/xfs_mount.h

index b6b8be8,f5e9937..20d7f63
--- 1/fs/xfs/xfs_mount.h
--- 2/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@@ -341,7 -327,8 +327,9 @@@ typedef struct xfs_mount 
         spinlock_t              m_sync_lock;    /* work item list lock */
         int                     m_sync_seq;     /* sync thread generation no. */
         wait_queue_head_t       m_wait_single_sync_task;
+       __int64_t               m_update_flags; /* sb flags we need to update
+                                                  on the next remount,rw */
+ +      struct vfsmount         *m_vfsmount;
   } xfs_mount_t;
   
   /*
diff --cc include/asm-generic/siginfo.h
Simple merge
diff --cc include/asm-generic/vmlinux.lds.h
Simple merge
diff --cc include/linux/Kbuild
Simple merge
diff --cc include/linux/acpi.h
Simple merge
diff --cc include/linux/bio.h
Simple merge
diff --cc include/linux/buffer_head.h

index 28a319f,bd7ac79..755ae3d
--- 1/include/linux/buffer_head.h
--- 2/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@@ -345,7 -345,16 +345,18 @@@ static inline void invalidate_inode_buf
   static inline int remove_inode_buffers(struct inode *inode) { return 1; }
   static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
   static inline void invalidate_bdev(struct block_device *bdev) {}
+ 
+ static inline struct super_block *freeze_bdev(struct block_device *sb)
+ {
+       return NULL;
+ }
+ 
+ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+ {
+       return 0;
+ }
+ 
+ +static inline void block_sync_page(struct page *) { }
+ +
   #endif /* CONFIG_BLOCK */
   #endif /* _LINUX_BUFFER_HEAD_H */
diff --cc include/linux/console.h
Simple merge
diff --cc include/linux/console_struct.h
Simple merge
diff --cc include/linux/device-mapper.h
Simple merge
diff --cc include/linux/dm-region-hash.h

index 0000000,a9e652a..f232248

mode 000000,100644..100644
--- /dev/null
--- 2/include/linux/dm-region-hash.h
+++ b/include/linux/dm-region-hash.h
@@@ -1,0 -1,104 +1,176 @@@
+ /*
+  * Copyright (C) 2003 Sistina Software Limited.
+  * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+  *
+  * Device-Mapper dirty region hash interface.
+  *
+  * This file is released under the GPL.
+  */
+ 
+ #ifndef DM_REGION_HASH_H
+ #define DM_REGION_HASH_H
+ 
+ #include <linux/dm-dirty-log.h>
+ 
+ /*-----------------------------------------------------------------
+  * Region hash
+  *----------------------------------------------------------------*/
- -struct dm_region_hash;
- -struct dm_region;
++struct dm_region_hash {
++      uint32_t region_size;
++      unsigned region_shift;
++
++      /* holds persistent region state */
++      struct dm_dirty_log *log;
++
++      /* hash table */
++      rwlock_t hash_lock;
++      mempool_t *region_pool;
++      unsigned mask;
++      unsigned nr_buckets;
++      unsigned prime;
++      unsigned shift;
++      struct list_head *buckets;
++
++      unsigned max_recovery; /* Max # of regions to recover in parallel */
++
++      spinlock_t region_lock;
++      atomic_t recovery_in_flight;
++      struct semaphore recovery_count;
++      struct list_head clean_regions;
++      struct list_head quiesced_regions;
++      struct list_head recovered_regions;
++      struct list_head failed_recovered_regions;
++
++      void *context;
++      sector_t target_begin;
++
++      /* Callback function to schedule bios writes */
++      void (*dispatch_bios)(void *context, struct bio_list *bios);
++
++      /* Callback function to wakeup callers worker thread. */
++      void (*wakeup_workers)(void *context);
++
++      /* Callback function to wakeup callers recovery waiters. */
++      void (*wakeup_all_recovery_waiters)(void *context);
++};
++
++struct dm_region {
++      struct dm_region_hash *rh;      /* FIXME: can we get rid of this ? */
++      region_t key;
++      int state;
++
++      struct list_head hash_list;
++      struct list_head list;
++
++      atomic_t pending;
++      struct bio_list delayed_bios;
++};
++
+ 
+ /*
+  * States a region can have.
+  */
+ enum dm_rh_region_states {
+       DM_RH_CLEAN      = 0x01,        /* No writes in flight. */
+       DM_RH_DIRTY      = 0x02,        /* Writes in flight. */
+       DM_RH_NOSYNC     = 0x04,        /* Out of sync. */
+       DM_RH_RECOVERING = 0x08,        /* Under resynchronization. */
+ };
+ 
+ /*
+  * Region hash create/destroy.
+  */
+ struct bio_list;
+ struct dm_region_hash *dm_region_hash_create(
+               void *context, void (*dispatch_bios)(void *context,
+                                                    struct bio_list *bios),
+               void (*wakeup_workers)(void *context),
+               void (*wakeup_all_recovery_waiters)(void *context),
+               sector_t target_begin, unsigned max_recovery,
+               struct dm_dirty_log *log, uint32_t region_size,
+               region_t nr_regions);
+ void dm_region_hash_destroy(struct dm_region_hash *rh);
+ 
+ struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh);
+ 
+ /*
- - * Conversion functions.
- - */
- -region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio);
- -sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region);
- -void *dm_rh_region_context(struct dm_region *reg);
- -
- -/*
- - * Get region size and key (ie. number of the region).
- - */
- -sector_t dm_rh_get_region_size(struct dm_region_hash *rh);
- -region_t dm_rh_get_region_key(struct dm_region *reg);
- -
- -/*
+  * Get/set/update region state (and dirty log).
+  *
+  */
+ int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block);
+ void dm_rh_set_state(struct dm_region_hash *rh, region_t region,
+                    enum dm_rh_region_states state, int may_block);
+ 
+ /* Non-zero errors_handled leaves the state of the region NOSYNC */
+ void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled);
+ 
+ /* Flush the region hash and dirty log. */
+ int dm_rh_flush(struct dm_region_hash *rh);
+ 
+ /* Inc/dec pending count on regions. */
+ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios);
++void dm_rh_inc(struct dm_region_hash *rh, region_t region);
+ void dm_rh_dec(struct dm_region_hash *rh, region_t region);
+ 
+ /* Delay bios on regions. */
+ void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio);
+ 
+ void dm_rh_mark_nosync(struct dm_region_hash *rh,
+                      struct bio *bio, unsigned done, int error);
+ 
+ /*
+  * Region recovery control.
+  */
+ 
+ /* Prepare some regions for recovery by starting to quiesce them. */
- -void dm_rh_recovery_prepare(struct dm_region_hash *rh);
++int dm_rh_recovery_prepare(struct dm_region_hash *rh);
+ 
+ /* Try fetching a quiesced region for recovery. */
+ struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh);
+ 
+ /* Report recovery end on a region. */
+ void dm_rh_recovery_end(struct dm_region *reg, int error);
+ 
+ /* Returns number of regions with recovery work outstanding. */
+ int dm_rh_recovery_in_flight(struct dm_region_hash *rh);
+ 
+ /* Start/stop recovery. */
+ void dm_rh_start_recovery(struct dm_region_hash *rh);
+ void dm_rh_stop_recovery(struct dm_region_hash *rh);
+ 
++/*
++ * Conversion fns
++ */
++static inline region_t dm_rh_sector_to_region(struct dm_region_hash *rh,
++                                            sector_t sector)
++{
++      return sector >> rh->region_shift;
++}
++
++static inline sector_t dm_rh_region_to_sector(struct dm_region_hash *rh,
++                                            region_t region)
++{
++      return region << rh->region_shift;
++}
++
++static inline region_t dm_rh_bio_to_region(struct dm_region_hash *rh,
++                                         struct bio *bio)
++{
++      return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
++}
++
++static inline void *dm_rh_region_context(struct dm_region *reg)
++{
++      return reg->rh->context;
++}
++
++static inline region_t dm_rh_get_region_key(struct dm_region *reg)
++{
++      return reg->key;
++}
++
++static inline sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
++{
++      return rh->region_size;
++}
+ #endif /* DM_REGION_HASH_H */
diff --cc include/linux/ext3_fs.h

index ca08a8b,dd495b8..3b1b1ef
--- 1/include/linux/ext3_fs.h
--- 2/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@@ -380,7 -411,8 +411,9 @@@ struct ext3_inode 
   #define EXT3_MOUNT_QUOTA              0x80000 /* Some quota option set */
   #define EXT3_MOUNT_USRQUOTA           0x100000 /* "old" user quota */
   #define EXT3_MOUNT_GRPQUOTA           0x200000 /* "old" group quota */
- #define EXT3_MOUNT_NFS4ACL            0x400000 /* NFS version 4 ACLs */
+ #define EXT3_MOUNT_DATA_ERR_ABORT     0x400000 /* Abort on file data write
+                                                 * error in ordered mode */
++#define EXT3_MOUNT_NFS4ACL            0x800000 /* NFS version 4 ACLs */
   
   /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
   #ifndef _LINUX_EXT2_FS_H
diff --cc include/linux/fb.h
Simple merge
diff --cc include/linux/fs.h

index 667517b,6022f44..a88ee95
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -1288,11 -1327,7 +1339,9 @@@ struct file_operations 
         int (*flock) (struct file *, int, struct file_lock *);
         ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
         ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+ +#define HAVE_FOP_OPEN_EXEC
+ +      int (*open_exec) (struct inode *);
         int (*setlease)(struct file *, long, struct file_lock **);
-       int (*fsetattr)(struct file *, struct iattr *);
   };
   
   struct inode_operations {
diff --cc include/linux/genhd.h

index 5a94b65,16948ea..cf55778
--- 1/include/linux/genhd.h
--- 2/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@@ -108,10 -112,28 +112,29 @@@ struct hd_struct 
   #define GENHD_FL_CD                           8
   #define GENHD_FL_UP                           16
   #define GENHD_FL_SUPPRESS_PARTITION_INFO      32
- #define GENHD_FL_FAIL                         64
+ #define GENHD_FL_EXT_DEVT                     64 /* allow extended devt */
+ +#define GENHD_FL_NO_PARTITION_SCAN            128
   
+ #define BLK_SCSI_MAX_CMDS     (256)
+ #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+ 
+ struct blk_scsi_cmd_filter {
+       unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+       unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+       struct kobject kobj;
+ };
+ 
+ struct disk_part_tbl {
+       struct rcu_head rcu_head;
+       int len;
+       struct hd_struct *last_lookup;
+       struct hd_struct *part[];
+ };
+ 
   struct gendisk {
+       /* major, first_minor and minors are input parameters only,
+        * don't use directly.  Use disk_devt() and disk_max_parts().
+        */
         int major;                      /* major number of driver */
         int first_minor;
         int minors;                     /* maximum number of minors, =1 for
diff --cc include/linux/gfp.h
Simple merge
diff --cc include/linux/kernel.h

index a6ffdf3,343df9e..7fedc1d
--- 1/include/linux/kernel.h
--- 2/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@@ -236,11 -280,10 +280,13 @@@ extern int oops_in_progress;            /* If set
   extern int panic_timeout;
   extern int panic_on_oops;
   extern int panic_on_unrecovered_nmi;
+ +extern int panic_on_io_nmi;
- extern int tainted;
+ +extern int unsupported;
   extern const char *print_tainted(void);
- extern void add_taint(unsigned);
+ extern void add_taint(unsigned flag);
++extern void add_nonfatal_taint(unsigned flag);
+ extern int test_taint(unsigned flag);
+ extern unsigned long get_taint(void);
   extern int root_mountflags;
   
   /* Values used for system_state */
@@@ -253,25 -296,18 +299,25 @@@ extern enum system_states 
         SYSTEM_SUSPEND_DISK,
   } system_state;
   
- #define TAINT_PROPRIETARY_MODULE      (1<<0)
- #define TAINT_FORCED_MODULE           (1<<1)
- #define TAINT_UNSAFE_SMP              (1<<2)
- #define TAINT_FORCED_RMMOD            (1<<3)
- #define TAINT_MACHINE_CHECK           (1<<4)
- #define TAINT_BAD_PAGE                        (1<<5)
- #define TAINT_USER                    (1<<6)
- #define TAINT_DIE                     (1<<7)
- #define TAINT_OVERRIDDEN_ACPI_TABLE   (1<<8)
- #define TAINT_WARN                    (1<<9)
- #define TAINT_CRAP                    (1<<10)
+ #define TAINT_PROPRIETARY_MODULE      0
+ #define TAINT_FORCED_MODULE           1
+ #define TAINT_UNSAFE_SMP              2
+ #define TAINT_FORCED_RMMOD            3
+ #define TAINT_MACHINE_CHECK           4
+ #define TAINT_BAD_PAGE                        5
+ #define TAINT_USER                    6
+ #define TAINT_DIE                     7
+ #define TAINT_OVERRIDDEN_ACPI_TABLE   8
+ #define TAINT_WARN                    9
+ #define TAINT_CRAP                    10
   
+ +/*
+ + * Take the upper bits to hopefully allow them
+ + * to stay the same for more than one release.
+ + */
- #define TAINT_NO_SUPPORT              (1<<30)
- #define TAINT_EXTERNAL_SUPPORT                (1<<31)
++#define TAINT_NO_SUPPORT              30
++#define TAINT_EXTERNAL_SUPPORT                31
+ +
   extern void dump_stack(void) __cold;
   
   enum {
diff --cc include/linux/kexec.h
Simple merge
diff --cc include/linux/libata.h
Simple merge
diff --cc include/linux/mm.h
Simple merge
diff --cc include/linux/mm_types.h
Simple merge
diff --cc include/linux/mmzone.h

index 68cb719,09c14e2..9758651
--- 1/include/linux/mmzone.h
--- 2/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@@ -204,12 -263,22 +263,25 @@@ enum zone_type 
   #error ZONES_SHIFT -- too many zones configured adjust calculation
   #endif
   
+ struct zone_reclaim_stat {
+       /*
+        * The pageout code in vmscan.c keeps track of how many of the
+        * mem/swap backed and file backed pages are refeferenced.
+        * The higher the rotated/scanned ratio, the more valuable
+        * that cache is.
+        *
+        * The anon LRU stats live in [0], file LRU stats in [1]
+        */
+       unsigned long           recent_rotated[2];
+       unsigned long           recent_scanned[2];
+ };
+ 
   struct zone {
         /* Fields commonly accessed by the page allocator */
- -      unsigned long           pages_min, pages_low, pages_high;
+ +      unsigned long           pages_high;     /* we stop kswapd */
+ +      unsigned long           pages_low;      /* we wake up kswapd */
+ +      unsigned long           pages_min;      /* we enter direct reclaim */
+ +      unsigned long           pages_emerg;    /* emergency pool */
         /*
          * We don't know if the memory that we're going to allocate will be freeable
          * or/and it will be released eventually, so to avoid totally wasting several
diff --cc include/linux/module.h

index 07cbb3a,4f7ea12..3a38462
--- 1/include/linux/module.h
--- 2/include/linux/module.h
+++ b/include/linux/module.h
@@@ -368,8 -364,19 +364,20 @@@ static inline int module_is_live(struc
   struct module *module_text_address(unsigned long addr);
   struct module *__module_text_address(unsigned long addr);
   int is_module_address(unsigned long addr);
+ +const char *supported_printable(int taint);
   
+ static inline int within_module_core(unsigned long addr, struct module *mod)
+ {
+       return (unsigned long)mod->module_core <= addr &&
+              addr < (unsigned long)mod->module_core + mod->core_size;
+ }
+ 
+ static inline int within_module_init(unsigned long addr, struct module *mod)
+ {
+       return (unsigned long)mod->module_init <= addr &&
+              addr < (unsigned long)mod->module_init + mod->init_size;
+ }
+ 
   /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
      symnum out of range. */
   int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
diff --cc include/linux/mount.h
Simple merge
diff --cc include/linux/nfs_fs.h
Simple merge
diff --cc include/linux/page-flags.h

index ef187aa,219a523..bec8935
--- 1/include/linux/page-flags.h
--- 2/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@@ -368,36 -365,23 +374,29 @@@ static inline void __ClearPageTail(stru
   
   #endif /* !PAGEFLAGS_EXTENDED */
   
+ +#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ +PAGEFLAG(MemError, memerror)
+ +#else
+ +PAGEFLAG_FALSE(MemError)
+ +#endif
+ +
- #if !defined(CONFIG_XEN)
- # define PAGE_FLAGS_XEN 0
- #elif defined(CONFIG_X86)
- # define PAGE_FLAGS_XEN ((1 << PG_pinned) | (1 << PG_foreign))
+ #ifdef CONFIG_UNEVICTABLE_LRU
+ #define __PG_UNEVICTABLE      (1 << PG_unevictable)
+ #define __PG_MLOCKED          (1 << PG_mlocked)
   #else
- # define PAGE_FLAGS_XEN (1 << PG_foreign)
+ #define __PG_UNEVICTABLE      0
+ #define __PG_MLOCKED          0
   #endif
   
- #define PAGE_FLAGS    (1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
-                        1 << PG_buddy | 1 << PG_writeback | 1 << PG_waiters | \
-                        1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
-                        PAGE_FLAGS_XEN)
- 
- /*
-  * Flags checked in bad_page().  Pages on the free list should not have
-  * these flags set.  It they are, there is a problem.
-  */
- #define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty)
- 
   /*
    * Flags checked when a page is freed.  Pages being freed should not have
    * these flags set.  It they are, there is a problem.
    */
- #define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved)
+ #define PAGE_FLAGS_CHECK_AT_FREE \
+       (1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
+        1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
+        1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
- -       __PG_UNEVICTABLE | __PG_MLOCKED)
++       1 << PG_waiters | __PG_UNEVICTABLE | __PG_MLOCKED)
   
   /*
    * Flags checked when a page is prepped for return by the page allocator.
diff --cc include/linux/pagemap.h
Simple merge
diff --cc include/linux/parser.h

index fa3225d,ea2281e..6265322
--- 1/include/linux/parser.h
--- 2/include/linux/parser.h
+++ b/include/linux/parser.h
@@@ -25,8 -25,7 +25,8 @@@ typedef struct 
         char *to;
   } substring_t;
   
- int match_token(char *, match_table_t table, substring_t args[]);
+ int match_token(char *, const match_table_t table, substring_t args[]);
+ +int match_string(substring_t *s, const char *str);
   int match_int(substring_t *, int *result);
   int match_octal(substring_t *, int *result);
   int match_hex(substring_t *, int *result);
diff --cc include/linux/perfmon_kern.h

index 6c3b527,0000000..a04e446

mode 100644,000000..100644
--- 1/include/linux/perfmon_kern.h
--- /dev/null
+++ b/include/linux/perfmon_kern.h
@@@ -1,551 -1,0 +1,550 @@@
+ +/*
+ + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
+ + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of version 2 of the GNU General Public
+ + * License as published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ + * 02111-1307 USA
+ + */
+ +
+ +#ifndef __LINUX_PERFMON_KERN_H__
+ +#define __LINUX_PERFMON_KERN_H__
+ +/*
+ + * This file contains all the definitions of data structures, variables, macros
+ + * that are to be shared between generic code and arch-specific code
+ + *
+ + * For generic only definitions, use perfmon/perfmon_priv.h
+ + */
+ +#ifdef CONFIG_PERFMON
+ +
+ +#include <linux/file.h>
+ +#include <linux/sched.h>
+ +#include <linux/perfmon.h>
+ +
+ +/*
+ + * system adminstrator configuration controls available via
+ + * the /sys/kerne/perfmon interface
+ + */
+ +struct pfm_controls {
+ +      u32     debug;          /* debugging control bitmask */
+ +      gid_t   sys_group;      /* gid to create a syswide context */
+ +      gid_t   task_group;     /* gid to create a per-task context */
+ +      u32     flags;          /* control flags (see below) */
+ +      size_t  arg_mem_max;    /* maximum vector argument size */
+ +      size_t  smpl_buffer_mem_max; /* max buf mem, -1 for infinity */
+ +};
+ +extern struct pfm_controls pfm_controls;
+ +
+ +/*
+ + * control flags
+ + */
+ +#define PFM_CTRL_FL_RW_EXPERT 0x1 /* bypass reserved fields on read/write */
+ +
+ +/*
+ + * software PMD
+ + */
+ +struct pfm_pmd {
+ +      u64 value;                      /* 64-bit value */
+ +      u64 lval;                       /* last reset value */
+ +      u64 ovflsw_thres;               /* #ovfls left before switch */
+ +      u64 long_reset;                 /* long reset value on overflow */
+ +      u64 short_reset;                /* short reset value on overflow */
+ +      u64 reset_pmds[PFM_PMD_BV];     /* pmds to reset on overflow */
+ +      u64 smpl_pmds[PFM_PMD_BV];      /* pmds to record on overflow */
+ +      u64 mask;                       /* range mask for random value */
+ +      u64 ovflsw_ref_thres;           /* #ovfls before next set */
+ +      u64 eventid;                    /* opaque event identifier */
+ +      u32 flags;                      /* notify/do not notify */
+ +};
+ +
+ +/*
+ + * event_set: encapsulates the full PMU state
+ + */
+ +struct pfm_event_set {
+ +      struct list_head list;          /* ordered chain of sets */
+ +      u16 id;                         /* set identification */
+ +      u16 nused_pmds;                 /* max number of used PMDs */
+ +      u16 nused_pmcs;                 /* max number of used PMCs */
+ +      u16 pad1;                       /* paddding */
+ +      u32 flags;                      /* public flags */
+ +      u32 priv_flags;                 /* private flags (see below) */
+ +      u64 runs;                       /* # of activations */
+ +      u32 npend_ovfls;                /* number of pending PMD overflow */
+ +      u32 pad2;                       /* padding */
+ +      u64 used_pmds[PFM_PMD_BV];      /* used PMDs */
+ +      u64 povfl_pmds[PFM_PMD_BV];     /* pending overflowed PMDs */
+ +      u64 ovfl_pmds[PFM_PMD_BV];      /* last overflowed PMDs */
+ +      u64 reset_pmds[PFM_PMD_BV];     /* PMDs to reset after overflow */
+ +      u64 ovfl_notify[PFM_PMD_BV];    /* notify on overflow */
+ +      u64 used_pmcs[PFM_PMC_BV];      /* used PMCs */
+ +      u64 pmcs[PFM_MAX_PMCS];         /* PMC values */
+ +
+ +      struct pfm_pmd pmds[PFM_MAX_PMDS];
+ +
+ +      ktime_t hrtimer_exp;            /* switch timeout reference */
+ +      ktime_t hrtimer_rem;            /* per-thread remainder timeout */
+ +
+ +      u64 duration_start;             /* start time in ns */
+ +      u64 duration;                   /* total active ns */
+ +};
+ +
+ +/*
+ + * common private event set flags (priv_flags)
+ + *
+ + * upper 16 bits: for arch-specific use
+ + * lower 16 bits: for common use
+ + */
+ +#define PFM_SETFL_PRIV_MOD_PMDS 0x1 /* PMD register(s) modified */
+ +#define PFM_SETFL_PRIV_MOD_PMCS 0x2 /* PMC register(s) modified */
+ +#define PFM_SETFL_PRIV_SWITCH 0x4 /* must switch set on restart */
+ +#define PFM_SETFL_PRIV_MOD_BOTH       (PFM_SETFL_PRIV_MOD_PMDS \
+ +                              | PFM_SETFL_PRIV_MOD_PMCS)
+ +
+ +/*
+ + * context flags
+ + */
+ +struct pfm_context_flags {
+ +      unsigned int block:1;           /* task blocks on user notifications */
+ +      unsigned int system:1;          /* do system wide monitoring */
+ +      unsigned int no_msg:1;          /* no message sent on overflow */
+ +      unsigned int switch_ovfl:1;     /* switch set on counter ovfl */
+ +      unsigned int switch_time:1;     /* switch set on timeout */
+ +      unsigned int started:1;         /* pfm_start() issued */
+ +      unsigned int work_type:2;       /* type of work for pfm_handle_work */
+ +      unsigned int mmap_nlock:1;      /* no lock in pfm_release_buf_space */
+ +      unsigned int ia64_v20_compat:1; /* context is IA-64 v2.0 mode */
+ +      unsigned int can_restart:8;     /* allowed to issue a PFM_RESTART */
+ +      unsigned int reset_count:8;     /* number of pending resets */
+ +      unsigned int is_self:1;         /* per-thread and self-montoring */
+ +      unsigned int reserved:5;        /* for future use */
+ +};
+ +
+ +/*
+ + * values for work_type (TIF_PERFMON_WORK must be set)
+ + */
+ +#define PFM_WORK_NONE 0       /* nothing to do */
+ +#define PFM_WORK_RESET        1       /* reset overflowed counters */
+ +#define PFM_WORK_BLOCK        2       /* block current thread */
+ +#define PFM_WORK_ZOMBIE       3       /* cleanup zombie context */
+ +
+ +/*
+ + * overflow description argument passed to sampling format
+ + */
+ +struct pfm_ovfl_arg {
+ +      u16 ovfl_pmd;           /* index of overflowed PMD  */
+ +      u16 active_set;         /* set active at the time of the overflow */
+ +      u32 ovfl_ctrl;          /* control flags */
+ +      u64 pmd_last_reset;     /* last reset value of overflowed PMD */
+ +      u64 smpl_pmds_values[PFM_MAX_PMDS]; /* values of other PMDs */
+ +      u64 pmd_eventid;        /* eventid associated with PMD */
+ +      u16 num_smpl_pmds;      /* number of PMDS in smpl_pmd_values */
+ +};
+ +/*
+ + * depth of message queue
+ + *
+ + * Depth cannot be bigger than 255 (see reset_count)
+ + */
+ +#define PFM_MSGS_ORDER                3 /* log2(number of messages) */
+ +#define PFM_MSGS_COUNT                (1<<PFM_MSGS_ORDER) /* number of messages */
+ +#define PFM_MSGQ_MASK         (PFM_MSGS_COUNT-1)
+ +
+ +/*
+ + * perfmon context state
+ + */
+ +#define PFM_CTX_UNLOADED      1 /* context is not loaded onto any task */
+ +#define PFM_CTX_LOADED                2 /* context is loaded onto a task */
+ +#define PFM_CTX_MASKED                3 /* context is loaded, monitoring is masked */
+ +#define PFM_CTX_ZOMBIE                4 /* context lost owner but still attached */
+ +
+ +/*
+ + * registers description
+ + */
+ +struct pfm_regdesc {
+ +      u64 pmcs[PFM_PMC_BV];           /* available PMC */
+ +      u64 pmds[PFM_PMD_BV];           /* available PMD */
+ +      u64 rw_pmds[PFM_PMD_BV];        /* available RW PMD */
+ +      u64 intr_pmds[PFM_PMD_BV];      /* PMD generating intr */
+ +      u64 cnt_pmds[PFM_PMD_BV];       /* PMD counters */
+ +      u16 max_pmc;                    /* highest+1 avail PMC */
+ +      u16 max_pmd;                    /* highest+1 avail PMD */
+ +      u16 max_rw_pmd;                 /* highest+1 avail RW PMD */
+ +      u16 first_intr_pmd;             /* first intr PMD */
+ +      u16 max_intr_pmd;               /* highest+1 intr PMD */
+ +      u16 num_rw_pmd;                 /* number of avail RW PMD */
+ +      u16 num_pmcs;                   /* number of logical PMCS */
+ +      u16 num_pmds;                   /* number of logical PMDS */
+ +      u16 num_counters;               /* number of counting PMD */
+ +};
+ +
+ +/*
+ + * context: contains all the state of a session
+ + */
+ +struct pfm_context {
+ +      spinlock_t              lock;           /* context protection */
+ +
+ +      struct pfm_context_flags flags;
+ +      u32                     state;          /* current state */
+ +      struct task_struct      *task;          /* attached task */
+ +
+ +      struct completion       restart_complete;/* block on notification */
+ +      u64                     last_act;       /* last activation */
+ +      u32                     last_cpu;       /* last CPU used (SMP only) */
+ +      u32                     cpu;            /* cpu bound to context */
+ +
+ +      struct pfm_smpl_fmt     *smpl_fmt;      /* sampling format callbacks */
+ +      void                    *smpl_addr;     /* user smpl buffer base */
+ +      size_t                  smpl_size;      /* user smpl buffer size */
+ +      void                    *smpl_real_addr;/* actual smpl buffer base */
+ +      size_t                  smpl_real_size; /* actual smpl buffer size */
+ +
+ +      wait_queue_head_t       msgq_wait;      /* pfm_read() wait queue */
+ +
+ +      union pfarg_msg         msgq[PFM_MSGS_COUNT];
+ +      int                     msgq_head;
+ +      int                     msgq_tail;
+ +
+ +      struct fasync_struct    *async_queue;   /* async notification */
+ +
+ +      struct pfm_event_set    *active_set;    /* active set */
+ +      struct list_head        set_list;       /* ordered list of sets */
+ +
+ +      struct pfm_regdesc      regs;           /* registers available to context */
+ +
+ +      /*
+ +       * save stack space by allocating temporary variables for
+ +       * pfm_overflow_handler() in pfm_context
+ +       */
+ +      struct pfm_ovfl_arg     ovfl_arg;
+ +      u64                     tmp_ovfl_notify[PFM_PMD_BV];
+ +};
+ +
+ +/*
+ + * ovfl_ctrl bitmask (used by interrupt handler)
+ + */
+ +#define PFM_OVFL_CTRL_NOTIFY  0x1     /* notify user */
+ +#define PFM_OVFL_CTRL_RESET   0x2     /* reset overflowed pmds */
+ +#define PFM_OVFL_CTRL_MASK    0x4     /* mask monitoring */
+ +#define PFM_OVFL_CTRL_SWITCH  0x8     /* switch sets */
+ +
+ +/*
+ + * logging
+ + */
+ +#define PFM_ERR(f, x...)  printk(KERN_ERR     "perfmon: " f "\n", ## x)
+ +#define PFM_WARN(f, x...) printk(KERN_WARNING "perfmon: " f "\n", ## x)
+ +#define PFM_LOG(f, x...)  printk(KERN_NOTICE  "perfmon: " f "\n", ## x)
+ +#define PFM_INFO(f, x...) printk(KERN_INFO    "perfmon: " f "\n", ## x)
+ +
+ +/*
+ + * debugging
+ + *
+ + * Printk rate limiting is enforced to avoid getting flooded with too many
+ + * error messages on the console (which could render the machine unresponsive).
+ + * To get full debug output (turn off ratelimit):
+ + *    $ echo 0 >/proc/sys/kernel/printk_ratelimit
+ + *
+ + * debug is a bitmask where bits are defined as follows:
+ + * bit  0: enable non-interrupt code degbug messages
+ + * bit  1: enable interrupt code debug messages
+ + */
+ +#ifdef CONFIG_PERFMON_DEBUG
+ +#define _PFM_DBG(lm, f, x...) \
+ +      do { \
+ +              if (unlikely((pfm_controls.debug & lm) && printk_ratelimit())) { \
+ +                      preempt_disable(); \
+ +                      printk("perfmon: %s.%d: CPU%d [%d]: " f "\n", \
+ +                             __func__, __LINE__, \
+ +                             smp_processor_id(), current->pid , ## x); \
+ +                      preempt_enable(); \
+ +              } \
+ +      } while (0)
+ +
+ +#define PFM_DBG(f, x...) _PFM_DBG(0x1, f, ##x)
+ +#define PFM_DBG_ovfl(f, x...) _PFM_DBG(0x2, f, ## x)
+ +#else
+ +#define PFM_DBG(f, x...)      do {} while (0)
+ +#define PFM_DBG_ovfl(f, x...) do {} while (0)
+ +#endif
+ +
+ +extern struct pfm_pmu_config  *pfm_pmu_conf;
+ +extern int perfmon_disabled;
+ +
+ +static inline struct pfm_arch_context *pfm_ctx_arch(struct pfm_context *c)
+ +{
+ +      return (struct pfm_arch_context *)(c+1);
+ +}
+ +
+ +int  pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr,
+ +                void **req, void **to_free);
+ +
+ +int pfm_get_smpl_arg(char __user *fmt_uname, void __user *uaddr, size_t usize,
+ +                   void **arg, struct pfm_smpl_fmt **fmt);
+ +
+ +int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req,
+ +                   int count);
+ +int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count,
+ +                   int compat);
+ +int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count);
+ +
+ +int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *req,
+ +                     struct task_struct *task);
+ +int __pfm_unload_context(struct pfm_context *ctx, int *can_release);
+ +
+ +int __pfm_stop(struct pfm_context *ctx, int *release_info);
+ +int  __pfm_restart(struct pfm_context *ctx, int *unblock);
+ +int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start);
+ +
+ +void pfm_free_context(struct pfm_context *ctx);
+ +
+ +void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size);
+ +
+ +int pfm_check_task_state(struct pfm_context *ctx, int check_mask,
+ +                       unsigned long *flags, void **resume);
+ +/*
+ + * check_mask bitmask values for pfm_check_task_state()
+ + */
+ +#define PFM_CMD_STOPPED               0x01    /* command needs thread stopped */
+ +#define PFM_CMD_UNLOADED      0x02    /* command needs ctx unloaded */
+ +#define PFM_CMD_UNLOAD                0x04    /* command is unload */
+ +
+ +int __pfm_create_context(struct pfarg_ctx *req,
+ +                       struct pfm_smpl_fmt *fmt,
+ +                       void *fmt_arg,
+ +                       int mode,
+ +                       struct pfm_context **new_ctx);
+ +
+ +struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id,
+ +                                 int alloc);
+ +
+ +int pfm_pmu_conf_get(int autoload);
+ +void pfm_pmu_conf_put(void);
+ +
+ +int pfm_session_allcpus_acquire(void);
+ +void pfm_session_allcpus_release(void);
+ +
+ +int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize);
+ +void pfm_smpl_buf_free(struct pfm_context *ctx);
+ +
+ +struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name);
+ +void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt);
+ +
+ +void pfm_interrupt_handler(unsigned long iip, struct pt_regs *regs);
+ +
+ +void pfm_resume_task(struct task_struct *t, void *data);
+ +
+ +#include <linux/perfmon_pmu.h>
+ +#include <linux/perfmon_fmt.h>
+ +
+ +extern const struct file_operations pfm_file_ops;
+ +/*
+ + * upper limit for count in calls that take vector arguments. This is used
+ + * to prevent for multiplication overflow when we compute actual storage size
+ + */
+ +#define PFM_MAX_ARG_COUNT(m) (INT_MAX/sizeof(*(m)))
+ +
+ +#define cast_ulp(_x) ((unsigned long *)_x)
+ +
+ +#define PFM_NORMAL      0
+ +#define PFM_COMPAT      1
+ +
+ +void __pfm_exit_thread(void);
+ +void pfm_ctxsw_in(struct task_struct *prev, struct task_struct *next);
+ +void pfm_ctxsw_out(struct task_struct *prev, struct task_struct *next);
+ +void pfm_handle_work(struct pt_regs *regs);
+ +void __pfm_init_percpu(void *dummy);
+ +void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
+ +
+ +static inline void pfm_exit_thread(void)
+ +{
+ +      if (current->pfm_context)
+ +              __pfm_exit_thread();
+ +}
+ +
+ +/*
+ + * include arch-specific kernel level definitions
+ + */
+ +#include <asm/perfmon_kern.h>
+ +
+ +static inline void pfm_copy_thread(struct task_struct *task)
+ +{
+ +      /*
+ +       * context or perfmon TIF state  is NEVER inherited
+ +       * in child task. Holds for per-thread and system-wide
+ +       */
+ +      task->pfm_context = NULL;
+ +      clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
+ +      clear_tsk_thread_flag(task, TIF_PERFMON_WORK);
+ +      pfm_arch_disarm_handle_work(task);
+ +}
+ +
+ +
+ +/*
+ + * read a single PMD register.
+ + *
+ + * virtual PMD registers have special handler.
+ + * Depends on definitions in asm/perfmon_kern.h
+ + */
+ +static inline u64 pfm_read_pmd(struct pfm_context *ctx, unsigned int cnum)
+ +{
+ +      if (unlikely(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V))
+ +              return pfm_pmu_conf->pmd_sread(ctx, cnum);
+ +
+ +      return pfm_arch_read_pmd(ctx, cnum);
+ +}
+ +/*
+ + * write a single PMD register.
+ + *
+ + * virtual PMD registers have special handler.
+ + * Depends on definitions in asm/perfmon_kern.h
+ + */
+ +static inline void pfm_write_pmd(struct pfm_context *ctx, unsigned int cnum,
+ +                               u64 value)
+ +{
+ +      /*
+ +       * PMD writes are ignored for read-only registers
+ +       */
+ +      if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_RO)
+ +              return;
+ +
+ +      if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V) {
+ +              pfm_pmu_conf->pmd_swrite(ctx, cnum, value);
+ +              return;
+ +      }
+ +      /*
+ +       * clear unimplemented bits
+ +       */
+ +      value &= ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
+ +
+ +      pfm_arch_write_pmd(ctx, cnum, value);
+ +}
+ +
+ +void __pfm_init_percpu(void *dummy);
+ +
+ +static inline void pfm_init_percpu(void)
+ +{
+ +      __pfm_init_percpu(NULL);
+ +}
+ +
+ +/*
+ + * pfm statistics are available via debugfs
+ + * and perfmon subdir.
+ + *
+ + * When adding/removing new stats, make sure you also
+ + * update the name table in perfmon_debugfs.c
+ + */
+ +enum pfm_stats_names {
+ +      PFM_ST_ovfl_intr_all_count = 0,
+ +      PFM_ST_ovfl_intr_ns,
+ +      PFM_ST_ovfl_intr_spurious_count,
+ +      PFM_ST_ovfl_intr_replay_count,
+ +      PFM_ST_ovfl_intr_regular_count,
+ +      PFM_ST_handle_work_count,
+ +      PFM_ST_ovfl_notify_count,
+ +      PFM_ST_reset_pmds_count,
+ +      PFM_ST_pfm_restart_count,
+ +      PFM_ST_fmt_handler_calls,
+ +      PFM_ST_fmt_handler_ns,
+ +      PFM_ST_set_switch_count,
+ +      PFM_ST_set_switch_ns,
+ +      PFM_ST_set_switch_exp,
+ +      PFM_ST_ctxswin_count,
+ +      PFM_ST_ctxswin_ns,
+ +      PFM_ST_handle_timeout_count,
+ +      PFM_ST_ovfl_intr_nmi_count,
+ +      PFM_ST_ctxswout_count,
+ +      PFM_ST_ctxswout_ns,
+ +      PFM_ST_LAST     /* last entry marked */
+ +};
+ +#define PFM_NUM_STATS PFM_ST_LAST
+ +
+ +struct pfm_stats {
+ +      u64 v[PFM_NUM_STATS];
+ +      struct dentry *dirs[PFM_NUM_STATS];
+ +      struct dentry *cpu_dir;
+ +      char cpu_name[8];
+ +};
+ +
+ +#ifdef CONFIG_PERFMON_DEBUG_FS
+ +#define pfm_stats_get(x)  __get_cpu_var(pfm_stats).v[PFM_ST_##x]
+ +#define pfm_stats_inc(x)  __get_cpu_var(pfm_stats).v[PFM_ST_##x]++
+ +#define pfm_stats_add(x, y)  __get_cpu_var(pfm_stats).v[PFM_ST_##x] += (y)
+ +void pfm_reset_stats(int cpu);
+ +#else
+ +#define pfm_stats_get(x)
+ +#define pfm_stats_inc(x)
+ +#define pfm_stats_add(x, y)
+ +static inline void pfm_reset_stats(int cpu)
+ +{}
+ +#endif
+ +
+ +
+ +
+ +DECLARE_PER_CPU(struct pfm_context *, pmu_ctx);
+ +DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
+ +DECLARE_PER_CPU(struct task_struct *, pmu_owner);
+ +
+ +void pfm_cpu_disable(void);
+ +
+ +
+ +/*
+ + * max vector argument elements for local storage (no kmalloc/kfree)
+ + * The PFM_ARCH_PM*_ARG should be defined in perfmon_kern.h.
+ + * If not, default (conservative) values are used
+ + */
+ +#ifndef PFM_ARCH_PMC_STK_ARG
+ +#define PFM_ARCH_PMC_STK_ARG  1
+ +#endif
+ +
+ +#ifndef PFM_ARCH_PMD_STK_ARG
+ +#define PFM_ARCH_PMD_STK_ARG  1
+ +#endif
+ +
+ +#define PFM_PMC_STK_ARG       PFM_ARCH_PMC_STK_ARG
+ +#define PFM_PMD_STK_ARG       PFM_ARCH_PMD_STK_ARG
+ +
+ +#else /* !CONFIG_PERFMON */
+ +
- 
+ +/*
+ + * perfmon hooks are nops when CONFIG_PERFMON is undefined
+ + */
+ +static inline void pfm_cpu_disable(void)
+ +{}
+ +
+ +static inline void pfm_exit_thread(void)
+ +{}
+ +
+ +static inline void pfm_handle_work(struct pt_regs *regs)
+ +{}
+ +
+ +static inline void pfm_copy_thread(struct task_struct *t)
+ +{}
+ +
+ +static inline void pfm_ctxsw_in(struct task_struct *p, struct task_struct *n)
+ +{}
+ +
+ +static inline void pfm_ctxsw_out(struct task_struct *p, struct task_struct *n)
+ +{}
+ +
+ +static inline void pfm_session_allcpus_release(void)
+ +{}
+ +
+ +static inline int pfm_session_allcpus_acquire(void)
+ +{
+ +      return 0;
+ +}
+ +
+ +static inline void pfm_init_percpu(void)
+ +{}
+ +
+ +#endif /* CONFIG_PERFMON */
+ +
+ +#endif /* __LINUX_PERFMON_KERN_H__ */
diff --cc include/linux/ptrace.h
Simple merge
diff --cc include/linux/reiserfs_fs.h

index dbd4fc7,bc5114d..e711c79
--- 1/include/linux/reiserfs_fs.h
--- 2/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@@ -563,10 -553,10 +563,8 @@@ static inline int uniqueness2type(__u3
                 return TYPE_DIRECT;
         case V1_DIRENTRY_UNIQUENESS:
                 return TYPE_DIRENTRY;
--      default:
-               reiserfs_warning(NULL, "vs-500", "unknown uniqueness %d",
-                                uniqueness);
- -              reiserfs_warning(NULL, "vs-500: unknown uniqueness %d",
- -                               uniqueness);
         case V1_ANY_UNIQUENESS:
++      default:
                 return TYPE_ANY;
         }
   }
@@@ -583,9 -573,9 +581,8 @@@ static inline __u32 type2uniqueness(in
                 return V1_DIRECT_UNIQUENESS;
         case TYPE_DIRENTRY:
                 return V1_DIRENTRY_UNIQUENESS;
--      default:
-               reiserfs_warning(NULL, "vs-501", "unknown type %d", type);
- -              reiserfs_warning(NULL, "vs-501: unknown type %d", type);
         case TYPE_ANY:
++      default:
                 return V1_ANY_UNIQUENESS;
         }
   }
diff --cc include/linux/reiserfs_fs_sb.h
Simple merge
diff --cc include/linux/reserve.h

index ac979d8,0000000..e1d3dbb

mode 100644,000000..100644
--- 1/include/linux/reserve.h
--- /dev/null
+++ b/include/linux/reserve.h
@@@ -1,198 -1,0 +1,198 @@@
+ +/*
+ + * Memory reserve management.
+ + *
+ + *  Copyright (C) 2007-2008 Red Hat, Inc.,
+ + *                        Peter Zijlstra <pzijlstr@redhat.com>
+ + *
+ + * This file contains the public data structure and API definitions.
+ + */
+ +
+ +#ifndef _LINUX_RESERVE_H
+ +#define _LINUX_RESERVE_H
+ +
+ +#include <linux/list.h>
+ +#include <linux/spinlock.h>
+ +#include <linux/wait.h>
+ +#include <linux/slab.h>
+ +
+ +struct mem_reserve {
+ +      struct mem_reserve *parent;
+ +      struct list_head children;
+ +      struct list_head siblings;
+ +
+ +      const char *name;
+ +
+ +      long pages;
+ +      long limit;
+ +      long usage;
+ +      spinlock_t lock;        /* protects limit and usage */
+ +
+ +      wait_queue_head_t waitqueue;
+ +};
+ +
+ +extern struct mem_reserve mem_reserve_root;
+ +
+ +void mem_reserve_init(struct mem_reserve *res, const char *name,
+ +                    struct mem_reserve *parent);
+ +int mem_reserve_connect(struct mem_reserve *new_child,
+ +                      struct mem_reserve *node);
+ +void mem_reserve_disconnect(struct mem_reserve *node);
+ +
+ +int mem_reserve_pages_set(struct mem_reserve *res, long pages);
+ +int mem_reserve_pages_add(struct mem_reserve *res, long pages);
+ +int mem_reserve_pages_charge(struct mem_reserve *res, long pages);
+ +
+ +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes);
+ +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes);
+ +
+ +struct kmem_cache;
+ +
+ +int mem_reserve_kmem_cache_set(struct mem_reserve *res,
+ +                             struct kmem_cache *s,
+ +                             int objects);
+ +int mem_reserve_kmem_cache_charge(struct mem_reserve *res,
+ +                                struct kmem_cache *s, long objs);
+ +
- void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
++void *___kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
+ +                       struct mem_reserve *res, int *emerg);
+ +
+ +static inline
- void *__kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
++void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
+ +                      struct mem_reserve *res, int *emerg)
+ +{
+ +      void *obj;
+ +
+ +      obj = __kmalloc_node_track_caller(size,
+ +                      flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node, ip);
+ +      if (!obj)
+ +              obj = ___kmalloc_reserve(size, flags, node, ip, res, emerg);
+ +
+ +      return obj;
+ +}
+ +
+ +/**
+ + * kmalloc_reserve() - kmalloc() and charge against @res for @emerg allocations
+ + * @size - size of the requested memory region
+ + * @gfp - allocation flags to use for this allocation
+ + * @node - preferred memory node for this allocation
+ + * @res - reserve to charge emergency allocations against
+ + * @emerg - bit 0 is set when the allocation was an emergency allocation
+ + *
+ + * Returns NULL on failure
+ + */
+ +#define kmalloc_reserve(size, gfp, node, res, emerg)                  \
+ +      __kmalloc_reserve(size, gfp, node,                              \
-                         __builtin_return_address(0), res, emerg)
++                        _RET_IP_, res, emerg)
+ +
+ +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg);
+ +
+ +/**
+ + * kfree_reserve() - kfree() and uncharge against @res for @emerg allocations
+ + * @obj - memory to free
+ + * @res - reserve to uncharge emergency allocations from
+ + * @emerg - was this an emergency allocation
+ + */
+ +static inline
+ +void kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
+ +{
+ +      if (unlikely(obj && res && emerg))
+ +              __kfree_reserve(obj, res, emerg);
+ +      else
+ +              kfree(obj);
+ +}
+ +
+ +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
+ +                               struct mem_reserve *res, int *emerg);
+ +
+ +/**
+ + * kmem_cache_alloc_reserve() - kmem_cache_alloc() and charge against @res
+ + * @s - kmem_cache to allocate from
+ + * @gfp - allocation flags to use for this allocation
+ + * @node - preferred memory node for this allocation
+ + * @res - reserve to charge emergency allocations against
+ + * @emerg - bit 0 is set when the allocation was an emergency allocation
+ + *
+ + * Returns NULL on failure
+ + */
+ +static inline
+ +void *kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
+ +                             struct mem_reserve *res, int *emerg)
+ +{
+ +      void *obj;
+ +
+ +      obj = kmem_cache_alloc_node(s,
+ +                      flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
+ +      if (!obj)
+ +              obj = __kmem_cache_alloc_reserve(s, flags, node, res, emerg);
+ +
+ +      return obj;
+ +}
+ +
+ +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
+ +                             struct mem_reserve *res, int emerg);
+ +
+ +/**
+ + * kmem_cache_free_reserve() - kmem_cache_free() and uncharge against @res
+ + * @s - kmem_cache to free to
+ + * @obj - memory to free
+ + * @res - reserve to uncharge emergency allocations from
+ + * @emerg - was this an emergency allocation
+ + */
+ +static inline
+ +void kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
+ +                           struct mem_reserve *res, int emerg)
+ +{
+ +      if (unlikely(obj && res && emerg))
+ +              __kmem_cache_free_reserve(s, obj, res, emerg);
+ +      else
+ +              kmem_cache_free(s, obj);
+ +}
+ +
+ +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
+ +                                struct mem_reserve *res, int *emerg);
+ +
+ +/**
+ + * alloc_pages_reserve() - alloc_pages() and charge against @res
+ + * @node - preferred memory node for this allocation
+ + * @gfp - allocation flags to use for this allocation
+ + * @order - page order
+ + * @res - reserve to charge emergency allocations against
+ + * @emerg - bit 0 is set when the allocation was an emergency allocation
+ + *
+ + * Returns NULL on failure
+ + */
+ +static inline
+ +struct page *alloc_pages_reserve(int node, gfp_t flags, int order,
+ +                               struct mem_reserve *res, int *emerg)
+ +{
+ +      struct page *page;
+ +
+ +      page = alloc_pages_node(node,
+ +                      flags | __GFP_NOMEMALLOC | __GFP_NOWARN, order);
+ +      if (!page)
+ +              page = __alloc_pages_reserve(node, flags, order, res, emerg);
+ +
+ +      return page;
+ +}
+ +
+ +void __free_pages_reserve(struct page *page, int order,
+ +                        struct mem_reserve *res, int emerg);
+ +
+ +/**
+ + * free_pages_reserve() - __free_pages() and uncharge against @res
+ + * @page - page to free
+ + * @order - page order
+ + * @res - reserve to uncharge emergency allocations from
+ + * @emerg - was this an emergency allocation
+ + */
+ +static inline
+ +void free_pages_reserve(struct page *page, int order,
+ +                      struct mem_reserve *res, int emerg)
+ +{
+ +      if (unlikely(page && res && emerg))
+ +              __free_pages_reserve(page, order, res, emerg);
+ +      else
+ +              __free_pages(page, order);
+ +}
+ +
+ +#endif /* _LINUX_RESERVE_H */
diff --cc include/linux/sched.h

index 3ae06ca,02e16d2..24af8cb
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -96,7 -96,7 +96,8 @@@ struct exec_domain
   struct futex_pi_state;
   struct robust_list_head;
   struct bio;
+ struct bts_tracer;
+ +struct pfm_context;
   
   /*
    * List of flags we want to share for kernel threads,
@@@ -1313,10 -1375,31 +1381,35 @@@ struct task_struct 
         int latency_record_count;
         struct latency_record latency_record[LT_SAVECOUNT];
   #endif
+       /*
+        * time slack values; these are used to round up poll() and
+        * select() etc timeout values. These are in nanoseconds.
+        */
+       unsigned long timer_slack_ns;
+       unsigned long default_timer_slack_ns;
+ 
+       struct list_head        *scm_work_list;
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       /* Index of current stored adress in ret_stack */
+       int curr_ret_stack;
+       /* Stack of return addresses for return function tracing */
+       struct ftrace_ret_stack *ret_stack;
+       /*
+        * Number of functions that haven't been traced
+        * because of depth overrun.
+        */
+       atomic_t trace_overrun;
+       /* Pause for the tracing */
+       atomic_t tracing_graph_pause;
+ #endif
+ #ifdef CONFIG_TRACING
+       /* state flags for use by tracers */
+       unsigned long trace;
+ #endif
+ +      u64     instrumentation;
+ +#ifdef CONFIG_PERFMON
+ +      struct pfm_context *pfm_context;
+ +#endif
   };
   
   /*
@@@ -1553,21 -1636,14 +1646,21 @@@ extern cputime_t task_gtime(struct task
   #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
   #define used_math() tsk_used_math(current)
   
+ +static inline void tsk_restore_flags(struct task_struct *p,
+ +                                   unsigned long pflags, unsigned long mask)
+ +{
+ +      p->flags &= ~mask;
+ +      p->flags |= pflags & mask;
+ +}
+ +
   #ifdef CONFIG_SMP
   extern int set_cpus_allowed_ptr(struct task_struct *p,
-                               const cpumask_t *new_mask);
+                               const struct cpumask *new_mask);
   #else
   static inline int set_cpus_allowed_ptr(struct task_struct *p,
-                                      const cpumask_t *new_mask)
+                                      const struct cpumask *new_mask)
   {
-       if (!cpu_isset(0, *new_mask))
+       if (!cpumask_test_cpu(0, new_mask))
                 return -EINVAL;
         return 0;
   }
diff --cc include/linux/skbuff.h

index df63365,cf2cb50..5780a56
--- 1/include/linux/skbuff.h
--- 2/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -321,15 -329,9 +329,12 @@@ struct sk_buff 
   #endif
   #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
         __u8                    do_not_encrypt:1;
+       __u8                    requeue:1;
   #endif
- -      /* 0/13/14 bit hole */
+ +#ifdef CONFIG_NETVM
+ +      __u8                    emergency:1;
+ +#endif
- #ifdef CONFIG_XEN
-       __u8                    proto_data_valid:1,
-                               proto_csum_blank:1;
- #endif
-       /* 10-16 bit hole */
++      /* 12-16 bit hole */
   
   #ifdef CONFIG_NET_DMA
         dma_cookie_t            dma_cookie;
@@@ -362,18 -364,14 +367,26 @@@
   
   #include <asm/system.h>
   
+ #ifdef CONFIG_HAS_DMA
+ #include <linux/dma-mapping.h>
+ extern int skb_dma_map(struct device *dev, struct sk_buff *skb,
+                      enum dma_data_direction dir);
+ extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
+                         enum dma_data_direction dir);
+ #endif
+ 
+ +#define SKB_ALLOC_FCLONE      0x01
+ +#define SKB_ALLOC_RX          0x02
+ +
+ +static inline bool skb_emergency(const struct sk_buff *skb)
+ +{
+ +#ifdef CONFIG_NETVM
+ +      return unlikely(skb->emergency);
+ +#else
+ +      return false;
+ +#endif
+ +}
+ +
   extern void kfree_skb(struct sk_buff *skb);
   extern void          __kfree_skb(struct sk_buff *skb);
   extern struct sk_buff *__alloc_skb(unsigned int size,
@@@ -387,9 -385,11 +400,11 @@@ static inline struct sk_buff *alloc_skb
   static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
                                                gfp_t priority)
   {
- -      return __alloc_skb(size, priority, 1, -1);
+ +      return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, -1);
   }
   
+ extern int skb_recycle_check(struct sk_buff *skb, int skb_size);
+ 
   extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
   extern struct sk_buff *skb_clone(struct sk_buff *skb,
                                  gfp_t priority);
diff --cc include/linux/slab.h

index b349b19,f96d13c..4ae1720
--- 1/include/linux/slab.h
--- 2/include/linux/slab.h
+++ b/include/linux/slab.h
@@@ -229,15 -253,14 +257,15 @@@ static inline void *kmem_cache_alloc_no
    * request comes from.
    */
   #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
- extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
+ extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
- -#define kmalloc_track_caller(size, flags) \
- -      __kmalloc_track_caller(size, flags, _RET_IP_)
   #else
- -#define kmalloc_track_caller(size, flags) \
+ +#define __kmalloc_track_caller(size, flags, ip) \
         __kmalloc(size, flags)
   #endif /* DEBUG_SLAB */
   
+ +#define kmalloc_track_caller(size, flags) \
-       __kmalloc_track_caller(size, flags, __builtin_return_address(0))
++      __kmalloc_track_caller(size, flags, _RET_IP_)
+ +
   #ifdef CONFIG_NUMA
   /*
    * kmalloc_node_track_caller is a special version of kmalloc_node that
@@@ -248,23 -271,22 +276,23 @@@
    * allocation request comes from.
    */
   #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
- extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
+ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
- -#define kmalloc_node_track_caller(size, flags, node) \
- -      __kmalloc_node_track_caller(size, flags, node, \
- -                      _RET_IP_)
   #else
- -#define kmalloc_node_track_caller(size, flags, node) \
+ +#define __kmalloc_node_track_caller(size, flags, node, ip) \
         __kmalloc_node(size, flags, node)
   #endif
   
   #else /* CONFIG_NUMA */
   
- -#define kmalloc_node_track_caller(size, flags, node) \
- -      kmalloc_track_caller(size, flags)
+ +#define __kmalloc_node_track_caller(size, flags, node, ip) \
+ +      __kmalloc_track_caller(size, flags, ip)
   
- #endif /* DEBUG_SLAB */
+ #endif /* CONFIG_NUMA */
   
+ +#define kmalloc_node_track_caller(size, flags, node) \
+ +      __kmalloc_node_track_caller(size, flags, node, \
-                       __builtin_return_address(0))
++                      _RET_IP_)
+ +
   /*
    * Shortcuts
    */
diff --cc include/linux/slab_def.h

index 39c3a5e,39c3a5e..6ca6a7b
--- 1/include/linux/slab_def.h
--- 2/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@@ -43,10 -43,10 +43,7 @@@ static inline void *kmalloc(size_t size
                         i++;
   #include <linux/kmalloc_sizes.h>
   #undef CACHE
--              {
--                      extern void __you_cannot_kmalloc_that_much(void);
--                      __you_cannot_kmalloc_that_much();
--              }
++              return NULL;
   found:
   #ifdef CONFIG_ZONE_DMA
                 if (flags & GFP_DMA)
@@@ -77,10 -77,10 +74,7 @@@ static inline void *kmalloc_node(size_
                         i++;
   #include <linux/kmalloc_sizes.h>
   #undef CACHE
--              {
--                      extern void __you_cannot_kmalloc_that_much(void);
--                      __you_cannot_kmalloc_that_much();
--              }
++              return NULL;
   found:
   #ifdef CONFIG_ZONE_DMA
                 if (flags & GFP_DMA)
diff --cc include/linux/sunrpc/xprt.h
Simple merge
diff --cc include/linux/swap.h

index dbf6fa7,d302155..e122533
--- 1/include/linux/swap.h
--- 2/include/linux/swap.h
+++ b/include/linux/swap.h
@@@ -119,8 -120,9 +120,10 @@@ struct swap_extent 
   enum {
         SWP_USED        = (1 << 0),     /* is slot in swap_info[] used? */
         SWP_WRITEOK     = (1 << 1),     /* ok to write to this swap?    */
-       SWP_ACTIVE      = (SWP_USED | SWP_WRITEOK),
-       SWP_FILE        = (1 << 2),     /* file swap area */
+       SWP_DISCARDABLE = (1 << 2),     /* blkdev supports discard */
+       SWP_DISCARDING  = (1 << 3),     /* now discarding a free cluster */
+       SWP_SOLIDSTATE  = (1 << 4),     /* blkdev seeks are cheap */
++      SWP_FILE        = (1 << 5),     /* file swap area */
                                         /* add others here before... */
         SWP_SCANNING    = (1 << 8),     /* refcount in scan_swap_map */
   };
@@@ -252,9 -309,8 +312,9 @@@ extern unsigned int count_swap_pages(in
   extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
   extern sector_t swapdev_block(int, pgoff_t);
   extern struct swap_info_struct *get_swap_info_struct(unsigned);
+ +extern struct swap_info_struct *page_swap_info(struct page *);
- extern int can_share_swap_page(struct page *);
- extern int remove_exclusive_swap_page(struct page *);
+ extern int reuse_swap_page(struct page *);
+ extern int try_to_free_swap(struct page *);
   struct backing_dev_info;
   
   /* linux/mm/thrash.c */
diff --cc include/linux/syscalls.h
Simple merge
diff --cc include/linux/sysctl.h
Simple merge
diff --cc include/linux/topology.h

index 418cb04,e632d29..b289e87
--- 1/include/linux/topology.h
--- 2/include/linux/topology.h
+++ b/include/linux/topology.h
@@@ -99,7 -99,8 +99,7 @@@ int arch_update_cpu_topology(void)
                                 | SD_BALANCE_FORK       \
                                 | SD_BALANCE_EXEC       \
                                 | SD_WAKE_AFFINE        \
-                               | SD_WAKE_IDLE,         \
- -                              | SD_WAKE_BALANCE       \
- -                              | SD_SHARE_CPUPOWER,    \
++                              | SD_WAKE_BALANCE,      \
         .last_balance           = jiffies,              \
         .balance_interval       = 1,                    \
   }
diff --cc include/linux/unwind.h

index 749928c,0000000..5ef3f3e

mode 100644,000000..100644
--- 1/include/linux/unwind.h
--- /dev/null
+++ b/include/linux/unwind.h
@@@ -1,127 -1,0 +1,76 @@@
+ +#ifndef _LINUX_UNWIND_H
+ +#define _LINUX_UNWIND_H
+ +
+ +/*
+ + * Copyright (C) 2002-2006 Novell, Inc.
-  *    Jan Beulich <jbeulich@novell.com>
++ *     Jan Beulich <jbeulich@novell.com>
+ + * This code is released under version 2 of the GNU GPL.
+ + *
+ + * A simple API for unwinding kernel stacks.  This is used for
+ + * debugging and error reporting purposes.  The kernel doesn't need
+ + * full-blown stack unwinding with all the bells and whistles, so there
+ + * is not much point in implementing the full Dwarf2 unwind API.
+ + */
++#ifdef CONFIG_STACK_UNWIND
+ +
+ +struct module;
++struct stacktrace_ops;
++struct unwind_frame_info;
+ +
- #ifdef CONFIG_STACK_UNWIND
+ +
++typedef int (*unwind_callback_fn)(struct unwind_frame_info *, const struct stacktrace_ops *, void *);
+ +#include <asm/unwind.h>
++#include <asm/stacktrace.h>
+ +
+ +#ifndef ARCH_UNWIND_SECTION_NAME
+ +#define ARCH_UNWIND_SECTION_NAME ".eh_frame"
+ +#endif
+ +
+ +/*
+ + * Initialize unwind support.
+ + */
+ +extern void unwind_init(void);
+ +extern void unwind_setup(void);
+ +
+ +#ifdef CONFIG_MODULES
+ +
+ +extern void *unwind_add_table(struct module *,
+ +                              const void *table_start,
+ +                              unsigned long table_size);
+ +
+ +extern void unwind_remove_table(void *handle, int init_only);
+ +
+ +#endif
+ +
+ +extern int unwind_init_frame_info(struct unwind_frame_info *,
+ +                                  struct task_struct *,
+ +                                  /*const*/ struct pt_regs *);
+ +
+ +/*
+ + * Prepare to unwind a blocked task.
+ + */
+ +extern int unwind_init_blocked(struct unwind_frame_info *,
+ +                               struct task_struct *);
+ +
+ +/*
+ + * Prepare to unwind the currently running thread.
+ + */
+ +extern int unwind_init_running(struct unwind_frame_info *,
-                                asmlinkage int (*callback)(struct unwind_frame_info *,
-                                                           void *arg),
-                                void *arg);
++                             asmlinkage unwind_callback_fn callback,
++                             const struct stacktrace_ops *ops,
++                               void *data);
+ +
+ +/*
+ + * Unwind to previous to frame.  Returns 0 if successful, negative
+ + * number in case of an error.
+ + */
+ +extern int unwind(struct unwind_frame_info *);
+ +
+ +/*
+ + * Unwind until the return pointer is in user-land (or until an error
+ + * occurs).  Returns 0 if successful, negative number in case of
+ + * error.
+ + */
+ +extern int unwind_to_user(struct unwind_frame_info *);
- 
- #else
- 
- struct unwind_frame_info {};
- 
- static inline void unwind_init(void) {}
- static inline void unwind_setup(void) {}
- 
- #ifdef CONFIG_MODULES
- 
- static inline void *unwind_add_table(struct module *mod,
-                                      const void *table_start,
-                                      unsigned long table_size)
- {
-       return NULL;
- }
- 
- #endif
- 
- static inline void unwind_remove_table(void *handle, int init_only)
- {
- }
- 
- static inline int unwind_init_frame_info(struct unwind_frame_info *info,
-                                          struct task_struct *tsk,
-                                          const struct pt_regs *regs)
- {
-       return -ENOSYS;
- }
- 
- static inline int unwind_init_blocked(struct unwind_frame_info *info,
-                                       struct task_struct *tsk)
- {
-       return -ENOSYS;
- }
- 
- static inline int unwind_init_running(struct unwind_frame_info *info,
-                                       asmlinkage int (*cb)(struct unwind_frame_info *,
-                                                            void *arg),
-                                       void *arg)
- {
-       return -ENOSYS;
- }
- 
- static inline int unwind(struct unwind_frame_info *info)
- {
-       return -ENOSYS;
- }
- 
- static inline int unwind_to_user(struct unwind_frame_info *info)
- {
-       return -ENOSYS;
- }
- 
- #endif
- 
++#endif /* CONFIG_STACK_UNWIND */
+ +#endif /* _LINUX_UNWIND_H */
diff --cc include/net/netns/ipv6.h

index 68eb126,afab4e4..b2845f1
--- 1/include/net/netns/ipv6.h
--- 2/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@@ -57,7 -55,17 +57,18 @@@ struct netns_ipv6 
         struct sock             *ndisc_sk;
         struct sock             *tcp_sk;
         struct sock             *igmp_sk;
- 
+ #ifdef CONFIG_IPV6_MROUTE
+       struct sock             *mroute6_sk;
+       struct mfc6_cache       **mfc6_cache_array;
+       struct mif_device       *vif6_table;
+       int                     maxvif;
+       atomic_t                cache_resolve_queue_len;
+       int                     mroute_do_assert;
+       int                     mroute_do_pim;
+ #ifdef CONFIG_IPV6_PIMSM_V2
+       int                     mroute_reg_vif_num;
+ #endif
+ #endif
+ +      struct mem_reserve      ip6_rt_reserve;
   };
   #endif
diff --cc include/net/sock.h

index d8a922d,5a3a151..5480a9c
--- 1/include/net/sock.h
--- 2/include/net/sock.h
+++ b/include/net/sock.h
@@@ -50,9 -51,9 +51,10 @@@
   #include <linux/skbuff.h>     /* struct sk_buff */
   #include <linux/mm.h>
   #include <linux/security.h>
+ +#include <linux/reserve.h>
   
   #include <linux/filter.h>
+ #include <linux/rculist_nulls.h>
   
   #include <asm/atomic.h>
   #include <net/dst.h>
diff --cc include/scsi/libfc.h
Simple merge
diff --cc include/scsi/scsi_device.h
Simple merge
diff --cc include/trace/filemap.h

index 0a2f65d,0000000..0d881a1

mode 100644,000000..100644
--- 1/include/trace/filemap.h
--- /dev/null
+++ b/include/trace/filemap.h
@@@ -1,13 -1,0 +1,13 @@@
+ +#ifndef _TRACE_FILEMAP_H
+ +#define _TRACE_FILEMAP_H
+ +
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(wait_on_page_start,
++DECLARE_TRACE(wait_on_page_start,
+ +      TPPROTO(struct page *page, int bit_nr),
+ +      TPARGS(page, bit_nr));
- DEFINE_TRACE(wait_on_page_end,
++DECLARE_TRACE(wait_on_page_end,
+ +      TPPROTO(struct page *page, int bit_nr),
+ +      TPARGS(page, bit_nr));
+ +
+ +#endif
diff --cc include/trace/fs.h

index 5e30365,0000000..6dc2da2

mode 100644,000000..100644
--- 1/include/trace/fs.h
--- /dev/null
+++ b/include/trace/fs.h
@@@ -1,65 -1,0 +1,65 @@@
+ +#ifndef _TRACE_FS_H
+ +#define _TRACE_FS_H
+ +
+ +#include <linux/buffer_head.h>
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(fs_buffer_wait_start,
++DECLARE_TRACE(fs_buffer_wait_start,
+ +      TPPROTO(struct buffer_head *bh),
+ +      TPARGS(bh));
- DEFINE_TRACE(fs_buffer_wait_end,
++DECLARE_TRACE(fs_buffer_wait_end,
+ +      TPPROTO(struct buffer_head *bh),
+ +      TPARGS(bh));
- DEFINE_TRACE(fs_exec,
++DECLARE_TRACE(fs_exec,
+ +      TPPROTO(char *filename),
+ +      TPARGS(filename));
- DEFINE_TRACE(fs_ioctl,
++DECLARE_TRACE(fs_ioctl,
+ +      TPPROTO(unsigned int fd, unsigned int cmd, unsigned long arg),
+ +      TPARGS(fd, cmd, arg));
- DEFINE_TRACE(fs_open,
++DECLARE_TRACE(fs_open,
+ +      TPPROTO(int fd, char *filename),
+ +      TPARGS(fd, filename));
- DEFINE_TRACE(fs_close,
++DECLARE_TRACE(fs_close,
+ +      TPPROTO(unsigned int fd),
+ +      TPARGS(fd));
- DEFINE_TRACE(fs_lseek,
++DECLARE_TRACE(fs_lseek,
+ +      TPPROTO(unsigned int fd, long offset, unsigned int origin),
+ +      TPARGS(fd, offset, origin));
- DEFINE_TRACE(fs_llseek,
++DECLARE_TRACE(fs_llseek,
+ +      TPPROTO(unsigned int fd, loff_t offset, unsigned int origin),
+ +      TPARGS(fd, offset, origin));
+ +
+ +/*
+ + * Probes must be aware that __user * may be modified by concurrent userspace
+ + * or kernel threads.
+ + */
- DEFINE_TRACE(fs_read,
++DECLARE_TRACE(fs_read,
+ +      TPPROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret),
+ +      TPARGS(fd, buf, count, ret));
- DEFINE_TRACE(fs_write,
++DECLARE_TRACE(fs_write,
+ +      TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+ +              ssize_t ret),
+ +      TPARGS(fd, buf, count, ret));
- DEFINE_TRACE(fs_pread64,
++DECLARE_TRACE(fs_pread64,
+ +      TPPROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos,
+ +              ssize_t ret),
+ +      TPARGS(fd, buf, count, pos, ret));
- DEFINE_TRACE(fs_pwrite64,
++DECLARE_TRACE(fs_pwrite64,
+ +      TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+ +              loff_t pos, ssize_t ret),
+ +      TPARGS(fd, buf, count, pos, ret));
- DEFINE_TRACE(fs_readv,
++DECLARE_TRACE(fs_readv,
+ +      TPPROTO(unsigned long fd, const struct iovec __user *vec,
+ +              unsigned long vlen, ssize_t ret),
+ +      TPARGS(fd, vec, vlen, ret));
- DEFINE_TRACE(fs_writev,
++DECLARE_TRACE(fs_writev,
+ +      TPPROTO(unsigned long fd, const struct iovec __user *vec,
+ +              unsigned long vlen, ssize_t ret),
+ +      TPARGS(fd, vec, vlen, ret));
- DEFINE_TRACE(fs_select,
-       TPPROTO(int fd, s64 timeout),
-       TPARGS(fd, timeout));
- DEFINE_TRACE(fs_poll,
++DECLARE_TRACE(fs_select,
++      TPPROTO(int fd, struct timespec *end_time),
++      TPARGS(fd, end_time));
++DECLARE_TRACE(fs_poll,
+ +      TPPROTO(int fd),
+ +      TPARGS(fd));
+ +#endif
diff --cc include/trace/hugetlb.h

index 5fbfb94,0000000..ef40768

mode 100644,000000..100644
--- 1/include/trace/hugetlb.h
--- /dev/null
+++ b/include/trace/hugetlb.h
@@@ -1,28 -1,0 +1,28 @@@
+ +#ifndef _TRACE_HUGETLB_H
+ +#define _TRACE_HUGETLB_H
+ +
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(hugetlb_page_release,
++DECLARE_TRACE(hugetlb_page_release,
+ +      TPPROTO(struct page *page),
+ +      TPARGS(page));
- DEFINE_TRACE(hugetlb_page_grab,
++DECLARE_TRACE(hugetlb_page_grab,
+ +      TPPROTO(struct page *page),
+ +      TPARGS(page));
- DEFINE_TRACE(hugetlb_buddy_pgalloc,
++DECLARE_TRACE(hugetlb_buddy_pgalloc,
+ +      TPPROTO(struct page *page),
+ +      TPARGS(page));
- DEFINE_TRACE(hugetlb_page_alloc,
++DECLARE_TRACE(hugetlb_page_alloc,
+ +      TPPROTO(struct page *page),
+ +      TPARGS(page));
- DEFINE_TRACE(hugetlb_page_free,
++DECLARE_TRACE(hugetlb_page_free,
+ +      TPPROTO(struct page *page),
+ +      TPARGS(page));
- DEFINE_TRACE(hugetlb_pages_reserve,
++DECLARE_TRACE(hugetlb_pages_reserve,
+ +      TPPROTO(struct inode *inode, long from, long to, int ret),
+ +      TPARGS(inode, from, to, ret));
- DEFINE_TRACE(hugetlb_pages_unreserve,
++DECLARE_TRACE(hugetlb_pages_unreserve,
+ +      TPPROTO(struct inode *inode, long offset, long freed),
+ +      TPARGS(inode, offset, freed));
+ +
+ +#endif
diff --cc include/trace/ipc.h

index 19a5c43,0000000..8ecacec

mode 100644,000000..100644
--- 1/include/trace/ipc.h
--- /dev/null
+++ b/include/trace/ipc.h
@@@ -1,15 -1,0 +1,15 @@@
+ +#ifndef _TRACE_IPC_H
+ +#define _TRACE_IPC_H
+ +
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(ipc_msg_create,
++DECLARE_TRACE(ipc_msg_create,
+ +      TPPROTO(long id, int flags),
+ +      TPARGS(id, flags));
- DEFINE_TRACE(ipc_sem_create,
++DECLARE_TRACE(ipc_sem_create,
+ +      TPPROTO(long id, int flags),
+ +      TPARGS(id, flags));
- DEFINE_TRACE(ipc_shm_create,
++DECLARE_TRACE(ipc_shm_create,
+ +      TPPROTO(long id, int flags),
+ +      TPARGS(id, flags));
+ +#endif
diff --cc include/trace/ipv4.h

index 38617ba,0000000..379c5f3

mode 100644,000000..100644
--- 1/include/trace/ipv4.h
--- /dev/null
+++ b/include/trace/ipv4.h
@@@ -1,14 -1,0 +1,14 @@@
+ +#ifndef _TRACE_IPV4_H
+ +#define _TRACE_IPV4_H
+ +
+ +#include <linux/inetdevice.h>
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(ipv4_addr_add,
++DECLARE_TRACE(ipv4_addr_add,
+ +      TPPROTO(struct in_ifaddr *ifa),
+ +      TPARGS(ifa));
- DEFINE_TRACE(ipv4_addr_del,
++DECLARE_TRACE(ipv4_addr_del,
+ +      TPPROTO(struct in_ifaddr *ifa),
+ +      TPARGS(ifa));
+ +
+ +#endif
diff --cc include/trace/ipv6.h

index f8055f6,0000000..f86c5ed

mode 100644,000000..100644
--- 1/include/trace/ipv6.h
--- /dev/null
+++ b/include/trace/ipv6.h
@@@ -1,14 -1,0 +1,14 @@@
+ +#ifndef _TRACE_IPV6_H
+ +#define _TRACE_IPV6_H
+ +
+ +#include <net/if_inet6.h>
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(ipv6_addr_add,
++DECLARE_TRACE(ipv6_addr_add,
+ +      TPPROTO(struct inet6_ifaddr *ifa),
+ +      TPARGS(ifa));
- DEFINE_TRACE(ipv6_addr_del,
++DECLARE_TRACE(ipv6_addr_del,
+ +      TPPROTO(struct inet6_ifaddr *ifa),
+ +      TPARGS(ifa));
+ +
+ +#endif
diff --cc include/trace/irq.h

index 48dc584,0000000..0b25757

mode 100644,000000..100644
--- 1/include/trace/irq.h
--- /dev/null
+++ b/include/trace/irq.h
@@@ -1,36 -1,0 +1,36 @@@
+ +#ifndef _TRACE_IRQ_H
+ +#define _TRACE_IRQ_H
+ +
+ +#include <linux/kdebug.h>
+ +#include <linux/interrupt.h>
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(irq_entry,
++DECLARE_TRACE(irq_entry,
+ +      TPPROTO(unsigned int id, struct pt_regs *regs),
+ +      TPARGS(id, regs));
- DEFINE_TRACE(irq_exit,
++DECLARE_TRACE(irq_exit,
+ +      TPPROTO(irqreturn_t retval),
+ +      TPARGS(retval));
- DEFINE_TRACE(irq_softirq_entry,
++DECLARE_TRACE(irq_softirq_entry,
+ +      TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+ +      TPARGS(h, softirq_vec));
- DEFINE_TRACE(irq_softirq_exit,
++DECLARE_TRACE(irq_softirq_exit,
+ +      TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+ +      TPARGS(h, softirq_vec));
- DEFINE_TRACE(irq_softirq_raise,
++DECLARE_TRACE(irq_softirq_raise,
+ +      TPPROTO(unsigned int nr),
+ +      TPARGS(nr));
- DEFINE_TRACE(irq_tasklet_low_entry,
++DECLARE_TRACE(irq_tasklet_low_entry,
+ +      TPPROTO(struct tasklet_struct *t),
+ +      TPARGS(t));
- DEFINE_TRACE(irq_tasklet_low_exit,
++DECLARE_TRACE(irq_tasklet_low_exit,
+ +      TPPROTO(struct tasklet_struct *t),
+ +      TPARGS(t));
- DEFINE_TRACE(irq_tasklet_high_entry,
++DECLARE_TRACE(irq_tasklet_high_entry,
+ +      TPPROTO(struct tasklet_struct *t),
+ +      TPARGS(t));
- DEFINE_TRACE(irq_tasklet_high_exit,
++DECLARE_TRACE(irq_tasklet_high_exit,
+ +      TPPROTO(struct tasklet_struct *t),
+ +      TPARGS(t));
+ +
+ +#endif
diff --cc include/trace/kernel.h

index 68f5fd4,0000000..2f12cef

mode 100644,000000..100644
--- 1/include/trace/kernel.h
--- /dev/null
+++ b/include/trace/kernel.h
@@@ -1,13 -1,0 +1,13 @@@
+ +#ifndef _TRACE_KERNEL_H
+ +#define _TRACE_KERNEL_H
+ +
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(kernel_module_free,
++DECLARE_TRACE(kernel_module_free,
+ +      TPPROTO(struct module *mod),
+ +      TPARGS(mod));
- DEFINE_TRACE(kernel_module_load,
++DECLARE_TRACE(kernel_module_load,
+ +      TPPROTO(struct module *mod),
+ +      TPARGS(mod));
+ +
+ +#endif
diff --cc include/trace/memory.h

index fc9d7fe,0000000..4354f2e

mode 100644,000000..100644
--- 1/include/trace/memory.h
--- /dev/null
+++ b/include/trace/memory.h
@@@ -1,14 -1,0 +1,14 @@@
+ +#ifndef _TRACE_MEMORY_H
+ +#define _TRACE_MEMORY_H
+ +
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(memory_handle_fault_entry,
++DECLARE_TRACE(memory_handle_fault_entry,
+ +      TPPROTO(struct mm_struct *mm, struct vm_area_struct *vma,
+ +              unsigned long address, int write_access),
+ +      TPARGS(mm, vma, address, write_access));
- DEFINE_TRACE(memory_handle_fault_exit,
++DECLARE_TRACE(memory_handle_fault_exit,
+ +      TPPROTO(int res),
+ +      TPARGS(res));
+ +
+ +#endif
diff --cc include/trace/net.h

index f673382,0000000..142933d

mode 100644,000000..100644
--- 1/include/trace/net.h
--- /dev/null
+++ b/include/trace/net.h
@@@ -1,14 -1,0 +1,14 @@@
+ +#ifndef _TRACE_NET_H
+ +#define _TRACE_NET_H
+ +
+ +#include <net/sock.h>
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(net_dev_xmit,
++DECLARE_TRACE(net_dev_xmit,
+ +      TPPROTO(struct sk_buff *skb),
+ +      TPARGS(skb));
- DEFINE_TRACE(net_dev_receive,
++DECLARE_TRACE(net_dev_receive,
+ +      TPPROTO(struct sk_buff *skb),
+ +      TPARGS(skb));
+ +
+ +#endif
diff --cc include/trace/page_alloc.h

index 9f525f2,0000000..ee4f1d1

mode 100644,000000..100644
--- 1/include/trace/page_alloc.h
--- /dev/null
+++ b/include/trace/page_alloc.h
@@@ -1,16 -1,0 +1,16 @@@
+ +#ifndef _TRACE_PAGE_ALLOC_H
+ +#define _TRACE_PAGE_ALLOC_H
+ +
+ +#include <linux/tracepoint.h>
+ +
+ +/*
+ + * mm_page_alloc : page can be NULL.
+ + */
- DEFINE_TRACE(page_alloc,
++DECLARE_TRACE(page_alloc,
+ +      TPPROTO(struct page *page, unsigned int order),
+ +      TPARGS(page, order));
- DEFINE_TRACE(page_free,
++DECLARE_TRACE(page_free,
+ +      TPPROTO(struct page *page, unsigned int order),
+ +      TPARGS(page, order));
+ +
+ +#endif
diff --cc include/trace/socket.h

index 876bfd8,0000000..60f216c

mode 100644,000000..100644
--- 1/include/trace/socket.h
--- /dev/null
+++ b/include/trace/socket.h
@@@ -1,26 -1,0 +1,26 @@@
+ +#ifndef _TRACE_SOCKET_H
+ +#define _TRACE_SOCKET_H
+ +
+ +#include <net/sock.h>
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(socket_sendmsg,
++DECLARE_TRACE(socket_sendmsg,
+ +      TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int ret),
+ +      TPARGS(sock, msg, size, ret));
- DEFINE_TRACE(socket_recvmsg,
++DECLARE_TRACE(socket_recvmsg,
+ +      TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int flags,
+ +              int ret),
+ +      TPARGS(sock, msg, size, flags, ret));
- DEFINE_TRACE(socket_create,
++DECLARE_TRACE(socket_create,
+ +      TPPROTO(struct socket *sock, int fd),
+ +      TPARGS(sock, fd));
+ +/*
+ + * socket_call
+ + *
+ + * TODO : This tracepoint should be expanded to cover each element of the
+ + * switch in sys_socketcall().
+ + */
- DEFINE_TRACE(socket_call,
++DECLARE_TRACE(socket_call,
+ +      TPPROTO(int call, unsigned long a0),
+ +      TPARGS(call, a0));
+ +#endif
diff --cc include/trace/swap.h

index f74a711,0000000..f036ee0

mode 100644,000000..100644
--- 1/include/trace/swap.h
--- /dev/null
+++ b/include/trace/swap.h
@@@ -1,20 -1,0 +1,20 @@@
+ +#ifndef _TRACE_SWAP_H
+ +#define _TRACE_SWAP_H
+ +
+ +#include <linux/swap.h>
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(swap_in,
++DECLARE_TRACE(swap_in,
+ +      TPPROTO(struct page *page, swp_entry_t entry),
+ +      TPARGS(page, entry));
- DEFINE_TRACE(swap_out,
++DECLARE_TRACE(swap_out,
+ +      TPPROTO(struct page *page),
+ +      TPARGS(page));
- DEFINE_TRACE(swap_file_open,
++DECLARE_TRACE(swap_file_open,
+ +      TPPROTO(struct file *file, char *filename),
+ +      TPARGS(file, filename));
- DEFINE_TRACE(swap_file_close,
++DECLARE_TRACE(swap_file_close,
+ +      TPPROTO(struct file *file),
+ +      TPARGS(file));
+ +
+ +#endif
diff --cc include/trace/timer.h

index 60da060,0000000..42fca11

mode 100644,000000..100644
--- 1/include/trace/timer.h
--- /dev/null
+++ b/include/trace/timer.h
@@@ -1,24 -1,0 +1,24 @@@
+ +#ifndef _TRACE_TIMER_H
+ +#define _TRACE_TIMER_H
+ +
+ +#include <linux/tracepoint.h>
+ +
- DEFINE_TRACE(timer_itimer_expired,
++DECLARE_TRACE(timer_itimer_expired,
+ +      TPPROTO(struct signal_struct *sig),
+ +      TPARGS(sig));
- DEFINE_TRACE(timer_itimer_set,
++DECLARE_TRACE(timer_itimer_set,
+ +      TPPROTO(int which, struct itimerval *value),
+ +      TPARGS(which, value));
- DEFINE_TRACE(timer_set,
++DECLARE_TRACE(timer_set,
+ +      TPPROTO(struct timer_list *timer),
+ +      TPARGS(timer));
+ +/*
+ + * xtime_lock is taken when kernel_timer_update_time tracepoint is reached.
+ + */
- DEFINE_TRACE(timer_update_time,
++DECLARE_TRACE(timer_update_time,
+ +      TPPROTO(struct timespec *_xtime, struct timespec *_wall_to_monotonic),
+ +      TPARGS(_xtime, _wall_to_monotonic));
- DEFINE_TRACE(timer_timeout,
++DECLARE_TRACE(timer_timeout,
+ +      TPPROTO(struct task_struct *p),
+ +      TPARGS(p));
+ +#endif
diff --cc init/Kconfig
Simple merge
diff --cc init/initramfs.c

index 0c9f409,d9c941c..5dc9eab
--- 1/init/initramfs.c
--- 2/init/initramfs.c
+++ b/init/initramfs.c
@@@ -6,9 -6,7 +6,10 @@@
   #include <linux/delay.h>
   #include <linux/string.h>
   #include <linux/syscalls.h>
+ #include <linux/utime.h>
+ +#ifdef CONFIG_ACPI_CUSTOM_DSDT_INITRD
+ +#include <acpi/acpi.h>
+ +#endif
   
   static __initdata char *message;
   static void __init error(char *x)
diff --cc init/main.c

index ed5964e,8442094..f58fe80
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -71,19 -75,6 +75,10 @@@
   #include <asm/smp.h>
   #endif
   
+ +#ifdef        CONFIG_KDB
+ +#include <linux/kdb.h>
+ +#endif        /* CONFIG_KDB */
+ +
- /*
-  * This is one of the first .c files built. Error out early if we have compiler
-  * trouble.
-  */
- 
- #if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
- #warning gcc-4.1.0 is known to miscompile the kernel.  A different compiler version is recommended.
- #endif
- 
   static int kernel_init(void *);
   
   extern void init_IRQ(void);
diff --cc ipc/msg.c

index 5b19061,2ceab7f..c4c4fa3
--- 1/ipc/msg.c
--- 2/ipc/msg.c
+++ b/ipc/msg.c
@@@ -946,3 -942,3 +946,4 @@@ static int sysvipc_msg_proc_show(struc
                         msq->q_ctime);
   }
   #endif
++DEFINE_TRACE(ipc_msg_create);
diff --cc ipc/sem.c

index 71384c5,16a2189..f0b06be
--- 1/ipc/sem.c
--- 2/ipc/sem.c
+++ b/ipc/sem.c
@@@ -1383,3 -1378,3 +1382,4 @@@ static int sysvipc_sem_proc_show(struc
                           sma->sem_ctime);
   }
   #endif
++DEFINE_TRACE(ipc_sem_create);
diff --cc ipc/shm.c

index 01b7e7a,a9e09ad..e486f5f
--- 1/ipc/shm.c
--- 2/ipc/shm.c
+++ b/ipc/shm.c
@@@ -1074,3 -1080,3 +1084,4 @@@ static int sysvipc_shm_proc_show(struc
                           shp->shm_ctim);
   }
   #endif
++DEFINE_TRACE(ipc_shm_create);
diff --cc kdb/modules/kdbm_task.c

index 2e5b7d6,0000000..f17eba3

mode 100644,000000..100644
--- 1/kdb/modules/kdbm_task.c
--- /dev/null
+++ b/kdb/modules/kdbm_task.c
@@@ -1,205 -1,0 +1,206 @@@
+ +/*
+ + * This file is subject to the terms and conditions of the GNU General Public
+ + * License.  See the file "COPYING" in the main directory of this archive
+ + * for more details.
+ + *
+ + * Copyright (c) 1999-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ + */
+ +
+ +#include <linux/blkdev.h>
+ +#include <linux/types.h>
+ +#include <linux/kdb.h>
+ +#include <linux/kdbprivate.h>
+ +#include <linux/module.h>
+ +#include <linux/init.h>
+ +#include <linux/mm.h>
+ +#include <linux/sched.h>
+ +#include <asm/signal.h>
+ +
+ +MODULE_AUTHOR("SGI");
+ +MODULE_DESCRIPTION("Debug struct task and sigset information");
+ +MODULE_LICENSE("GPL");
+ +
+ +static char *
+ +kdb_cpus_allowed_string(struct task_struct *tp)
+ +{
+ +      static char maskbuf[NR_CPUS * 8];
+ +      if (cpus_equal(tp->cpus_allowed, cpu_online_map))
+ +              strcpy(maskbuf, "ALL");
+ +      else if (cpus_full(tp->cpus_allowed))
+ +              strcpy(maskbuf, "ALL(NR_CPUS)");
+ +      else if (cpus_empty(tp->cpus_allowed))
+ +              strcpy(maskbuf, "NONE");
+ +      else if (cpus_weight(tp->cpus_allowed) == 1)
+ +              snprintf(maskbuf, sizeof(maskbuf), "ONLY(%d)", first_cpu(tp->cpus_allowed));
+ +      else
-               cpulist_scnprintf(maskbuf, sizeof(maskbuf), tp->cpus_allowed);
++              cpulist_scnprintf(maskbuf, sizeof(maskbuf), &tp->cpus_allowed);
+ +      return maskbuf;
+ +}
+ +
+ +static int
+ +kdbm_task(int argc, const char **argv)
+ +{
+ +      unsigned long addr;
+ +      long offset=0;
+ +      int nextarg;
+ +      int e = 0;
+ +      struct task_struct *tp = NULL, *tp1;
++      const struct cred *cred;
+ +
+ +      if (argc != 1)
+ +              return KDB_ARGCOUNT;
+ +
+ +      nextarg = 1;
+ +      if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
+ +              return(e);
+ +
+ +      if (!(tp = kmalloc(sizeof(*tp), GFP_ATOMIC))) {
+ +          kdb_printf("%s: cannot kmalloc tp\n", __FUNCTION__);
+ +          goto out;
+ +      }
+ +      if ((e = kdb_getarea(*tp, addr))) {
+ +          kdb_printf("%s: invalid task address\n", __FUNCTION__);
+ +          goto out;
+ +      }
+ +
+ +      tp1 = (struct task_struct *)addr;
+ +      kdb_printf(
+ +          "struct task at 0x%lx, pid=%d flags=0x%x state=%ld comm=\"%s\"\n",
+ +          addr, tp->pid, tp->flags, tp->state, tp->comm);
+ +
+ +      kdb_printf("  cpu=%d policy=%u ", kdb_process_cpu(tp), tp->policy);
+ +      kdb_printf(
+ +          "prio=%d static_prio=%d cpus_allowed=",
+ +          tp->prio, tp->static_prio);
+ +      {
+ +              /* The cpus allowed string may be longer than kdb_printf() can
+ +               * handle.  Print it in chunks.
+ +               */
+ +              char c, *p;
+ +              p = kdb_cpus_allowed_string(tp);
+ +              while (1) {
+ +                      if (strlen(p) < 100) {
+ +                              kdb_printf("%s", p);
+ +                              break;
+ +                      }
+ +                      c = p[100];
+ +                      p[100] = '\0';
+ +                      kdb_printf("%s", p);
+ +                      p[100] = c;
+ +                      p += 100;
+ +              }
+ +      }
+ +      kdb_printf(" &thread=0x%p\n", &tp1->thread);
+ +
+ +      kdb_printf("  need_resched=%d ",
+ +              test_tsk_thread_flag(tp, TIF_NEED_RESCHED));
+ +      kdb_printf(
+ +          "time_slice=%u",
+ +          tp->rt.time_slice);
+ +      kdb_printf(" lock_depth=%d\n", tp->lock_depth);
+ +
+ +      kdb_printf(
+ +          "  fs=0x%p files=0x%p mm=0x%p\n",
+ +          tp->fs, tp->files, tp->mm);
+ +
++      cred = get_cred((struct cred *) __task_cred(tp));
+ +      kdb_printf(
+ +          "  uid=%d euid=%d suid=%d fsuid=%d gid=%d egid=%d sgid=%d fsgid=%d\n",
-           tp->uid, tp->euid, tp->suid, tp->fsuid, tp->gid, tp->egid, tp->sgid, tp->fsgid);
++          cred->uid, cred->euid, cred->suid, cred->fsuid, cred->gid, cred->egid, cred->sgid, cred->fsgid);
+ +
-       kdb_printf(
-           "  user=0x%p\n",
-           tp->user);
++      kdb_printf( "  user=0x%p\n", cred->user);
++      put_cred(cred);
+ +
+ +      if (tp->sysvsem.undo_list)
+ +              kdb_printf(
+ +                  "  sysvsem.sem_undo refcnt %d list_proc=0x%p\n",
+ +                  atomic_read(&tp->sysvsem.undo_list->refcnt),
+ +                  &tp->sysvsem.undo_list->list_proc);
+ +
+ +      kdb_printf(
+ +          "  signal=0x%p &blocked=0x%p &pending=0x%p\n",
+ +          tp->signal, &tp1->blocked, &tp1->pending);
+ +
+ +      kdb_printf(
+ +          "  utime=%ld stime=%ld cutime=%ld cstime=%ld\n",
+ +          tp->utime, tp->stime,
+ +          tp->signal ? tp->signal->cutime : 0L,
+ +          tp->signal ? tp->signal->cstime : 0L);
+ +
+ +      kdb_printf("  thread_info=0x%p\n", task_thread_info(tp));
+ +      kdb_printf("  ti flags=0x%lx\n", (unsigned long)task_thread_info(tp)->flags);
+ +
+ +#ifdef CONFIG_NUMA
+ +      kdb_printf(
+ +          "  mempolicy=0x%p il_next=%d\n",
+ +          tp->mempolicy, tp->il_next);
+ +#endif
+ +
+ +out:
+ +      if (tp)
+ +          kfree(tp);
+ +      return e;
+ +}
+ +
+ +static int
+ +kdbm_sigset(int argc, const char **argv)
+ +{
+ +      sigset_t *sp = NULL;
+ +      unsigned long addr;
+ +      long offset=0;
+ +      int nextarg;
+ +      int e = 0;
+ +      int i;
+ +      char fmt[32];
+ +
+ +      if (argc != 1)
+ +              return KDB_ARGCOUNT;
+ +
+ +#ifndef _NSIG_WORDS
+ +      kdb_printf("unavailable on this platform, _NSIG_WORDS not defined.\n");
+ +#else
+ +      nextarg = 1;
+ +      if ((e = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL)) != 0)
+ +              return(e);
+ +
+ +      if (!(sp = kmalloc(sizeof(*sp), GFP_ATOMIC))) {
+ +          kdb_printf("%s: cannot kmalloc sp\n", __FUNCTION__);
+ +          goto out;
+ +      }
+ +      if ((e = kdb_getarea(*sp, addr))) {
+ +          kdb_printf("%s: invalid sigset address\n", __FUNCTION__);
+ +          goto out;
+ +      }
+ +
+ +      sprintf(fmt, "[%%d]=0x%%0%dlx ", (int)sizeof(sp->sig[0])*2);
+ +      kdb_printf("sigset at 0x%p : ", sp);
+ +      for (i=_NSIG_WORDS-1; i >= 0; i--) {
+ +          if (i == 0 || sp->sig[i]) {
+ +              kdb_printf(fmt, i, sp->sig[i]);
+ +          }
+ +      }
+ +      kdb_printf("\n");
+ +#endif /* _NSIG_WORDS */
+ +
+ +out:
+ +      if (sp)
+ +          kfree(sp);
+ +      return e;
+ +}
+ +
+ +static int __init kdbm_task_init(void)
+ +{
+ +      kdb_register("task", kdbm_task, "<vaddr>", "Display task_struct", 0);
+ +      kdb_register("sigset", kdbm_sigset, "<vaddr>", "Display sigset_t", 0);
+ +
+ +      return 0;
+ +}
+ +
+ +static void __exit kdbm_task_exit(void)
+ +{
+ +      kdb_unregister("task");
+ +      kdb_unregister("sigset");
+ +}
+ +
+ +module_init(kdbm_task_init)
+ +module_exit(kdbm_task_exit)
diff --cc kernel/Makefile
Simple merge
diff --cc kernel/capability.c

index 7ba45d7,4e17041..e6b895d
--- 1/kernel/capability.c
--- 2/kernel/capability.c
+++ b/kernel/capability.c
@@@ -34,14 -30,14 +30,21 @@@ EXPORT_SYMBOL(__cap_full_set)
   EXPORT_SYMBOL(__cap_init_eff_set);
   
   #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
- -int file_caps_enabled = 1;
+ +int file_caps_enabled;
   
- static int __init setup_file_caps(char *str)
+ static int __init file_caps_disable(char *str)
+ {
+       file_caps_enabled = 0;
+       return 1;
+ }
+ __setup("no_file_caps", file_caps_disable);
++
++static int __init file_caps_enable(char *str)
+ +{
-       get_option(&str, &file_caps_enabled);
++      file_caps_enabled = 1;
+ +      return 1;
+ +}
- __setup("file_caps=", setup_file_caps);
++__setup("file_caps", file_caps_enable);
   #endif
   
   /*
diff --cc kernel/cgroup.c
Simple merge
diff --cc kernel/exit.c
Simple merge
diff --cc kernel/irq/handle.c

index ab30dc2,3aba8d1..db71e19
--- 1/kernel/irq/handle.c
--- 2/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@@ -15,10 -15,16 +15,17 @@@
   #include <linux/random.h>
   #include <linux/interrupt.h>
   #include <linux/kernel_stat.h>
- #include <trace/irq.h>
+ #include <linux/rculist.h>
+ #include <linux/hash.h>
   
++#include <trace/irq.h>
   #include "internals.h"
   
+ /*
+  * lockdep: we want to handle all irq_desc locks as a single lock-class:
+  */
+ struct lock_class_key irq_desc_lock_class;
+ 
   /**
    * handle_bad_irq - handle spurious and unhandled irqs
    * @irq:       the interrupt number
@@@ -131,12 -327,7 +328,10 @@@ irqreturn_t handle_IRQ_event(unsigned i
   {
         irqreturn_t ret, retval = IRQ_NONE;
         unsigned int status = 0;
+ +      struct pt_regs *regs = get_irq_regs();
+ +
+ +      trace_irq_entry(irq, regs);
   
-       handle_dynamic_tick(action);
- 
         if (!(action->flags & IRQF_DISABLED))
                 local_irq_enable_in_hardirq();
   
@@@ -262,19 -457,22 +463,24 @@@ out
   }
   #endif
   
- #ifdef CONFIG_TRACE_IRQFLAGS
- 
- /*
-  * lockdep: we want to handle all irq_desc locks as a single lock-class:
-  */
- static struct lock_class_key irq_desc_lock_class;
- 
   void early_init_irq_lock_class(void)
   {
+       struct irq_desc *desc;
         int i;
   
-       for (i = 0; i < NR_IRQS; i++)
-               lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+       for_each_irq_desc(i, desc) {
+               lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+       }
   }
   
+ #ifdef CONFIG_SPARSE_IRQ
+ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       return desc ? desc->kstat_irqs[cpu] : 0;
+ }
   #endif
+ EXPORT_SYMBOL(kstat_irqs_cpu);
+ 
++DEFINE_TRACE(irq_entry);
++DEFINE_TRACE(irq_exit);
diff --cc kernel/itimer.c

index bf7b066,6a5fe93..8cff7c5
--- 1/kernel/itimer.c
--- 2/kernel/itimer.c
+++ b/kernel/itimer.c
@@@ -298,3 -280,3 +285,6 @@@ SYSCALL_DEFINE3(setitimer, int, which, 
                 return -EFAULT;
         return 0;
   }
++
++DEFINE_TRACE(timer_itimer_expired);
++DEFINE_TRACE(timer_itimer_set);
diff --cc kernel/kallsyms.c
Simple merge
diff --cc kernel/kexec.c

index 159780f,8a6d7b0..4729fc0
--- 1/kernel/kexec.c
--- 2/kernel/kexec.c
+++ b/kernel/kexec.c
@@@ -30,7 -30,7 +30,8 @@@
   #include <linux/pm.h>
   #include <linux/cpu.h>
   #include <linux/console.h>
+ #include <linux/vmalloc.h>
+ +#include <linux/sysctl.h>
   
   #include <asm/page.h>
   #include <asm/uaccess.h>
diff --cc kernel/ksysfs.c

index 6b21950,528dd78..d703b0b
--- 1/kernel/ksysfs.c
--- 2/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@@ -104,25 -136,8 +136,25 @@@ static struct bin_attribute notes_attr 
   struct kobject *kernel_kobj;
   EXPORT_SYMBOL_GPL(kernel_kobj);
   
+ +const char *supported_printable(int taint)
+ +{
+ +      if (taint & TAINT_NO_SUPPORT)
+ +              return "No";
+ +      else if (taint & TAINT_EXTERNAL_SUPPORT)
+ +              return "Yes, External";
+ +      else
+ +              return "Yes";
+ +}
+ +
+ +static ssize_t supported_show(struct kobject *kobj,
+ +                            struct kobj_attribute *attr, char *buf)
+ +{
-       return sprintf(buf, "%s\n", supported_printable(tainted));
++      return sprintf(buf, "%s\n", supported_printable(get_taint()));
+ +}
+ +KERNEL_ATTR_RO(supported);
+ +
   static struct attribute * kernel_attrs[] = {
- #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
+ #if defined(CONFIG_HOTPLUG)
         &uevent_seqnum_attr.attr,
         &uevent_helper_attr.attr,
   #endif
diff --cc kernel/module.c

index a4b5e8f,e8b51d4..88062d1
--- 1/kernel/module.c
--- 2/kernel/module.c
+++ b/kernel/module.c
@@@ -47,7 -49,8 +49,9 @@@
   #include <linux/license.h>
   #include <asm/sections.h>
   #include <linux/tracepoint.h>
+ #include <linux/ftrace.h>
+ #include <linux/async.h>
+ +#include <trace/kernel.h>
   
   #if 0
   #define DEBUGP printk
@@@ -62,22 -65,8 +66,22 @@@
   /* If this is set, the section belongs in the init part of the module */
   #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
   
+ +/* Allow unsupported modules switch. */
+ +#ifdef UNSUPPORTED_MODULES
+ +int unsupported = UNSUPPORTED_MODULES;
+ +#else
+ +int unsupported = 2;  /* don't warn when loading unsupported modules. */
+ +#endif
+ +
+ +static int __init unsupported_setup(char *str)
+ +{
+ +      get_option(&str, &unsupported);
+ +      return 1;
+ +}
+ +__setup("unsupported=", unsupported_setup);
+ +
   /* List of modules, protected by module_mutex or preempt_disable
-  * (add/delete uses stop_machine). */
+  * (delete uses stop_machine/add uses RCU list operations). */
   static DEFINE_MUTEX(module_mutex);
   static LIST_HEAD(modules);
   
@@@ -941,31 -948,6 +979,31 @@@ static struct module_attribute initstat
         .show = show_initstate,
   };
   
+ +static void setup_modinfo_supported(struct module *mod, const char *s)
+ +{
+ +      if (!s) {
-               mod->taints |= TAINT_NO_SUPPORT;
++              mod->taints |= (1 << TAINT_NO_SUPPORT);
+ +              return;
+ +      }
+ +
+ +      if (strcmp(s, "external") == 0)
-               mod->taints |= TAINT_EXTERNAL_SUPPORT;
++              mod->taints |= (1 << TAINT_EXTERNAL_SUPPORT);
+ +      else if (strcmp(s, "yes"))
-               mod->taints |= TAINT_NO_SUPPORT;
++              mod->taints |= (1 << TAINT_NO_SUPPORT);
+ +}
+ +
+ +static ssize_t show_modinfo_supported(struct module_attribute *mattr,
+ +                      struct module *mod, char *buffer)
+ +{
+ +      return sprintf(buffer, "%s\n", supported_printable(mod->taints));
+ +}
+ +
+ +static struct module_attribute modinfo_supported = {
+ +      .attr = { .name = "supported", .mode = 0444 },
+ +      .show = show_modinfo_supported,
+ +      .setup = setup_modinfo_supported,
+ +};
+ +
   static struct module_attribute *modinfo_attrs[] = {
         &modinfo_version,
         &modinfo_srcversion,
@@@ -2338,36 -2260,10 +2321,32 @@@ static noinline struct module *load_mod
         add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
         add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
   
+ +      /* We don't use add_taint() here because it also disables lockdep. */
-       if (mod->taints & TAINT_EXTERNAL_SUPPORT)
-               tainted |= TAINT_EXTERNAL_SUPPORT;
-       else if (mod->taints == TAINT_NO_SUPPORT) {
++      if (mod->taints & (1 << TAINT_EXTERNAL_SUPPORT))
++              add_nonfatal_taint(TAINT_EXTERNAL_SUPPORT);
++      else if (mod->taints == (1 << TAINT_NO_SUPPORT)) {
+ +              if (unsupported == 0) {
+ +                      printk(KERN_WARNING "%s: module not supported by "
+ +                             "Novell, refusing to load. To override, echo "
+ +                             "1 > /proc/sys/kernel/unsupported\n", mod->name);
+ +                      err = -ENOEXEC;
+ +                      goto free_hdr;
+ +              }
-               tainted |= TAINT_NO_SUPPORT;
++              add_nonfatal_taint(TAINT_NO_SUPPORT);
+ +              if (unsupported == 1) {
+ +                      printk(KERN_WARNING "%s: module is not supported by "
+ +                             "Novell. Novell Technical Services may decline "
+ +                             "your support request if it involves a kernel "
+ +                             "fault.\n", mod->name);
+ +              }
+ +      }
+ +
         /* Get rid of temporary copy */
         vfree(hdr);
   
+ +      trace_kernel_module_load(mod);
+ +
+       stop_machine_destroy();
         /* Done! */
         return mod;
   
@@@ -2617,24 -2511,17 +2599,24 @@@ int module_get_kallsym(unsigned int sym
                         char *name, char *module_name, int *exported)
   {
         struct module *mod;
+ +#ifdef CONFIG_KDB
+ +      int get_lock = !KDB_IS_RUNNING();
+ +#else
+ +#define       get_lock 1
+ +#endif
   
- -      preempt_disable();
+ +      if (get_lock)
+ +              preempt_disable();
-       list_for_each_entry(mod, &modules, list) {
+       list_for_each_entry_rcu(mod, &modules, list) {
                 if (symnum < mod->num_symtab) {
                         *value = mod->symtab[symnum].st_value;
                         *type = mod->symtab[symnum].st_info;
                         strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
                                 KSYM_NAME_LEN);
                         strlcpy(module_name, mod->name, MODULE_NAME_LEN);
-                       *exported = is_exported(name, mod);
+                       *exported = is_exported(name, *value, mod);
- -                      preempt_enable();
+ +                      if (get_lock)
+ +                              preempt_enable();
                         return 0;
                 }
                 symnum -= mod->num_symtab;
@@@ -2704,16 -2573,12 +2669,16 @@@ static char *module_flags(struct modul
             mod->state == MODULE_STATE_GOING ||
             mod->state == MODULE_STATE_COMING) {
                 buf[bx++] = '(';
-               if (mod->taints & TAINT_PROPRIETARY_MODULE)
+               if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE))
                         buf[bx++] = 'P';
-               if (mod->taints & TAINT_FORCED_MODULE)
+               if (mod->taints & (1 << TAINT_FORCED_MODULE))
                         buf[bx++] = 'F';
-               if (mod->taints & TAINT_CRAP)
+               if (mod->taints & (1 << TAINT_CRAP))
                         buf[bx++] = 'C';
-               if (mod->taints & TAINT_NO_SUPPORT)
++              if (mod->taints & (1 << TAINT_NO_SUPPORT))
+ +                      buf[bx++] = 'N';
-               if (mod->taints & TAINT_EXTERNAL_SUPPORT)
++              if (mod->taints & (1 << TAINT_EXTERNAL_SUPPORT))
+ +                      buf[bx++] = 'X';
                 /*
                  * TAINT_FORCED_RMMOD: could be added.
                  * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
@@@ -2854,7 -2760,6 +2860,7 @@@ void print_modules(void
         if (last_unloaded_module[0])
                 printk(" [last unloaded: %s]", last_unloaded_module);
         printk("\n");
-       printk("Supported: %s\n", supported_printable(tainted));
++      printk("Supported: %s\n", supported_printable(get_taint()));
   }
   
   #ifdef CONFIG_MODVERSIONS
@@@ -2923,3 -2828,3 +2929,6 @@@ int module_get_iter_tracepoints(struct 
         return found;
   }
   #endif
++
++DEFINE_TRACE(kernel_module_free);
++DEFINE_TRACE(kernel_module_load);
diff --cc kernel/panic.c

index 60881df,2a2ff36..dfd8436
--- 1/kernel/panic.c
--- 2/kernel/panic.c
+++ b/kernel/panic.c
@@@ -21,12 -21,10 +21,13 @@@
   #include <linux/debug_locks.h>
   #include <linux/random.h>
   #include <linux/kallsyms.h>
+ #include <linux/dmi.h>
+ +#ifdef CONFIG_KDB_KDUMP
+ +#include <linux/kdb.h>
+ +#endif
   
   int panic_on_oops;
- int tainted;
+ static unsigned long tainted_mask;
   static int pause_on_oops;
   static int pause_on_oops_flag;
   static DEFINE_SPINLOCK(pause_on_oops_lock);
@@@ -166,6 -137,27 +160,29 @@@ NORET_TYPE void panic(const char * fmt
   
   EXPORT_SYMBOL(panic);
   
+ 
+ struct tnt {
+       u8 bit;
+       char true;
+       char false;
+ };
+ 
+ static const struct tnt tnts[] = {
+       { TAINT_PROPRIETARY_MODULE, 'P', 'G' },
+       { TAINT_FORCED_MODULE, 'F', ' ' },
+       { TAINT_UNSAFE_SMP, 'S', ' ' },
+       { TAINT_FORCED_RMMOD, 'R', ' ' },
+       { TAINT_MACHINE_CHECK, 'M', ' ' },
+       { TAINT_BAD_PAGE, 'B', ' ' },
+       { TAINT_USER, 'U', ' ' },
+       { TAINT_DIE, 'D', ' ' },
+       { TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' },
+       { TAINT_WARN, 'W', ' ' },
+       { TAINT_CRAP, 'C', ' ' },
++      { TAINT_NO_SUPPORT, 'N', ' ' },
++      { TAINT_EXTERNAL_SUPPORT, 'X', ' ' },
+ };
+ 
   /**
    *    print_tainted - return a string to represent the kernel taint state.
    *
@@@ -209,19 -195,23 +222,28 @@@ const char *print_tainted(void
         return(buf);
   }
   
- void add_taint(unsigned flag)
+ int test_taint(unsigned flag)
   {
-       debug_locks = 0; /* can't trust the integrity of the kernel anymore */
-       tainted |= flag;
+       return test_bit(flag, &tainted_mask);
+ }
+ EXPORT_SYMBOL(test_taint);
+ 
+ unsigned long get_taint(void)
+ {
+       return tainted_mask;
   }
- EXPORT_SYMBOL(add_taint);
   
- static int __init pause_on_oops_setup(char *str)
++void add_nonfatal_taint(unsigned flag)
+ +{
-       pause_on_oops = simple_strtoul(str, NULL, 0);
-       return 1;
++      set_bit(flag, &tainted_mask);
++}
++
+ void add_taint(unsigned flag)
+ {
+       debug_locks = 0; /* can't trust the integrity of the kernel anymore */
+       set_bit(flag, &tainted_mask);
   }
- __setup("pause_on_oops=", pause_on_oops_setup);
+ EXPORT_SYMBOL(add_taint);
   
   static void spin_msec(int msecs)
   {
diff --cc kernel/printk.c
Simple merge
diff --cc kernel/ptrace.c
Simple merge
diff --cc kernel/sched.c
Simple merge
diff --cc kernel/signal.c
Simple merge
diff --cc kernel/softirq.c

index 57eb27b,bdbe9de..3989bc6
--- 1/kernel/softirq.c
--- 2/kernel/softirq.c
+++ b/kernel/softirq.c
@@@ -208,9 -193,18 +196,20 @@@ restart
   
         do {
                 if (pending & 1) {
+                       int prev_count = preempt_count();
+ 
+ +                      trace_irq_softirq_entry(h, softirq_vec);
                         h->action(h);
+ +                      trace_irq_softirq_exit(h, softirq_vec);
+ 
+                       if (unlikely(prev_count != preempt_count())) {
+                               printk(KERN_ERR "huh, entered softirq %td %p"
+                                      "with preempt_count %08x,"
+                                      " exited with %08x?\n", h - softirq_vec,
+                                      h->action, prev_count, preempt_count());
+                               preempt_count() = prev_count;
+                       }
+ 
                         rcu_bh_qsctr_inc(cpu);
                 }
                 h++;
@@@ -671,3 -784,23 +795,31 @@@ int on_each_cpu(void (*func) (void *inf
   }
   EXPORT_SYMBOL(on_each_cpu);
   #endif
+ 
+ /*
+  * [ These __weak aliases are kept in a separate compilation unit, so that
+  *   GCC does not inline them incorrectly. ]
+  */
+ 
+ int __init __weak early_irq_init(void)
+ {
+       return 0;
+ }
+ 
+ int __init __weak arch_early_irq_init(void)
+ {
+       return 0;
+ }
+ 
+ int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+ {
+       return 0;
+ }
++
++DEFINE_TRACE(irq_softirq_entry);
++DEFINE_TRACE(irq_softirq_exit);
++DEFINE_TRACE(irq_softirq_raise);
++DEFINE_TRACE(irq_tasklet_low_entry);
++DEFINE_TRACE(irq_tasklet_low_exit);
++DEFINE_TRACE(irq_tasklet_high_entry);
++DEFINE_TRACE(irq_tasklet_high_exit);
diff --cc kernel/sysctl.c
Simple merge
diff --cc kernel/sysctl_check.c
Simple merge
diff --cc kernel/taskstats.c

index b486077,888adbc..6b166b0
--- 1/kernel/taskstats.c
--- 2/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@@ -432,25 -431,29 +431,35 @@@ static int taskstats_user_cmd(struct sk
         struct sk_buff *rep_skb;
         struct taskstats *stats;
         size_t size;
-       cpumask_t mask;
+       cpumask_var_t mask;
+ +#ifdef CONFIG_IA64
+ +      struct taskstats statn;
+ +#define statf &statn
+ +#else
+ +#define statf stats
+ +#endif
   
-       rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
+       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+               return -ENOMEM;
+ 
+       rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
         if (rc < 0)
-               return rc;
-       if (rc == 0)
-               return add_del_listener(info->snd_pid, &mask, REGISTER);
+               goto free_return_rc;
+       if (rc == 0) {
+               rc = add_del_listener(info->snd_pid, mask, REGISTER);
+               goto free_return_rc;
+       }
   
-       rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
+       rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
         if (rc < 0)
+               goto free_return_rc;
+       if (rc == 0) {
+               rc = add_del_listener(info->snd_pid, mask, DEREGISTER);
+ free_return_rc:
+               free_cpumask_var(mask);
                 return rc;
-       if (rc == 0)
-               return add_del_listener(info->snd_pid, &mask, DEREGISTER);
+       }
+       free_cpumask_var(mask);
   
         /*
          * Size includes space for nested attributes
diff --cc kernel/timer.c

index 4d4f944,13dd64f..407e070
--- 1/kernel/timer.c
--- 2/kernel/timer.c
+++ b/kernel/timer.c
@@@ -1600,3 -1581,3 +1587,7 @@@ unsigned long msleep_interruptible(unsi
   }
   
   EXPORT_SYMBOL(msleep_interruptible);
++
++DEFINE_TRACE(timer_set);
++DEFINE_TRACE(timer_update_time);
++DEFINE_TRACE(timer_timeout);
diff --cc kernel/trace/Kconfig

index f6620ba,e2a4ff6..c301df4
--- 1/kernel/trace/Kconfig
--- 2/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@@ -13,11 -40,18 +40,17 @@@ config RING_BUFFE
   config TRACING
         bool
         select DEBUG_FS
-       select STACKTRACE
+       select RING_BUFFER
+       select STACKTRACE if STACKTRACE_SUPPORT
+       select TRACEPOINTS
+       select NOP_TRACER
+ 
+ menu "Tracers"
   
- config FTRACE
+ config FUNCTION_TRACER
         bool "Kernel Function Tracer"
-       depends on HAVE_FTRACE
+       depends on HAVE_FUNCTION_TRACER
+       depends on DEBUG_KERNEL
- -      select FRAME_POINTER
         select TRACING
         select CONTEXT_SWITCH_TRACER
         help
diff --cc kernel/unwind.c

index 65aa62f,0000000..95bcddc

mode 100644,000000..100644
--- 1/kernel/unwind.c
--- /dev/null
+++ b/kernel/unwind.c
@@@ -1,1303 -1,0 +1,1302 @@@
+ +/*
+ + * Copyright (C) 2002-2006 Novell, Inc.
+ + *    Jan Beulich <jbeulich@novell.com>
+ + * This code is released under version 2 of the GNU GPL.
+ + *
+ + * A simple API for unwinding kernel stacks.  This is used for
+ + * debugging and error reporting purposes.  The kernel doesn't need
+ + * full-blown stack unwinding with all the bells and whistles, so there
+ + * is not much point in implementing the full Dwarf2 unwind API.
+ + */
+ +
+ +#include <linux/unwind.h>
+ +#include <linux/module.h>
+ +#include <linux/bootmem.h>
+ +#include <linux/sort.h>
+ +#include <linux/stop_machine.h>
+ +#include <linux/uaccess.h>
+ +#include <asm/sections.h>
+ +#include <asm/unaligned.h>
+ +
+ +extern const char __start_unwind[], __end_unwind[];
+ +extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
+ +
+ +#define MAX_STACK_DEPTH 8
+ +
+ +#define EXTRA_INFO(f) { \
+ +              BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
+ +                                % FIELD_SIZEOF(struct unwind_frame_info, f)) \
+ +              + offsetof(struct unwind_frame_info, f) \
+ +                / FIELD_SIZEOF(struct unwind_frame_info, f), \
+ +              FIELD_SIZEOF(struct unwind_frame_info, f) \
+ +      }
+ +#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
+ +
+ +static const struct {
+ +      unsigned offs:BITS_PER_LONG / 2;
+ +      unsigned width:BITS_PER_LONG / 2;
+ +} reg_info[] = {
+ +      UNW_REGISTER_INFO
+ +};
+ +
+ +#undef PTREGS_INFO
+ +#undef EXTRA_INFO
+ +
+ +#ifndef REG_INVALID
+ +#define REG_INVALID(r) (reg_info[r].width == 0)
+ +#endif
+ +
+ +#define DW_CFA_nop                          0x00
+ +#define DW_CFA_set_loc                      0x01
+ +#define DW_CFA_advance_loc1                 0x02
+ +#define DW_CFA_advance_loc2                 0x03
+ +#define DW_CFA_advance_loc4                 0x04
+ +#define DW_CFA_offset_extended              0x05
+ +#define DW_CFA_restore_extended             0x06
+ +#define DW_CFA_undefined                    0x07
+ +#define DW_CFA_same_value                   0x08
+ +#define DW_CFA_register                     0x09
+ +#define DW_CFA_remember_state               0x0a
+ +#define DW_CFA_restore_state                0x0b
+ +#define DW_CFA_def_cfa                      0x0c
+ +#define DW_CFA_def_cfa_register             0x0d
+ +#define DW_CFA_def_cfa_offset               0x0e
+ +#define DW_CFA_def_cfa_expression           0x0f
+ +#define DW_CFA_expression                   0x10
+ +#define DW_CFA_offset_extended_sf           0x11
+ +#define DW_CFA_def_cfa_sf                   0x12
+ +#define DW_CFA_def_cfa_offset_sf            0x13
+ +#define DW_CFA_val_offset                   0x14
+ +#define DW_CFA_val_offset_sf                0x15
+ +#define DW_CFA_val_expression               0x16
+ +#define DW_CFA_lo_user                      0x1c
+ +#define DW_CFA_GNU_window_save              0x2d
+ +#define DW_CFA_GNU_args_size                0x2e
+ +#define DW_CFA_GNU_negative_offset_extended 0x2f
+ +#define DW_CFA_hi_user                      0x3f
+ +
+ +#define DW_EH_PE_FORM     0x07
+ +#define DW_EH_PE_native   0x00
+ +#define DW_EH_PE_leb128   0x01
+ +#define DW_EH_PE_data2    0x02
+ +#define DW_EH_PE_data4    0x03
+ +#define DW_EH_PE_data8    0x04
+ +#define DW_EH_PE_signed   0x08
+ +#define DW_EH_PE_ADJUST   0x70
+ +#define DW_EH_PE_abs      0x00
+ +#define DW_EH_PE_pcrel    0x10
+ +#define DW_EH_PE_textrel  0x20
+ +#define DW_EH_PE_datarel  0x30
+ +#define DW_EH_PE_funcrel  0x40
+ +#define DW_EH_PE_aligned  0x50
+ +#define DW_EH_PE_indirect 0x80
+ +#define DW_EH_PE_omit     0xff
+ +
+ +typedef unsigned long uleb128_t;
+ +typedef   signed long sleb128_t;
+ +#define sleb128abs __builtin_labs
+ +
+ +static struct unwind_table {
+ +      struct {
+ +              unsigned long pc;
+ +              unsigned long range;
+ +      } core, init;
+ +      const void *address;
+ +      unsigned long size;
+ +      const unsigned char *header;
+ +      unsigned long hdrsz;
+ +      struct unwind_table *link;
+ +      const char *name;
+ +} root_table;
+ +
+ +struct unwind_item {
+ +      enum item_location {
+ +              Nowhere,
+ +              Memory,
+ +              Register,
+ +              Value
+ +      } where;
+ +      uleb128_t value;
+ +};
+ +
+ +struct unwind_state {
+ +      uleb128_t loc, org;
+ +      const u8 *cieStart, *cieEnd;
+ +      uleb128_t codeAlign;
+ +      sleb128_t dataAlign;
+ +      struct cfa {
+ +              uleb128_t reg, offs;
+ +      } cfa;
+ +      struct unwind_item regs[ARRAY_SIZE(reg_info)];
+ +      unsigned stackDepth:8;
+ +      unsigned version:8;
+ +      const u8 *label;
+ +      const u8 *stack[MAX_STACK_DEPTH];
+ +};
+ +
+ +static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
+ +
+ +static unsigned unwind_debug;
+ +static int __init unwind_debug_setup(char *s)
+ +{
+ +      unwind_debug = simple_strtoul(s, NULL, 0);
+ +      return 1;
+ +}
+ +__setup("unwind_debug=", unwind_debug_setup);
+ +#define dprintk(lvl, fmt, args...) \
+ +      ((void)(lvl > unwind_debug \
+ +       || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
+ +
+ +static struct unwind_table *find_table(unsigned long pc)
+ +{
+ +      struct unwind_table *table;
+ +
+ +      for (table = &root_table; table; table = table->link)
+ +              if ((pc >= table->core.pc
+ +                   && pc < table->core.pc + table->core.range)
+ +                  || (pc >= table->init.pc
+ +                      && pc < table->init.pc + table->init.range))
+ +                      break;
+ +
+ +      return table;
+ +}
+ +
+ +static unsigned long read_pointer(const u8 **pLoc,
+ +                                  const void *end,
+ +                                  signed ptrType,
+ +                                  unsigned long text_base,
+ +                                  unsigned long data_base);
+ +
+ +static void init_unwind_table(struct unwind_table *table,
+ +                              const char *name,
+ +                              const void *core_start,
+ +                              unsigned long core_size,
+ +                              const void *init_start,
+ +                              unsigned long init_size,
+ +                              const void *table_start,
+ +                              unsigned long table_size,
+ +                              const u8 *header_start,
+ +                              unsigned long header_size)
+ +{
+ +      const u8 *ptr = header_start + 4;
+ +      const u8 *end = header_start + header_size;
+ +
+ +      table->core.pc = (unsigned long)core_start;
+ +      table->core.range = core_size;
+ +      table->init.pc = (unsigned long)init_start;
+ +      table->init.range = init_size;
+ +      table->address = table_start;
+ +      table->size = table_size;
+ +      /* See if the linker provided table looks valid. */
+ +      if (header_size <= 4
+ +          || header_start[0] != 1
+ +          || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
+ +             != table_start
+ +          || !read_pointer(&ptr, end, header_start[2], 0, 0)
+ +          || !read_pointer(&ptr, end, header_start[3], 0,
+ +                           (unsigned long)header_start)
+ +          || !read_pointer(&ptr, end, header_start[3], 0,
+ +                           (unsigned long)header_start))
+ +              header_start = NULL;
+ +      table->hdrsz = header_size;
+ +      smp_wmb();
+ +      table->header = header_start;
+ +      table->link = NULL;
+ +      table->name = name;
+ +}
+ +
+ +void __init unwind_init(void)
+ +{
+ +      init_unwind_table(&root_table, "kernel",
+ +                        _text, _end - _text,
+ +                        NULL, 0,
+ +                        __start_unwind, __end_unwind - __start_unwind,
+ +                        __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
+ +}
+ +
+ +static const u32 bad_cie, not_fde;
+ +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+ +static signed fde_pointer_type(const u32 *cie);
+ +
+ +struct eh_frame_hdr_table_entry {
+ +      unsigned long start, fde;
+ +};
+ +
+ +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+ +{
+ +      const struct eh_frame_hdr_table_entry *e1 = p1;
+ +      const struct eh_frame_hdr_table_entry *e2 = p2;
+ +
+ +      return (e1->start > e2->start) - (e1->start < e2->start);
+ +}
+ +
+ +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+ +{
+ +      struct eh_frame_hdr_table_entry *e1 = p1;
+ +      struct eh_frame_hdr_table_entry *e2 = p2;
+ +      unsigned long v;
+ +
+ +      v = e1->start;
+ +      e1->start = e2->start;
+ +      e2->start = v;
+ +      v = e1->fde;
+ +      e1->fde = e2->fde;
+ +      e2->fde = v;
+ +}
+ +
+ +static void __init setup_unwind_table(struct unwind_table *table,
+ +                                      void *(*alloc)(unsigned long))
+ +{
+ +      const u8 *ptr;
+ +      unsigned long tableSize = table->size, hdrSize;
+ +      unsigned n;
+ +      const u32 *fde;
+ +      struct {
+ +              u8 version;
+ +              u8 eh_frame_ptr_enc;
+ +              u8 fde_count_enc;
+ +              u8 table_enc;
+ +              unsigned long eh_frame_ptr;
+ +              unsigned int fde_count;
+ +              struct eh_frame_hdr_table_entry table[];
+ +      } __attribute__((__packed__)) *header;
+ +
+ +      if (table->header)
+ +              return;
+ +
+ +      if (table->hdrsz)
+ +              printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
+ +                     table->name);
+ +
+ +      if (tableSize & (sizeof(*fde) - 1))
+ +              return;
+ +
+ +      for (fde = table->address, n = 0;
+ +           tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+ +           tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ +              const u32 *cie = cie_for_fde(fde, table);
+ +              signed ptrType;
+ +
+ +              if (cie == &not_fde)
+ +                      continue;
+ +              if (cie == NULL
+ +                  || cie == &bad_cie
+ +                  || (ptrType = fde_pointer_type(cie)) < 0)
+ +                      return;
+ +              ptr = (const u8 *)(fde + 2);
+ +              if (!read_pointer(&ptr,
+ +                                (const u8 *)(fde + 1) + *fde,
+ +                                ptrType, 0, 0))
+ +                      return;
+ +              ++n;
+ +      }
+ +
+ +      if (tableSize || !n)
+ +              return;
+ +
+ +      hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+ +              + 2 * n * sizeof(unsigned long);
+ +      dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
+ +      header = alloc(hdrSize);
+ +      if (!header)
+ +              return;
+ +      header->version          = 1;
+ +      header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
+ +      header->fde_count_enc    = DW_EH_PE_abs|DW_EH_PE_data4;
+ +      header->table_enc        = DW_EH_PE_abs|DW_EH_PE_native;
+ +      put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
+ +      BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+ +                   % __alignof(typeof(header->fde_count)));
+ +      header->fde_count        = n;
+ +
+ +      BUILD_BUG_ON(offsetof(typeof(*header), table)
+ +                   % __alignof(typeof(*header->table)));
+ +      for (fde = table->address, tableSize = table->size, n = 0;
+ +           tableSize;
+ +           tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ +              const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
+ +
+ +              if (!fde[1])
+ +                      continue; /* this is a CIE */
+ +              ptr = (const u8 *)(fde + 2);
+ +              header->table[n].start = read_pointer(&ptr,
+ +                                                    (const u8 *)(fde + 1) + *fde,
+ +                                                    fde_pointer_type(cie), 0, 0);
+ +              header->table[n].fde = (unsigned long)fde;
+ +              ++n;
+ +      }
+ +      WARN_ON(n != header->fde_count);
+ +
+ +      sort(header->table,
+ +           n,
+ +           sizeof(*header->table),
+ +           cmp_eh_frame_hdr_table_entries,
+ +           swap_eh_frame_hdr_table_entries);
+ +
+ +      table->hdrsz = hdrSize;
+ +      smp_wmb();
+ +      table->header = (const void *)header;
+ +}
+ +
+ +static void *__init balloc(unsigned long sz)
+ +{
+ +      return __alloc_bootmem_nopanic(sz,
+ +                                     sizeof(unsigned int),
+ +                                     __pa(MAX_DMA_ADDRESS));
+ +}
+ +
+ +void __init unwind_setup(void)
+ +{
+ +      setup_unwind_table(&root_table, balloc);
+ +}
+ +
+ +#ifdef CONFIG_MODULES
+ +
+ +static struct unwind_table *last_table;
+ +
+ +/* Must be called with module_mutex held. */
+ +void *unwind_add_table(struct module *module,
+ +                       const void *table_start,
+ +                       unsigned long table_size)
+ +{
+ +      struct unwind_table *table;
+ +
+ +      if (table_size <= 0)
+ +              return NULL;
+ +
+ +      table = kmalloc(sizeof(*table), GFP_KERNEL);
+ +      if (!table)
+ +              return NULL;
+ +
+ +      init_unwind_table(table, module->name,
+ +                        module->module_core, module->core_size,
+ +                        module->module_init, module->init_size,
+ +                        table_start, table_size,
+ +                        NULL, 0);
+ +
+ +      if (last_table)
+ +              last_table->link = table;
+ +      else
+ +              root_table.link = table;
+ +      last_table = table;
+ +
+ +      return table;
+ +}
+ +
+ +struct unlink_table_info
+ +{
+ +      struct unwind_table *table;
+ +      int init_only;
+ +};
+ +
+ +static int unlink_table(void *arg)
+ +{
+ +      struct unlink_table_info *info = arg;
+ +      struct unwind_table *table = info->table, *prev;
+ +
+ +      for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
+ +              ;
+ +
+ +      if (prev->link) {
+ +              if (info->init_only) {
+ +                      table->init.pc = 0;
+ +                      table->init.range = 0;
+ +                      info->table = NULL;
+ +              } else {
+ +                      prev->link = table->link;
+ +                      if (!prev->link)
+ +                              last_table = prev;
+ +              }
+ +      } else
+ +              info->table = NULL;
+ +
+ +      return 0;
+ +}
+ +
+ +/* Must be called with module_mutex held. */
+ +void unwind_remove_table(void *handle, int init_only)
+ +{
+ +      struct unwind_table *table = handle;
+ +      struct unlink_table_info info;
+ +
+ +      if (!table || table == &root_table)
+ +              return;
+ +
+ +      if (init_only && table == last_table) {
+ +              table->init.pc = 0;
+ +              table->init.range = 0;
+ +              return;
+ +      }
+ +
+ +      info.table = table;
+ +      info.init_only = init_only;
+ +      stop_machine(unlink_table, &info, NULL);
+ +
+ +      if (info.table)
+ +              kfree(table);
+ +}
+ +
+ +#endif /* CONFIG_MODULES */
+ +
+ +static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
+ +{
+ +      const u8 *cur = *pcur;
+ +      uleb128_t value;
+ +      unsigned shift;
+ +
+ +      for (shift = 0, value = 0; cur < end; shift += 7) {
+ +              if (shift + 7 > 8 * sizeof(value)
+ +                  && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ +                      cur = end + 1;
+ +                      break;
+ +              }
+ +              value |= (uleb128_t)(*cur & 0x7f) << shift;
+ +              if (!(*cur++ & 0x80))
+ +                      break;
+ +      }
+ +      *pcur = cur;
+ +
+ +      return value;
+ +}
+ +
+ +static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
+ +{
+ +      const u8 *cur = *pcur;
+ +      sleb128_t value;
+ +      unsigned shift;
+ +
+ +      for (shift = 0, value = 0; cur < end; shift += 7) {
+ +              if (shift + 7 > 8 * sizeof(value)
+ +                  && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ +                      cur = end + 1;
+ +                      break;
+ +              }
+ +              value |= (sleb128_t)(*cur & 0x7f) << shift;
+ +              if (!(*cur & 0x80)) {
+ +                      value |= -(*cur++ & 0x40) << shift;
+ +                      break;
+ +              }
+ +      }
+ +      *pcur = cur;
+ +
+ +      return value;
+ +}
+ +
+ +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
+ +{
+ +      const u32 *cie;
+ +
+ +      if (!*fde || (*fde & (sizeof(*fde) - 1)))
+ +              return &bad_cie;
+ +      if (!fde[1])
+ +              return &not_fde; /* this is a CIE */
+ +      if ((fde[1] & (sizeof(*fde) - 1))
+ +          || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
+ +              return NULL; /* this is not a valid FDE */
+ +      cie = fde + 1 - fde[1] / sizeof(*fde);
+ +      if (*cie <= sizeof(*cie) + 4
+ +          || *cie >= fde[1] - sizeof(*fde)
+ +          || (*cie & (sizeof(*cie) - 1))
+ +          || cie[1])
+ +              return NULL; /* this is not a (valid) CIE */
+ +      return cie;
+ +}
+ +
+ +static unsigned long read_pointer(const u8 **pLoc,
+ +                                  const void *end,
+ +                                  signed ptrType,
+ +                                  unsigned long text_base,
+ +                                  unsigned long data_base)
+ +{
+ +      unsigned long value = 0;
+ +      union {
+ +              const u8 *p8;
+ +              const u16 *p16u;
+ +              const s16 *p16s;
+ +              const u32 *p32u;
+ +              const s32 *p32s;
+ +              const unsigned long *pul;
+ +      } ptr;
+ +
+ +      if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
+ +              dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
+ +              return 0;
+ +      }
+ +      ptr.p8 = *pLoc;
+ +      switch (ptrType & DW_EH_PE_FORM) {
+ +      case DW_EH_PE_data2:
+ +              if (end < (const void *)(ptr.p16u + 1)) {
+ +                      dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
+ +                      return 0;
+ +              }
+ +              if (ptrType & DW_EH_PE_signed)
+ +                      value = get_unaligned(ptr.p16s++);
+ +              else
+ +                      value = get_unaligned(ptr.p16u++);
+ +              break;
+ +      case DW_EH_PE_data4:
+ +#ifdef CONFIG_64BIT
+ +              if (end < (const void *)(ptr.p32u + 1)) {
+ +                      dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
+ +                      return 0;
+ +              }
+ +              if (ptrType & DW_EH_PE_signed)
+ +                      value = get_unaligned(ptr.p32s++);
+ +              else
+ +                      value = get_unaligned(ptr.p32u++);
+ +              break;
+ +      case DW_EH_PE_data8:
+ +              BUILD_BUG_ON(sizeof(u64) != sizeof(value));
+ +#else
+ +              BUILD_BUG_ON(sizeof(u32) != sizeof(value));
+ +#endif
+ +      case DW_EH_PE_native:
+ +              if (end < (const void *)(ptr.pul + 1)) {
+ +                      dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
+ +                      return 0;
+ +              }
+ +              value = get_unaligned(ptr.pul++);
+ +              break;
+ +      case DW_EH_PE_leb128:
+ +              BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
+ +              value = ptrType & DW_EH_PE_signed
+ +                      ? get_sleb128(&ptr.p8, end)
+ +                      : get_uleb128(&ptr.p8, end);
+ +              if ((const void *)ptr.p8 > end) {
+ +                      dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
+ +                      return 0;
+ +              }
+ +              break;
+ +      default:
+ +              dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
+ +                      ptrType, ptr.p8, end);
+ +              return 0;
+ +      }
+ +      switch (ptrType & DW_EH_PE_ADJUST) {
+ +      case DW_EH_PE_abs:
+ +              break;
+ +      case DW_EH_PE_pcrel:
+ +              value += (unsigned long)*pLoc;
+ +              break;
+ +      case DW_EH_PE_textrel:
+ +              if (likely(text_base)) {
+ +                      value += text_base;
+ +                      break;
+ +              }
+ +              dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
+ +                      ptrType, *pLoc, end);
+ +              return 0;
+ +      case DW_EH_PE_datarel:
+ +              if (likely(data_base)) {
+ +                      value += data_base;
+ +                      break;
+ +              }
+ +              dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
+ +                      ptrType, *pLoc, end);
+ +              return 0;
+ +      default:
+ +              dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
+ +                      ptrType, *pLoc, end);
+ +              return 0;
+ +      }
+ +      if ((ptrType & DW_EH_PE_indirect)
+ +          && probe_kernel_address(value, value)) {
+ +              dprintk(1, "Cannot read indirect value %lx (%p,%p).",
+ +                      value, *pLoc, end);
+ +              return 0;
+ +      }
+ +      *pLoc = ptr.p8;
+ +
+ +      return value;
+ +}
+ +
+ +static signed fde_pointer_type(const u32 *cie)
+ +{
+ +      const u8 *ptr = (const u8 *)(cie + 2);
+ +      unsigned version = *ptr;
+ +
+ +      if (version != 1)
+ +              return -1; /* unsupported */
+ +      if (*++ptr) {
+ +              const char *aug;
+ +              const u8 *end = (const u8 *)(cie + 1) + *cie;
+ +              uleb128_t len;
+ +
+ +              /* check if augmentation size is first (and thus present) */
+ +              if (*ptr != 'z')
+ +                      return -1;
+ +              /* check if augmentation string is nul-terminated */
+ +              if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
+ +                      return -1;
+ +              ++ptr; /* skip terminator */
+ +              get_uleb128(&ptr, end); /* skip code alignment */
+ +              get_sleb128(&ptr, end); /* skip data alignment */
+ +              /* skip return address column */
+ +              version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
+ +              len = get_uleb128(&ptr, end); /* augmentation length */
+ +              if (ptr + len < ptr || ptr + len > end)
+ +                      return -1;
+ +              end = ptr + len;
+ +              while (*++aug) {
+ +                      if (ptr >= end)
+ +                              return -1;
+ +                      switch (*aug) {
+ +                      case 'L':
+ +                              ++ptr;
+ +                              break;
+ +                      case 'P': {
+ +                                      signed ptrType = *ptr++;
+ +
+ +                                      if (!read_pointer(&ptr, end, ptrType, 0, 0)
+ +                                          || ptr > end)
+ +                                              return -1;
+ +                              }
+ +                              break;
+ +                      case 'R':
+ +                              return *ptr;
+ +                      default:
+ +                              return -1;
+ +                      }
+ +              }
+ +      }
+ +      return DW_EH_PE_native|DW_EH_PE_abs;
+ +}
+ +
+ +static int advance_loc(unsigned long delta, struct unwind_state *state)
+ +{
+ +      state->loc += delta * state->codeAlign;
+ +
+ +      return delta > 0;
+ +}
+ +
+ +static void set_rule(uleb128_t reg,
+ +                     enum item_location where,
+ +                     uleb128_t value,
+ +                     struct unwind_state *state)
+ +{
+ +      if (reg < ARRAY_SIZE(state->regs)) {
+ +              state->regs[reg].where = where;
+ +              state->regs[reg].value = value;
+ +      }
+ +}
+ +
+ +static int processCFI(const u8 *start,
+ +                      const u8 *end,
+ +                      unsigned long targetLoc,
+ +                      signed ptrType,
+ +                      struct unwind_state *state)
+ +{
+ +      union {
+ +              const u8 *p8;
+ +              const u16 *p16;
+ +              const u32 *p32;
+ +      } ptr;
+ +      int result = 1;
+ +
+ +      if (start != state->cieStart) {
+ +              state->loc = state->org;
+ +              result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
+ +              if (targetLoc == 0 && state->label == NULL)
+ +                      return result;
+ +      }
+ +      for (ptr.p8 = start; result && ptr.p8 < end; ) {
+ +              switch (*ptr.p8 >> 6) {
+ +                      uleb128_t value;
+ +
+ +              case 0:
+ +                      switch (*ptr.p8++) {
+ +                      case DW_CFA_nop:
+ +                              break;
+ +                      case DW_CFA_set_loc:
+ +                              state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
+ +                              if (state->loc == 0)
+ +                                      result = 0;
+ +                              break;
+ +                      case DW_CFA_advance_loc1:
+ +                              result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
+ +                              break;
+ +                      case DW_CFA_advance_loc2:
+ +                              result = ptr.p8 <= end + 2
+ +                                       && advance_loc(*ptr.p16++, state);
+ +                              break;
+ +                      case DW_CFA_advance_loc4:
+ +                              result = ptr.p8 <= end + 4
+ +                                       && advance_loc(*ptr.p32++, state);
+ +                              break;
+ +                      case DW_CFA_offset_extended:
+ +                              value = get_uleb128(&ptr.p8, end);
+ +                              set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ +                              break;
+ +                      case DW_CFA_val_offset:
+ +                              value = get_uleb128(&ptr.p8, end);
+ +                              set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
+ +                              break;
+ +                      case DW_CFA_offset_extended_sf:
+ +                              value = get_uleb128(&ptr.p8, end);
+ +                              set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
+ +                              break;
+ +                      case DW_CFA_val_offset_sf:
+ +                              value = get_uleb128(&ptr.p8, end);
+ +                              set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
+ +                              break;
+ +                      case DW_CFA_restore_extended:
+ +                      case DW_CFA_undefined:
+ +                      case DW_CFA_same_value:
+ +                              set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
+ +                              break;
+ +                      case DW_CFA_register:
+ +                              value = get_uleb128(&ptr.p8, end);
+ +                              set_rule(value,
+ +                                       Register,
+ +                                       get_uleb128(&ptr.p8, end), state);
+ +                              break;
+ +                      case DW_CFA_remember_state:
+ +                              if (ptr.p8 == state->label) {
+ +                                      state->label = NULL;
+ +                                      return 1;
+ +                              }
+ +                              if (state->stackDepth >= MAX_STACK_DEPTH) {
+ +                                      dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
+ +                                      return 0;
+ +                              }
+ +                              state->stack[state->stackDepth++] = ptr.p8;
+ +                              break;
+ +                      case DW_CFA_restore_state:
+ +                              if (state->stackDepth) {
+ +                                      const uleb128_t loc = state->loc;
+ +                                      const u8 *label = state->label;
+ +
+ +                                      state->label = state->stack[state->stackDepth - 1];
+ +                                      memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
+ +                                      memset(state->regs, 0, sizeof(state->regs));
+ +                                      state->stackDepth = 0;
+ +                                      result = processCFI(start, end, 0, ptrType, state);
+ +                                      state->loc = loc;
+ +                                      state->label = label;
+ +                              } else {
+ +                                      dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
+ +                                      return 0;
+ +                              }
+ +                              break;
+ +                      case DW_CFA_def_cfa:
+ +                              state->cfa.reg = get_uleb128(&ptr.p8, end);
+ +                              /*nobreak*/
+ +                      case DW_CFA_def_cfa_offset:
+ +                              state->cfa.offs = get_uleb128(&ptr.p8, end);
+ +                              break;
+ +                      case DW_CFA_def_cfa_sf:
+ +                              state->cfa.reg = get_uleb128(&ptr.p8, end);
+ +                              /*nobreak*/
+ +                      case DW_CFA_def_cfa_offset_sf:
+ +                              state->cfa.offs = get_sleb128(&ptr.p8, end)
+ +                                                * state->dataAlign;
+ +                              break;
+ +                      case DW_CFA_def_cfa_register:
+ +                              state->cfa.reg = get_uleb128(&ptr.p8, end);
+ +                              break;
+ +                      /*todo case DW_CFA_def_cfa_expression: */
+ +                      /*todo case DW_CFA_expression: */
+ +                      /*todo case DW_CFA_val_expression: */
+ +                      case DW_CFA_GNU_args_size:
+ +                              get_uleb128(&ptr.p8, end);
+ +                              break;
+ +                      case DW_CFA_GNU_negative_offset_extended:
+ +                              value = get_uleb128(&ptr.p8, end);
+ +                              set_rule(value,
+ +                                       Memory,
+ +                                       (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
+ +                              break;
+ +                      case DW_CFA_GNU_window_save:
+ +                      default:
+ +                              dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
+ +                              result = 0;
+ +                              break;
+ +                      }
+ +                      break;
+ +              case 1:
+ +                      result = advance_loc(*ptr.p8++ & 0x3f, state);
+ +                      break;
+ +              case 2:
+ +                      value = *ptr.p8++ & 0x3f;
+ +                      set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ +                      break;
+ +              case 3:
+ +                      set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
+ +                      break;
+ +              }
+ +              if (ptr.p8 > end) {
+ +                      dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
+ +                      result = 0;
+ +              }
+ +              if (result && targetLoc != 0 && targetLoc < state->loc)
+ +                      return 1;
+ +      }
+ +
+ +      if (result && ptr.p8 < end)
+ +              dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
+ +
+ +      return result
+ +             && ptr.p8 == end
+ +             && (targetLoc == 0
+ +                 || (/*todo While in theory this should apply, gcc in practice omits
+ +                       everything past the function prolog, and hence the location
+ +                       never reaches the end of the function.
+ +                     targetLoc < state->loc &&*/ state->label == NULL));
+ +}
+ +
+ +/* Unwind to previous to frame.  Returns 0 if successful, negative
+ + * number in case of an error. */
+ +int unwind(struct unwind_frame_info *frame)
+ +{
+ +#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
+ +      const u32 *fde = NULL, *cie = NULL;
+ +      const u8 *ptr = NULL, *end = NULL;
+ +      unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
+ +      unsigned long startLoc = 0, endLoc = 0, cfa;
+ +      unsigned i;
+ +      signed ptrType = -1;
+ +      uleb128_t retAddrReg = 0;
+ +      const struct unwind_table *table;
+ +      struct unwind_state state;
+ +
+ +      if (UNW_PC(frame) == 0)
+ +              return -EINVAL;
+ +      if ((table = find_table(pc)) != NULL
+ +          && !(table->size & (sizeof(*fde) - 1))) {
+ +              const u8 *hdr = table->header;
+ +              unsigned long tableSize;
+ +
+ +              smp_rmb();
+ +              if (hdr && hdr[0] == 1) {
+ +                      switch (hdr[3] & DW_EH_PE_FORM) {
+ +                      case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
+ +                      case DW_EH_PE_data2: tableSize = 2; break;
+ +                      case DW_EH_PE_data4: tableSize = 4; break;
+ +                      case DW_EH_PE_data8: tableSize = 8; break;
+ +                      default: tableSize = 0; break;
+ +                      }
+ +                      ptr = hdr + 4;
+ +                      end = hdr + table->hdrsz;
+ +                      if (tableSize
+ +                          && read_pointer(&ptr, end, hdr[1], 0, 0)
+ +                             == (unsigned long)table->address
+ +                          && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
+ +                          && i == (end - ptr) / (2 * tableSize)
+ +                          && !((end - ptr) % (2 * tableSize))) {
+ +                              do {
+ +                                      const u8 *cur = ptr + (i / 2) * (2 * tableSize);
+ +
+ +                                      startLoc = read_pointer(&cur,
+ +                                                              cur + tableSize,
+ +                                                              hdr[3], 0,
+ +                                                              (unsigned long)hdr);
+ +                                      if (pc < startLoc)
+ +                                              i /= 2;
+ +                                      else {
+ +                                              ptr = cur - tableSize;
+ +                                              i = (i + 1) / 2;
+ +                                      }
+ +                              } while (startLoc && i > 1);
+ +                              if (i == 1
+ +                                  && (startLoc = read_pointer(&ptr,
+ +                                                              ptr + tableSize,
+ +                                                              hdr[3], 0,
+ +                                                              (unsigned long)hdr)) != 0
+ +                                  && pc >= startLoc)
+ +                                      fde = (void *)read_pointer(&ptr,
+ +                                                                 ptr + tableSize,
+ +                                                                 hdr[3], 0,
+ +                                                                 (unsigned long)hdr);
+ +                      }
+ +              }
+ +              if (hdr && !fde)
+ +                      dprintk(3, "Binary lookup for %lx failed.", pc);
+ +
+ +              if (fde != NULL) {
+ +                      cie = cie_for_fde(fde, table);
+ +                      ptr = (const u8 *)(fde + 2);
+ +                      if (cie != NULL
+ +                          && cie != &bad_cie
+ +                          && cie != &not_fde
+ +                          && (ptrType = fde_pointer_type(cie)) >= 0
+ +                          && read_pointer(&ptr,
+ +                                          (const u8 *)(fde + 1) + *fde,
+ +                                          ptrType, 0, 0) == startLoc) {
+ +                              if (!(ptrType & DW_EH_PE_indirect))
+ +                                      ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ +                              endLoc = startLoc
+ +                                       + read_pointer(&ptr,
+ +                                                      (const u8 *)(fde + 1) + *fde,
+ +                                                      ptrType, 0, 0);
+ +                              if (pc >= endLoc)
+ +                                      fde = NULL;
+ +                      } else
+ +                              fde = NULL;
+ +                      if (!fde)
+ +                              dprintk(1, "Binary lookup result for %lx discarded.", pc);
+ +              }
+ +              if (fde == NULL) {
+ +                      for (fde = table->address, tableSize = table->size;
+ +                           cie = NULL, tableSize > sizeof(*fde)
+ +                           && tableSize - sizeof(*fde) >= *fde;
+ +                           tableSize -= sizeof(*fde) + *fde,
+ +                           fde += 1 + *fde / sizeof(*fde)) {
+ +                              cie = cie_for_fde(fde, table);
+ +                              if (cie == &bad_cie) {
+ +                                      cie = NULL;
+ +                                      break;
+ +                              }
+ +                              if (cie == NULL
+ +                                  || cie == &not_fde
+ +                                  || (ptrType = fde_pointer_type(cie)) < 0)
+ +                                      continue;
+ +                              ptr = (const u8 *)(fde + 2);
+ +                              startLoc = read_pointer(&ptr,
+ +                                                      (const u8 *)(fde + 1) + *fde,
+ +                                                      ptrType, 0, 0);
+ +                              if (!startLoc)
+ +                                      continue;
+ +                              if (!(ptrType & DW_EH_PE_indirect))
+ +                                      ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ +                              endLoc = startLoc
+ +                                       + read_pointer(&ptr,
+ +                                                      (const u8 *)(fde + 1) + *fde,
+ +                                                      ptrType, 0, 0);
+ +                              if (pc >= startLoc && pc < endLoc)
+ +                                      break;
+ +                      }
+ +                      if (!fde)
+ +                              dprintk(3, "Linear lookup for %lx failed.", pc);
+ +              }
+ +      }
+ +      if (cie != NULL) {
+ +              memset(&state, 0, sizeof(state));
+ +              state.cieEnd = ptr; /* keep here temporarily */
+ +              ptr = (const u8 *)(cie + 2);
+ +              end = (const u8 *)(cie + 1) + *cie;
+ +              frame->call_frame = 1;
+ +              if ((state.version = *ptr) != 1)
+ +                      cie = NULL; /* unsupported version */
+ +              else if (*++ptr) {
+ +                      /* check if augmentation size is first (and thus present) */
+ +                      if (*ptr == 'z') {
+ +                              while (++ptr < end && *ptr) {
+ +                                      switch (*ptr) {
+ +                                      /* check for ignorable (or already handled)
+ +                                       * nul-terminated augmentation string */
+ +                                      case 'L':
+ +                                      case 'P':
+ +                                      case 'R':
+ +                                              continue;
+ +                                      case 'S':
+ +                                              frame->call_frame = 0;
+ +                                              continue;
+ +                                      default:
+ +                                              break;
+ +                                      }
+ +                                      break;
+ +                              }
+ +                      }
+ +                      if (ptr >= end || *ptr)
+ +                              cie = NULL;
+ +              }
+ +              if (!cie)
+ +                      dprintk(1, "CIE unusable (%p,%p).", ptr, end);
+ +              ++ptr;
+ +      }
+ +      if (cie != NULL) {
+ +              /* get code aligment factor */
+ +              state.codeAlign = get_uleb128(&ptr, end);
+ +              /* get data aligment factor */
+ +              state.dataAlign = get_sleb128(&ptr, end);
+ +              if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
+ +                      cie = NULL;
+ +              else if (UNW_PC(frame) % state.codeAlign
+ +                       || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+ +                      dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
+ +                              UNW_PC(frame), UNW_SP(frame));
+ +                      return -EPERM;
+ +              } else {
+ +                      retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
+ +                      /* skip augmentation */
+ +                      if (((const char *)(cie + 2))[1] == 'z') {
+ +                              uleb128_t augSize = get_uleb128(&ptr, end);
+ +
+ +                              ptr += augSize;
+ +                      }
+ +                      if (ptr > end
+ +                          || retAddrReg >= ARRAY_SIZE(reg_info)
+ +                          || REG_INVALID(retAddrReg)
+ +                          || reg_info[retAddrReg].width != sizeof(unsigned long))
+ +                              cie = NULL;
+ +              }
+ +              if (!cie)
+ +                      dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
+ +      }
+ +      if (cie != NULL) {
+ +              state.cieStart = ptr;
+ +              ptr = state.cieEnd;
+ +              state.cieEnd = end;
+ +              end = (const u8 *)(fde + 1) + *fde;
+ +              /* skip augmentation */
+ +              if (((const char *)(cie + 2))[1] == 'z') {
+ +                      uleb128_t augSize = get_uleb128(&ptr, end);
+ +
+ +                      if ((ptr += augSize) > end)
+ +                              fde = NULL;
+ +              }
+ +              if (!fde)
+ +                      dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
+ +      }
-       if (cie == NULL || fde == NULL) {
+ +#ifdef CONFIG_FRAME_POINTER
-               unsigned long top, bottom;
++      if (cie == NULL || fde == NULL) {
++              unsigned long top = TSK_STACK_TOP(frame->task);
++              unsigned long bottom = STACK_BOTTOM(frame->task);
++              unsigned long fp = UNW_FP(frame);
++              unsigned long sp = UNW_SP(frame);
++              unsigned long link;
+ +
-               if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
++              if ((sp | fp) & sizeof(unsigned long))
+ +                      return -EPERM;
-               top = TSK_STACK_TOP(frame->task);
-               bottom = STACK_BOTTOM(frame->task);
++
+ +# if FRAME_RETADDR_OFFSET < 0
-               if (UNW_SP(frame) < top
-                   && UNW_FP(frame) <= UNW_SP(frame)
-                   && bottom < UNW_FP(frame)
++              if (!(sp < top && fp <= sp && bottom < fp))
+ +# else
-               if (UNW_SP(frame) > top
-                   && UNW_FP(frame) >= UNW_SP(frame)
-                   && bottom > UNW_FP(frame)
++              if (!(sp < top && fp >= sp && bottom < fp))
+ +# endif
-                   && !((UNW_SP(frame) | UNW_FP(frame))
-                        & (sizeof(unsigned long) - 1))) {
-                       unsigned long link;
++                      return -ENXIO;
++
++              if (probe_kernel_address(fp + FRAME_LINK_OFFSET, link))
++                      return -ENXIO;
+ +
-                       if (!probe_kernel_address(UNW_FP(frame) + FRAME_LINK_OFFSET,
-                                                 link)
+ +# if FRAME_RETADDR_OFFSET < 0
-                           && link > bottom && link < UNW_FP(frame)
++              if (!(link > bottom && link < fp))
+ +# else
-                           && link > UNW_FP(frame) && link < bottom
++              if (!(link > bottom && link > fp))
+ +# endif
-                           && !(link & (sizeof(link) - 1))
-                           && !probe_kernel_address(UNW_FP(frame) + FRAME_RETADDR_OFFSET,
-                                                    UNW_PC(frame))) {
-                               UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
++                      return -ENXIO;
++
++              if (link & (sizeof(unsigned long) - 1))
++                      return -ENXIO;
++
++              fp += FRAME_RETADDR_OFFSET;
++              if (probe_kernel_address(fp, UNW_PC(frame)))
++                      return -ENXIO;
++
++              /* Ok, we can use it */
+ +# if FRAME_RETADDR_OFFSET < 0
-                                       -
++              UNW_SP(frame) = fp - sizeof(UNW_PC(frame));
+ +# else
-                                       +
++              UNW_SP(frame) = fp + sizeof(UNW_PC(frame));
+ +# endif
-                                         sizeof(UNW_PC(frame));
-                               UNW_FP(frame) = link;
-                               return 0;
-                       }
-               }
- #endif
-               return -ENXIO;
++              UNW_FP(frame) = link;
++              return 0;
+ +      }
++#endif
+ +      state.org = startLoc;
+ +      memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
+ +      /* process instructions */
+ +      if (!processCFI(ptr, end, pc, ptrType, &state)
+ +          || state.loc > endLoc
+ +          || state.regs[retAddrReg].where == Nowhere
+ +          || state.cfa.reg >= ARRAY_SIZE(reg_info)
+ +          || reg_info[state.cfa.reg].width != sizeof(unsigned long)
+ +          || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
+ +          || state.cfa.offs % sizeof(unsigned long)) {
+ +              dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
+ +              return -EIO;
+ +      }
+ +      /* update frame */
+ +#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
+ +      if (frame->call_frame
+ +          && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
+ +              frame->call_frame = 0;
+ +#endif
+ +      cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
+ +      startLoc = min((unsigned long)UNW_SP(frame), cfa);
+ +      endLoc = max((unsigned long)UNW_SP(frame), cfa);
+ +      if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
+ +              startLoc = min(STACK_LIMIT(cfa), cfa);
+ +              endLoc = max(STACK_LIMIT(cfa), cfa);
+ +      }
+ +#ifndef CONFIG_64BIT
+ +# define CASES CASE(8); CASE(16); CASE(32)
+ +#else
+ +# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
+ +#endif
+ +      pc = UNW_PC(frame);
+ +      sp = UNW_SP(frame);
+ +      for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ +              if (REG_INVALID(i)) {
+ +                      if (state.regs[i].where == Nowhere)
+ +                              continue;
+ +                      dprintk(1, "Cannot restore register %u (%d).",
+ +                              i, state.regs[i].where);
+ +                      return -EIO;
+ +              }
+ +              switch (state.regs[i].where) {
+ +              default:
+ +                      break;
+ +              case Register:
+ +                      if (state.regs[i].value >= ARRAY_SIZE(reg_info)
+ +                          || REG_INVALID(state.regs[i].value)
+ +                          || reg_info[i].width > reg_info[state.regs[i].value].width) {
+ +                              dprintk(1, "Cannot restore register %u from register %lu.",
+ +                                      i, state.regs[i].value);
+ +                              return -EIO;
+ +                      }
+ +                      switch (reg_info[state.regs[i].value].width) {
+ +#define CASE(n) \
+ +                      case sizeof(u##n): \
+ +                              state.regs[i].value = FRAME_REG(state.regs[i].value, \
+ +                                                              const u##n); \
+ +                              break
+ +                      CASES;
+ +#undef CASE
+ +                      default:
+ +                              dprintk(1, "Unsupported register size %u (%lu).",
+ +                                      reg_info[state.regs[i].value].width,
+ +                                      state.regs[i].value);
+ +                              return -EIO;
+ +                      }
+ +                      break;
+ +              }
+ +      }
+ +      for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ +              if (REG_INVALID(i))
+ +                      continue;
+ +              switch (state.regs[i].where) {
+ +              case Nowhere:
+ +                      if (reg_info[i].width != sizeof(UNW_SP(frame))
+ +                          || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
+ +                             != &UNW_SP(frame))
+ +                              continue;
+ +                      UNW_SP(frame) = cfa;
+ +                      break;
+ +              case Register:
+ +                      switch (reg_info[i].width) {
+ +#define CASE(n) case sizeof(u##n): \
+ +                              FRAME_REG(i, u##n) = state.regs[i].value; \
+ +                              break
+ +                      CASES;
+ +#undef CASE
+ +                      default:
+ +                              dprintk(1, "Unsupported register size %u (%u).",
+ +                                      reg_info[i].width, i);
+ +                              return -EIO;
+ +                      }
+ +                      break;
+ +              case Value:
+ +                      if (reg_info[i].width != sizeof(unsigned long)) {
+ +                              dprintk(1, "Unsupported value size %u (%u).",
+ +                                      reg_info[i].width, i);
+ +                              return -EIO;
+ +                      }
+ +                      FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
+ +                                                          * state.dataAlign;
+ +                      break;
+ +              case Memory: {
+ +                              unsigned long addr = cfa + state.regs[i].value
+ +                                                         * state.dataAlign;
+ +
+ +                              if ((state.regs[i].value * state.dataAlign)
+ +                                  % sizeof(unsigned long)
+ +                                  || addr < startLoc
+ +                                  || addr + sizeof(unsigned long) < addr
+ +                                  || addr + sizeof(unsigned long) > endLoc) {
+ +                                      dprintk(1, "Bad memory location %lx (%lx).",
+ +                                              addr, state.regs[i].value);
+ +                                      return -EIO;
+ +                              }
+ +                              switch (reg_info[i].width) {
+ +#define CASE(n)                       case sizeof(u##n): \
+ +                                      if (probe_kernel_address(addr, \
+ +                                                               FRAME_REG(i, u##n))) \
+ +                                              return -EFAULT; \
+ +                                      break
+ +                              CASES;
+ +#undef CASE
+ +                              default:
+ +                                      dprintk(1, "Unsupported memory size %u (%u).",
+ +                                              reg_info[i].width, i);
+ +                                      return -EIO;
+ +                              }
+ +                      }
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (UNW_PC(frame) % state.codeAlign
+ +          || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+ +              dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
+ +                      UNW_PC(frame), UNW_SP(frame));
+ +              return -EIO;
+ +      }
+ +      if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
+ +              dprintk(1, "No progress (%lx,%lx).", pc, sp);
+ +              return -EIO;
+ +      }
+ +
+ +      return 0;
+ +#undef CASES
+ +#undef FRAME_REG
+ +}
+ +EXPORT_SYMBOL_GPL(unwind);
+ +
+ +int unwind_init_frame_info(struct unwind_frame_info *info,
+ +                           struct task_struct *tsk,
+ +                           /*const*/ struct pt_regs *regs)
+ +{
+ +      info->task = tsk;
+ +      info->call_frame = 0;
+ +      arch_unw_init_frame_info(info, regs);
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(unwind_init_frame_info);
+ +
+ +/*
+ + * Prepare to unwind a blocked task.
+ + */
+ +int unwind_init_blocked(struct unwind_frame_info *info,
+ +                        struct task_struct *tsk)
+ +{
+ +      info->task = tsk;
+ +      info->call_frame = 0;
+ +      arch_unw_init_blocked(info);
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(unwind_init_blocked);
+ +
+ +/*
+ + * Prepare to unwind the currently running thread.
+ + */
+ +int unwind_init_running(struct unwind_frame_info *info,
-                         asmlinkage int (*callback)(struct unwind_frame_info *,
-                                                    void *arg),
-                         void *arg)
++                      asmlinkage unwind_callback_fn callback,
++                      const struct stacktrace_ops *ops, void *data)
+ +{
+ +      info->task = current;
+ +      info->call_frame = 0;
+ +
-       return arch_unwind_init_running(info, callback, arg);
++      return arch_unwind_init_running(info, callback, ops, data);
+ +}
+ +EXPORT_SYMBOL_GPL(unwind_init_running);
+ +
+ +/*
+ + * Unwind until the return pointer is in user-land (or until an error
+ + * occurs).  Returns 0 if successful, negative number in case of
+ + * error.
+ + */
+ +int unwind_to_user(struct unwind_frame_info *info)
+ +{
+ +      while (!arch_unw_user_mode(info)) {
+ +              int err = unwind(info);
+ +
+ +              if (err < 0)
+ +                      return err;
+ +      }
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(unwind_to_user);
diff --cc kernel/wait.c
Simple merge
diff --cc lib/Kconfig.debug

index b46a1d6,29044f5..d5aba6b
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -539,33 -582,15 +582,33 @@@ config ARCH_WANT_FRAME_POINTER
   config FRAME_POINTER
         bool "Compile the kernel with frame pointers"
         depends on DEBUG_KERNEL && \
-               (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || \
-                AVR32 || SUPERH || BLACKFIN || MN10300)
-       default y if DEBUG_INFO && UML
+               (CRIS || M68K || M68KNOMMU || FRV || UML || S390 || \
+                AVR32 || SUPERH || BLACKFIN || MN10300) || \
+               ARCH_WANT_FRAME_POINTERS
+       default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
         help
-         If you say Y here the resulting kernel image will be slightly larger
-         and slower, but it might give very useful debugging information on
-         some architectures or if you use external debuggers.
-         If you don't debug the kernel, you can say N.
+         If you say Y here the resulting kernel image will be slightly
+         larger and slower, but it gives very useful debugging information
+         in case of kernel bugs. (precise oopses/stacktraces/warnings)
   
+ +config UNWIND_INFO
+ +      bool "Compile the kernel with frame unwind information"
+ +      depends on !IA64 && !PARISC && !ARM
+ +      depends on !MODULES || !(MIPS || PPC || SUPERH || V850)
+ +      help
+ +        If you say Y here the resulting kernel image will be slightly larger
+ +        but not slower, and it will give very useful debugging information.
+ +        If you don't debug the kernel, you can say N, but we may not be able
+ +        to solve problems without frame unwind information or frame pointers.
+ +
+ +config STACK_UNWIND
+ +      bool "Stack unwind support"
+ +      depends on UNWIND_INFO
+ +      depends on X86
+ +      help
+ +        This enables more precise stack traces, omitting all unrelated
+ +        occurrences of pointers into kernel code from the dump.
+ +
   config BOOT_PRINTK_DELAY
         bool "Delay each boot printk message by N milliseconds"
         depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
diff --cc lib/bug.c

index c181d57,300e41a..49ef9b6
--- 1/lib/bug.c
--- 2/lib/bug.c
+++ b/lib/bug.c
@@@ -41,12 -43,17 +43,21 @@@
   #include <linux/bug.h>
   #include <linux/sched.h>
   
+ +#ifdef CONFIG_KDB
+ +#include <linux/kdb.h>
+ +#endif
+ +
   extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
   
+ static inline unsigned long bug_addr(const struct bug_entry *bug)
+ {
+ #ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+       return bug->bug_addr;
+ #else
+       return (unsigned long)bug + bug->bug_addr_disp;
+ #endif
+ }
+ 
   #ifdef CONFIG_MODULES
   static LIST_HEAD(module_bug_list);
   
diff --cc lib/parser.c
Simple merge
diff --cc mm/Kconfig
Simple merge
diff --cc mm/Makefile

index e2456c7,72255be..7976636
--- 1/mm/Makefile
--- 2/mm/Makefile
+++ b/mm/Makefile
@@@ -9,9 -9,9 +9,9 @@@ mmu-$(CONFIG_MMU)        := fremap.o highmem.
   
   obj-y                 := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                            maccess.o page_alloc.o page-writeback.o pdflush.o \
-                          readahead.o swap.o truncate.o vmscan.o \
+                          readahead.o swap.o truncate.o vmscan.o shmem.o \
                            prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
- -                         page_isolation.o mm_init.o $(mmu-y)
+ +                         page_isolation.o mm_init.o reserve.o $(mmu-y)
   
   obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
   obj-$(CONFIG_BOUNCE)  += bounce.o
diff --cc mm/filemap.c

index 33aaace,23acefe..6a5aad4
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -33,8 -33,8 +33,9 @@@
   #include <linux/cpuset.h>
   #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
   #include <linux/memcontrol.h>
+ #include <linux/mm_inline.h> /* for page_is_file_cache() */
   #include "internal.h"
+ +#include <trace/filemap.h>
   
   /*
    * FIXME: remove all knowledge of the buffer layer from the core VM
@@@ -496,12 -493,26 +497,27 @@@ EXPORT_SYMBOL(add_to_page_cache_locked)
   int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                                 pgoff_t offset, gfp_t gfp_mask)
   {
-       int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-       if (ret == 0)
-               lru_cache_add(page);
+       int ret;
+ 
+       /*
+        * Splice_read and readahead add shmem/tmpfs pages into the page cache
+        * before shmem_readpage has a chance to mark them as SwapBacked: they
+        * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+        * (called in add_to_page_cache) needs to know where they're going too.
+        */
+       if (mapping_cap_swap_backed(mapping))
+               SetPageSwapBacked(page);
+ 
+       ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+       if (ret == 0) {
+               if (page_is_file_cache(page))
+                       lru_cache_add_file(page);
+               else
+                       lru_cache_add_active_anon(page);
+       }
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
   
   #ifdef CONFIG_NUMA
   struct page *__page_cache_alloc(gfp_t gfp)
@@@ -2771,3 -2482,3 +2557,6 @@@ int try_to_release_page(struct page *pa
   }
   
   EXPORT_SYMBOL(try_to_release_page);
++
++DEFINE_TRACE(wait_on_page_start);
++DEFINE_TRACE(wait_on_page_end);
diff --cc mm/hugetlb.c

index 63e279f,618e983..51405b6
--- 1/mm/hugetlb.c
--- 2/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@@ -1510,37 -1533,15 +1543,37 @@@ int hugetlb_overcommit_handler(struct c
   
   #endif /* CONFIG_SYSCTL */
   
+ +#ifdef        CONFIG_KDB
+ +#include <linux/kdb.h>
+ +#include <linux/kdbprivate.h>
+ +/* Like hugetlb_report_meminfo() but using kdb_printf() */
+ +void
+ +kdb_hugetlb_report_meminfo(void)
+ +{
+ +      struct hstate *h = &default_hstate;
+ +      kdb_printf(
+ +              "HugePages_Total: %5lu\n"
+ +              "HugePages_Free:  %5lu\n"
+ +              "HugePages_Rsvd:  %5lu\n"
+ +              "HugePages_Surp:  %5lu\n"
+ +              "Hugepagesize:    %5lu kB\n",
+ +              h->nr_huge_pages,
+ +              h->free_huge_pages,
+ +              h->resv_huge_pages,
+ +              h->surplus_huge_pages,
+ +              1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
+ +}
+ +#endif        /* CONFIG_KDB */
+ +
- int hugetlb_report_meminfo(char *buf)
+ void hugetlb_report_meminfo(struct seq_file *m)
   {
         struct hstate *h = &default_hstate;
-       return sprintf(buf,
-                       "HugePages_Total: %5lu\n"
-                       "HugePages_Free:  %5lu\n"
-                       "HugePages_Rsvd:  %5lu\n"
-                       "HugePages_Surp:  %5lu\n"
-                       "Hugepagesize:    %5lu kB\n",
+       seq_printf(m,
+                       "HugePages_Total:   %5lu\n"
+                       "HugePages_Free:    %5lu\n"
+                       "HugePages_Rsvd:    %5lu\n"
+                       "HugePages_Surp:    %5lu\n"
+                       "Hugepagesize:   %8lu kB\n",
                         h->nr_huge_pages,
                         h->free_huge_pages,
                         h->resv_huge_pages,
@@@ -2306,3 -2323,3 +2366,11 @@@ void hugetlb_unreserve_pages(struct ino
         hugetlb_put_quota(inode->i_mapping, (chg - freed));
         hugetlb_acct_memory(h, -(chg - freed));
   }
++
++DEFINE_TRACE(hugetlb_page_release);
++DEFINE_TRACE(hugetlb_page_grab);
++DEFINE_TRACE(hugetlb_buddy_pgalloc);
++DEFINE_TRACE(hugetlb_page_alloc);
++DEFINE_TRACE(hugetlb_page_free);
++DEFINE_TRACE(hugetlb_pages_reserve);
++DEFINE_TRACE(hugetlb_pages_unreserve);
diff --cc mm/internal.h
Simple merge
diff --cc mm/memcontrol.c

index 89f421f,4d0ea3c..bf34711
--- 1/mm/memcontrol.c
--- 2/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@@ -1241,5 -2306,14 +2306,15 @@@ struct cgroup_subsys mem_cgroup_subsys 
         .populate = mem_cgroup_populate,
         .attach = mem_cgroup_move_task,
         .early_init = 0,
+ +      .disabled = 1,
   };
+ 
+ #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+ 
+ static int __init disable_swap_account(char *s)
+ {
+       really_do_swap_account = 0;
+       return 1;
+ }
+ __setup("noswapaccount", disable_swap_account);
+ #endif
diff --cc mm/memory.c

index b19028b,22bfa7a..e184df5
--- 1/mm/memory.c
--- 2/mm/memory.c
+++ b/mm/memory.c
@@@ -52,6 -52,9 +52,10 @@@
   #include <linux/writeback.h>
   #include <linux/memcontrol.h>
   #include <linux/mmu_notifier.h>
+ #include <linux/kallsyms.h>
+ #include <linux/swapops.h>
+ #include <linux/elf.h>
++#include <trace/memory.h>
   
   #include <asm/pgalloc.h>
   #include <asm/uaccess.h>
@@@ -59,14 -62,8 +63,10 @@@
   #include <asm/tlbflush.h>
   #include <asm/pgtable.h>
   
- #include <linux/swapops.h>
- #include <linux/elf.h>
- #include <trace/memory.h>
- 
   #include "internal.h"
   
+ +#include <trace/swap.h>
+ +
   #ifndef CONFIG_NEED_MULTIPLE_NODES
   /* use the per-pgdat data instead for discontigmem - mbligh */
   unsigned long max_mapnr;
@@@ -2329,16 -2431,10 +2434,11 @@@ static int do_swap_page(struct mm_struc
                 /* Had to read the page from swap area: Major fault */
                 ret = VM_FAULT_MAJOR;
                 count_vm_event(PGMAJFAULT);
+ +              trace_swap_in(page, entry);
         }
   
-       if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-               delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-               ret = VM_FAULT_OOM;
-               goto out;
-       }
- 
         mark_page_accessed(page);
+ 
         lock_page(page);
         delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
   
@@@ -3075,3 -3168,27 +3187,31 @@@ void print_vma_addr(char *prefix, unsig
         }
         up_read(&current->mm->mmap_sem);
   }
+ 
+ #ifdef CONFIG_PROVE_LOCKING
+ void might_fault(void)
+ {
+       /*
+        * Some code (nfs/sunrpc) uses socket ops on kernel memory while
+        * holding the mmap_sem, this is safe because kernel memory doesn't
+        * get paged out, therefore we'll never actually fault, and the
+        * below annotations will generate false positives.
+        */
+       if (segment_eq(get_fs(), KERNEL_DS))
+               return;
+ 
+       might_sleep();
+       /*
+        * it would be nicer only to annotate paths which are not under
+        * pagefault_disable, however that requires a larger audit and
+        * providing helpers like get_user_atomic.
+        */
+       if (!in_atomic() && current->mm)
+               might_lock_read(&current->mm->mmap_sem);
+ }
+ EXPORT_SYMBOL(might_fault);
+ #endif
++
++DEFINE_TRACE(swap_in);
++DEFINE_TRACE(memory_handle_fault_entry);
++DEFINE_TRACE(memory_handle_fault_exit);
diff --cc mm/migrate.c

index 88cb52e,2bb4e1d..a7803e4
--- 1/mm/migrate.c
--- 2/mm/migrate.c
+++ b/mm/migrate.c
@@@ -83,25 -52,10 +52,11 @@@ int migrate_prep(void
   
         return 0;
   }
+ +EXPORT_SYMBOL(migrate_prep);
   
- static inline void move_to_lru(struct page *page)
- {
-       if (PageActive(page)) {
-               /*
-                * lru_cache_add_active checks that
-                * the PG_active bit is off.
-                */
-               ClearPageActive(page);
-               lru_cache_add_active(page);
-       } else {
-               lru_cache_add(page);
-       }
-       put_page(page);
- }
- 
   /*
-  * Add isolated pages on the list back to the LRU.
+  * Add isolated pages on the list back to the LRU under page lock
+  * to avoid leaking evictable pages back onto unevictable list.
    *
    * returns the number of pages put back.
    */
@@@ -744,26 -693,7 +695,26 @@@ unlock
                  * restored.
                  */
                 list_del(&page->lru);
+ +              if (PageMemError(page)) {
+ +                      if (rc == 0)
+ +                              /*
+ +                               * A page with a memory error that has
+ +                               * been migrated will not be moved to
+ +                               * the LRU.
+ +                               */
+ +                              goto move_newpage;
+ +                      else
+ +                              /*
+ +                               * The page failed to migrate and will not
+ +                               * be added to the bad page list.  Clearing
+ +                               * the error bit will allow another attempt
+ +                               * to migrate if it gets another correctable
+ +                               * error.
+ +                               */
+ +                              ClearPageMemError(page);
+ +              }
+ +
-               move_to_lru(page);
+               putback_lru_page(page);
         }
   
   move_newpage:
diff --cc mm/mmap.c

index 2e5d6f1,8d95902..33bf279
--- 1/mm/mmap.c
--- 2/mm/mmap.c
+++ b/mm/mmap.c
@@@ -1669,7 -1633,6 +1674,7 @@@ int expand_upwards(struct vm_area_struc
                 if (!error)
                         vma->vm_end = address;
         }
- out_unlock: __maybe_unused
++out_unlock:
         anon_vma_unlock(vma);
         return error;
   }
diff --cc mm/mprotect.c
Simple merge
diff --cc mm/page-writeback.c

index 5e24843,b493db7..1ee1bab
--- 1/mm/page-writeback.c
--- 2/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@@ -66,9 -66,15 +66,15 @@@ static inline long sync_writeback_pages
   /*
    * Start background writeback (via pdflush) at this percentage
    */
- -int dirty_background_ratio = 5;
+ +int dirty_background_ratio = 10;
   
   /*
+  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
+  * dirty_background_ratio * the amount of dirtyable memory
+  */
+ unsigned long dirty_background_bytes;
+ 
+ /*
    * free highmem will not be subtracted from the total free memory
    * for calculating free ratios if vm_highmem_is_dirtyable is true
    */
@@@ -77,9 -83,15 +83,15 @@@ int vm_highmem_is_dirtyable
   /*
    * The generator of dirty data starts writeback at this percentage
    */
- -int vm_dirty_ratio = 10;
+ +int vm_dirty_ratio = 40;
   
   /*
+  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
+  * vm_dirty_ratio * the amount of dirtyable memory
+  */
+ unsigned long vm_dirty_bytes;
+ 
+ /*
    * The interval between `kupdate'-style writebacks, in jiffies
    */
   int dirty_writeback_interval = 5 * HZ;
diff --cc mm/page_alloc.c

index a16f251,5675b30..e1bd84d
--- 1/mm/page_alloc.c
--- 2/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@@ -531,19 -547,11 +552,13 @@@ static void __free_pages_ok(struct pag
   {
         unsigned long flags;
         int i;
-       int reserved = 0;
+       int bad = 0;
   
- #ifdef CONFIG_XEN
-       if (PageForeign(page)) {
-               PageForeignDestructor(page, order);
-               return;
-       }
- #endif
+ +      trace_page_free(page, order);
+ +
         for (i = 0 ; i < (1 << order) ; ++i)
-               reserved += free_pages_check(page + i);
-       if (reserved)
+               bad += free_pages_check(page + i);
+       if (bad)
                 return;
   
         if (!PageHighMem(page)) {
@@@ -1001,14 -1000,6 +1007,8 @@@ static void free_hot_cold_page(struct p
         struct per_cpu_pages *pcp;
         unsigned long flags;
   
- #ifdef CONFIG_XEN
-       if (PageForeign(page)) {
-               PageForeignDestructor(page, 0);
-               return;
-       }
- #endif
+ +      trace_page_free(page, 0);
+ +
         if (PageAnon(page))
                 page->mapping = NULL;
         if (free_pages_check(page))
@@@ -1961,11 -1942,16 +1981,16 @@@ void show_free_areas(void
                         "\n",
                         zone->name,
                         K(zone_page_state(zone, NR_FREE_PAGES)),
- -                      K(zone->pages_min),
- -                      K(zone->pages_low),
- -                      K(zone->pages_high),
+ +                      K(zone->pages_emerg + zone->pages_min),
+ +                      K(zone->pages_emerg + zone->pages_low),
+ +                      K(zone->pages_emerg + zone->pages_high),
-                       K(zone_page_state(zone, NR_ACTIVE)),
-                       K(zone_page_state(zone, NR_INACTIVE)),
+                       K(zone_page_state(zone, NR_ACTIVE_ANON)),
+                       K(zone_page_state(zone, NR_INACTIVE_ANON)),
+                       K(zone_page_state(zone, NR_ACTIVE_FILE)),
+                       K(zone_page_state(zone, NR_INACTIVE_FILE)),
+ #ifdef CONFIG_UNEVICTABLE_LRU
+                       K(zone_page_state(zone, NR_UNEVICTABLE)),
+ #endif
                         K(zone->present_pages),
                         zone->pages_scanned,
                         (zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@@ -4319,72 -4307,46 +4351,112 @@@ static void __setup_per_zone_pages_min(
         calculate_totalreserve_pages();
   }
   
+ /**
+  * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
+  *
+  * The inactive anon list should be small enough that the VM never has to
+  * do too much work, but large enough that each inactive page has a chance
+  * to be referenced again before it is swapped out.
+  *
+  * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
+  * INACTIVE_ANON pages on this zone's LRU, maintained by the
+  * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
+  * the anonymous pages are kept on the inactive list.
+  *
+  * total     target    max
+  * memory    ratio     inactive anon
+  * -------------------------------------
+  *   10MB       1         5MB
+  *  100MB       1        50MB
+  *    1GB       3       250MB
+  *   10GB      10       0.9GB
+  *  100GB      31         3GB
+  *    1TB     101        10GB
+  *   10TB     320        32GB
+  */
+ static void setup_per_zone_inactive_ratio(void)
+ {
+       struct zone *zone;
+ 
+       for_each_zone(zone) {
+               unsigned int gb, ratio;
+ 
+               /* Zone size in gigabytes */
+               gb = zone->present_pages >> (30 - PAGE_SHIFT);
+               ratio = int_sqrt(10 * gb);
+               if (!ratio)
+                       ratio = 1;
+ 
+               zone->inactive_ratio = ratio;
+       }
+ }
+ 
+ +void setup_per_zone_pages_min(void)
+ +{
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(&min_free_lock, flags);
+ +      __setup_per_zone_pages_min();
+ +      spin_unlock_irqrestore(&min_free_lock, flags);
+ +}
+ +
+ +static void __adjust_memalloc_reserve(int pages)
+ +{
+ +      var_free_kbytes += pages << (PAGE_SHIFT - 10);
+ +      BUG_ON(var_free_kbytes < 0);
+ +      setup_per_zone_pages_min();
+ +}
+ +
+ +static int test_reserve_limits(void)
+ +{
+ +      struct zone *zone;
+ +      int node;
+ +
+ +      for_each_zone(zone)
+ +              wakeup_kswapd(zone, 0);
+ +
+ +      for_each_online_node(node) {
+ +              struct page *page = alloc_pages_node(node, GFP_KERNEL, 0);
+ +              if (!page)
+ +                      return -ENOMEM;
+ +
+ +              __free_page(page);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/**
+ + *    adjust_memalloc_reserve - adjust the memalloc reserve
+ + *    @pages: number of pages to add
+ + *
+ + *    It adds a number of pages to the memalloc reserve; if
+ + *    the number was positive it kicks reclaim into action to
+ + *    satisfy the higher watermarks.
+ + *
+ + *    returns -ENOMEM when it failed to satisfy the watermarks.
+ + */
+ +int adjust_memalloc_reserve(int pages)
+ +{
+ +      int err = 0;
+ +
+ +      mutex_lock(&var_free_mutex);
+ +      __adjust_memalloc_reserve(pages);
+ +      if (pages > 0) {
+ +              err = test_reserve_limits();
+ +              if (err) {
+ +                      __adjust_memalloc_reserve(-pages);
+ +                      goto unlock;
+ +              }
+ +      }
+ +      printk(KERN_DEBUG "Emergency reserve: %d\n", var_free_kbytes);
+ +
+ +unlock:
+ +      mutex_unlock(&var_free_mutex);
+ +      return err;
+ +}
+ +EXPORT_SYMBOL_GPL(adjust_memalloc_reserve);
+ +
   /*
    * Initialise min_free_kbytes.
    *
@@@ -4420,8 -4382,9 +4492,9 @@@ static int __init init_per_zone_pages_m
                 min_free_kbytes = 128;
         if (min_free_kbytes > 65536)
                 min_free_kbytes = 65536;
- -      setup_per_zone_pages_min();
+ +      __setup_per_zone_pages_min();
         setup_per_zone_lowmem_reserve();
+       setup_per_zone_inactive_ratio();
         return 0;
   }
   module_init(init_per_zone_pages_min)
@@@ -4805,3 -4755,3 +4865,6 @@@ __offline_isolated_pages(unsigned long 
         spin_unlock_irqrestore(&zone->lock, flags);
   }
   #endif
++
++DEFINE_TRACE(page_alloc);
++DEFINE_TRACE(page_free);
diff --cc mm/page_io.c

index 01adfd4,dc6ce0a..2753cbd
--- 1/mm/page_io.c
--- 2/mm/page_io.c
+++ b/mm/page_io.c
@@@ -99,9 -97,8 +99,9 @@@ int swap_writepage(struct page *page, s
   {
         struct bio *bio;
         int ret = 0, rw = WRITE;
+ +      struct swap_info_struct *sis = page_swap_info(page);
   
-       if (remove_exclusive_swap_page(page)) {
+       if (try_to_free_swap(page)) {
                 unlock_page(page);
                 goto out;
         }
@@@ -168,21 -124,9 +168,21 @@@ int swap_readpage(struct file *file, st
   {
         struct bio *bio;
         int ret = 0;
+ +      struct swap_info_struct *sis = page_swap_info(page);
   
-       BUG_ON(!PageLocked(page));
-       BUG_ON(PageUptodate(page));
+       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON(PageUptodate(page));
+ +
+ +      if (sis->flags & SWP_FILE) {
+ +              struct file *swap_file = sis->swap_file;
+ +              struct address_space *mapping = swap_file->f_mapping;
+ +
+ +              ret = mapping->a_ops->swap_in(swap_file, page);
+ +              if (!ret)
+ +                      count_vm_event(PSWPIN);
+ +              return ret;
+ +      }
+ +
         bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
                                 end_swap_bio_read);
         if (bio == NULL) {
@@@ -195,3 -139,3 +195,5 @@@
   out:
         return ret;
   }
++
++DEFINE_TRACE(swap_out);
diff --cc mm/reserve.c

index 75c48a4,0000000..2a00d72

mode 100644,000000..100644
--- 1/mm/reserve.c
--- /dev/null
+++ b/mm/reserve.c
@@@ -1,637 -1,0 +1,637 @@@
+ +/*
+ + * Memory reserve management.
+ + *
+ + *  Copyright (C) 2007-2008, Red Hat, Inc.,
+ + *                         Peter Zijlstra <pzijlstr@redhat.com>
+ + *
+ + * Description:
+ + *
+ + * Manage a set of memory reserves.
+ + *
+ + * A memory reserve is a reserve for a specified number of object of specified
+ + * size. Since memory is managed in pages, this reserve demand is then
+ + * translated into a page unit.
+ + *
+ + * So each reserve has a specified object limit, an object usage count and a
+ + * number of pages required to back these objects.
+ + *
+ + * Usage is charged against a reserve, if the charge fails, the resource must
+ + * not be allocated/used.
+ + *
+ + * The reserves are managed in a tree, and the resource demands (pages and
+ + * limit) are propagated up the tree. Obviously the object limit will be
+ + * meaningless as soon as the unit starts mixing, but the required page reserve
+ + * (being of one unit) is still valid at the root.
+ + *
+ + * It is the page demand of the root node that is used to set the global
+ + * reserve (adjust_memalloc_reserve() which sets zone->pages_emerg).
+ + *
+ + * As long as a subtree has the same usage unit, an aggregate node can be used
+ + * to charge against, instead of the leaf nodes. However, do be consistent with
+ + * who is charged, resource usage is not propagated up the tree (for
+ + * performance reasons).
+ + */
+ +
+ +#include <linux/reserve.h>
+ +#include <linux/mutex.h>
+ +#include <linux/mmzone.h>
+ +#include <linux/log2.h>
+ +#include <linux/proc_fs.h>
+ +#include <linux/seq_file.h>
+ +#include <linux/module.h>
+ +#include <linux/slab.h>
+ +#include <linux/sched.h>
+ +#include "internal.h"
+ +
+ +static DEFINE_MUTEX(mem_reserve_mutex);
+ +
+ +/**
+ + * @mem_reserve_root - the global reserve root
+ + *
+ + * The global reserve is empty, and has no limit unit, it merely
+ + * acts as an aggregation point for reserves and an interface to
+ + * adjust_memalloc_reserve().
+ + */
+ +struct mem_reserve mem_reserve_root = {
+ +      .children = LIST_HEAD_INIT(mem_reserve_root.children),
+ +      .siblings = LIST_HEAD_INIT(mem_reserve_root.siblings),
+ +      .name = "total reserve",
+ +      .lock = __SPIN_LOCK_UNLOCKED(mem_reserve_root.lock),
+ +      .waitqueue = __WAIT_QUEUE_HEAD_INITIALIZER(mem_reserve_root.waitqueue),
+ +};
+ +EXPORT_SYMBOL_GPL(mem_reserve_root);
+ +
+ +/**
+ + * mem_reserve_init() - initialize a memory reserve object
+ + * @res - the new reserve object
+ + * @name - a name for this reserve
+ + * @parent - when non NULL, the parent to connect to.
+ + */
+ +void mem_reserve_init(struct mem_reserve *res, const char *name,
+ +                    struct mem_reserve *parent)
+ +{
+ +      memset(res, 0, sizeof(*res));
+ +      INIT_LIST_HEAD(&res->children);
+ +      INIT_LIST_HEAD(&res->siblings);
+ +      res->name = name;
+ +      spin_lock_init(&res->lock);
+ +      init_waitqueue_head(&res->waitqueue);
+ +
+ +      if (parent)
+ +              mem_reserve_connect(res, parent);
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_init);
+ +
+ +/*
+ + * propagate the pages and limit changes up the (sub)tree.
+ + */
+ +static void __calc_reserve(struct mem_reserve *res, long pages, long limit)
+ +{
+ +      unsigned long flags;
+ +
+ +      for ( ; res; res = res->parent) {
+ +              res->pages += pages;
+ +
+ +              if (limit) {
+ +                      spin_lock_irqsave(&res->lock, flags);
+ +                      res->limit += limit;
+ +                      spin_unlock_irqrestore(&res->lock, flags);
+ +              }
+ +      }
+ +}
+ +
+ +/**
+ + * __mem_reserve_add() - primitive to change the size of a reserve
+ + * @res - reserve to change
+ + * @pages - page delta
+ + * @limit - usage limit delta
+ + *
+ + * Returns -ENOMEM when a size increase is not possible atm.
+ + */
+ +static int __mem_reserve_add(struct mem_reserve *res, long pages, long limit)
+ +{
+ +      int ret = 0;
+ +      long reserve;
+ +
+ +      /*
+ +       * This looks more complex than need be, that is because we handle
+ +       * the case where @res isn't actually connected to mem_reserve_root.
+ +       *
+ +       * So, by propagating the new pages up the (sub)tree and computing
+ +       * the difference in mem_reserve_root.pages we find if this action
+ +       * affects the actual reserve.
+ +       *
+ +       * The (partial) propagation also makes that mem_reserve_connect()
+ +       * needs only look at the direct child, since each disconnected
+ +       * sub-tree is fully up-to-date.
+ +       */
+ +      reserve = mem_reserve_root.pages;
+ +      __calc_reserve(res, pages, 0);
+ +      reserve = mem_reserve_root.pages - reserve;
+ +
+ +      if (reserve) {
+ +              ret = adjust_memalloc_reserve(reserve);
+ +              if (ret)
+ +                      __calc_reserve(res, -pages, 0);
+ +      }
+ +
+ +      /*
+ +       * Delay updating the limits until we've acquired the resources to
+ +       * back it.
+ +       */
+ +      if (!ret)
+ +              __calc_reserve(res, 0, limit);
+ +
+ +      return ret;
+ +}
+ +
+ +/**
+ + * __mem_reserve_charge() - primitive to charge object usage of a reserve
+ + * @res - reserve to charge
+ + * @charge - size of the charge
+ + *
+ + * Returns non-zero on success, zero on failure.
+ + */
+ +static
+ +int __mem_reserve_charge(struct mem_reserve *res, long charge)
+ +{
+ +      unsigned long flags;
+ +      int ret = 0;
+ +
+ +      spin_lock_irqsave(&res->lock, flags);
+ +      if (charge < 0 || res->usage + charge < res->limit) {
+ +              res->usage += charge;
+ +              if (unlikely(res->usage < 0))
+ +                      res->usage = 0;
+ +              ret = 1;
+ +      }
+ +      if (charge < 0)
+ +              wake_up_all(&res->waitqueue);
+ +      spin_unlock_irqrestore(&res->lock, flags);
+ +
+ +      return ret;
+ +}
+ +
+ +/**
+ + * mem_reserve_connect() - connect a reserve to another in a child-parent relation
+ + * @new_child - the reserve node to connect (child)
+ + * @node - the reserve node to connect to (parent)
+ + *
+ + * Connecting a node results in an increase of the reserve by the amount of
+ + * pages in @new_child->pages if @node has a connection to mem_reserve_root.
+ + *
+ + * Returns -ENOMEM when the new connection would increase the reserve (parent
+ + * is connected to mem_reserve_root) and there is no memory to do so.
+ + *
+ + * On error, the child is _NOT_ connected.
+ + */
+ +int mem_reserve_connect(struct mem_reserve *new_child, struct mem_reserve *node)
+ +{
+ +      int ret;
+ +
+ +      WARN_ON(!new_child->name);
+ +
+ +      mutex_lock(&mem_reserve_mutex);
+ +      if (new_child->parent) {
+ +              ret = -EEXIST;
+ +              goto unlock;
+ +      }
+ +      new_child->parent = node;
+ +      list_add(&new_child->siblings, &node->children);
+ +      ret = __mem_reserve_add(node, new_child->pages, new_child->limit);
+ +      if (ret) {
+ +              new_child->parent = NULL;
+ +              list_del_init(&new_child->siblings);
+ +      }
+ +unlock:
+ +      mutex_unlock(&mem_reserve_mutex);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_connect);
+ +
+ +/**
+ + * mem_reserve_disconnect() - sever a nodes connection to the reserve tree
+ + * @node - the node to disconnect
+ + *
+ + * Disconnecting a node results in a reduction of the reserve by @node->pages
+ + * if node had a connection to mem_reserve_root.
+ + */
+ +void mem_reserve_disconnect(struct mem_reserve *node)
+ +{
+ +      int ret;
+ +
+ +      BUG_ON(!node->parent);
+ +
+ +      mutex_lock(&mem_reserve_mutex);
+ +      if (!node->parent) {
+ +              ret = -ENOENT;
+ +              goto unlock;
+ +      }
+ +      ret = __mem_reserve_add(node->parent, -node->pages, -node->limit);
+ +      if (!ret) {
+ +              node->parent = NULL;
+ +              list_del_init(&node->siblings);
+ +      }
+ +unlock:
+ +      mutex_unlock(&mem_reserve_mutex);
+ +
+ +      /*
+ +       * We cannot fail to shrink the reserves, can we?
+ +       */
+ +      WARN_ON(ret);
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_disconnect);
+ +
+ +#ifdef CONFIG_PROC_FS
+ +
+ +/*
+ + * Simple output of the reserve tree in: /proc/reserve_info
+ + * Example:
+ + *
+ + * localhost ~ # cat /proc/reserve_info
+ + * 1:0 "total reserve" 6232K 0/278581
+ + * 2:1 "total network reserve" 6232K 0/278581
+ + * 3:2 "network TX reserve" 212K 0/53
+ + * 4:3 "protocol TX pages" 212K 0/53
+ + * 5:2 "network RX reserve" 6020K 0/278528
+ + * 6:5 "IPv4 route cache" 5508K 0/16384
+ + * 7:5 "SKB data reserve" 512K 0/262144
+ + * 8:7 "IPv4 fragment cache" 512K 0/262144
+ + */
+ +
+ +static void mem_reserve_show_item(struct seq_file *m, struct mem_reserve *res,
+ +                                unsigned int parent, unsigned int *id)
+ +{
+ +      struct mem_reserve *child;
+ +      unsigned int my_id = ++*id;
+ +
+ +      seq_printf(m, "%d:%d \"%s\" %ldK %ld/%ld\n",
+ +                      my_id, parent, res->name,
+ +                      res->pages << (PAGE_SHIFT - 10),
+ +                      res->usage, res->limit);
+ +
+ +      list_for_each_entry(child, &res->children, siblings)
+ +              mem_reserve_show_item(m, child, my_id, id);
+ +}
+ +
+ +static int mem_reserve_show(struct seq_file *m, void *v)
+ +{
+ +      unsigned int ident = 0;
+ +
+ +      mutex_lock(&mem_reserve_mutex);
+ +      mem_reserve_show_item(m, &mem_reserve_root, ident, &ident);
+ +      mutex_unlock(&mem_reserve_mutex);
+ +
+ +      return 0;
+ +}
+ +
+ +static int mem_reserve_open(struct inode *inode, struct file *file)
+ +{
+ +      return single_open(file, mem_reserve_show, NULL);
+ +}
+ +
+ +static const struct file_operations mem_reserve_opterations = {
+ +      .open = mem_reserve_open,
+ +      .read = seq_read,
+ +      .llseek = seq_lseek,
+ +      .release = single_release,
+ +};
+ +
+ +static __init int mem_reserve_proc_init(void)
+ +{
+ +      proc_create("reserve_info", S_IRUSR, NULL, &mem_reserve_opterations);
+ +      return 0;
+ +}
+ +
+ +module_init(mem_reserve_proc_init);
+ +
+ +#endif
+ +
+ +/*
+ + * alloc_page helpers
+ + */
+ +
+ +/**
+ + * mem_reserve_pages_set() - set reserves size in pages
+ + * @res - reserve to set
+ + * @pages - size in pages to set it to
+ + *
+ + * Returns -ENOMEM when it fails to set the reserve. On failure the old size
+ + * is preserved.
+ + */
+ +int mem_reserve_pages_set(struct mem_reserve *res, long pages)
+ +{
+ +      int ret;
+ +
+ +      mutex_lock(&mem_reserve_mutex);
+ +      pages -= res->pages;
+ +      ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
+ +      mutex_unlock(&mem_reserve_mutex);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_pages_set);
+ +
+ +/**
+ + * mem_reserve_pages_add() - change the size in a relative way
+ + * @res - reserve to change
+ + * @pages - number of pages to add (or subtract when negative)
+ + *
+ + * Similar to mem_reserve_pages_set, except that the argument is relative
+ + * instead of absolute.
+ + *
+ + * Returns -ENOMEM when it fails to increase.
+ + */
+ +int mem_reserve_pages_add(struct mem_reserve *res, long pages)
+ +{
+ +      int ret;
+ +
+ +      mutex_lock(&mem_reserve_mutex);
+ +      ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
+ +      mutex_unlock(&mem_reserve_mutex);
+ +
+ +      return ret;
+ +}
+ +
+ +/**
+ + * mem_reserve_pages_charge() - charge page usage to a reserve
+ + * @res - reserve to charge
+ + * @pages - size to charge
+ + *
+ + * Returns non-zero on success.
+ + */
+ +int mem_reserve_pages_charge(struct mem_reserve *res, long pages)
+ +{
+ +      return __mem_reserve_charge(res, pages * PAGE_SIZE);
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_pages_charge);
+ +
+ +/*
+ + * kmalloc helpers
+ + */
+ +
+ +/**
+ + * mem_reserve_kmalloc_set() - set this reserve to bytes worth of kmalloc
+ + * @res - reserve to change
+ + * @bytes - size in bytes to reserve
+ + *
+ + * Returns -ENOMEM on failure.
+ + */
+ +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes)
+ +{
+ +      int ret;
+ +      long pages;
+ +
+ +      mutex_lock(&mem_reserve_mutex);
+ +      pages = kmalloc_estimate_bytes(GFP_ATOMIC, bytes);
+ +      pages -= res->pages;
+ +      bytes -= res->limit;
+ +      ret = __mem_reserve_add(res, pages, bytes);
+ +      mutex_unlock(&mem_reserve_mutex);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_set);
+ +
+ +/**
+ + * mem_reserve_kmalloc_charge() - charge bytes to a reserve
+ + * @res - reserve to charge
+ + * @bytes - bytes to charge
+ + *
+ + * Returns non-zero on success.
+ + */
+ +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes)
+ +{
+ +      if (bytes < 0)
+ +              bytes = -roundup_pow_of_two(-bytes);
+ +      else
+ +              bytes = roundup_pow_of_two(bytes);
+ +
+ +      return __mem_reserve_charge(res, bytes);
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_charge);
+ +
+ +/*
+ + * kmem_cache helpers
+ + */
+ +
+ +/**
+ + * mem_reserve_kmem_cache_set() - set reserve to @objects worth of kmem_cache_alloc of @s
+ + * @res - reserve to set
+ + * @s - kmem_cache to reserve from
+ + * @objects - number of objects to reserve
+ + *
+ + * Returns -ENOMEM on failure.
+ + */
+ +int mem_reserve_kmem_cache_set(struct mem_reserve *res, struct kmem_cache *s,
+ +                             int objects)
+ +{
+ +      int ret;
+ +      long pages, bytes;
+ +
+ +      mutex_lock(&mem_reserve_mutex);
+ +      pages = kmem_alloc_estimate(s, GFP_ATOMIC, objects);
+ +      pages -= res->pages;
+ +      bytes = objects * kmem_cache_size(s) - res->limit;
+ +      ret = __mem_reserve_add(res, pages, bytes);
+ +      mutex_unlock(&mem_reserve_mutex);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_set);
+ +
+ +/**
+ + * mem_reserve_kmem_cache_charge() - charge (or uncharge) usage of objs
+ + * @res - reserve to charge
+ + * @objs - objects to charge for
+ + *
+ + * Returns non-zero on success.
+ + */
+ +int mem_reserve_kmem_cache_charge(struct mem_reserve *res, struct kmem_cache *s,
+ +                                long objs)
+ +{
+ +      return __mem_reserve_charge(res, objs * kmem_cache_size(s));
+ +}
+ +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_charge);
+ +
+ +/*
+ + * Alloc wrappers.
+ + *
+ + * Actual usage is commented in linux/reserve.h where the interface functions
+ + * live. Furthermore, the code is 3 instances of the same paradigm, hence only
+ + * the first contains extensive comments.
+ + */
+ +
+ +/*
+ + * kmalloc/kfree
+ + */
+ +
- void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
++void *___kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
+ +                       struct mem_reserve *res, int *emerg)
+ +{
+ +      void *obj;
+ +      gfp_t gfp;
+ +
+ +      /*
+ +       * Try a regular allocation, when that fails and we're not entitled
+ +       * to the reserves, fail.
+ +       */
+ +      gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
+ +      obj = __kmalloc_node_track_caller(size, gfp, node, ip);
+ +
+ +      if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
+ +              goto out;
+ +
+ +      /*
+ +       * If we were given a reserve to charge against, try that.
+ +       */
+ +      if (res && !mem_reserve_kmalloc_charge(res, size)) {
+ +              /*
+ +               * If we failed to charge and we're not allowed to wait for
+ +               * it to succeed, bail.
+ +               */
+ +              if (!(flags & __GFP_WAIT))
+ +                      goto out;
+ +
+ +              /*
+ +               * Wait for a successfull charge against the reserve. All
+ +               * uncharge operations against this reserve will wake us up.
+ +               */
+ +              wait_event(res->waitqueue,
+ +                              mem_reserve_kmalloc_charge(res, size));
+ +
+ +              /*
+ +               * After waiting for it, again try a regular allocation.
+ +               * Pressure could have lifted during our sleep. If this
+ +               * succeeds, uncharge the reserve.
+ +               */
+ +              obj = __kmalloc_node_track_caller(size, gfp, node, ip);
+ +              if (obj) {
+ +                      mem_reserve_kmalloc_charge(res, -size);
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      /*
+ +       * Regular allocation failed, and we've successfully charged our
+ +       * requested usage against the reserve. Do the emergency allocation.
+ +       */
+ +      obj = __kmalloc_node_track_caller(size, flags, node, ip);
+ +      WARN_ON(!obj);
+ +      if (emerg)
+ +              *emerg = 1;
+ +
+ +out:
+ +      return obj;
+ +}
+ +
+ +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
+ +{
+ +      /*
+ +       * ksize gives the full allocated size vs the requested size we used to
+ +       * charge; however since we round up to the nearest power of two, this
+ +       * should all work nicely.
+ +       */
+ +      size_t size = ksize(obj);
+ +
+ +      kfree(obj);
+ +      /*
+ +       * Free before uncharge, this ensures memory is actually present when
+ +       * a subsequent charge succeeds.
+ +       */
+ +      mem_reserve_kmalloc_charge(res, -size);
+ +}
+ +
+ +/*
+ + * kmem_cache_alloc/kmem_cache_free
+ + */
+ +
+ +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
+ +                               struct mem_reserve *res, int *emerg)
+ +{
+ +      void *obj;
+ +      gfp_t gfp;
+ +
+ +      gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
+ +      obj = kmem_cache_alloc_node(s, gfp, node);
+ +
+ +      if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
+ +              goto out;
+ +
+ +      if (res && !mem_reserve_kmem_cache_charge(res, s, 1)) {
+ +              if (!(flags & __GFP_WAIT))
+ +                      goto out;
+ +
+ +              wait_event(res->waitqueue,
+ +                              mem_reserve_kmem_cache_charge(res, s, 1));
+ +
+ +              obj = kmem_cache_alloc_node(s, gfp, node);
+ +              if (obj) {
+ +                      mem_reserve_kmem_cache_charge(res, s, -1);
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      obj = kmem_cache_alloc_node(s, flags, node);
+ +      WARN_ON(!obj);
+ +      if (emerg)
+ +              *emerg = 1;
+ +
+ +out:
+ +      return obj;
+ +}
+ +
+ +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
+ +                             struct mem_reserve *res, int emerg)
+ +{
+ +      kmem_cache_free(s, obj);
+ +      mem_reserve_kmem_cache_charge(res, s, -1);
+ +}
+ +
+ +/*
+ + * alloc_pages/free_pages
+ + */
+ +
+ +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
+ +                                 struct mem_reserve *res, int *emerg)
+ +{
+ +      struct page *page;
+ +      gfp_t gfp;
+ +      long pages = 1 << order;
+ +
+ +      gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
+ +      page = alloc_pages_node(node, gfp, order);
+ +
+ +      if (page || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
+ +              goto out;
+ +
+ +      if (res && !mem_reserve_pages_charge(res, pages)) {
+ +              if (!(flags & __GFP_WAIT))
+ +                      goto out;
+ +
+ +              wait_event(res->waitqueue,
+ +                              mem_reserve_pages_charge(res, pages));
+ +
+ +              page = alloc_pages_node(node, gfp, order);
+ +              if (page) {
+ +                      mem_reserve_pages_charge(res, -pages);
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      page = alloc_pages_node(node, flags, order);
+ +      WARN_ON(!page);
+ +      if (emerg)
+ +              *emerg = 1;
+ +
+ +out:
+ +      return page;
+ +}
+ +
+ +void __free_pages_reserve(struct page *page, int order,
+ +                        struct mem_reserve *res, int emerg)
+ +{
+ +      __free_pages(page, order);
+ +      mem_reserve_pages_charge(res, -(1 << order));
+ +}
diff --cc mm/slab.c
Simple merge
diff --cc mm/slob.c
Simple merge
diff --cc mm/slub.c

index c0af0bb,6392ae5..2cff487
--- 1/mm/slub.c
--- 2/mm/slub.c
+++ b/mm/slub.c
@@@ -23,7 -24,7 +24,8 @@@
   #include <linux/kallsyms.h>
   #include <linux/memory.h>
   #include <linux/math64.h>
+ #include <linux/fault-inject.h>
+ +#include "internal.h"
   
   /*
    * Lock order:
diff --cc mm/swap_state.c
Simple merge
diff --cc mm/swapfile.c

index 72b6b8b,f48b831..ba6d83a
--- 1/mm/swapfile.c
--- 2/mm/swapfile.c
+++ b/mm/swapfile.c
@@@ -36,7 -33,7 +37,8 @@@
   #include <asm/pgtable.h>
   #include <asm/tlbflush.h>
   #include <linux/swapops.h>
+ #include <linux/page_cgroup.h>
+ +#include <trace/swap.h>
   
   static DEFINE_SPINLOCK(swap_lock);
   static unsigned int nr_swapfiles;
@@@ -1925,3 -2033,3 +2109,6 @@@ int valid_swaphandles(swp_entry_t entry
         *offset = ++toff;
         return nr_pages? ++nr_pages: 0;
   }
++
++DEFINE_TRACE(swap_file_open);
++DEFINE_TRACE(swap_file_close);
diff --cc mm/vmscan.c

index 1ff1a58,9a27c44..828402d
--- 1/mm/vmscan.c
--- 2/mm/vmscan.c
+++ b/mm/vmscan.c
@@@ -844,6 -991,51 +991,52 @@@ static unsigned long clear_active_flags
         return nr_active;
   }
   
+ /**
+  * isolate_lru_page - tries to isolate a page from its LRU list
+  * @page: page to isolate from its LRU list
+  *
+  * Isolates a @page from an LRU list, clears PageLRU and adjusts the
+  * vmstat statistic corresponding to whatever LRU list the page was on.
+  *
+  * Returns 0 if the page was removed from an LRU list.
+  * Returns -EBUSY if the page was not on an LRU list.
+  *
+  * The returned page will have PageLRU() cleared.  If it was found on
+  * the active list, it will have PageActive set.  If it was found on
+  * the unevictable list, it will have the PageUnevictable bit set. That flag
+  * may need to be cleared by the caller before letting the page go.
+  *
+  * The vmstat statistic corresponding to the list on which the page was
+  * found will be decremented.
+  *
+  * Restrictions:
+  * (1) Must be called with an elevated refcount on the page. This is a
+  *     fundamentnal difference from isolate_lru_pages (which is called
+  *     without a stable reference).
+  * (2) the lru_lock must not be held.
+  * (3) interrupts must be enabled.
+  */
+ int isolate_lru_page(struct page *page)
+ {
+       int ret = -EBUSY;
+ 
+       if (PageLRU(page)) {
+               struct zone *zone = page_zone(page);
+ 
+               spin_lock_irq(&zone->lru_lock);
+               if (PageLRU(page) && get_page_unless_zero(page)) {
+                       int lru = page_lru(page);
+                       ret = 0;
+                       ClearPageLRU(page);
+ 
+                       del_page_from_lru_list(zone, page, lru);
+               }
+               spin_unlock_irq(&zone->lru_lock);
+       }
+       return ret;
+ }
++EXPORT_SYMBOL(isolate_lru_page);
+ 
   /*
    * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
    * of reclaimed pages
diff --cc mm/vmstat.c

index f1c1057,9114974..25a9732
--- 1/mm/vmstat.c
--- 2/mm/vmstat.c
+++ b/mm/vmstat.c
@@@ -692,11 -732,14 +732,14 @@@ static void zoneinfo_show_print(struct 
                    "\n        spanned  %lu"
                    "\n        present  %lu",
                    zone_page_state(zone, NR_FREE_PAGES),
- -                 zone->pages_min,
- -                 zone->pages_low,
- -                 zone->pages_high,
+ +                 zone->pages_emerg + zone->pages_min,
+ +                 zone->pages_emerg + zone->pages_low,
+ +                 zone->pages_emerg + zone->pages_high,
                    zone->pages_scanned,
-                  zone->nr_scan_active, zone->nr_scan_inactive,
+                  zone->lru[LRU_ACTIVE_ANON].nr_scan,
+                  zone->lru[LRU_INACTIVE_ANON].nr_scan,
+                  zone->lru[LRU_ACTIVE_FILE].nr_scan,
+                  zone->lru[LRU_INACTIVE_FILE].nr_scan,
                    zone->spanned_pages,
                    zone->present_pages);
   
diff --cc net/Kconfig
Simple merge
diff --cc net/bridge/br_if.c
Simple merge
diff --cc net/core/dev.c

index 6769b76,5379b0c..0532c6b
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -927,19 -906,19 +907,24 @@@ int dev_change_name(struct net_device *
         }
         else if (__dev_get_by_name(net, newname))
                 return -EEXIST;
--      else
-       {
++      else {
+ +              if (strncmp(newname, dev->name, IFNAMSIZ))
-                       printk(KERN_INFO "%s renamed to %s by %s [%u]\n", dev->name, newname, current->comm, current->pid);
++                      printk(KERN_INFO "%s renamed to %s by %s [%u]\n",
++                             dev->name, newname, current->comm,
++                             current->pid);
                 strlcpy(dev->name, newname, IFNAMSIZ);
+ +      }
   
   rollback:
-       err = device_rename(&dev->dev, dev->name);
-       if (err) {
-               memcpy(dev->name, oldname, IFNAMSIZ);
-               return err;
+       /* For now only devices in the initial network namespace
+        * are in sysfs.
+        */
+       if (net == &init_net) {
+               ret = device_rename(&dev->dev, dev->name);
+               if (ret) {
+                       memcpy(dev->name, oldname, IFNAMSIZ);
+                       return ret;
+               }
         }
   
         write_lock_bh(&dev_base_lock);
@@@ -2280,23 -2234,13 +2265,26 @@@ int netif_receive_skb(struct sk_buff *s
         struct net_device *null_or_orig;
         int ret = NET_RX_DROP;
         __be16 type;
+ +      unsigned long pflags = current->flags;
   
+       if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
+               return NET_RX_SUCCESS;
+ 
+ +      /* Emergency skb are special, they should
+ +       *  - be delivered to SOCK_MEMALLOC sockets only
+ +       *  - stay away from userspace
+ +       *  - have bounded memory usage
+ +       *
+ +       * Use PF_MEMALLOC as a poor mans memory pool - the grouping kind.
+ +       * This saves us from propagating the allocation context down to all
+ +       * allocation sites.
+ +       */
+ +      if (skb_emergency(skb))
+ +              current->flags |= PF_MEMALLOC;
+ +
         /* if we've gotten here through NAPI, check netpoll */
         if (netpoll_receive_skb(skb))
- -              return NET_RX_DROP;
+ +              goto out;
   
         if (!skb->tstamp.tv64)
                 net_timestamp(skb);
@@@ -2335,22 -2280,6 +2325,9 @@@
         }
   #endif
   
- #ifdef CONFIG_XEN
-       switch (skb->ip_summed) {
-       case CHECKSUM_UNNECESSARY:
-               skb->proto_data_valid = 1;
-               break;
-       case CHECKSUM_PARTIAL:
-               /* XXX Implement me. */
-       default:
-               skb->proto_data_valid = 0;
-               break;
-       }
- #endif
- 
+ +      if (skb_emergency(skb))
+ +              goto skip_taps;
+ +
         list_for_each_entry_rcu(ptype, &ptype_all, list) {
                 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
                     ptype->dev == orig_dev) {
@@@ -5009,8 -5258,6 +5313,9 @@@ EXPORT_SYMBOL(br_fdb_get_hook)
   EXPORT_SYMBOL(br_fdb_put_hook);
   #endif
   
- #ifdef CONFIG_KMOD
   EXPORT_SYMBOL(dev_load);
- #endif
   
   EXPORT_PER_CPU_SYMBOL(softnet_data);
++
++DEFINE_TRACE(net_dev_xmit);
++DEFINE_TRACE(net_dev_receive);
diff --cc net/core/filter.c
Simple merge
diff --cc net/core/skbuff.c

index e274ebb,2e5f2ca..fc7ecef
--- 1/net/core/skbuff.c
--- 2/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -2352,10 -2457,36 +2539,37 @@@ struct sk_buff *skb_segment(struct sk_b
                 if (hsize > len || !sg)
                         hsize = len;
   
-               nskb = __alloc_skb(hsize + doffset + headroom, GFP_ATOMIC,
-                                  skb_alloc_rx_flag(skb), -1);
-               if (unlikely(!nskb))
-                       goto err;
+               if (!hsize && i >= nfrags) {
+                       BUG_ON(fskb->len != len);
+ 
+                       pos += len;
+                       nskb = skb_clone(fskb, GFP_ATOMIC);
+                       fskb = fskb->next;
+ 
+                       if (unlikely(!nskb))
+                               goto err;
+ 
+                       hsize = skb_end_pointer(nskb) - nskb->head;
+                       if (skb_cow_head(nskb, doffset + headroom)) {
+                               kfree_skb(nskb);
+                               goto err;
+                       }
+ 
+                       nskb->truesize += skb_end_pointer(nskb) - nskb->head -
+                                         hsize;
+                       skb_release_head_state(nskb);
+                       __skb_push(nskb, doffset);
+               } else {
- -                      nskb = alloc_skb(hsize + doffset + headroom,
- -                                       GFP_ATOMIC);
++                      nskb = __alloc_skb(hsize + doffset + headroom,
++                                         GFP_ATOMIC, skb_alloc_rx_flag(skb),
++                                         -1);
+ 
+                       if (unlikely(!nskb))
+                               goto err;
+ 
+                       skb_reserve(nskb, headroom);
+                       __skb_put(nskb, doffset);
+               }
   
                 if (segs)
                         tail->next = nskb;
@@@ -2387,11 -2519,9 +2602,9 @@@
                 skb_copy_from_linear_data_offset(skb, offset,
                                                  skb_put(nskb, hsize), hsize);
   
-               while (pos < offset + len) {
-                       BUG_ON(i >= nfrags);
- 
+               while (pos < offset + len && i < nfrags) {
                         *frag = skb_shinfo(skb)->frags[i];
- -                      get_page(frag->page);
+ +                      skb_get_page(nskb, frag->page);
                         size = frag->size;
   
                         if (pos < offset) {
diff --cc net/core/sock.c
Simple merge
diff --cc net/ipv4/devinet.c

index 49d30b6,309997e..e868d3e
--- 1/net/ipv4/devinet.c
--- 2/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@@ -1677,3 -1664,3 +1667,6 @@@ EXPORT_SYMBOL(inet_select_addr)
   EXPORT_SYMBOL(inetdev_by_index);
   EXPORT_SYMBOL(register_inetaddr_notifier);
   EXPORT_SYMBOL(unregister_inetaddr_notifier);
++
++DEFINE_TRACE(ipv4_addr_add);
++DEFINE_TRACE(ipv4_addr_del);
diff --cc net/ipv4/ip_fragment.c

index 654d2d4,6659ac0..b13bfcc
--- 1/net/ipv4/ip_fragment.c
--- 2/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@@ -601,64 -598,6 +600,63 @@@ int ip_defrag(struct sk_buff *skb, u32 
   }
   
   #ifdef CONFIG_SYSCTL
+ +static int proc_dointvec_fragment(struct ctl_table *table, int write,
+ +              struct file *filp, void __user *buffer, size_t *lenp,
+ +              loff_t *ppos)
+ +{
+ +      struct net *net = container_of(table->data, struct net,
+ +                                     ipv4.frags.high_thresh);
+ +      ctl_table tmp = *table;
+ +      int new_bytes, ret;
+ +
+ +      mutex_lock(&net->ipv4.frags.lock);
+ +      if (write) {
+ +              tmp.data = &new_bytes;
+ +              table = &tmp;
+ +      }
+ +
+ +      ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmalloc_set(&net->ipv4.frags.reserve,
+ +                              new_bytes);
+ +              if (!ret)
+ +                      net->ipv4.frags.high_thresh = new_bytes;
+ +      }
+ +      mutex_unlock(&net->ipv4.frags.lock);
+ +
+ +      return ret;
+ +}
+ +
+ +static int sysctl_intvec_fragment(struct ctl_table *table,
-               int __user *name, int nlen,
+ +              void __user *oldval, size_t __user *oldlenp,
+ +              void __user *newval, size_t newlen)
+ +{
+ +      struct net *net = container_of(table->data, struct net,
+ +                                     ipv4.frags.high_thresh);
+ +      int write = (newval && newlen);
+ +      ctl_table tmp = *table;
+ +      int new_bytes, ret;
+ +
+ +      mutex_lock(&net->ipv4.frags.lock);
+ +      if (write) {
+ +              tmp.data = &new_bytes;
+ +              table = &tmp;
+ +      }
+ +
-       ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
++      ret = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmalloc_set(&net->ipv4.frags.reserve,
+ +                              new_bytes);
+ +              if (!ret)
+ +                      net->ipv4.frags.high_thresh = new_bytes;
+ +      }
+ +      mutex_unlock(&net->ipv4.frags.lock);
+ +
+ +      return ret;
+ +}
+ +
   static int zero;
   
   static struct ctl_table ip4_frags_ns_ctl_table[] = {
@@@ -668,8 -607,7 +666,8 @@@
                 .data           = &init_net.ipv4.frags.high_thresh,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = &proc_dointvec_fragment,
-               .strategy       = &sysctl_intvec_fragment,
- -              .proc_handler   = proc_dointvec
++              .proc_handler   = proc_dointvec_fragment,
++              .strategy       = sysctl_intvec_fragment,
         },
         {
                 .ctl_name       = NET_IPV4_IPFRAG_LOW_THRESH,
diff --cc net/ipv4/netfilter/Kconfig

index 59ac79d,3816e1d..c5d865e
--- 1/net/ipv4/netfilter/Kconfig
--- 2/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@@ -100,31 -101,6 +101,20 @@@ config IP_NF_MATCH_TT
   
           To compile it as a module, choose M here.  If unsure, say N.
   
- config IP_NF_MATCH_ADDRTYPE
-       tristate '"addrtype" address type match support'
-       depends on IP_NF_IPTABLES
-       depends on NETFILTER_ADVANCED
-       help
-         This option allows you to match what routing thinks of an address,
-         eg. UNICAST, LOCAL, BROADCAST, ...
- 
-         If you want to compile it as a module, say M here and read
-         <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
- 
+ +config IP_NF_MATCH_IPV4OPTIONS
+ +      tristate  'IPV4OPTIONS match support'
+ +      depends on IP_NF_IPTABLES
+ +      help
+ +        This option adds a IPV4OPTIONS match.
+ +        It allows you to filter options like source routing,
+ +        record route, timestamp and router-altert.
+ +
+ +        If you say Y here, try iptables -m ipv4options --help for more information.
+ +
+ +        If you want to compile it as a module, say M here and read
+ +        Documentation/modules.txt.  If unsure, say 'N'.
+ +
+ +
   # `filter', generic and specific targets
   config IP_NF_FILTER
         tristate "Packet filtering"
diff --cc net/ipv4/netfilter/Makefile

index da74a38,5f9b650..103efe2
--- 1/net/ipv4/netfilter/Makefile
--- 2/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@@ -48,9 -51,7 +51,8 @@@ obj-$(CONFIG_IP_NF_SECURITY) += iptable
   obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
   obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
   obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
- obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
   obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
+ +obj-$(CONFIG_IP_NF_MATCH_IPV4OPTIONS) += ipt_ipv4options.o
   
   # targets
   obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --cc net/ipv4/netfilter/ipt_LOG.c
Simple merge
diff --cc net/ipv4/netfilter/ipt_ipv4options.c

index c34fd45,0000000..b35b9da

mode 100644,000000..100644
--- 1/net/ipv4/netfilter/ipt_ipv4options.c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ipv4options.c
@@@ -1,175 -1,0 +1,164 @@@
+ +/*
+ +  This is a module which is used to match ipv4 options.
+ +  This file is distributed under the terms of the GNU General Public
+ +  License (GPL). Copies of the GPL can be obtained from:
+ +  ftp://prep.ai.mit.edu/pub/gnu/GPL
+ +
+ +  11-mars-2001 Fabrice MARIE <fabrice@netfilter.org> : initial development.
+ +  12-july-2001 Fabrice MARIE <fabrice@netfilter.org> : added router-alert otions matching. Fixed a bug with no-srr
+ +  12-august-2001 Imran Patel <ipatel@crosswinds.net> : optimization of the match.
+ +  18-november-2001 Fabrice MARIE <fabrice@netfilter.org> : added [!] 'any' option match.
+ +  19-february-2004 Harald Welte <laforge@netfilter.org> : merge with 2.6.x
+ +*/
+ +
+ +#include <linux/module.h>
+ +#include <linux/skbuff.h>
+ +#include <net/ip.h>
+ +
+ +#include <linux/netfilter_ipv4/ip_tables.h>
+ +#include <linux/netfilter_ipv4/ipt_ipv4options.h>
+ +
+ +MODULE_LICENSE("GPL");
+ +MODULE_AUTHOR("Fabrice Marie <fabrice@netfilter.org>");
+ +
+ +static bool
- match(const struct sk_buff *skb,
-       const struct net_device *in,
-       const struct net_device *out,
-       const struct xt_match *match,
-       const void *matchinfo,
-       int offset,
-       unsigned int protoff,
-       bool *hotdrop)
++match(const struct sk_buff *skb, const struct xt_match_param *params)
+ +{
-       const struct ipt_ipv4options_info *info = matchinfo;   /* match info for rule */
++      const struct ipt_ipv4options_info *info = params->matchinfo;   /* match info for rule */
+ +      const struct iphdr *iph = ip_hdr(skb);
+ +      const struct ip_options *opt;
+ +
+ +      if (iph->ihl * 4 == sizeof(struct iphdr)) {
+ +              /* No options, so we match only the "DONTs" and the "IGNOREs" */
+ +
+ +              if (((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT) ||
+ +                  ((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) ||
+ +                  ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR) ||
+ +                  ((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) ||
+ +                  ((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) ||
+ +                    ((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT))
+ +                      return 0;
+ +              return 1;
+ +      }
+ +      else {
+ +              if ((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT)
+ +                      /* there are options, and we don't need to care which one */
+ +                      return 1;
+ +              else {
+ +                      if ((info->options & IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) == IPT_IPV4OPTION_DONT_MATCH_ANY_OPT)
+ +                              /* there are options but we don't want any ! */
+ +                              return 0;
+ +              }
+ +      }
+ +
+ +      opt = &(IPCB(skb)->opt);
+ +
+ +      /* source routing */
+ +      if ((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) {
+ +              if (!((opt->srr) && (opt->is_strictroute)))
+ +                      return 0;
+ +      }
+ +      else if ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR) {
+ +              if (!((opt->srr) && (!opt->is_strictroute)))
+ +                      return 0;
+ +      }
+ +      else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_SRR) == IPT_IPV4OPTION_DONT_MATCH_SRR) {
+ +              if (opt->srr)
+ +                      return 0;
+ +      }
+ +      /* record route */
+ +      if ((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) {
+ +              if (!opt->rr)
+ +                      return 0;
+ +      }
+ +      else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_RR) == IPT_IPV4OPTION_DONT_MATCH_RR) {
+ +              if (opt->rr)
+ +                      return 0;
+ +      }
+ +      /* timestamp */
+ +      if ((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) {
+ +              if (!opt->ts)
+ +                      return 0;
+ +      }
+ +      else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) == IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) {
+ +              if (opt->ts)
+ +                      return 0;
+ +      }
+ +      /* router-alert option  */
+ +      if ((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT) {
+ +              if (!opt->router_alert)
+ +                      return 0;
+ +      }
+ +      else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) {
+ +              if (opt->router_alert)
+ +                      return 0;
+ +      }
+ +
+ +      /* we match ! */
+ +      return 1;
+ +}
+ +
+ +static bool
- checkentry(const char *tablename,
-          const void *ip,
-          const struct xt_match *match,
-          void *matchinfo,
-          unsigned int hook_mask)
++checkentry(const struct xt_mtchk_param *params)
+ +{
-       const struct ipt_ipv4options_info *info = matchinfo;   /* match info for rule */
++      const struct ipt_ipv4options_info *info = params->matchinfo;   /* match info for rule */
+ +      /* Check the size */
-       if (match->matchsize != IPT_ALIGN(sizeof(struct ipt_ipv4options_info)))
++      if (params->match->matchsize != IPT_ALIGN(sizeof(struct ipt_ipv4options_info)))
+ +              return 0;
+ +      /* Now check the coherence of the data ... */
+ +      if (((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT) &&
+ +          (((info->options & IPT_IPV4OPTION_DONT_MATCH_SRR) == IPT_IPV4OPTION_DONT_MATCH_SRR) ||
+ +           ((info->options & IPT_IPV4OPTION_DONT_MATCH_RR) == IPT_IPV4OPTION_DONT_MATCH_RR) ||
+ +           ((info->options & IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) == IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) ||
+ +           ((info->options & IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) ||
+ +           ((info->options & IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) == IPT_IPV4OPTION_DONT_MATCH_ANY_OPT)))
+ +              return 0; /* opposites */
+ +      if (((info->options & IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) == IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) &&
+ +          (((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR) ||
+ +           ((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) ||
+ +           ((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) ||
+ +           ((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) ||
+ +           ((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT) ||
+ +           ((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT)))
+ +              return 0; /* opposites */
+ +      if (((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) &&
+ +          ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR))
+ +              return 0; /* cannot match in the same time loose and strict source routing */
+ +      if ((((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) ||
+ +           ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR)) &&
+ +          ((info->options & IPT_IPV4OPTION_DONT_MATCH_SRR) == IPT_IPV4OPTION_DONT_MATCH_SRR))
+ +              return 0; /* opposites */
+ +      if (((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) &&
+ +          ((info->options & IPT_IPV4OPTION_DONT_MATCH_RR) == IPT_IPV4OPTION_DONT_MATCH_RR))
+ +              return 0; /* opposites */
+ +      if (((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) &&
+ +          ((info->options & IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) == IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP))
+ +              return 0; /* opposites */
+ +      if (((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT) &&
+ +          ((info->options & IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT))
+ +              return 0; /* opposites */
+ +
+ +      /* everything looks ok. */
+ +      return 1;
+ +}
+ +
+ +static struct xt_match ipv4options_match = {
+ +      .name = "ipv4options",
+ +      .match = match,
+ +      .matchsize = sizeof(struct ipt_ipv4options_info),
+ +      .checkentry = checkentry,
+ +      .me = THIS_MODULE
+ +};
+ +
+ +static int __init init(void)
+ +{
+ +      return xt_register_match(&ipv4options_match);
+ +}
+ +
+ +static void __exit fini(void)
+ +{
+ +      xt_unregister_match(&ipv4options_match);
+ +}
+ +
+ +module_init(init);
+ +module_exit(fini);
diff --cc net/ipv4/route.c

index 090111b,97f7115..5d9f444
--- 1/net/ipv4/route.c
--- 2/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -396,62 -400,6 +403,61 @@@ static int rt_cache_seq_show(struct seq
         return 0;
   }
   
+ +static struct mutex ipv4_route_lock;
+ +
+ +static int proc_dointvec_route(struct ctl_table *table, int write,
+ +              struct file *filp, void __user *buffer, size_t *lenp,
+ +              loff_t *ppos)
+ +{
+ +      ctl_table tmp = *table;
+ +      int new_size, ret;
+ +
+ +      mutex_lock(&ipv4_route_lock);
+ +      if (write) {
+ +              tmp.data = &new_size;
+ +              table = &tmp;
+ +      }
+ +
+ +      ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmem_cache_set(&ipv4_route_reserve,
+ +                              ipv4_dst_ops.kmem_cachep, new_size);
+ +              if (!ret)
+ +                      ip_rt_max_size = new_size;
+ +      }
+ +      mutex_unlock(&ipv4_route_lock);
+ +
+ +      return ret;
+ +}
+ +
+ +static int sysctl_intvec_route(struct ctl_table *table,
-               int __user *name, int nlen,
+ +              void __user *oldval, size_t __user *oldlenp,
+ +              void __user *newval, size_t newlen)
+ +{
+ +      int write = (newval && newlen);
+ +      ctl_table tmp = *table;
+ +      int new_size, ret;
+ +
+ +      mutex_lock(&ipv4_route_lock);
+ +      if (write) {
+ +              tmp.data = &new_size;
+ +              table = &tmp;
+ +      }
+ +
-       ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
++      ret = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmem_cache_set(&ipv4_route_reserve,
+ +                              ipv4_dst_ops.kmem_cachep, new_size);
+ +              if (!ret)
+ +                      ip_rt_max_size = new_size;
+ +      }
+ +      mutex_unlock(&ipv4_route_lock);
+ +
+ +      return ret;
+ +}
+ +
   static const struct seq_operations rt_cache_seq_ops = {
         .start  = rt_cache_seq_start,
         .next   = rt_cache_seq_next,
@@@ -3050,8 -3124,7 +3182,8 @@@ static ctl_table ipv4_route_table[] = 
                 .data           = &ip_rt_max_size,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = &proc_dointvec_route,
-               .strategy       = &sysctl_intvec_route,
- -              .proc_handler   = proc_dointvec,
++              .proc_handler   = proc_dointvec_route,
++              .strategy       = sysctl_intvec_route,
         },
         {
                 /*  Deprecated. Use gc_min_interval_ms */
diff --cc net/ipv4/tcp.c
Simple merge
diff --cc net/ipv4/tcp_input.c
Simple merge
diff --cc net/ipv4/tcp_output.c

index c251805,557fe16..c7a39fc
--- 1/net/ipv4/tcp_output.c
--- 2/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -2253,9 -2207,9 +2208,10 @@@ struct sk_buff *tcp_make_synack(struct 
         struct sk_buff *skb;
         struct tcp_md5sig_key *md5;
         __u8 *md5_hash_location;
+       int mss;
   
- -      skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
+ +      skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1,
+ +                      sk_allocation(sk, GFP_ATOMIC));
         if (skb == NULL)
                 return NULL;
   
diff --cc net/ipv6/addrconf.c

index a3cf038,e92ad84..1efd14e
--- 1/net/ipv6/addrconf.c
--- 2/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@@ -4572,3 -4571,3 +4575,6 @@@ void addrconf_cleanup(void
   
         unregister_pernet_subsys(&addrconf_net_ops);
   }
++
++DEFINE_TRACE(ipv6_addr_add);
++DEFINE_TRACE(ipv6_addr_del);
diff --cc net/ipv6/reassembly.c

index a089dd7,3c57511..6912769
--- 1/net/ipv6/reassembly.c
--- 2/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@@ -633,64 -635,6 +636,63 @@@ static struct inet6_protocol frag_proto
   };
   
   #ifdef CONFIG_SYSCTL
+ +static int proc_dointvec_fragment(struct ctl_table *table, int write,
+ +              struct file *filp, void __user *buffer, size_t *lenp,
+ +              loff_t *ppos)
+ +{
+ +      struct net *net = container_of(table->data, struct net,
+ +                                     ipv6.frags.high_thresh);
+ +      ctl_table tmp = *table;
+ +      int new_bytes, ret;
+ +
+ +      mutex_lock(&net->ipv6.frags.lock);
+ +      if (write) {
+ +              tmp.data = &new_bytes;
+ +              table = &tmp;
+ +      }
+ +
+ +      ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmalloc_set(&net->ipv6.frags.reserve,
+ +                                            new_bytes);
+ +              if (!ret)
+ +                      net->ipv6.frags.high_thresh = new_bytes;
+ +      }
+ +      mutex_unlock(&net->ipv6.frags.lock);
+ +
+ +      return ret;
+ +}
+ +
+ +static int sysctl_intvec_fragment(struct ctl_table *table,
-               int __user *name, int nlen,
+ +              void __user *oldval, size_t __user *oldlenp,
+ +              void __user *newval, size_t newlen)
+ +{
+ +      struct net *net = container_of(table->data, struct net,
+ +                                     ipv6.frags.high_thresh);
+ +      int write = (newval && newlen);
+ +      ctl_table tmp = *table;
+ +      int new_bytes, ret;
+ +
+ +      mutex_lock(&net->ipv6.frags.lock);
+ +      if (write) {
+ +              tmp.data = &new_bytes;
+ +              table = &tmp;
+ +      }
+ +
-       ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
++      ret = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmalloc_set(&net->ipv6.frags.reserve,
+ +                                            new_bytes);
+ +              if (!ret)
+ +                      net->ipv6.frags.high_thresh = new_bytes;
+ +      }
+ +      mutex_unlock(&net->ipv6.frags.lock);
+ +
+ +      return ret;
+ +}
+ +
   static struct ctl_table ip6_frags_ns_ctl_table[] = {
         {
                 .ctl_name       = NET_IPV6_IP6FRAG_HIGH_THRESH,
@@@ -698,8 -642,7 +700,8 @@@
                 .data           = &init_net.ipv6.frags.high_thresh,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = &proc_dointvec_fragment,
-               .strategy       = &sysctl_intvec_fragment,
- -              .proc_handler   = proc_dointvec
++              .proc_handler   = proc_dointvec_fragment,
++              .strategy       = sysctl_intvec_fragment,
         },
         {
                 .ctl_name       = NET_IPV6_IP6FRAG_LOW_THRESH,
diff --cc net/ipv6/route.c

index f4a870e,c4a5982..6bf062a
--- 1/net/ipv6/route.c
--- 2/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -2474,64 -2533,6 +2534,63 @@@ int ipv6_sysctl_rtcache_flush(ctl_tabl
                 return -EINVAL;
   }
   
+ +static int proc_dointvec_route(struct ctl_table *table, int write,
+ +              struct file *filp, void __user *buffer, size_t *lenp,
+ +              loff_t *ppos)
+ +{
+ +      struct net *net = container_of(table->data, struct net,
+ +                                     ipv6.sysctl.ip6_rt_max_size);
+ +      ctl_table tmp = *table;
+ +      int new_size, ret;
+ +
+ +      mutex_lock(&net->ipv6.sysctl.ip6_rt_lock);
+ +      if (write) {
+ +              tmp.data = &new_size;
+ +              table = &tmp;
+ +      }
+ +
+ +      ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmem_cache_set(&net->ipv6.ip6_rt_reserve,
+ +                              net->ipv6.ip6_dst_ops->kmem_cachep, new_size);
+ +              if (!ret)
+ +                      net->ipv6.sysctl.ip6_rt_max_size = new_size;
+ +      }
+ +      mutex_unlock(&net->ipv6.sysctl.ip6_rt_lock);
+ +
+ +      return ret;
+ +}
+ +
+ +static int sysctl_intvec_route(struct ctl_table *table,
-               int __user *name, int nlen,
+ +              void __user *oldval, size_t __user *oldlenp,
+ +              void __user *newval, size_t newlen)
+ +{
+ +      struct net *net = container_of(table->data, struct net,
+ +                                     ipv6.sysctl.ip6_rt_max_size);
+ +      int write = (newval && newlen);
+ +      ctl_table tmp = *table;
+ +      int new_size, ret;
+ +
+ +      mutex_lock(&net->ipv6.sysctl.ip6_rt_lock);
+ +      if (write) {
+ +              tmp.data = &new_size;
+ +              table = &tmp;
+ +      }
+ +
-       ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
++      ret = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
+ +
+ +      if (!ret && write) {
+ +              ret = mem_reserve_kmem_cache_set(&net->ipv6.ip6_rt_reserve,
+ +                              net->ipv6.ip6_dst_ops->kmem_cachep, new_size);
+ +              if (!ret)
+ +                      net->ipv6.sysctl.ip6_rt_max_size = new_size;
+ +      }
+ +      mutex_unlock(&net->ipv6.sysctl.ip6_rt_lock);
+ +
+ +      return ret;
+ +}
+ +
   ctl_table ipv6_route_table_template[] = {
         {
                 .procname       =       "flush",
@@@ -2554,8 -2555,7 +2613,8 @@@
                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
                 .maxlen         =       sizeof(int),
                 .mode           =       0644,
-               .proc_handler   =       &proc_dointvec_route,
-               .strategy       =       &sysctl_intvec_route,
- -              .proc_handler   =       proc_dointvec,
++              .proc_handler   =       proc_dointvec_route,
++              .strategy       =       sysctl_intvec_route,
         },
         {
                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
diff --cc net/ipv6/tcp_ipv6.c

index 94d92a1,e5b85d4..1c1dc83
--- 1/net/ipv6/tcp_ipv6.c
--- 2/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@@ -1061,6 -987,6 +990,7 @@@ static void tcp_v6_send_response(struc
         struct sock *ctl_sk = net->ipv6.tcp_sk;
         unsigned int tot_len = sizeof(struct tcphdr);
         __be32 *topt;
++      gfp_t gfp_mask = GFP_ATOMIC;
   
         if (ts)
                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@@ -1069,8 -995,8 +999,11 @@@
                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
   #endif
   
++      if (skb->sk)
++              gfp_mask = sk_allocation(skb->sk, gfp_mask);
++
         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
-                        sk_allocation(ctl_sk, GFP_ATOMIC));
- -                       GFP_ATOMIC);
++                       gfp_mask);
         if (buff == NULL)
                 return;
   
diff --cc net/iucv/af_iucv.c
Simple merge
diff --cc net/mac80211/Kconfig
Simple merge
diff --cc net/netfilter/Kconfig
Simple merge
diff --cc net/netfilter/Makefile

index 0930f60,da3d909..0d7fcf2
--- 1/net/netfilter/Makefile
--- 2/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@@ -33,8 -33,10 +33,11 @@@ obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_c
   obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
   obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
   obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
+ +obj-$(CONFIG_NF_CONNTRACK_SLP) += nf_conntrack_slp.o
   
+ # transparent proxy support
+ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+ 
   # generic X tables 
   obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
   
diff --cc net/netfilter/core.c
Simple merge
diff --cc net/sctp/ulpevent.c
Simple merge
diff --cc net/socket.c

index 3c39c98,35dd737..b8e272e
--- 1/net/socket.c
--- 2/net/socket.c
+++ b/net/socket.c
@@@ -2481,3 -2404,3 +2411,8 @@@ EXPORT_SYMBOL(kernel_setsockopt)
   EXPORT_SYMBOL(kernel_sendpage);
   EXPORT_SYMBOL(kernel_sock_ioctl);
   EXPORT_SYMBOL(kernel_sock_shutdown);
++
++DEFINE_TRACE(socket_sendmsg);
++DEFINE_TRACE(socket_recvmsg);
++DEFINE_TRACE(socket_create);
++DEFINE_TRACE(socket_call);
diff --cc net/sunrpc/Kconfig

index 0000000,dcef600..a718aa6

mode 000000,100644..100644
--- /dev/null
--- 2/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@@ -1,0 -1,78 +1,83 @@@
+ config SUNRPC
+       tristate
+ 
+ config SUNRPC_GSS
+       tristate
+ 
+ config SUNRPC_XPRT_RDMA
+       tristate
+       depends on SUNRPC && INFINIBAND && EXPERIMENTAL
+       default SUNRPC && INFINIBAND
+       help
+         This option allows the NFS client and server to support
+         an RDMA-enabled transport.
+ 
+         To compile RPC client RDMA transport support as a module,
+         choose M here: the module will be called xprtrdma.
+ 
+         If unsure, say N.
+ 
+ config SUNRPC_REGISTER_V4
+       bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
+       depends on SUNRPC && EXPERIMENTAL
+       default n
+       help
+         Sun added support for registering RPC services at an IPv6
+         address by creating two new versions of the rpcbind protocol
+         (RFC 1833).
+ 
+         This option enables support in the kernel RPC server for
+         registering kernel RPC services via version 4 of the rpcbind
+         protocol.  If you enable this option, you must run a portmapper
+         daemon that supports rpcbind protocol version 4.
+ 
+         Serving NFS over IPv6 from knfsd (the kernel's NFS server)
+         requires that you enable this option and use a portmapper that
+         supports rpcbind version 4.
+ 
+         If unsure, say N to get traditional behavior (register kernel
+         RPC services using only rpcbind version 2).  Distributions
+         using the legacy Linux portmapper daemon must say N here.
+ 
++config SUNRPC_SWAP
++      def_bool n
++      depends on SUNRPC
++      select NETVM
++
+ config RPCSEC_GSS_KRB5
+       tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
+       depends on SUNRPC && EXPERIMENTAL
+       select SUNRPC_GSS
+       select CRYPTO
+       select CRYPTO_MD5
+       select CRYPTO_DES
+       select CRYPTO_CBC
+       help
+         Choose Y here to enable Secure RPC using the Kerberos version 5
+         GSS-API mechanism (RFC 1964).
+ 
+         Secure RPC calls with Kerberos require an auxiliary user-space
+         daemon which may be found in the Linux nfs-utils package
+         available from http://linux-nfs.org/.  In addition, user-space
+         Kerberos support should be installed.
+ 
+         If unsure, say N.
+ 
+ config RPCSEC_GSS_SPKM3
+       tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
+       depends on SUNRPC && EXPERIMENTAL
+       select SUNRPC_GSS
+       select CRYPTO
+       select CRYPTO_MD5
+       select CRYPTO_DES
+       select CRYPTO_CAST5
+       select CRYPTO_CBC
+       help
+         Choose Y here to enable Secure RPC using the SPKM3 public key
+         GSS-API mechansim (RFC 2025).
+ 
+         Secure RPC calls with SPKM3 require an auxiliary userspace
+         daemon which may be found in the Linux nfs-utils package
+         available from http://linux-nfs.org/.
+ 
+         If unsure, say N.
diff --cc net/sunrpc/xprtsock.c
Simple merge
diff --cc perfmon/perfmon_file.c

index 1cde81b,0000000..ba12cfa

mode 100644,000000..100644
--- 1/perfmon/perfmon_file.c
--- /dev/null
+++ b/perfmon/perfmon_file.c
@@@ -1,751 -1,0 +1,751 @@@
+ +/*
+ + * perfmon_file.c: perfmon2 file input/output functions
+ + *
+ + * This file implements the perfmon2 interface which
+ + * provides access to the hardware performance counters
+ + * of the host processor.
+ + *
+ + * The initial version of perfmon.c was written by
+ + * Ganesh Venkitachalam, IBM Corp.
+ + *
+ + * Then it was modified for perfmon-1.x by Stephane Eranian and
+ + * David Mosberger, Hewlett Packard Co.
+ + *
+ + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
+ + * by Stephane Eranian, Hewlett Packard Co.
+ + *
+ + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
+ + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ + *                David Mosberger-Tang <davidm@hpl.hp.com>
+ + *
+ + * More information about perfmon available at:
+ + *    http://perfmon2.sf.net
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of version 2 of the GNU General Public
+ + * License as published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ + * 02111-1307 USA
+ + */
+ +#include <linux/kernel.h>
+ +#include <linux/module.h>
+ +#include <linux/file.h>
+ +#include <linux/poll.h>
+ +#include <linux/vfs.h>
+ +#include <linux/pagemap.h>
+ +#include <linux/mount.h>
+ +#include <linux/perfmon_kern.h>
+ +#include "perfmon_priv.h"
+ +
+ +#define PFMFS_MAGIC 0xa0b4d889        /* perfmon filesystem magic number */
+ +
+ +struct pfm_controls pfm_controls = {
+ +      .sys_group = PFM_GROUP_PERM_ANY,
+ +      .task_group = PFM_GROUP_PERM_ANY,
+ +      .arg_mem_max = PAGE_SIZE,
+ +      .smpl_buffer_mem_max = ~0,
+ +};
+ +EXPORT_SYMBOL(pfm_controls);
+ +
+ +static int __init enable_debug(char *str)
+ +{
+ +      pfm_controls.debug = 1;
+ +      PFM_INFO("debug output enabled\n");
+ +      return 1;
+ +}
+ +__setup("perfmon_debug", enable_debug);
+ +
+ +static int pfmfs_delete_dentry(struct dentry *dentry)
+ +{
+ +      return 1;
+ +}
+ +
+ +static struct dentry_operations pfmfs_dentry_operations = {
+ +      .d_delete = pfmfs_delete_dentry,
+ +};
+ +
+ +int pfm_buf_map_pagefault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ +{
+ +      void *kaddr;
+ +      unsigned long address;
+ +      struct pfm_context *ctx;
+ +      size_t size;
+ +
+ +      address = (unsigned long)vmf->virtual_address;
+ +
+ +      ctx = vma->vm_private_data;
+ +      if (ctx == NULL) {
+ +              PFM_DBG("no ctx");
+ +              return VM_FAULT_SIGBUS;
+ +      }
+ +      /*
+ +       * size available to user (maybe different from real_smpl_size
+ +       */
+ +      size = ctx->smpl_size;
+ +
+ +      if ((address < vma->vm_start) ||
+ +          (address >= (vma->vm_start + size)))
+ +              return VM_FAULT_SIGBUS;
+ +
+ +      kaddr = ctx->smpl_addr + (address - vma->vm_start);
+ +
+ +      vmf->page = vmalloc_to_page(kaddr);
+ +      get_page(vmf->page);
+ +
+ +      PFM_DBG("[%d] start=%p ref_count=%d",
+ +              current->pid,
+ +              kaddr, page_count(vmf->page));
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * we need to determine whther or not we are closing the last reference
+ + * to the file and thus are going to end up in pfm_close() which eventually
+ + * calls pfm_release_buf_space(). In that function, we update the accouting
+ + * for locked_vm given that we are actually freeing the sampling buffer. The
+ + * issue is that there are multiple paths leading to pfm_release_buf_space(),
+ + * from exit(), munmap(), close(). The path coming from munmap() is problematic
+ + * becuse do_munmap() grabs mmap_sem in write-mode which is also what
+ + * pfm_release_buf_space does. To avoid deadlock, we need to determine where
+ + * we are calling from and skip the locking. The vm_ops->close() callback
+ + * is invoked for each remove_vma() independently of the number of references
+ + * left on the file descriptor, therefore simple reference counter does not
+ + * work. We need to determine if this is the last call, and then set a flag
+ + * to skip the locking.
+ + */
+ +static void pfm_buf_map_close(struct vm_area_struct *vma)
+ +{
+ +      struct file *file;
+ +      struct pfm_context *ctx;
+ +
+ +      file = vma->vm_file;
+ +      ctx = vma->vm_private_data;
+ +
+ +      /*
+ +       * if file is going to close, then pfm_close() will
+ +       * be called, do not lock in pfm_release_buf
+ +       */
+ +      if (atomic_read(&file->f_count) == 1)
+ +              ctx->flags.mmap_nlock = 1;
+ +}
+ +
+ +/*
+ + * we do not have a close callback because, the locked
+ + * memory accounting must be done when the actual buffer
+ + * is freed. Munmap does not free the page backing the vma
+ + * because they may still be in use by the PMU interrupt handler.
+ + */
+ +struct vm_operations_struct pfm_buf_map_vm_ops = {
+ +      .fault = pfm_buf_map_pagefault,
+ +      .close = pfm_buf_map_close
+ +};
+ +
+ +static int pfm_mmap_buffer(struct pfm_context *ctx, struct vm_area_struct *vma,
+ +                         size_t size)
+ +{
+ +      if (ctx->smpl_addr == NULL) {
+ +              PFM_DBG("no sampling buffer to map");
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (size > ctx->smpl_size) {
+ +              PFM_DBG("mmap size=%zu >= actual buf size=%zu",
+ +                      size,
+ +                      ctx->smpl_size);
+ +              return -EINVAL;
+ +      }
+ +
+ +      vma->vm_ops = &pfm_buf_map_vm_ops;
+ +      vma->vm_private_data = ctx;
+ +
+ +      return 0;
+ +}
+ +
+ +static int pfm_mmap(struct file *file, struct vm_area_struct *vma)
+ +{
+ +      size_t size;
+ +      struct pfm_context *ctx;
+ +      unsigned long flags;
+ +      int ret;
+ +
+ +      PFM_DBG("pfm_file_ops");
+ +
+ +      ctx  = file->private_data;
+ +      size = (vma->vm_end - vma->vm_start);
+ +
+ +      if (ctx == NULL)
+ +              return -EINVAL;
+ +
+ +      ret = -EINVAL;
+ +
+ +      spin_lock_irqsave(&ctx->lock, flags);
+ +
+ +      if (vma->vm_flags & VM_WRITE) {
+ +              PFM_DBG("cannot map buffer for writing");
+ +              goto done;
+ +      }
+ +
+ +      PFM_DBG("vm_pgoff=%lu size=%zu vm_start=0x%lx",
+ +              vma->vm_pgoff,
+ +              size,
+ +              vma->vm_start);
+ +
+ +      ret = pfm_mmap_buffer(ctx, vma, size);
+ +      if (ret == 0)
+ +              vma->vm_flags |= VM_RESERVED;
+ +
+ +      PFM_DBG("ret=%d vma_flags=0x%lx vma_start=0x%lx vma_size=%lu",
+ +              ret,
+ +              vma->vm_flags,
+ +              vma->vm_start,
+ +              vma->vm_end-vma->vm_start);
+ +done:
+ +      spin_unlock_irqrestore(&ctx->lock, flags);
+ +
+ +      return ret;
+ +}
+ +
+ +/*
+ + * Extract one message from queue.
+ + *
+ + * return:
+ + *    -EAGAIN:  when non-blocking and nothing is* in the queue.
+ + *    -ERESTARTSYS: when blocking and signal is pending
+ + *    Otherwise returns size of message (sizeof(pfarg_msg))
+ + */
+ +ssize_t __pfm_read(struct pfm_context *ctx, union pfarg_msg *msg_buf, int non_block)
+ +{
+ +      ssize_t ret = 0;
+ +      unsigned long flags;
+ +      DECLARE_WAITQUEUE(wait, current);
+ +
+ +      /*
+ +       * we must masks interrupts to avoid a race condition
+ +       * with the PMU interrupt handler.
+ +       */
+ +      spin_lock_irqsave(&ctx->lock, flags);
+ +
+ +      while (pfm_msgq_is_empty(ctx)) {
+ +
+ +              /*
+ +               * handle non-blocking reads
+ +               * return -EAGAIN
+ +               */
+ +              ret = -EAGAIN;
+ +              if (non_block)
+ +                      break;
+ +
+ +              add_wait_queue(&ctx->msgq_wait, &wait);
+ +              set_current_state(TASK_INTERRUPTIBLE);
+ +
+ +              spin_unlock_irqrestore(&ctx->lock, flags);
+ +
+ +              schedule();
+ +
+ +              /*
+ +               * during this window, another thread may call
+ +               * pfm_read() and steal our message
+ +               */
+ +
+ +              spin_lock_irqsave(&ctx->lock, flags);
+ +
+ +              remove_wait_queue(&ctx->msgq_wait, &wait);
+ +              set_current_state(TASK_RUNNING);
+ +
+ +              /*
+ +               * check for pending signals
+ +               * return -ERESTARTSYS
+ +               */
+ +              ret = -ERESTARTSYS;
+ +              if (signal_pending(current))
+ +                      break;
+ +
+ +              /*
+ +               * we may have a message
+ +               */
+ +              ret = 0;
+ +      }
+ +
+ +      /*
+ +       * extract message
+ +       */
+ +      if (ret == 0) {
+ +              /*
+ +               * copy the oldest message into msg_buf.
+ +               * We cannot directly call copy_to_user()
+ +               * because interrupts masked. This is done
+ +               * in the caller
+ +               */
+ +              pfm_get_next_msg(ctx, msg_buf);
+ +
+ +              ret = sizeof(*msg_buf);
+ +
+ +              PFM_DBG("extracted type=%d", msg_buf->type);
+ +      }
+ +
+ +      spin_unlock_irqrestore(&ctx->lock, flags);
+ +
+ +      PFM_DBG("blocking=%d ret=%zd", non_block, ret);
+ +
+ +      return ret;
+ +}
+ +
+ +static ssize_t pfm_read(struct file *filp, char __user *buf, size_t size,
+ +                      loff_t *ppos)
+ +{
+ +      struct pfm_context *ctx;
+ +      union pfarg_msg msg_buf;
+ +      int non_block, ret;
+ +
+ +      PFM_DBG_ovfl("buf=%p size=%zu", buf, size);
+ +
+ +      ctx = filp->private_data;
+ +      if (ctx == NULL) {
+ +              PFM_ERR("no ctx for pfm_read");
+ +              return -EINVAL;
+ +      }
+ +
+ +      non_block = filp->f_flags & O_NONBLOCK;
+ +
+ +#ifdef CONFIG_IA64_PERFMON_COMPAT
+ +      /*
+ +       * detect IA-64 v2.0 context read (message size is different)
+ +       * nops on all other architectures
+ +       */
+ +      if (unlikely(ctx->flags.ia64_v20_compat))
+ +              return pfm_arch_compat_read(ctx,  buf, non_block, size);
+ +#endif
+ +      /*
+ +       * cannot extract partial messages.
+ +       * check even when there is no message
+ +       *
+ +       * cannot extract more than one message per call. Bytes
+ +       * above sizeof(msg) are ignored.
+ +       */
+ +      if (size < sizeof(msg_buf)) {
+ +              PFM_DBG("message is too small size=%zu must be >=%zu)",
+ +                      size,
+ +                      sizeof(msg_buf));
+ +              return -EINVAL;
+ +      }
+ +
+ +      ret =  __pfm_read(ctx, &msg_buf, non_block);
+ +      if (ret > 0) {
+ +              if (copy_to_user(buf, &msg_buf, sizeof(msg_buf)))
+ +                      ret = -EFAULT;
+ +      }
+ +      PFM_DBG_ovfl("ret=%d", ret);
+ +      return ret;
+ +}
+ +
+ +static ssize_t pfm_write(struct file *file, const char __user *ubuf,
+ +                        size_t size, loff_t *ppos)
+ +{
+ +      PFM_DBG("pfm_write called");
+ +      return -EINVAL;
+ +}
+ +
+ +static unsigned int pfm_poll(struct file *filp, poll_table *wait)
+ +{
+ +      struct pfm_context *ctx;
+ +      unsigned long flags;
+ +      unsigned int mask = 0;
+ +
+ +      PFM_DBG("pfm_file_ops");
+ +
+ +      if (filp->f_op != &pfm_file_ops) {
+ +              PFM_ERR("pfm_poll bad magic");
+ +              return 0;
+ +      }
+ +
+ +      ctx = filp->private_data;
+ +      if (ctx == NULL) {
+ +              PFM_ERR("pfm_poll no ctx");
+ +              return 0;
+ +      }
+ +
+ +      PFM_DBG("before poll_wait");
+ +
+ +      poll_wait(filp, &ctx->msgq_wait, wait);
+ +
+ +      /*
+ +       * pfm_msgq_is_empty() is non-atomic
+ +       *
+ +       * filp is protected by fget() at upper level
+ +       * context cannot be closed by another thread.
+ +       *
+ +       * There may be a race with a PMU interrupt adding
+ +       * messages to the queue. But we are interested in
+ +       * queue not empty, so adding more messages should
+ +       * not really be a problem.
+ +       *
+ +       * There may be a race with another thread issuing
+ +       * a read() and stealing messages from the queue thus
+ +       * may return the wrong answer. This could potentially
+ +       * lead to a blocking read, because nothing is
+ +       * available in the queue
+ +       */
+ +      spin_lock_irqsave(&ctx->lock, flags);
+ +
+ +      if (!pfm_msgq_is_empty(ctx))
+ +              mask =  POLLIN | POLLRDNORM;
+ +
+ +      spin_unlock_irqrestore(&ctx->lock, flags);
+ +
+ +      PFM_DBG("after poll_wait mask=0x%x", mask);
+ +
+ +      return mask;
+ +}
+ +
+ +static int pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+ +                   unsigned long arg)
+ +{
+ +      PFM_DBG("pfm_ioctl called");
+ +      return -EINVAL;
+ +}
+ +
+ +/*
+ + * interrupt cannot be masked when entering this function
+ + */
+ +static inline int __pfm_fasync(int fd, struct file *filp,
+ +                             struct pfm_context *ctx, int on)
+ +{
+ +      int ret;
+ +
+ +      PFM_DBG("in  fd=%d on=%d async_q=%p",
+ +              fd,
+ +              on,
+ +              ctx->async_queue);
+ +
+ +      ret = fasync_helper(fd, filp, on, &ctx->async_queue);
+ +
+ +      PFM_DBG("out fd=%d on=%d async_q=%p ret=%d",
+ +              fd,
+ +              on,
+ +              ctx->async_queue, ret);
+ +
+ +      return ret;
+ +}
+ +
+ +static int pfm_fasync(int fd, struct file *filp, int on)
+ +{
+ +      struct pfm_context *ctx;
+ +      int ret;
+ +
+ +      PFM_DBG("pfm_file_ops");
+ +
+ +      ctx = filp->private_data;
+ +      if (ctx == NULL) {
+ +              PFM_ERR("pfm_fasync no ctx");
+ +              return -EBADF;
+ +      }
+ +
+ +      /*
+ +       * we cannot mask interrupts during this call because this may
+ +       * may go to sleep if memory is not readily avalaible.
+ +       *
+ +       * We are protected from the context disappearing by the
+ +       * get_fd()/put_fd() done in caller. Serialization of this function
+ +       * is ensured by caller.
+ +       */
+ +      ret = __pfm_fasync(fd, filp, ctx, on);
+ +
+ +      PFM_DBG("pfm_fasync called on fd=%d on=%d async_queue=%p ret=%d",
+ +              fd,
+ +              on,
+ +              ctx->async_queue, ret);
+ +
+ +      return ret;
+ +}
+ +
+ +#ifdef CONFIG_SMP
+ +static void __pfm_close_remote_cpu(void *info)
+ +{
+ +      struct pfm_context *ctx = info;
+ +      int can_release;
+ +
+ +      BUG_ON(ctx != __get_cpu_var(pmu_ctx));
+ +
+ +      /*
+ +       * we are in IPI interrupt handler which has always higher
+ +       * priority than PMU interrupt, therefore we do not need to
+ +       * mask interrupts. context locking is not needed because we
+ +       * are in close(), no more user references.
+ +       *
+ +       * can_release is ignored, release done on calling CPU
+ +       */
+ +      __pfm_unload_context(ctx, &can_release);
+ +
+ +      /*
+ +       * we cannot free context here because we are in_interrupt().
+ +       * we free on the calling CPU
+ +       */
+ +}
+ +
+ +static int pfm_close_remote_cpu(u32 cpu, struct pfm_context *ctx)
+ +{
+ +      BUG_ON(irqs_disabled());
+ +      return smp_call_function_single(cpu, __pfm_close_remote_cpu, ctx, 1);
+ +}
+ +#endif /* CONFIG_SMP */
+ +
+ +/*
+ + * called either on explicit close() or from exit_files().
+ + * Only the LAST user of the file gets to this point, i.e., it is
+ + * called only ONCE.
+ + *
+ + * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero
+ + * (fput()),i.e, last task to access the file. Nobody else can access the
+ + * file at this point.
+ + *
+ + * When called from exit_files(), the VMA has been freed because exit_mm()
+ + * is executed before exit_files().
+ + *
+ + * When called from exit_files(), the current task is not yet ZOMBIE but we
+ + * flush the PMU state to the context.
+ + */
+ +int __pfm_close(struct pfm_context *ctx, struct file *filp)
+ +{
+ +      unsigned long flags;
+ +      int state;
+ +      int can_free = 1, can_unload = 1;
+ +      int is_system, can_release = 0;
+ +      u32 cpu;
+ +
+ +      /*
+ +       * no risk of ctx of filp disappearing so we can operate outside
+ +       * of spin_lock(). fasync_helper() runs with interrupts masked,
+ +       * thus there is no risk with the PMU interrupt handler
+ +       *
+ +       * In case of zombie, we will not have the async struct anymore
+ +       * thus kill_fasync() will not do anything
+ +       *
+ +       * fd is not used when removing the entry so we pass -1
+ +       */
+ +      if (filp->f_flags & FASYNC)
+ +              __pfm_fasync (-1, filp, ctx, 0);
+ +
+ +      spin_lock_irqsave(&ctx->lock, flags);
+ +
+ +      state = ctx->state;
+ +      is_system = ctx->flags.system;
+ +      cpu = ctx->cpu;
+ +
+ +      PFM_DBG("state=%d", state);
+ +
+ +      /*
+ +       * check if unload is needed
+ +       */
+ +      if (state == PFM_CTX_UNLOADED)
+ +              goto doit;
+ +
+ +#ifdef CONFIG_SMP
+ +      /*
+ +       * we need to release the resource on the ORIGINAL cpu.
+ +       * we need to release the context lock to avoid deadlocks
+ +       * on the original CPU, especially in the context switch
+ +       * routines. It is safe to unlock because we are in close(),
+ +       * in other words, there is no more access from user level.
+ +       * we can also unmask interrupts on this CPU because the
+ +       * context is running on the original CPU. Context will be
+ +       * unloaded and the session will be released on the original
+ +       * CPU. Upon return, the caller is guaranteed that the context
+ +       * is gone from original CPU.
+ +       */
+ +      if (is_system && cpu != smp_processor_id()) {
+ +              spin_unlock_irqrestore(&ctx->lock, flags);
+ +              pfm_close_remote_cpu(cpu, ctx);
+ +              can_release = 1;
+ +              goto free_it;
+ +      }
+ +
+ +      if (!is_system && ctx->task != current) {
+ +              /*
+ +               * switch context to zombie state
+ +               */
+ +              ctx->state = PFM_CTX_ZOMBIE;
+ +
+ +              PFM_DBG("zombie ctx for [%d]", ctx->task->pid);
+ +              /*
+ +               * must check if other thread is using block overflow
+ +               * notification mode. If so make sure it will not block
+ +               * because there will not be any pfm_restart() issued.
+ +               * When the thread notices the ZOMBIE state, it will clean
+ +               * up what is left of the context
+ +               */
+ +              if (state == PFM_CTX_MASKED && ctx->flags.block) {
+ +                      /*
+ +                       * force task to wake up from MASKED state
+ +                       */
+ +                      PFM_DBG("waking up [%d]", ctx->task->pid);
+ +
+ +                      complete(&ctx->restart_complete);
+ +              }
+ +              /*
+ +               * PMU session will be release by monitored task when it notices
+ +               * ZOMBIE state as part of pfm_unload_context()
+ +               */
+ +              can_unload = can_free = 0;
+ +      }
+ +#endif
+ +      if (can_unload)
+ +              __pfm_unload_context(ctx, &can_release);
+ +doit:
+ +      spin_unlock_irqrestore(&ctx->lock, flags);
+ +
+ +#ifdef CONFIG_SMP
+ +free_it:
+ +#endif
+ +      if (can_release)
+ +              pfm_session_release(is_system, cpu);
+ +
+ +      if (can_free)
+ +              pfm_free_context(ctx);
+ +
+ +      return 0;
+ +}
+ +
+ +static int pfm_close(struct inode *inode, struct file *filp)
+ +{
+ +      struct pfm_context *ctx;
+ +
+ +      PFM_DBG("called filp=%p", filp);
+ +
+ +      ctx = filp->private_data;
+ +      if (ctx == NULL) {
+ +              PFM_ERR("no ctx");
+ +              return -EBADF;
+ +      }
+ +      return __pfm_close(ctx, filp);
+ +}
+ +
+ +static int pfm_no_open(struct inode *irrelevant, struct file *dontcare)
+ +{
+ +      PFM_DBG("pfm_file_ops");
+ +
+ +      return -ENXIO;
+ +}
+ +
+ +
+ +const struct file_operations pfm_file_ops = {
+ +      .llseek = no_llseek,
+ +      .read = pfm_read,
+ +      .write = pfm_write,
+ +      .poll = pfm_poll,
+ +      .ioctl = pfm_ioctl,
+ +      .open = pfm_no_open, /* special open to disallow open via /proc */
+ +      .fasync = pfm_fasync,
+ +      .release = pfm_close,
+ +      .mmap = pfm_mmap
+ +};
+ +
+ +static int pfmfs_get_sb(struct file_system_type *fs_type,
+ +                      int flags, const char *dev_name,
+ +                      void *data, struct vfsmount *mnt)
+ +{
+ +      return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
+ +}
+ +
+ +static struct file_system_type pfm_fs_type = {
+ +      .name     = "pfmfs",
+ +      .get_sb   = pfmfs_get_sb,
+ +      .kill_sb  = kill_anon_super,
+ +};
+ +
+ +/*
+ + * pfmfs should _never_ be mounted by userland - too much of security hassle,
+ + * no real gain from having the whole whorehouse mounted. So we don't need
+ + * any operations on the root directory. However, we need a non-trivial
+ + * d_name - pfm: will go nicely and kill the special-casing in procfs.
+ + */
+ +static struct vfsmount *pfmfs_mnt;
+ +
+ +int __init pfm_init_fs(void)
+ +{
+ +      int err = register_filesystem(&pfm_fs_type);
+ +      if (!err) {
+ +              pfmfs_mnt = kern_mount(&pfm_fs_type);
+ +              err = PTR_ERR(pfmfs_mnt);
+ +              if (IS_ERR(pfmfs_mnt))
+ +                      unregister_filesystem(&pfm_fs_type);
+ +              else
+ +                      err = 0;
+ +      }
+ +      return err;
+ +}
+ +
+ +int pfm_alloc_fd(struct file **cfile)
+ +{
+ +      int fd, ret = 0;
+ +      struct file *file = NULL;
+ +      struct inode * inode;
+ +      char name[32];
+ +      struct qstr this;
+ +
+ +      fd = get_unused_fd();
+ +      if (fd < 0)
+ +              return -ENFILE;
+ +
+ +      ret = -ENFILE;
+ +
+ +      file = get_empty_filp();
+ +      if (!file)
+ +              goto out;
+ +
+ +      /*
+ +       * allocate a new inode
+ +       */
+ +      inode = new_inode(pfmfs_mnt->mnt_sb);
+ +      if (!inode)
+ +              goto out;
+ +
+ +      PFM_DBG("new inode ino=%ld @%p", inode->i_ino, inode);
+ +
+ +      inode->i_sb = pfmfs_mnt->mnt_sb;
+ +      inode->i_mode = S_IFCHR|S_IRUGO;
-       inode->i_uid = current->fsuid;
-       inode->i_gid = current->fsgid;
++      inode->i_uid = current_fsuid();
++      inode->i_gid = current_fsgid();
+ +
+ +      sprintf(name, "[%lu]", inode->i_ino);
+ +      this.name = name;
+ +      this.hash = inode->i_ino;
+ +      this.len = strlen(name);
+ +
+ +      ret = -ENOMEM;
+ +
+ +      /*
+ +       * allocate a new dcache entry
+ +       */
+ +      file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+ +      if (!file->f_dentry)
+ +              goto out;
+ +
+ +      file->f_dentry->d_op = &pfmfs_dentry_operations;
+ +
+ +      d_add(file->f_dentry, inode);
+ +      file->f_vfsmnt = mntget(pfmfs_mnt);
+ +      file->f_mapping = inode->i_mapping;
+ +
+ +      file->f_op = &pfm_file_ops;
+ +      file->f_mode = FMODE_READ;
+ +      file->f_flags = O_RDONLY;
+ +      file->f_pos  = 0;
+ +
+ +      *cfile = file;
+ +
+ +      return fd;
+ +out:
+ +      if (file)
+ +              put_filp(file);
+ +      put_unused_fd(fd);
+ +      return ret;
+ +}
diff --cc perfmon/perfmon_init.c

index 4af4bc5,0000000..52a5307

mode 100644,000000..100644
--- 1/perfmon/perfmon_init.c
--- /dev/null
+++ b/perfmon/perfmon_init.c
@@@ -1,134 -1,0 +1,120 @@@
+ +/*
+ + * perfmon.c: perfmon2 global initialization functions
+ + *
+ + * This file implements the perfmon2 interface which
+ + * provides access to the hardware performance counters
+ + * of the host processor.
+ + *
+ + *
+ + * The initial version of perfmon.c was written by
+ + * Ganesh Venkitachalam, IBM Corp.
+ + *
+ + * Then it was modified for perfmon-1.x by Stephane Eranian and
+ + * David Mosberger, Hewlett Packard Co.
+ + *
+ + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
+ + * by Stephane Eranian, Hewlett Packard Co.
+ + *
+ + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
+ + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ + *                David Mosberger-Tang <davidm@hpl.hp.com>
+ + *
+ + * More information about perfmon available at:
+ + *    http://perfmon2.sf.net
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of version 2 of the GNU General Public
+ + * License as published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ + * 02111-1307 USA
+ + */
+ +#include <linux/kernel.h>
+ +#include <linux/perfmon_kern.h>
+ +#include "perfmon_priv.h"
+ +
+ +/*
+ + * external variables
+ + */
+ +DEFINE_PER_CPU(struct task_struct *, pmu_owner);
+ +DEFINE_PER_CPU(struct pfm_context  *, pmu_ctx);
+ +DEFINE_PER_CPU(u64, pmu_activation_number);
+ +DEFINE_PER_CPU(struct pfm_stats, pfm_stats);
+ +DEFINE_PER_CPU(struct hrtimer, pfm_hrtimer);
+ +
+ +
+ +int perfmon_disabled = 0;     /* >0 if perfmon is disabled */
+ +
+ +/*
+ + * called from cpu_init() and pfm_pmu_register()
+ + */
+ +void __pfm_init_percpu(void *dummy)
+ +{
+ +      struct hrtimer *h;
+ +
+ +      h = &__get_cpu_var(pfm_hrtimer);
+ +
+ +      pfm_arch_init_percpu();
+ +
+ +      /*
+ +       * initialize per-cpu high res timer
+ +       */
+ +      hrtimer_init(h, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- #ifdef CONFIG_HIGH_RES_TIMERS
-       /*
-        * avoid potential deadlock on the runqueue lock
-        * during context switch when multiplexing. Situation
-        * arises on architectures which run switch_to() with
-        * the runqueue lock held, e.g., x86. On others, e.g.,
-        * IA-64, the problem does not exist.
-        * Setting the callback mode to HRTIMER_CB_IRQSAFE_UNOCKED
-        * such that the callback routine is only called on hardirq
-        * context not on softirq, thus the context switch will not
-        * end up trying to wakeup the softirqd
-        */
-       h->cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
- #endif
+ +      h->function = pfm_handle_switch_timeout;
+ +}
+ +
+ +/*
+ + * global initialization routine, executed only once
+ + */
+ +int __init pfm_init(void)
+ +{
+ +      PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN);
+ +
+ +      if (pfm_init_ctx())
+ +              goto error_disable;
+ +
+ +
+ +      if (pfm_init_sets())
+ +              goto error_disable;
+ +
+ +      if (pfm_init_fs())
+ +              goto error_disable;
+ +
+ +      if (pfm_init_sysfs())
+ +              goto error_disable;
+ +
+ +      if (pfm_init_control())
+ +              goto error_disable;
+ +
+ +      /* not critical, so no error checking */
+ +      pfm_init_debugfs();
+ +
+ +      /*
+ +       * one time, arch-specific global initialization
+ +       */
+ +      if (pfm_arch_init())
+ +              goto error_disable;
+ +
+ +      if (pfm_init_hotplug())
+ +              goto error_disable;
+ +      return 0;
+ +
+ +error_disable:
+ +      PFM_ERR("perfmon is disabled due to initialization error");
+ +      perfmon_disabled = 1;
+ +      return -1;
+ +}
+ +
+ +/*
+ + * must use subsys_initcall() to ensure that the perfmon2 core
+ + * is initialized before any PMU description module when they are
+ + * compiled in.
+ + */
+ +subsys_initcall(pfm_init);
diff --cc perfmon/perfmon_sysfs.c

index 5a82db8,0000000..7355190

mode 100644,000000..100644
--- 1/perfmon/perfmon_sysfs.c
--- /dev/null
+++ b/perfmon/perfmon_sysfs.c
@@@ -1,515 -1,0 +1,517 @@@
+ +/*
+ + * perfmon_sysfs.c: perfmon2 sysfs interface
+ + *
+ + * This file implements the perfmon2 interface which
+ + * provides access to the hardware performance counters
+ + * of the host processor.
+ + *
+ + * The initial version of perfmon.c was written by
+ + * Ganesh Venkitachalam, IBM Corp.
+ + *
+ + * Then it was modified for perfmon-1.x by Stephane Eranian and
+ + * David Mosberger, Hewlett Packard Co.
+ + *
+ + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
+ + * by Stephane Eranian, Hewlett Packard Co.
+ + *
+ + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
+ + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ + *                David Mosberger-Tang <davidm@hpl.hp.com>
+ + *
+ + * More information about perfmon available at:
+ + *    http://perfmon2.sf.net
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of version 2 of the GNU General Public
+ + * License as published by the Free Software Foundation.
+ + *
+ + * This program is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License
+ + * along with this program; if not, write to the Free Software
+ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ + * 02111-1307 USA
+ + */
+ +#include <linux/kernel.h>
+ +#include <linux/module.h> /* for EXPORT_SYMBOL */
+ +#include <linux/perfmon_kern.h>
+ +#include "perfmon_priv.h"
+ +
+ +struct pfm_attribute {
+ +      struct attribute attr;
+ +      ssize_t (*show)(void *, struct pfm_attribute *attr, char *);
+ +      ssize_t (*store)(void *, const char *, size_t);
+ +};
+ +#define to_attr(n) container_of(n, struct pfm_attribute, attr);
+ +
+ +#define PFM_RO_ATTR(_name, _show) \
+ +      struct kobj_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
+ +
+ +#define PFM_RW_ATTR(_name, _show, _store)                     \
+ +      struct kobj_attribute attr_##_name = __ATTR(_name, 0644, _show, _store)
+ +
+ +#define PFM_ROS_ATTR(_name, _show) \
+ +      struct pfm_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
+ +
+ +#define is_attr_name(a, n) (!strcmp((a)->attr.name, n))
+ +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu);
+ +
+ +static struct kobject *pfm_kernel_kobj, *pfm_fmt_kobj;
+ +static struct kobject *pfm_pmu_kobj;
+ +
+ +static ssize_t pfm_regs_attr_show(struct kobject *kobj,
+ +              struct attribute *attr, char *buf)
+ +{
+ +      struct pfm_regmap_desc *reg = to_reg(kobj);
+ +      struct pfm_attribute *attribute = to_attr(attr);
+ +      return attribute->show ? attribute->show(reg, attribute, buf) : -EIO;
+ +}
+ +
+ +static ssize_t pfm_fmt_attr_show(struct kobject *kobj,
+ +              struct attribute *attr, char *buf)
+ +{
+ +      struct pfm_smpl_fmt *fmt = to_smpl_fmt(kobj);
+ +      struct pfm_attribute *attribute = to_attr(attr);
+ +      return attribute->show ? attribute->show(fmt, attribute, buf) : -EIO;
+ +}
+ +
+ +static struct sysfs_ops pfm_regs_sysfs_ops = {
+ +      .show  = pfm_regs_attr_show
+ +};
+ +
+ +static struct sysfs_ops pfm_fmt_sysfs_ops = {
+ +      .show = pfm_fmt_attr_show
+ +};
+ +
+ +static struct kobj_type pfm_regs_ktype = {
+ +      .sysfs_ops = &pfm_regs_sysfs_ops,
+ +};
+ +
+ +static struct kobj_type pfm_fmt_ktype = {
+ +      .sysfs_ops = &pfm_fmt_sysfs_ops,
+ +};
+ +
+ +static ssize_t pfm_controls_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+ +{
++      int base;
++
+ +      if (is_attr_name(attr, "version"))
+ +              return snprintf(buf, PAGE_SIZE, "%u.%u\n",  PFM_VERSION_MAJ, PFM_VERSION_MIN);
+ +
+ +      if (is_attr_name(attr, "task_sessions_count"))
+ +              return pfm_sysfs_res_show(buf, PAGE_SIZE, 0);
+ +
+ +      if (is_attr_name(attr, "debug"))
+ +              return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug);
+ +
+ +      if (is_attr_name(attr, "task_group"))
+ +              return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.task_group);
+ +
+ +      if (is_attr_name(attr, "mode"))
+ +              return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.flags);
+ +
+ +      if (is_attr_name(attr, "arg_mem_max"))
+ +              return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.arg_mem_max);
+ +      if (is_attr_name(attr, "sys_sessions_count"))
+ +              return pfm_sysfs_res_show(buf, PAGE_SIZE, 1);
+ +
+ +      if (is_attr_name(attr, "smpl_buffer_mem_max"))
+ +              return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.smpl_buffer_mem_max);
+ +
+ +      if (is_attr_name(attr, "smpl_buffer_mem_cur"))
+ +              return pfm_sysfs_res_show(buf, PAGE_SIZE, 2);
+ +
+ +      if (is_attr_name(attr, "sys_group"))
+ +              return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.sys_group);
+ +
+ +      /* XXX: could be set to write-only */
+ +      if (is_attr_name(attr, "reset_stats")) {
+ +              buf[0] = '0';
+ +              buf[1] = '\0';
+ +              return strnlen(buf, PAGE_SIZE);
+ +      }
+ +      return 0;
+ +}
+ +
+ +static ssize_t pfm_controls_store(struct kobject *kobj, struct kobj_attribute *attr,
+ +                                const char *buf, size_t count)
+ +{
+ +      int i;
+ +      size_t d;
+ +
+ +      if (sscanf(buf, "%zu", &d) != 1)
+ +              goto skip;
+ +
+ +      if (is_attr_name(attr, "debug"))
+ +              pfm_controls.debug = d;
+ +
+ +      if (is_attr_name(attr, "task_group"))
+ +              pfm_controls.task_group = d;
+ +
+ +      if (is_attr_name(attr, "sys_group"))
+ +              pfm_controls.sys_group = d;
+ +
+ +      if (is_attr_name(attr, "mode"))
+ +                pfm_controls.flags = d ? PFM_CTRL_FL_RW_EXPERT : 0;
+ +
+ +      if (is_attr_name(attr, "arg_mem_max")) {
+ +              /*
+ +               * we impose a page as the minimum.
+ +               *
+ +               * This limit may be smaller than the stack buffer
+ +               * available and that is fine.
+ +               */
+ +              if (d >= PAGE_SIZE)
+ +                      pfm_controls.arg_mem_max = d;
+ +      }
+ +      if (is_attr_name(attr, "reset_stats")) {
+ +              for_each_online_cpu(i) {
+ +                      pfm_reset_stats(i);
+ +              }
+ +      }
+ +
+ +      if (is_attr_name(attr, "smpl_buffer_mem_max")) {
+ +              if (d >= PAGE_SIZE)
+ +                      pfm_controls.smpl_buffer_mem_max = d;
+ +      }
+ +skip:
+ +      return count;
+ +}
+ +
+ +/*
+ + * /sys/kernel/perfmon attributes
+ + */
+ +static PFM_RO_ATTR(version, pfm_controls_show);
+ +static PFM_RO_ATTR(task_sessions_count, pfm_controls_show);
+ +static PFM_RO_ATTR(sys_sessions_count, pfm_controls_show);
+ +static PFM_RO_ATTR(smpl_buffer_mem_cur, pfm_controls_show);
+ +
+ +static PFM_RW_ATTR(debug, pfm_controls_show, pfm_controls_store);
+ +static PFM_RW_ATTR(task_group, pfm_controls_show, pfm_controls_store);
+ +static PFM_RW_ATTR(mode, pfm_controls_show, pfm_controls_store);
+ +static PFM_RW_ATTR(sys_group, pfm_controls_show, pfm_controls_store);
+ +static PFM_RW_ATTR(arg_mem_max, pfm_controls_show, pfm_controls_store);
+ +static PFM_RW_ATTR(smpl_buffer_mem_max, pfm_controls_show, pfm_controls_store);
+ +static PFM_RW_ATTR(reset_stats, pfm_controls_show, pfm_controls_store);
+ +
+ +static struct attribute *pfm_kernel_attrs[] = {
+ +      &attr_version.attr,
+ +      &attr_task_sessions_count.attr,
+ +      &attr_sys_sessions_count.attr,
+ +      &attr_smpl_buffer_mem_cur.attr,
+ +      &attr_debug.attr,
+ +      &attr_reset_stats.attr,
+ +      &attr_sys_group.attr,
+ +      &attr_task_group.attr,
+ +        &attr_mode.attr,
+ +      &attr_smpl_buffer_mem_max.attr,
+ +      &attr_arg_mem_max.attr,
+ +      NULL
+ +};
+ +
+ +static struct attribute_group pfm_kernel_attr_group = {
+ +      .attrs = pfm_kernel_attrs,
+ +};
+ +
+ +/*
+ + * per-reg attributes
+ + */
+ +static ssize_t pfm_reg_show(void *data, struct pfm_attribute *attr, char *buf)
+ +{
+ +      struct pfm_regmap_desc *reg;
+ +      int w;
+ +
+ +      reg = data;
+ +
+ +      if (is_attr_name(attr, "name"))
+ +              return snprintf(buf, PAGE_SIZE, "%s\n", reg->desc);
+ +
+ +      if (is_attr_name(attr, "dfl_val"))
+ +              return snprintf(buf, PAGE_SIZE, "0x%llx\n",
+ +                              (unsigned long long)reg->dfl_val);
+ +
+ +      if (is_attr_name(attr, "width")) {
+ +              w = (reg->type & PFM_REG_C64) ?
+ +                  pfm_pmu_conf->counter_width : 64;
+ +              return snprintf(buf, PAGE_SIZE, "%d\n", w);
+ +      }
+ +
+ +      if (is_attr_name(attr, "rsvd_msk"))
+ +              return snprintf(buf, PAGE_SIZE, "0x%llx\n",
+ +                              (unsigned long long)reg->rsvd_msk);
+ +
+ +      if (is_attr_name(attr, "addr"))
+ +              return snprintf(buf, PAGE_SIZE, "0x%lx\n", reg->hw_addr);
+ +
+ +      return 0;
+ +}
+ +
+ +static PFM_ROS_ATTR(name, pfm_reg_show);
+ +static PFM_ROS_ATTR(dfl_val, pfm_reg_show);
+ +static PFM_ROS_ATTR(rsvd_msk, pfm_reg_show);
+ +static PFM_ROS_ATTR(width, pfm_reg_show);
+ +static PFM_ROS_ATTR(addr, pfm_reg_show);
+ +
+ +static struct attribute *pfm_reg_attrs[] = {
+ +      &attr_name.attr,
+ +      &attr_dfl_val.attr,
+ +      &attr_rsvd_msk.attr,
+ +      &attr_width.attr,
+ +      &attr_addr.attr,
+ +      NULL
+ +};
+ +
+ +static struct attribute_group pfm_reg_attr_group = {
+ +      .attrs = pfm_reg_attrs,
+ +};
+ +
+ +static ssize_t pfm_pmu_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+ +{
+ +      if (is_attr_name(attr, "model"))
+ +              return snprintf(buf, PAGE_SIZE, "%s\n", pfm_pmu_conf->pmu_name);
+ +      return 0;
+ +}
+ +static PFM_RO_ATTR(model, pfm_pmu_show);
+ +
+ +static struct attribute *pfm_pmu_desc_attrs[] = {
+ +      &attr_model.attr,
+ +      NULL
+ +};
+ +
+ +static struct attribute_group pfm_pmu_desc_attr_group = {
+ +      .attrs = pfm_pmu_desc_attrs,
+ +};
+ +
+ +static int pfm_sysfs_add_pmu_regs(struct pfm_pmu_config *pmu)
+ +{
+ +      struct pfm_regmap_desc *reg;
+ +      unsigned int i, k;
+ +      int ret;
+ +
+ +      reg = pmu->pmc_desc;
+ +      for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
+ +
+ +              if (!(reg->type & PFM_REG_I))
+ +                      continue;
+ +
+ +              ret = kobject_init_and_add(&reg->kobj, &pfm_regs_ktype,
+ +                                         pfm_pmu_kobj, "pmc%u", i);
+ +              if (ret)
+ +                      goto undo_pmcs;
+ +
+ +              ret = sysfs_create_group(&reg->kobj, &pfm_reg_attr_group);
+ +              if (ret) {
+ +                      kobject_del(&reg->kobj);
+ +                      goto undo_pmcs;
+ +              }
+ +      }
+ +
+ +      reg = pmu->pmd_desc;
+ +      for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
+ +
+ +              if (!(reg->type & PFM_REG_I))
+ +                      continue;
+ +
+ +              ret = kobject_init_and_add(&reg->kobj, &pfm_regs_ktype,
+ +                                         pfm_pmu_kobj, "pmd%u", i);
+ +              if (ret)
+ +                      goto undo_pmds;
+ +
+ +              ret = sysfs_create_group(&reg->kobj, &pfm_reg_attr_group);
+ +              if (ret) {
+ +                      kobject_del(&reg->kobj);
+ +                      goto undo_pmds;
+ +              }
+ +      }
+ +      return 0;
+ +undo_pmds:
+ +      reg = pmu->pmd_desc;
+ +      for (k = 0; k < i; k++, reg++) {
+ +              if (!(reg->type & PFM_REG_I))
+ +                      continue;
+ +              sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
+ +              kobject_del(&reg->kobj);
+ +      }
+ +      i = pmu->num_pmc_entries;
+ +      /* fall through */
+ +undo_pmcs:
+ +      reg = pmu->pmc_desc;
+ +      for (k = 0; k < i; k++, reg++) {
+ +              if (!(reg->type & PFM_REG_I))
+ +                      continue;
+ +              sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
+ +              kobject_del(&reg->kobj);
+ +      }
+ +      return ret;
+ +}
+ +
+ +static int pfm_sysfs_del_pmu_regs(struct pfm_pmu_config *pmu)
+ +{
+ +      struct pfm_regmap_desc *reg;
+ +      unsigned int i;
+ +
+ +      reg = pmu->pmc_desc;
+ +      for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
+ +
+ +              if (!(reg->type & PFM_REG_I))
+ +                      continue;
+ +
+ +              sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
+ +              kobject_del(&reg->kobj);
+ +      }
+ +
+ +      reg = pmu->pmd_desc;
+ +      for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
+ +
+ +              if (!(reg->type & PFM_REG_I))
+ +                      continue;
+ +
+ +              sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
+ +              kobject_del(&reg->kobj);
+ +      }
+ +      return 0;
+ +}
+ +
+ +/*
+ + * when a PMU description module is inserted, we create
+ + * a pmu_desc subdir in sysfs and we populate it with
+ + * PMU specific information, such as register mappings
+ + */
+ +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu)
+ +{
+ +      int ret;
+ +
+ +      pfm_pmu_kobj = kobject_create_and_add("pmu_desc", pfm_kernel_kobj);
+ +      if (!pfm_pmu_kobj)
+ +              return -ENOMEM;
+ +
+ +      ret = sysfs_create_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
+ +      if (ret) {
+ +              /* will release pfm_pmu_kobj */
+ +              kobject_put(pfm_pmu_kobj);
+ +              return ret;
+ +      }
+ +
+ +      ret = pfm_sysfs_add_pmu_regs(pmu);
+ +      if (ret) {
+ +              sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
+ +              /* will release pfm_pmu_kobj */
+ +              kobject_put(pfm_pmu_kobj);
+ +      } else
+ +              kobject_uevent(pfm_pmu_kobj, KOBJ_ADD);
+ +
+ +      return ret;
+ +}
+ +
+ +/*
+ + * when a PMU description module is removed, we also remove
+ + * all its information from sysfs, i.e., the pmu_desc subdir
+ + * disappears
+ + */
+ +int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu)
+ +{
+ +      pfm_sysfs_del_pmu_regs(pmu);
+ +      sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
+ +      kobject_uevent(pfm_pmu_kobj, KOBJ_REMOVE);
+ +      kobject_put(pfm_pmu_kobj);
+ +      pfm_pmu_kobj = NULL;
+ +      return 0;
+ +}
+ +
+ +static ssize_t pfm_fmt_show(void *data, struct pfm_attribute *attr, char *buf)
+ +{
+ +      struct pfm_smpl_fmt *fmt = data;
+ +
+ +      if (is_attr_name(attr, "version"))
+ +              return snprintf(buf, PAGE_SIZE, "%u.%u\n",
+ +                      fmt->fmt_version >> 16 & 0xffff,
+ +                      fmt->fmt_version & 0xffff);
+ +      return 0;
+ +}
+ +
+ +/*
+ + * do not use predefined macros because of name conflict
+ + * with /sys/kernel/perfmon/version
+ + */
+ +struct pfm_attribute attr_fmt_version = {
+ +      .attr   = { .name = "version", .mode = 0444 },
+ +      .show   = pfm_fmt_show,
+ +};
+ +
+ +static struct attribute *pfm_fmt_attrs[] = {
+ +      &attr_fmt_version.attr,
+ +      NULL
+ +};
+ +
+ +static struct attribute_group pfm_fmt_attr_group = {
+ +      .attrs = pfm_fmt_attrs,
+ +};
+ +
+ +/*
+ + * when a sampling format module is inserted, we populate
+ + * sysfs with some information
+ + */
+ +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt)
+ +{
+ +      int ret;
+ +
+ +      ret = kobject_init_and_add(&fmt->kobj, &pfm_fmt_ktype,
+ +                                 pfm_fmt_kobj, fmt->fmt_name);
+ +      if (ret)
+ +              return ret;
+ +
+ +      ret = sysfs_create_group(&fmt->kobj, &pfm_fmt_attr_group);
+ +      if (ret)
+ +              kobject_del(&fmt->kobj);
+ +      else
+ +              kobject_uevent(&fmt->kobj, KOBJ_ADD);
+ +
+ +      return ret;
+ +}
+ +
+ +/*
+ + * when a sampling format module is removed, its information
+ + * must also be removed from sysfs
+ + */
+ +void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt)
+ +{
+ +      sysfs_remove_group(&fmt->kobj, &pfm_fmt_attr_group);
+ +      kobject_uevent(&fmt->kobj, KOBJ_REMOVE);
+ +      kobject_del(&fmt->kobj);
+ +}
+ +
+ +int __init pfm_init_sysfs(void)
+ +{
+ +      int ret;
+ +
+ +      pfm_kernel_kobj = kobject_create_and_add("perfmon", kernel_kobj);
+ +      if (!pfm_kernel_kobj) {
+ +              PFM_ERR("cannot add kernel object: /sys/kernel/perfmon");
+ +              return -ENOMEM;
+ +      }
+ +
+ +      ret = sysfs_create_group(pfm_kernel_kobj, &pfm_kernel_attr_group);
+ +      if (ret) {
+ +              kobject_put(pfm_kernel_kobj);
+ +              return ret;
+ +      }
+ +
+ +      pfm_fmt_kobj = kobject_create_and_add("formats", pfm_kernel_kobj);
+ +      if (ret) {
+ +              PFM_ERR("cannot add fmt object: %d", ret);
+ +              goto error_fmt;
+ +      }
+ +      if (pfm_pmu_conf)
+ +              pfm_sysfs_add_pmu(pfm_pmu_conf);
+ +
+ +      pfm_sysfs_builtin_fmt_add();
+ +
+ +      return 0;
+ +
+ +error_fmt:
+ +      kobject_del(pfm_kernel_kobj);
+ +      return ret;
+ +}
diff --cc scripts/Makefile.build

index 9bfb7fb,c7de8b3..b18b494
--- 1/scripts/Makefile.build
--- 2/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@@ -165,18 -151,17 +151,18 @@@ cmd_cc_i_c       = $(CPP) $(c_flags)   
   $(obj)/%.i: $(src)/%.c FORCE
         $(call if_changed_dep,cc_i_c)
   
+ cmd_gensymtypes =                                                           \
+     $(CPP) -D__GENKSYMS__ $(c_flags) $< |                                   \
+     $(GENKSYMS) -T $@ -a $(ARCH)                                            \
+      $(if $(KBUILD_PRESERVE),-p)                                            \
++     $(if $(KBUILD_OVERRIDE),-o)                                            \
+      $(if $(1),-r $(firstword $(wildcard $(@:.symtypes=.symref) /dev/null)))
+ 
   quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@
- cmd_cc_symtypes_c        = \
-               set -e;                                                 \
-               $(CPP) -D__GENKSYMS__ $(c_flags) $<                     \
-               | $(GENKSYMS) -T $@                                     \
-                             -r $(firstword $(wildcard                 \
-                                    $(@:.symtypes=.symref) /dev/null)) \
-                             $(if $(KBUILD_PRESERVE),-p)               \
-                             $(if $(KBUILD_OVERRIDE),-o)               \
-                             -a $(ARCH)                                \
-                 >/dev/null;                                           \
-               test -s $@ || rm -f $@
+ cmd_cc_symtypes_c =                                                         \
+     set -e;                                                                 \
+     $(call cmd_gensymtypes, true) >/dev/null;                               \
+     test -s $@ || rm -f $@
   
   $(obj)/%.symtypes : $(src)/%.c FORCE
         $(call cmd,cc_symtypes_c)
diff --cc scripts/Makefile.modpost
Simple merge
diff --cc scripts/config

index 0000000,db6084b..db6084b

mode 000000,100755..100644
--- /dev/null
--- 2/scripts/config
+++ b/scripts/config
diff --cc scripts/genksyms/genksyms.c
Simple merge
diff --cc scripts/headerdep.pl

index 0000000,97399da..97399da

mode 000000,100755..100644
--- /dev/null
--- 2/scripts/headerdep.pl
+++ b/scripts/headerdep.pl
diff --cc scripts/kconfig/Makefile
Simple merge
diff --cc scripts/mkcompile_h
Simple merge
diff --cc scripts/mod/modpost.c

index 1aa28ae,8892161..5f99e79
--- 1/scripts/mod/modpost.c
--- 2/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@@ -1784,6 -1734,6 +1777,13 @@@ void add_staging_flag(struct buffer *b
                 buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n");
   }
   
++void add_supported_flag(struct buffer *b, struct module *mod)
++{
++      const char *how = supported(mod);
++      if (how)
++              buf_printf(b, "\nMODULE_INFO(supported, \"%s\");\n", how);
++}
++
   /**
    * Record CRCs for unresolved symbols
    **/
diff --cc scripts/tags.sh

index 0000000,fdbe78b..fdbe78b

mode 000000,100755..100644
--- /dev/null
--- 2/scripts/tags.sh
+++ b/scripts/tags.sh
diff --cc security/selinux/avc.c
Simple merge
diff --cc sound/ppc/mpc52xx_ac97.c

index 8f54d5f,0000000..50ae646

mode 100644,000000..100644
--- 1/sound/ppc/mpc52xx_ac97.c
--- /dev/null
+++ b/sound/ppc/mpc52xx_ac97.c
@@@ -1,807 -1,0 +1,808 @@@
+ +/*
+ + * Driver for the PSC of the Freescale MPC52xx configured as AC97 interface
+ + *
+ + *
+ + * Copyright (C) 2006 Sylvain Munaut <tnt@246tNt.com>
+ + *
+ + * This file is licensed under the terms of the GNU General Public License
+ + * version 2. This program is licensed "as is" without any warranty of any
+ + * kind, whether express or implied.
+ + */
+ +#include <linux/kernel.h>
+ +#include <linux/module.h>
+ +#include <linux/interrupt.h>
+ +#include <linux/spinlock.h>
+ +
+ +#include <sound/core.h>
+ +#include <sound/initval.h>
+ +#include <sound/pcm.h>
+ +#include <sound/pcm_params.h>
+ +#include <sound/ac97_codec.h>
+ +
++#include <linux/of_platform.h>
+ +#include <asm/of_platform.h>
+ +#include <linux/dma-mapping.h>
+ +#include <asm/mpc52xx_psc.h>
+ +
+ +#include <sysdev/bestcomm/bestcomm.h>
+ +#include <sysdev/bestcomm/gen_bd.h>
+ +
+ +
+ +#define DRV_NAME "mpc52xx-psc-ac97"
+ +
+ +
+ +/* ======================================================================== */
+ +/* Structs / Defines                                                        */
+ +/* ======================================================================== */
+ +
+ +/* Private structure */
+ +struct mpc52xx_ac97_priv {
+ +      struct device *dev;
+ +      resource_size_t mem_start;
+ +      resource_size_t mem_len;
+ +      int irq;
+ +      struct mpc52xx_psc __iomem *psc;
+ +      struct mpc52xx_psc_fifo __iomem *fifo;
+ +
+ +      struct bcom_task *tsk_tx;
+ +      spinlock_t dma_lock;
+ +
+ +      struct snd_card *card;
+ +      struct snd_pcm *pcm;
+ +      struct snd_ac97 *ac97;
+ +
+ +      struct snd_pcm_substream *substream_playback;
+ +
+ +      int period_byte_size;
+ +      u32 period_start, period_end, period_next_p;
+ +};
+ +
+ +/* Register bit definition (AC97 mode specific) */
+ +#define PSC_AC97_SLOT_BIT(n)          (1<<(12-n))
+ +#define PSC_AC97_SLOTS_XMIT_SHIFT     16
+ +#define PSC_AC97_SLOTS_RECV_SHIFT      0
+ +
+ +/* Bestcomm options */
+ +#define AC97_TX_NUM_BD        32
+ +#define AC97_RX_NUM_BD        32
+ +
+ +static int mpc52xx_ac97_tx_fill(struct mpc52xx_ac97_priv *priv)
+ +{
+ +      struct snd_pcm_runtime *rt;
+ +
+ +      u32 dma_data_ptr;
+ +
+ +      rt = priv->substream_playback->runtime;
+ +
+ +      dma_data_ptr = virt_to_phys(rt->dma_area);
+ +
+ +      priv->period_byte_size  = frames_to_bytes(rt, rt->period_size);
+ +      priv->period_start      = dma_data_ptr;
+ +      priv->period_end        = dma_data_ptr + priv->period_byte_size * rt->periods;
+ +      priv->period_next_p     = dma_data_ptr;
+ +
+ +      spin_lock(&priv->dma_lock);
+ +      while (!bcom_queue_full(priv->tsk_tx)) {
+ +              struct bcom_gen_bd *bd;
+ +
+ +              /* Submit a new one */
+ +              bd = (struct bcom_gen_bd *) bcom_prepare_next_buffer(priv->tsk_tx);
+ +              bd->status = priv->period_byte_size;
+ +              bd->buf_pa = priv->period_next_p;
+ +              bcom_submit_next_buffer(priv->tsk_tx, NULL);
+ +
+ +              /* Next pointer */
+ +              priv->period_next_p += priv->period_byte_size;
+ +              if (priv->period_next_p >= priv->period_end)
+ +                      priv->period_next_p = priv->period_start;
+ +      }
+ +      spin_unlock(&priv->dma_lock);
+ +
+ +      return 0;
+ +}
+ +
+ +
+ +/* ======================================================================== */
+ +/* ISR routine                                                              */
+ +/* ======================================================================== */
+ +
+ +static irqreturn_t mpc52xx_ac97_tx_irq(int irq, void *dev_id)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = dev_id;
+ +      struct snd_pcm_runtime *rt;
+ +      struct bcom_gen_bd *bd;
+ +
+ +      rt = priv->substream_playback->runtime;
+ +
+ +      if (!bcom_buffer_done(priv->tsk_tx)) {
+ +              dev_dbg(priv->dev, "tx mismatch? Check correct output PSC\n");
+ +              bcom_disable(priv->tsk_tx);
+ +      }
+ +
+ +      spin_lock(&priv->dma_lock);
+ +      while (bcom_buffer_done(priv->tsk_tx)) {
+ +              /* Get the buffer back */
+ +              bcom_retrieve_buffer(priv->tsk_tx, NULL, NULL);
+ +
+ +              /* Submit a new one */
+ +              bd = (struct bcom_gen_bd *) bcom_prepare_next_buffer(priv->tsk_tx);
+ +              bd->status = priv->period_byte_size;
+ +              bd->buf_pa = priv->period_next_p;
+ +              bcom_submit_next_buffer(priv->tsk_tx, NULL);
+ +              bcom_enable(priv->tsk_tx);
+ +
+ +              /* Next pointer */
+ +              priv->period_next_p += priv->period_byte_size;
+ +              if (priv->period_next_p >= priv->period_end)
+ +                      priv->period_next_p = priv->period_start;
+ +      }
+ +      spin_unlock(&priv->dma_lock);
+ +
+ +      snd_pcm_period_elapsed(priv->substream_playback);
+ +
+ +      return IRQ_HANDLED;
+ +}
+ +
+ +
+ +static irqreturn_t mpc52xx_ac97_irq(int irq, void *dev_id)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = dev_id;
+ +
+ +      static int icnt = 0;
+ +
+ +#if 1
+ +      /* Anti Crash during dev ;) */
+ +      if ((icnt++) > 5000)
+ +              out_be16(&priv->psc->mpc52xx_psc_imr, 0);
+ +#endif
+ +
+ +      /* Print statuts */
+ +      dev_dbg(priv->dev, "isr: %04x", in_be16(&priv->psc->mpc52xx_psc_imr));
+ +      out_8(&priv->psc->command,MPC52xx_PSC_RST_ERR_STAT);
+ +
+ +      return IRQ_HANDLED;
+ +}
+ +
+ +/* ======================================================================== */
+ +/* PCM interface                                                            */
+ +/* ======================================================================== */
+ +
+ +/* HW desc */
+ +
+ +static struct snd_pcm_hardware mpc52xx_ac97_hw = {
+ +      .info                   = SNDRV_PCM_INFO_INTERLEAVED            |
+ +                                      SNDRV_PCM_INFO_MMAP             |
+ +                                      SNDRV_PCM_INFO_MMAP_VALID,
+ +      .formats                = SNDRV_PCM_FMTBIT_S32_BE,
+ +      .rates                  = SNDRV_PCM_RATE_8000_48000,
+ +      .rate_min               = 8000,
+ +      .rate_max               = 48000,
+ +      .channels_min           = 1,
+ +      .channels_max           = 2,    /* Support for more ? */
+ +      .buffer_bytes_max       = 1024*1024,
+ +      .period_bytes_min       = 512,
+ +      .period_bytes_max       = 16*1024,
+ +      .periods_min            = 8,
+ +      .periods_max            = 1024,
+ +      .fifo_size              = 512,
+ +};
+ +
+ +
+ +/* Playback */
+ +
+ +static int mpc52xx_ac97_playback_open(struct snd_pcm_substream *substream)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = substream->pcm->private_data;
+ +
+ +      dev_dbg(priv->dev, "mpc52xx_ac97_playback_open(%p)\n", substream);
+ +
+ +      substream->runtime->hw = mpc52xx_ac97_hw;
+ +
+ +      priv->substream_playback = substream;
+ +
+ +      return 0;       /* FIXME */
+ +}
+ +
+ +static int mpc52xx_ac97_playback_close(struct snd_pcm_substream *substream)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = substream->pcm->private_data;
+ +      dev_dbg(priv->dev, "mpc52xx_ac97_playback_close(%p)\n", substream);
+ +      priv->substream_playback = NULL;
+ +      return 0;       /* FIXME */
+ +}
+ +
+ +static int mpc52xx_ac97_playback_prepare(struct snd_pcm_substream *substream)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = substream->pcm->private_data;
+ +
+ +      dev_dbg(priv->dev, "mpc52xx_ac97_playback_prepare(%p)\n", substream);
+ +
+ +      /* FIXME, need a spinlock to protect access */
+ +      if (substream->runtime->channels == 1)
+ +              out_be32(&priv->psc->ac97_slots, 0x01000000);
+ +      else
+ +              out_be32(&priv->psc->ac97_slots, 0x03000000);
+ +
+ +      snd_ac97_set_rate(priv->ac97, AC97_PCM_FRONT_DAC_RATE,
+ +                      substream->runtime->rate);
+ +
+ +      return 0;       /* FIXME */
+ +}
+ +
+ +
+ +/* Capture */
+ +
+ +static int mpc52xx_ac97_capture_open(struct snd_pcm_substream *substream)
+ +{
+ +/*    struct mpc52xx_ac97_priv *priv = substream->pcm->private_data; */
+ +      return 0;       /* FIXME */
+ +}
+ +
+ +static int mpc52xx_ac97_capture_close(struct snd_pcm_substream *substream)
+ +{
+ +/*    struct mpc52xx_ac97_priv *priv = substream->pcm->private_data; */
+ +      return 0;       /* FIXME */
+ +}
+ +
+ +static int
+ +mpc52xx_ac97_capture_prepare(struct snd_pcm_substream *substream)
+ +{
+ +/*    struct mpc52xx_ac97_priv *priv = substream->pcm->private_data; */
+ +      return 0;       /* FIXME */
+ +}
+ +
+ +
+ +/* Common */
+ +
+ +static int mpc52xx_ac97_hw_params(struct snd_pcm_substream *substream,
+ +                      struct snd_pcm_hw_params *params)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = substream->pcm->private_data;
+ +      int rv;
+ +
+ +      dev_dbg(priv->dev, "mpc52xx_ac97_hw_params(%p)\n", substream);
+ +
+ +      rv = snd_pcm_lib_malloc_pages(substream,
+ +                                      params_buffer_bytes(params));
+ +      if (rv < 0) {
+ +              printk(KERN_ERR "hw params failes\n");  /* FIXME */
+ +              return rv;
+ +      }
+ +
+ +      dev_dbg(priv->dev, "%d %d %d\n", params_buffer_bytes(params),
+ +              params_period_bytes(params), params_periods(params));
+ +
+ +      return 0;
+ +}
+ +
+ +static int mpc52xx_ac97_hw_free(struct snd_pcm_substream *substream)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = substream->pcm->private_data;
+ +
+ +      dev_dbg(priv->dev, "mpc52xx_ac97_hw_free(%p)\n", substream);
+ +
+ +      return snd_pcm_lib_free_pages(substream);
+ +}
+ +
+ +static int mpc52xx_ac97_trigger(struct snd_pcm_substream *substream, int cmd)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = substream->pcm->private_data;
+ +      int rv = 0;
+ +
+ +      dev_dbg(priv->dev, "mpc52xx_ac97_trigger(%p,%d)\n", substream, cmd);
+ +
+ +      switch (cmd) {
+ +              case SNDRV_PCM_TRIGGER_START:
+ +                      /* Enable TX taks */
+ +                      bcom_gen_bd_tx_reset(priv->tsk_tx);
+ +                      mpc52xx_ac97_tx_fill(priv);
+ +                      bcom_enable(priv->tsk_tx);
+ +/*
+ +                      out_be16(&priv->psc->mpc52xx_psc_imr, 0x0800); // 0x0100
+ +                      out_be16(&priv->psc->mpc52xx_psc_imr, 0x0100); // 0x0100
+ +*/
+ +                              /* FIXME: Shouldn't we check for overrun too ? */
+ +                              /* also, shouldn't we just activate TX here ? */
+ +
+ +                      break;
+ +
+ +              case SNDRV_PCM_TRIGGER_STOP:
+ +                      /* Disable TX task */
+ +                      bcom_disable(priv->tsk_tx);
+ +                      out_be16(&priv->psc->mpc52xx_psc_imr, 0x0000); // 0x0100
+ +
+ +                      break;
+ +
+ +              default:
+ +                      rv = -EINVAL;
+ +      }
+ +
+ +      /* FIXME */
+ +      return rv;
+ +}
+ +
+ +static snd_pcm_uframes_t mpc52xx_ac97_pointer(struct snd_pcm_substream *substream)
+ +{
+ +      struct snd_pcm_runtime *runtime = substream->runtime;
+ +      struct mpc52xx_ac97_priv *priv = substream->pcm->private_data;
+ +      u32 count;
+ +
+ +      count = priv->tsk_tx->bd[priv->tsk_tx->outdex].data[0] - priv->period_start;
+ +
+ +      return bytes_to_frames(runtime, count);
+ +}
+ +
+ +
+ +/* Ops */
+ +
+ +static struct snd_pcm_ops mpc52xx_ac97_playback_ops = {
+ +      .open           = mpc52xx_ac97_playback_open,
+ +      .close          = mpc52xx_ac97_playback_close,
+ +      .ioctl          = snd_pcm_lib_ioctl,
+ +      .hw_params      = mpc52xx_ac97_hw_params,
+ +      .hw_free        = mpc52xx_ac97_hw_free,
+ +      .prepare        = mpc52xx_ac97_playback_prepare,
+ +      .trigger        = mpc52xx_ac97_trigger,
+ +      .pointer        = mpc52xx_ac97_pointer,
+ +};
+ +
+ +static struct snd_pcm_ops mpc52xx_ac97_capture_ops = {
+ +      .open           = mpc52xx_ac97_capture_open,
+ +      .close          = mpc52xx_ac97_capture_close,
+ +      .ioctl          = snd_pcm_lib_ioctl,
+ +      .hw_params      = mpc52xx_ac97_hw_params,
+ +      .hw_free        = mpc52xx_ac97_hw_free,
+ +      .prepare        = mpc52xx_ac97_capture_prepare,
+ +      .trigger        = mpc52xx_ac97_trigger,
+ +      .pointer        = mpc52xx_ac97_pointer,
+ +};
+ +
+ +
+ +/* ======================================================================== */
+ +/* AC97 Bus interface                                                       */
+ +/* ======================================================================== */
+ +
+ +static unsigned short mpc52xx_ac97_bus_read(struct snd_ac97 *ac97,
+ +                                              unsigned short reg)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = ac97->private_data;
+ +      int timeout;
+ +      unsigned int val;
+ +
+ +      /* Wait for it to be ready */
+ +      timeout = 1000;
+ +      while ((--timeout) && (in_be16(&priv->psc->mpc52xx_psc_status) &
+ +                                              MPC52xx_PSC_SR_CMDSEND) )
+ +              udelay(10);
+ +
+ +      if (!timeout) {
+ +              printk(KERN_ERR DRV_NAME ": timeout on ac97 bus (rdy)\n");
+ +              return 0xffff;
+ +      }
+ +
+ +      /* Do the read */
+ +      out_be32(&priv->psc->ac97_cmd, (1<<31) | ((reg & 0x7f) << 24));
+ +
+ +      /* Wait for the answer */
+ +      timeout = 1000;
+ +      while ((--timeout) && !(in_be16(&priv->psc->mpc52xx_psc_status) &
+ +                                              MPC52xx_PSC_SR_DATA_VAL) )
+ +              udelay(10);
+ +
+ +      if (!timeout) {
+ +              printk(KERN_ERR DRV_NAME ": timeout on ac97 read (val)\n");
+ +              return 0xffff;
+ +      }
+ +
+ +      /* Get the data */
+ +      val = in_be32(&priv->psc->ac97_data);
+ +      if ( ((val>>24) & 0x7f) != reg ) {
+ +              printk(KERN_ERR DRV_NAME ": reg echo error on ac97 read\n");
+ +              return 0xffff;
+ +      }
+ +      val = (val >> 8) & 0xffff;
+ +
+ +      return (unsigned short) val;
+ +}
+ +
+ +static void mpc52xx_ac97_bus_write(struct snd_ac97 *ac97,
+ +                      unsigned short reg, unsigned short val)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = ac97->private_data;
+ +      int timeout;
+ +
+ +      /* Wait for it to be ready */
+ +      timeout = 1000;
+ +      while ((--timeout) && (in_be16(&priv->psc->mpc52xx_psc_status) &
+ +                                              MPC52xx_PSC_SR_CMDSEND) )
+ +              udelay(10);
+ +
+ +      if (!timeout) {
+ +              printk(KERN_ERR DRV_NAME ": timeout on ac97 write\n");
+ +              return;
+ +      }
+ +
+ +      /* Write data */
+ +      out_be32(&priv->psc->ac97_cmd, ((reg & 0x7f) << 24) | (val << 8));
+ +}
+ +
+ +static void mpc52xx_ac97_bus_reset(struct snd_ac97 *ac97)
+ +{
+ +      struct mpc52xx_ac97_priv *priv = ac97->private_data;
+ +
+ +      dev_dbg(priv->dev, "ac97 codec reset\n");
+ +
+ +      /* Do a cold reset */
+ +      /*
+ +       * Note: This could interfere with some external AC97 mixers, as it
+ +       * could switch them into test mode, when SYNC or SDATA_OUT are not
+ +       * low while RES is low!
+ +       */
+ +      out_8(&priv->psc->op1, 0x02);
+ +      udelay(10);
+ +      out_8(&priv->psc->op0, 0x02);
+ +      udelay(50);
+ +
+ +      /* PSC recover from cold reset (cfr user manual, not sure if useful) */
+ +      out_be32(&priv->psc->sicr, in_be32(&priv->psc->sicr));
+ +}
+ +
+ +
+ +static struct snd_ac97_bus_ops mpc52xx_ac97_bus_ops = {
+ +      .read   = mpc52xx_ac97_bus_read,
+ +      .write  = mpc52xx_ac97_bus_write,
+ +      .reset  = mpc52xx_ac97_bus_reset,
+ +};
+ +
+ +
+ +/* ======================================================================== */
+ +/* Sound driver setup                                                       */
+ +/* ======================================================================== */
+ +
+ +static int mpc52xx_ac97_setup_pcm(struct mpc52xx_ac97_priv *priv)
+ +{
+ +      int rv;
+ +
+ +      rv = snd_pcm_new(priv->card, DRV_NAME "-pcm", 0, 1, 1, &priv->pcm);
+ +      if (rv) {
+ +              dev_dbg(priv->dev, "%s: snd_pcm_new failed\n", DRV_NAME);
+ +              return rv;
+ +      }
+ +
+ +      rv = snd_pcm_lib_preallocate_pages_for_all(priv->pcm,
+ +              SNDRV_DMA_TYPE_CONTINUOUS, snd_dma_continuous_data(GFP_KERNEL),
+ +              128*1024, 128*1024);
+ +      if (rv) {
+ +              dev_dbg(priv->dev,
+ +                      "%s: snd_pcm_lib_preallocate_pages_for_all  failed\n",
+ +                      DRV_NAME);
+ +              return rv;
+ +      }
+ +
+ +      snd_pcm_set_ops(priv->pcm, SNDRV_PCM_STREAM_PLAYBACK,
+ +                      &mpc52xx_ac97_playback_ops);
+ +      snd_pcm_set_ops(priv->pcm, SNDRV_PCM_STREAM_CAPTURE,
+ +                      &mpc52xx_ac97_capture_ops);
+ +
+ +      priv->pcm->private_data = priv;
+ +      priv->pcm->info_flags = 0;
+ +
+ +      strcpy(priv->pcm->name, "Freescale MPC52xx PSC-AC97 PCM");
+ +
+ +      return 0;
+ +}
+ +
+ +static int mpc52xx_ac97_setup_mixer(struct mpc52xx_ac97_priv *priv)
+ +{
+ +      struct snd_ac97_bus *ac97_bus;
+ +      struct snd_ac97_template ac97_template;
+ +      int rv;
+ +
+ +      rv = snd_ac97_bus(priv->card, 0, &mpc52xx_ac97_bus_ops, NULL, &ac97_bus);
+ +      if (rv) {
+ +              printk(KERN_ERR DRV_NAME ": snd_ac97_bus failed\n");
+ +              return rv;
+ +      }
+ +
+ +      memset(&ac97_template, 0, sizeof(struct snd_ac97_template));
+ +      ac97_template.private_data = priv;
+ +
+ +      rv = snd_ac97_mixer(ac97_bus, &ac97_template, &priv->ac97);
+ +      if (rv) {
+ +              printk(KERN_ERR DRV_NAME ": snd_ac97_mixer failed\n");
+ +              return rv;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static int mpc52xx_ac97_hwinit(struct mpc52xx_ac97_priv *priv)
+ +{
+ +      /* Reset everything first by safety */
+ +      out_8(&priv->psc->command,MPC52xx_PSC_RST_RX);
+ +      out_8(&priv->psc->command,MPC52xx_PSC_RST_TX);
+ +      out_8(&priv->psc->command,MPC52xx_PSC_RST_ERR_STAT);
+ +
+ +      /* Do a cold reset of codec */
+ +      /*
+ +       * Note: This could interfere with some external AC97 mixers, as it
+ +       * could switch them into test mode, when SYNC or SDATA_OUT are not
+ +       * low while RES is low!
+ +       */
+ +      out_8(&priv->psc->op1, 0x02);
+ +      udelay(10);
+ +      out_8(&priv->psc->op0, 0x02);
+ +      udelay(50);
+ +
+ +      /* Configure AC97 enhanced mode */
+ +      out_be32(&priv->psc->sicr, 0x03010000);
+ +
+ +      /* No slots active */
+ +      out_be32(&priv->psc->ac97_slots, 0x00000000);
+ +
+ +      /* No IRQ */
+ +      out_be16(&priv->psc->mpc52xx_psc_imr, 0x0000);
+ +
+ +      /* FIFO levels */
+ +      out_8(&priv->fifo->rfcntl, 0x07);
+ +      out_8(&priv->fifo->tfcntl, 0x07);
+ +      out_be16(&priv->fifo->rfalarm, 0x80);
+ +      out_be16(&priv->fifo->tfalarm, 0x80);
+ +
+ +      /* Go */
+ +      out_8(&priv->psc->command,MPC52xx_PSC_TX_ENABLE);
+ +      out_8(&priv->psc->command,MPC52xx_PSC_RX_ENABLE);
+ +
+ +      return 0;
+ +}
+ +
+ +static int mpc52xx_ac97_hwshutdown(struct mpc52xx_ac97_priv *priv)
+ +{
+ +      /* No IRQ */
+ +      out_be16(&priv->psc->mpc52xx_psc_imr, 0x0000);
+ +
+ +      /* Disable TB & RX */
+ +      out_8(&priv->psc->command,MPC52xx_PSC_RST_RX);
+ +      out_8(&priv->psc->command,MPC52xx_PSC_RST_TX);
+ +
+ +      /* FIXME : Reset or put codec in low power ? */
+ +
+ +      return 0;
+ +}
+ +
+ +/* ======================================================================== */
+ +/* OF Platform Driver                                                       */
+ +/* ======================================================================== */
+ +
+ +static int __devinit
+ +mpc52xx_ac97_probe(struct of_device *op, const struct of_device_id *match)
+ +{
+ +      struct device_node *dn = op->node;
+ +      struct mpc52xx_ac97_priv *priv;
+ +      struct snd_card *card;
+ +      struct resource res;
+ +      int tx_initiator;
+ +      int rv;
+ +      const unsigned int *devno;
+ +
+ +      dev_dbg(&op->dev, "probing MPC52xx PSC AC97 driver\n");
+ +
+ +      /* Get card structure */
+ +      rv = -ENOMEM;
+ +      card = snd_card_new(SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+ +                              THIS_MODULE, sizeof(struct mpc52xx_ac97_priv));
+ +      if (!card)
+ +              goto err_early;
+ +
+ +      priv = card->private_data;
+ +
+ +      /* Init our private structure */
+ +      priv->card = card;
+ +      priv->dev = &op->dev;
+ +
+ +      /* Get resources (mem,irq,...) */
+ +      rv = of_address_to_resource(dn, 0, &res);
+ +      if (rv)
+ +              goto err_early;
+ +
+ +      priv->mem_start = res.start;
+ +      priv->mem_len = res.end - res.start + 1;
+ +
+ +      if (!request_mem_region(priv->mem_start, priv->mem_len, DRV_NAME)) {
+ +              dev_err(&op->dev, "%s: request_mem_region failed\n", DRV_NAME);
+ +              rv = -EBUSY;
+ +              goto err_early;
+ +      }
+ +
+ +      priv->psc = ioremap(priv->mem_start, priv->mem_len);
+ +      if (!priv->psc) {
+ +              dev_err(&op->dev, "%s: ioremap failed\n", DRV_NAME);
+ +              rv = -ENOMEM;
+ +              goto err_iomap;
+ +      }
+ +      /* the fifo starts right after psc ends */
+ +      priv->fifo = (struct mpc52xx_psc_fifo*)&priv->psc[1];   /* FIXME */
+ +
+ +      priv->irq = irq_of_parse_and_map(dn, 0);
+ +      if (priv->irq == NO_IRQ) {
+ +              dev_err(&op->dev, "%s: irq_of_parse_and_map failed\n",
+ +                      DRV_NAME);
+ +              rv = -EBUSY;
+ +              goto err_irqmap;
+ +      }
+ +
+ +      /* Setup Bestcomm tasks */
+ +      spin_lock_init(&priv->dma_lock);
+ +
+ +      /*
+ +       * PSC1 or PSC2 can be configured for AC97 usage. Select the right
+ +       * channel, to let the BCOMM unit does its job correctly.
+ +       */
+ +      devno = of_get_property(dn, "cell-index", NULL);
+ +      switch (*devno) {
+ +      case 0: /* PSC1 */
+ +              tx_initiator = 14;
+ +              break;
+ +      case 1: /* PSC2 */
+ +              tx_initiator = 12;
+ +              break;
+ +      default:
+ +              dev_dbg(priv->dev, "Unknown PSC unit for AC97 usage!\n");
+ +              rv = -ENODEV;
+ +              goto err_irq;
+ +      }
+ +
+ +      priv->tsk_tx = bcom_gen_bd_tx_init(AC97_TX_NUM_BD,
+ +                      priv->mem_start + sizeof(struct mpc52xx_psc) +
+ +                              offsetof(struct mpc52xx_psc_fifo, tfdata),
+ +                      tx_initiator,
+ +                      2);     /* ipr : FIXME */
+ +      if (!priv->tsk_tx) {
+ +              dev_err(&op->dev, "%s: bcom_gen_bd_tx_init failed\n",
+ +                      DRV_NAME);
+ +              rv = -ENOMEM;
+ +              goto err_bcomm;
+ +      }
+ +
+ +      /* Low level HW Init */
+ +      mpc52xx_ac97_hwinit(priv);
+ +
+ +      /* Request IRQ now that we're 'stable' */
+ +      rv = request_irq(priv->irq, mpc52xx_ac97_irq, 0, DRV_NAME, priv);
+ +      if (rv < 0) {
+ +              dev_err(&op->dev, "%s: request_irq failed\n", DRV_NAME);
+ +              goto err_irqreq;
+ +      }
+ +
+ +      rv = request_irq(bcom_get_task_irq(priv->tsk_tx),
+ +                              mpc52xx_ac97_tx_irq, 0, DRV_NAME "_tx", priv);
+ +      if (rv < 0) {
+ +              dev_err(&op->dev, "%s: request_irq failed\n", DRV_NAME);
+ +              goto err_txirqreq;
+ +      }
+ +
+ +      /* Prepare sound stuff */
+ +      rv = mpc52xx_ac97_setup_mixer(priv);
+ +      if (rv)
+ +              goto err_late;
+ +
+ +      rv = mpc52xx_ac97_setup_pcm(priv);
+ +      if (rv)
+ +              goto err_late;
+ +
+ +      /* Finally register the card */
+ +      snprintf(card->shortname, sizeof(card->shortname), DRV_NAME);
+ +      snprintf(card->longname, sizeof(card->longname),
+ +              "Freescale MPC52xx PSC-AC97 (%s)", card->mixername);
+ +
+ +      rv = snd_card_register(card);
+ +      if (rv) {
+ +              dev_err(&op->dev, "%s: snd_card_register failed\n", DRV_NAME);
+ +              goto err_late;
+ +      }
+ +
+ +      dev_set_drvdata(&op->dev, priv);
+ +
+ +      return 0;
+ +
+ +err_late:
+ +      free_irq(bcom_get_task_irq(priv->tsk_tx), priv);
+ +err_txirqreq:
+ +      free_irq(priv->irq, priv);
+ +err_irqreq:
+ +      bcom_gen_bd_tx_release(priv->tsk_tx);
+ +err_bcomm:
+ +      mpc52xx_ac97_hwshutdown(priv);
+ +err_irq:
+ +      irq_dispose_mapping(priv->irq);
+ +err_irqmap:
+ +      iounmap(priv->psc);
+ +err_iomap:
+ +      release_mem_region(priv->mem_start, priv->mem_len);
+ +err_early:
+ +      if (card)
+ +              snd_card_free(card);
+ +      return rv;
+ +}
+ +
+ +static int mpc52xx_ac97_remove(struct of_device *op)
+ +{
+ +      struct mpc52xx_ac97_priv *priv;
+ +
+ +      dev_dbg(&op->dev, "removing MPC52xx PSC AC97 driver\n");
+ +
+ +      priv = dev_get_drvdata(&op->dev);
+ +      if (priv) {
+ +              /* Sound subsys shutdown */
+ +              snd_card_free(priv->card);
+ +
+ +              /* Low level HW shutdown */
+ +              mpc52xx_ac97_hwshutdown(priv);
+ +
+ +              /* Release bestcomm tasks */
+ +              free_irq(bcom_get_task_irq(priv->tsk_tx), priv);
+ +              bcom_gen_bd_tx_release(priv->tsk_tx);
+ +
+ +              /* Release resources */
+ +              iounmap(priv->psc);
+ +              free_irq(priv->irq, priv);
+ +              irq_dispose_mapping(priv->irq);
+ +              release_mem_region(priv->mem_start, priv->mem_len);
+ +      }
+ +
+ +      dev_set_drvdata(&op->dev, NULL);
+ +
+ +      return 0;
+ +}
+ +
+ +
+ +static struct of_device_id mpc52xx_ac97_of_match[] = {
+ +      {
+ +              .type           = "sound",
+ +              .compatible     = "mpc5200b-psc-ac97",  /* B only for now */
+ +      }, { }
+ +};
+ +MODULE_DEVICE_TABLE(of, mpc52xx_ac97_of_match);
+ +static struct of_platform_driver mpc52xx_ac97_of_driver = {
+ +      .owner          = THIS_MODULE,
+ +      .name           = DRV_NAME,
+ +      .match_table    = mpc52xx_ac97_of_match,
+ +      .probe          = mpc52xx_ac97_probe,
+ +      .remove         = mpc52xx_ac97_remove,
+ +      .driver         = {
+ +              .name   = DRV_NAME,
+ +      },
+ +};
+ +
+ +/* ======================================================================== */
+ +/* Module                                                                   */
+ +/* ======================================================================== */
+ +
+ +static int __init mpc52xx_ac97_init(void)
+ +{
+ +      int rv;
+ +
+ +      printk(KERN_INFO "Sound: MPC52xx PSC AC97 driver\n");
+ +
+ +      rv = of_register_platform_driver(&mpc52xx_ac97_of_driver);
+ +      if (rv) {
+ +              printk(KERN_ERR DRV_NAME ": "
+ +                      "of_register_platform_driver failed (%i)\n", rv);
+ +              return rv;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static void __exit mpc52xx_ac97_exit(void)
+ +{
+ +      of_unregister_platform_driver(&mpc52xx_ac97_of_driver);
+ +}
+ +
+ +module_init(mpc52xx_ac97_init);
+ +module_exit(mpc52xx_ac97_exit);
+ +
+ +MODULE_AUTHOR("Sylvain Munaut <tnt@246tNt.com>");
+ +MODULE_DESCRIPTION(DRV_NAME ": Freescale MPC52xx PSC AC97 driver");
+ +MODULE_LICENSE("GPL");
+ +
diff --cc sound/sparc/amd7930.c
Simple merge
diff --cc sound/sparc/cs4231.c

index 1c4797b,41c3875..ba6526b
--- 1/sound/sparc/cs4231.c
--- 2/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@@ -2101,68 -2072,57 +2072,58 @@@ static int __devinit cs4231_ebus_probe(
   }
   #endif
   
- static int __init cs4231_init(void)
+ static int __devinit cs4231_probe(struct of_device *op, const struct of_device_id *match)
   {
- #ifdef SBUS_SUPPORT
-       struct sbus_bus *sbus;
-       struct sbus_dev *sdev;
- #endif
   #ifdef EBUS_SUPPORT
-       struct linux_ebus *ebus;
-       struct linux_ebus_device *edev;
+       if (!strcmp(op->node->parent->name, "ebus"))
+               return cs4231_ebus_probe(op, match);
   #endif
-       int found;
- 
-       found = 0;
- 
   #ifdef SBUS_SUPPORT
-       for_all_sbusdev(sdev, sbus) {
-               if (!strcmp(sdev->prom_name, "SUNW,CS4231")) {
-                       if (cs4231_sbus_attach(sdev) == 0)
-                               found++;
-               }
-       }
+       if (!strcmp(op->node->parent->name, "sbus") ||
+           !strcmp(op->node->parent->name, "sbi"))
+               return cs4231_sbus_probe(op, match);
   #endif
- #ifdef EBUS_SUPPORT
-       for_each_ebus(ebus) {
-               for_each_ebusdev(edev, ebus) {
-                       int match = 0;
- 
-                       if (!strcmp(edev->prom_node->name, "SUNW,CS4231")) {
-                               match = 1;
-                       } else if (!strcmp(edev->prom_node->name, "audio")) {
-                               const char *compat;
- 
-                               compat = of_get_property(edev->prom_node,
-                                                        "compatible", NULL);
-                               if (compat && !strcmp(compat, "SUNW,CS4231"))
-                                       match = 1;
-                       }
+       return -ENODEV;
+ }
   
-                       if (match &&
-                           cs4231_ebus_attach(edev) == 0)
-                               found++;
-               }
-       }
- #endif
+ static int __devexit cs4231_remove(struct of_device *op)
+ {
+       struct snd_cs4231 *chip = dev_get_drvdata(&op->dev);
   
+       snd_card_free(chip->card);
   
-       return (found > 0) ? 0 : -EIO;
+       return 0;
   }
   
- static void __exit cs4231_exit(void)
- {
-       struct snd_cs4231 *p = cs4231_list;
+ static const struct of_device_id cs4231_match[] = {
+       {
+               .name = "SUNW,CS4231",
+       },
+       {
+               .name = "audio",
+               .compatible = "SUNW,CS4231",
+       },
+       {},
+ };
   
-       while (p != NULL) {
-               struct snd_cs4231 *next = p->next;
+ MODULE_DEVICE_TABLE(of, cs4231_match);
   
-               snd_card_free(p->card);
+ static struct of_platform_driver cs4231_driver = {
++      .owner          = THIS_MODULE,
+       .name           = "audio",
+       .match_table    = cs4231_match,
+       .probe          = cs4231_probe,
+       .remove         = __devexit_p(cs4231_remove),
+ };
   
-               p = next;
-       }
+ static int __init cs4231_init(void)
+ {
+       return of_register_driver(&cs4231_driver, &of_bus_type);
+ }
   
-       cs4231_list = NULL;
+ static void __exit cs4231_exit(void)
+ {
+       of_unregister_driver(&cs4231_driver);
   }
   
   module_init(cs4231_init);
diff --cc sound/sparc/dbri.c
Simple merge
author	Jeff Mahoney <jeffm@suse.de>
	Mon, 2 Feb 2009 16:36:17 +0000 (11:36 -0500)
committer	Jeff Mahoney <jeffm@suse.de>
	Mon, 2 Feb 2009 16:36:17 +0000 (11:36 -0500)