- Update to 3.2-rc1.

author Jeff Mahoney <jeffm@suse.com>

Wed, 16 Nov 2011 05:32:19 +0000 (00:32 -0500)

committer Jeff Mahoney <jeffm@suse.com>

Wed, 16 Nov 2011 05:32:19 +0000 (00:32 -0500)
author Jeff Mahoney <jeffm@suse.com>
Wed, 16 Nov 2011 05:32:19 +0000 (00:32 -0500)
committer Jeff Mahoney <jeffm@suse.com>
Wed, 16 Nov 2011 05:32:19 +0000 (00:32 -0500)
diff --cc Documentation/kernel-parameters.txt
Simple merge
diff --cc Documentation/sysctl/kernel.txt
Simple merge
diff --cc Makefile
Simple merge
diff --cc arch/ia64/Kconfig
Simple merge
diff --cc arch/ia64/kernel/acpi.c
Simple merge
diff --cc arch/powerpc/kernel/exceptions-64s.S
Simple merge
diff --cc arch/powerpc/kernel/legacy_serial.c
Simple merge
diff --cc arch/powerpc/kernel/prom_init.c
Simple merge
diff --cc arch/powerpc/platforms/pseries/setup.c
Simple merge
diff --cc arch/powerpc/xmon/xmon.c
Simple merge
diff --cc arch/s390/Kconfig
Simple merge
diff --cc arch/x86/Kconfig
Simple merge
diff --cc arch/x86/Makefile
Simple merge
diff --cc arch/x86/kernel/acpi/boot.c

index 6c86282,4558f0d..d246e74
--- 1/arch/x86/kernel/acpi/boot.c
--- 2/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@@ -1361,23 -1343,7 +1343,22 @@@ static int __init dmi_ignore_irq0_timer
         }
         return 0;
   }
- #endif
   
+ +static int __init force_acpi_rsdt(const struct dmi_system_id *d)
+ +{
+ +      if (!acpi_force) {
+ +              printk(KERN_NOTICE "%s detected: force use of acpi=rsdt\n",
+ +                     d->ident);
+ +              acpi_rsdt_forced = 1;
+ +      } else {
+ +              printk(KERN_NOTICE
+ +                     "Warning: acpi=force overrules DMI blacklist: "
+ +                     "acpi=rsdt\n");
+ +      }
+ +      return 0;
+ +
+ +}
+ +
   /*
    * If your system is blacklisted here, but you find that acpi=force
    * works for you, please contact linux-acpi@vger.kernel.org
diff --cc arch/x86/kernel/apic/bigsmp_32.c
Simple merge
diff --cc arch/x86/kernel/apic/probe_32.c
Simple merge
diff --cc arch/x86/kernel/apm_32.c
Simple merge
diff --cc arch/x86/kernel/cpu/perf_event.c
Simple merge
diff --cc arch/x86/kernel/dumpstack_64.c
Simple merge
diff --cc arch/x86/kernel/e820.c
Simple merge
diff --cc arch/x86/kernel/entry_32.S
Simple merge
diff --cc arch/x86/kernel/entry_64.S

index 8e32890,faf8d5e..3f6de7a
--- 1/arch/x86/kernel/entry_64.S
--- 2/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@ -1217,41 -1212,7 +1217,41 @@@ ENTRY(call_softirq
         CFI_ENDPROC
   END(call_softirq)
   
+ +#ifdef CONFIG_STACK_UNWIND
+ +ENTRY(arch_unwind_init_running)
+ +      CFI_STARTPROC
+ +      movq    %r15, R15(%rdi)
+ +      movq    %r14, R14(%rdi)
+ +      xchgq   %rsi, %rdx
+ +      movq    %r13, R13(%rdi)
+ +      movq    %r12, R12(%rdi)
+ +      xorl    %eax, %eax
+ +      movq    %rbp, RBP(%rdi)
+ +      movq    %rbx, RBX(%rdi)
+ +      movq    (%rsp), %r9
+ +      xchgq   %rdx, %rcx
+ +      movq    %rax, R11(%rdi)
+ +      movq    %rax, R10(%rdi)
+ +      movq    %rax, R9(%rdi)
+ +      movq    %rax, R8(%rdi)
+ +      movq    %rax, RAX(%rdi)
+ +      movq    %rax, RCX(%rdi)
+ +      movq    %rax, RDX(%rdi)
+ +      movq    %rax, RSI(%rdi)
+ +      movq    %rax, RDI(%rdi)
+ +      movq    %rax, ORIG_RAX(%rdi)
+ +      movq    %r9, RIP(%rdi)
+ +      leaq    8(%rsp), %r9
+ +      movq    $__KERNEL_CS, CS(%rdi)
+ +      movq    %rax, EFLAGS(%rdi)
+ +      movq    %r9, RSP(%rdi)
+ +      movq    $__KERNEL_DS, SS(%rdi)
+ +      jmpq    *%rcx
+ +      CFI_ENDPROC
+ +END(arch_unwind_init_running)
+ +#endif
+ +
- #ifdef CONFIG_PARAVIRT_XEN
+ #ifdef CONFIG_XEN
   zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
   
   /*
diff --cc arch/x86/kernel/hpet.c
Simple merge
diff --cc arch/x86/kernel/reboot.c
Simple merge
diff --cc arch/x86/kernel/vmlinux.lds.S
Simple merge
diff --cc arch/x86/kvm/svm.c
Simple merge
diff --cc arch/x86/kvm/x86.c

index d8a9fc7,c38efd7..49b593d
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -1557,7 -1572,6 +1572,7 @@@ int kvm_set_msr_common(struct kvm_vcpu 
         case MSR_VM_HSAVE_PA:
         case MSR_AMD64_PATCH_LOADER:
                 break;
-       case 0xe2:
++      case MSR_NHM_SNB_PKG_CST_CFG_CTL: /* 0xe2 */
         case 0x200 ... 0x2ff:
                 return set_msr_mtrr(vcpu, msr, data);
         case MSR_IA32_APICBASE:
@@@ -1859,9 -1878,11 +1879,12 @@@ int kvm_get_msr_common(struct kvm_vcpu 
         case MSR_K8_INT_PENDING_MSG:
         case MSR_AMD64_NB_CFG:
         case MSR_FAM10H_MMIO_CONF_BASE:
-       case 0xe2:
++      case MSR_NHM_SNB_PKG_CST_CFG_CTL: /* 0xe2 */
                 data = 0;
                 break;
+       case MSR_IA32_UCODE_REV:
+               data = 0x100000000ULL;
+               break;
         case MSR_MTRRcap:
                 data = 0x500 | KVM_NR_VAR_MTRR;
                 break;
diff --cc drivers/acpi/Kconfig
Simple merge
diff --cc drivers/acpi/ec_sys.c

index 47d4790,6c47ae9..e53e00c
--- 1/drivers/acpi/ec_sys.c
--- 2/drivers/acpi/ec_sys.c
+++ b/drivers/acpi/ec_sys.c
@@@ -11,7 -11,7 +11,8 @@@
   #include <linux/kernel.h>
   #include <linux/acpi.h>
   #include <linux/debugfs.h>
+ #include <linux/module.h>
+ +#include <linux/uaccess.h>
   #include "internal.h"
   
   MODULE_AUTHOR("Thomas Renninger <trenn@suse.de>");
diff --cc drivers/acpi/osl.c
Simple merge
diff --cc drivers/char/Kconfig
Simple merge
diff --cc drivers/connector/cn_proc.c
Simple merge
diff --cc drivers/cpufreq/cpufreq_ondemand.c
Simple merge
diff --cc drivers/hid/hid-apple.c
Simple merge
diff --cc drivers/hid/hid-core.c
Simple merge
diff --cc drivers/hid/hid-ids.h
Simple merge
diff --cc drivers/idle/intel_idle.c
Simple merge
diff --cc drivers/input/mouse/synaptics.c

index 80bcfcd,c080b82..0f724ba
--- 1/drivers/input/mouse/synaptics.c
--- 2/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@@ -1036,14 -1363,13 +1471,16 @@@ int synaptics_init(struct psmouse *psmo
   
         priv->pkt_type = SYN_MODEL_NEWABS(priv->model_id) ? SYN_NEWABS : SYN_OLDABS;
   
-       printk(KERN_INFO "Synaptics Touchpad, model: %ld, fw: %ld.%ld, id: %#lx, caps: %#lx/%#lx/%#lx\n",
-               SYN_ID_MODEL(priv->identity),
-               SYN_ID_MAJOR(priv->identity), SYN_ID_MINOR(priv->identity),
-               priv->model_id, priv->capabilities, priv->ext_cap, priv->ext_cap_0c);
+       psmouse_info(psmouse,
+                    "Touchpad model: %ld, fw: %ld.%ld, id: %#lx, caps: %#lx/%#lx/%#lx\n",
+                    SYN_ID_MODEL(priv->identity),
+                    SYN_ID_MAJOR(priv->identity), SYN_ID_MINOR(priv->identity),
+                    priv->model_id,
+                    priv->capabilities, priv->ext_cap, priv->ext_cap_0c);
   
+ +      if (synaptics_init_led(psmouse) < 0)
+ +              goto init_fail;
+ +
         set_input_params(psmouse->dev, priv);
   
         /*
diff --cc drivers/input/mouse/synaptics.h

index 7c1c0bf,622aea8..4fded31
--- 1/drivers/input/mouse/synaptics.h
--- 2/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@@ -127,10 -139,11 +139,13 @@@ struct synaptics_hw_state 
         unsigned int down:1;
         unsigned char ext_buttons;
         signed char scroll;
+ 
+       /* As reported in last AGM-CONTACT packets */
+       struct synaptics_mt_state mt_state;
   };
   
+ +struct synaptics_led;
+ +
   struct synaptics_data {
         /* Data read from the touchpad */
         unsigned long int model_id;             /* Model-ID */
@@@ -148,8 -161,15 +163,16 @@@
   
         struct serio *pt_port;                  /* Pass-through serio port */
   
-       struct synaptics_hw_state mt;           /* current gesture packet */
+       struct synaptics_mt_state mt_state;     /* Current mt finger state */
+       bool mt_state_lost;                     /* mt_state may be incorrect */
+ 
+       /*
+        * Last received Advanced Gesture Mode (AGM) packet. An AGM packet
+        * contains position data for a second contact, at half resolution.
+        */
+       struct synaptics_hw_state agm;
+       bool agm_pending;                       /* new AGM packet received */
+ +      struct synaptics_led *led;
   };
   
   void synaptics_module_init(void);
diff --cc drivers/input/touchscreen/Kconfig
Simple merge
diff --cc drivers/input/touchscreen/Makefile
Simple merge
diff --cc drivers/isdn/mISDN/socket.c
Simple merge
diff --cc drivers/md/Kconfig
Simple merge
diff --cc drivers/md/Makefile

index 504e0be,046860c..f49eaae
--- 1/drivers/md/Makefile
--- 2/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@@ -38,7 -41,7 +41,8 @@@ obj-$(CONFIG_DM_MIRROR)               += dm-mirror.
   obj-$(CONFIG_DM_LOG_USERSPACE)        += dm-log-userspace.o
   obj-$(CONFIG_DM_ZERO)         += dm-zero.o
   obj-$(CONFIG_DM_RAID) += dm-raid.o
+ obj-$(CONFIG_DM_THIN_PROVISIONING)    += dm-thin-pool.o
+ +obj-$(CONFIG_DM_RAID45)               += dm-raid45.o dm-log.o dm-memcache.o
   
   ifeq ($(CONFIG_DM_UEVENT),y)
   dm-mod-objs                   += dm-uevent.o
diff --cc drivers/md/dm-least-pending.c

index 4bce57c,0000000..f4e98b7

mode 100644,000000..100644
--- 1/drivers/md/dm-least-pending.c
--- /dev/null
+++ b/drivers/md/dm-least-pending.c
@@@ -1,258 -1,0 +1,259 @@@
+ +/*
+ + * (C) Copyright 2008 Hewlett-Packard Development Company, L.P
+ + *
+ + * This file is released under the GPL.
+ + */
+ +
+ +#include "dm-path-selector.h"
+ +
+ +#include <linux/slab.h>
++#include <linux/module.h>
+ +
+ +#define DM_MSG_PREFIX "multipath least-pending"
+ +
+ +/*-----------------------------------------------------------------
+ +* Path-handling code, paths are held in lists
+ +*---------------------------------------------------------------*/
+ +struct path_info {
+ +       struct list_head list;
+ +       struct dm_path *path;
+ +       unsigned repeat_count;
+ +       atomic_t io_count;
+ +};
+ +
+ +static void free_paths(struct list_head *paths)
+ +{
+ +       struct path_info *pi, *next;
+ +
+ +       list_for_each_entry_safe(pi, next, paths, list) {
+ +              list_del(&pi->list);
+ +              kfree(pi);
+ +       }
+ +}
+ +
+ +/*-----------------------------------------------------------------
+ + * Least-pending selector
+ + *---------------------------------------------------------------*/
+ +
+ +#define LPP_MIN_IO     1
+ +
+ +struct selector {
+ +       struct list_head valid_paths;
+ +       struct list_head invalid_paths;
+ +};
+ +
+ +static struct selector *alloc_selector(void)
+ +{
+ +       struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ +
+ +       if (s) {
+ +              INIT_LIST_HEAD(&s->valid_paths);
+ +              INIT_LIST_HEAD(&s->invalid_paths);
+ +       }
+ +
+ +       return s;
+ +}
+ +
+ +static int lpp_create(struct path_selector *ps, unsigned argc, char **argv)
+ +{
+ +       struct selector *s;
+ +
+ +       s = alloc_selector();
+ +       if (!s)
+ +              return -ENOMEM;
+ +
+ +       ps->context = s;
+ +       return 0;
+ +}
+ +
+ +static void lpp_destroy(struct path_selector *ps)
+ +{
+ +       struct selector *s = ps->context;
+ +
+ +       free_paths(&s->valid_paths);
+ +       free_paths(&s->invalid_paths);
+ +       kfree(s);
+ +       ps->context = NULL;
+ +}
+ +
+ +static int lpp_status(struct path_selector *ps, struct dm_path *path,
+ +                      status_type_t type, char *result, unsigned int maxlen)
+ +{
+ +       struct path_info *pi;
+ +       int sz = 0;
+ +
+ +       if (!path)
+ +              switch (type) {
+ +              case STATUSTYPE_INFO:
+ +                      DMEMIT("1 ");
+ +              break;
+ +              case STATUSTYPE_TABLE:
+ +                      DMEMIT("0 ");
+ +              break;
+ +              }
+ +       else {
+ +              pi = path->pscontext;
+ +              switch (type) {
+ +              case STATUSTYPE_INFO:
+ +                      DMEMIT("%u:%u ", pi->repeat_count,
+ +                                       atomic_read(&pi->io_count));
+ +              break;
+ +              case STATUSTYPE_TABLE:
+ +              break;
+ +              }
+ +      }
+ +
+ +       return sz;
+ +}
+ +
+ +/*
+ + * Called during initialisation to register each path with an
+ + * optional repeat_count.
+ + */
+ +static int lpp_add_path(struct path_selector *ps, struct dm_path *path,
+ +                      int argc, char **argv, char **error)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi;
+ +       unsigned repeat_count = LPP_MIN_IO;
+ +
+ +      if (argc > 1) {
+ +              *error = "least-pending ps: incorrect number of arguments";
+ +              return -EINVAL;
+ +      }
+ +
+ +       /* First path argument is number of I/Os before switching path */
+ +       if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
+ +              *error = "least-pending ps: invalid repeat count";
+ +              return -EINVAL;
+ +       }
+ +
+ +       /* allocate the path */
+ +       pi = kmalloc(sizeof(*pi), GFP_KERNEL);
+ +       if (!pi) {
+ +              *error = "least-pending ps: Error allocating path context";
+ +              return -ENOMEM;
+ +       }
+ +
+ +       pi->path = path;
+ +       pi->repeat_count = repeat_count;
+ +       atomic_set(&pi->io_count, 0);
+ +
+ +       path->pscontext = pi;
+ +
+ +       list_add(&pi->list, &s->valid_paths);
+ +
+ +       return 0;
+ +}
+ +
+ +static void lpp_fail_path(struct path_selector *ps, struct dm_path *p)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi = p->pscontext;
+ +
+ +       if (!pi)
+ +      return;
+ +
+ +       atomic_set(&pi->io_count, 0);
+ +
+ +       list_move(&pi->list, &s->invalid_paths);
+ +}
+ +
+ +static int lpp_reinstate_path(struct path_selector *ps, struct dm_path *p)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi = p->pscontext;
+ +
+ +       if (!pi)
+ +      return 1;
+ +
+ +       list_move(&pi->list, &s->valid_paths);
+ +
+ +       return 0;
+ +}
+ +
+ +static struct dm_path *lpp_select_path(struct path_selector *ps,
+ +                                      unsigned *repeat_count,
+ +                                      size_t nr_bytes)
+ +{
+ +       struct selector *s = ps->context;
+ +       struct path_info *pi, *next, *least_io_path = NULL;
+ +       struct list_head *paths;
+ +
+ +       if (list_empty(&s->valid_paths))
+ +              return NULL;
+ +
+ +       paths = &s->valid_paths;
+ +
+ +       list_for_each_entry_safe(pi, next, paths, list) {
+ +              if (!least_io_path || atomic_read(&least_io_path->io_count) < atomic_read(&pi->io_count))
+ +                      least_io_path = pi;
+ +              if (!atomic_read(&least_io_path->io_count))
+ +                      break;
+ +       }
+ +
+ +       if (!least_io_path)
+ +              return NULL;
+ +
+ +       atomic_inc(&least_io_path->io_count);
+ +       *repeat_count = least_io_path->repeat_count;
+ +
+ +       return least_io_path->path;
+ +}
+ +
+ +static int lpp_end_io(struct path_selector *ps, struct dm_path *path,
+ +                    size_t nr_bytes)
+ +{
+ +       struct path_info *pi = NULL;
+ +
+ +       pi = path->pscontext;
+ +       if (!pi)
+ +      return 1;
+ +
+ +       atomic_dec(&pi->io_count);
+ +
+ +       return 0;
+ +}
+ +
+ +static struct path_selector_type lpp_ps = {
+ +       .name = "least-pending",
+ +       .module = THIS_MODULE,
+ +       .table_args = 1,
+ +       .info_args = 0,
+ +       .create = lpp_create,
+ +       .destroy = lpp_destroy,
+ +       .status = lpp_status,
+ +       .add_path = lpp_add_path,
+ +       .fail_path = lpp_fail_path,
+ +       .reinstate_path = lpp_reinstate_path,
+ +       .select_path = lpp_select_path,
+ +       .end_io = lpp_end_io,
+ +};
+ +
+ +static int __init dm_lpp_init(void)
+ +{
+ +       int r = dm_register_path_selector(&lpp_ps);
+ +
+ +       if (r < 0)
+ +              DMERR("register failed %d", r);
+ +
+ +       DMINFO("version 1.0.0 loaded");
+ +
+ +       return r;
+ +}
+ +
+ +static void __exit dm_lpp_exit(void)
+ +{
+ +       int r = dm_unregister_path_selector(&lpp_ps);
+ +
+ +       if (r < 0)
+ +              DMERR("unregister failed %d", r);
+ +}
+ +
+ +module_init(dm_lpp_init);
+ +module_exit(dm_lpp_exit);
+ +
+ +MODULE_DESCRIPTION(DM_NAME " least-pending multipath path selector");
+ +MODULE_AUTHOR("Sakshi Chaitanya Veni <vsakshi@hp.com>");
+ +MODULE_LICENSE("GPL");
+ +
diff --cc drivers/md/dm-memcache.c

index abfcd5f,0000000..2d7d914

mode 100644,000000..100644
--- 1/drivers/md/dm-memcache.c
--- /dev/null
+++ b/drivers/md/dm-memcache.c
@@@ -1,302 -1,0 +1,303 @@@
+ +/*
+ + * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
+ + *
+ + * Module Author: Heinz Mauelshagen <heinzm@redhat.com>
+ + *
+ + * Device-mapper memory object handling:
+ + *
+ + * o allocate/free total_pages in a per client page pool.
+ + *
+ + * o allocate/free memory objects with chunks (1..n) of
+ + *   pages_per_chunk pages hanging off.
+ + *
+ + * This file is released under the GPL.
+ + */
+ +
+ +#define       DM_MEM_CACHE_VERSION    "0.2"
+ +
+ +#include "dm.h"
+ +#include "dm-memcache.h"
+ +#include <linux/dm-io.h>
+ +#include <linux/slab.h>
++#include <linux/module.h>
+ +
+ +struct dm_mem_cache_client {
+ +      spinlock_t lock;
+ +      mempool_t *objs_pool;
+ +      struct page_list *free_list;
+ +      unsigned objects;
+ +      unsigned chunks;
+ +      unsigned pages_per_chunk;
+ +      unsigned free_pages;
+ +      unsigned total_pages;
+ +};
+ +
+ +/*
+ + * Free pages and page_list elements of client.
+ + */
+ +static void free_cache_pages(struct page_list *list)
+ +{
+ +      while (list) {
+ +              struct page_list *pl = list;
+ +
+ +              list = pl->next;
+ +              BUG_ON(!pl->page);
+ +              __free_page(pl->page);
+ +              kfree(pl);
+ +      }
+ +}
+ +
+ +/*
+ + * Alloc number of pages and page_list elements as required by client.
+ + */
+ +static struct page_list *alloc_cache_pages(unsigned pages)
+ +{
+ +      struct page_list *pl, *ret = NULL;
+ +      struct page *page;
+ +
+ +      while (pages--) {
+ +              page = alloc_page(GFP_NOIO);
+ +              if (!page)
+ +                      goto err;
+ +
+ +              pl = kmalloc(sizeof(*pl), GFP_NOIO);
+ +              if (!pl) {
+ +                      __free_page(page);
+ +                      goto err;
+ +              }
+ +
+ +              pl->page = page;
+ +              pl->next = ret;
+ +              ret = pl;
+ +      }
+ +
+ +      return ret;
+ +
+ +err:
+ +      free_cache_pages(ret);
+ +      return NULL;
+ +}
+ +
+ +/*
+ + * Allocate page_list elements from the pool to chunks of the memory object.
+ + */
+ +static void alloc_chunks(struct dm_mem_cache_client *cl,
+ +                       struct dm_mem_cache_object *obj)
+ +{
+ +      unsigned chunks = cl->chunks;
+ +      unsigned long flags;
+ +
+ +      local_irq_save(flags);
+ +      local_irq_disable();
+ +      while (chunks--) {
+ +              unsigned p = cl->pages_per_chunk;
+ +
+ +              obj[chunks].pl = NULL;
+ +
+ +              while (p--) {
+ +                      struct page_list *pl;
+ +
+ +                      /* Take next element from free list */
+ +                      spin_lock(&cl->lock);
+ +                      pl = cl->free_list;
+ +                      BUG_ON(!pl);
+ +                      cl->free_list = pl->next;
+ +                      spin_unlock(&cl->lock);
+ +
+ +                      pl->next = obj[chunks].pl;
+ +                      obj[chunks].pl = pl;
+ +              }
+ +      }
+ +
+ +      local_irq_restore(flags);
+ +}
+ +
+ +/*
+ + * Free page_list elements putting them back onto free list
+ + */
+ +static void free_chunks(struct dm_mem_cache_client *cl,
+ +                      struct dm_mem_cache_object *obj)
+ +{
+ +      unsigned chunks = cl->chunks;
+ +      unsigned long flags;
+ +      struct page_list *next, *pl;
+ +
+ +      local_irq_save(flags);
+ +      local_irq_disable();
+ +      while (chunks--) {
+ +              for (pl = obj[chunks].pl; pl; pl = next) {
+ +                      next = pl->next;
+ +
+ +                      spin_lock(&cl->lock);
+ +                      pl->next = cl->free_list;
+ +                      cl->free_list = pl;
+ +                      cl->free_pages++;
+ +                      spin_unlock(&cl->lock);
+ +              }
+ +      }
+ +
+ +      local_irq_restore(flags);
+ +}
+ +
+ +/*
+ + * Create/destroy dm memory cache client resources.
+ + */
+ +struct dm_mem_cache_client *
+ +dm_mem_cache_client_create(unsigned objects, unsigned chunks,
+ +                         unsigned pages_per_chunk)
+ +{
+ +      unsigned total_pages = objects * chunks * pages_per_chunk;
+ +      struct dm_mem_cache_client *client;
+ +
+ +      BUG_ON(!total_pages);
+ +      client = kzalloc(sizeof(*client), GFP_KERNEL);
+ +      if (!client)
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      client->objs_pool = mempool_create_kmalloc_pool(objects,
+ +                              chunks * sizeof(struct dm_mem_cache_object));
+ +      if (!client->objs_pool)
+ +              goto err;
+ +
+ +      client->free_list = alloc_cache_pages(total_pages);
+ +      if (!client->free_list)
+ +              goto err1;
+ +
+ +      spin_lock_init(&client->lock);
+ +      client->objects = objects;
+ +      client->chunks = chunks;
+ +      client->pages_per_chunk = pages_per_chunk;
+ +      client->free_pages = client->total_pages = total_pages;
+ +      return client;
+ +
+ +err1:
+ +      mempool_destroy(client->objs_pool);
+ +err:
+ +      kfree(client);
+ +      return ERR_PTR(-ENOMEM);
+ +}
+ +EXPORT_SYMBOL(dm_mem_cache_client_create);
+ +
+ +void dm_mem_cache_client_destroy(struct dm_mem_cache_client *cl)
+ +{
+ +      BUG_ON(cl->free_pages != cl->total_pages);
+ +      free_cache_pages(cl->free_list);
+ +      mempool_destroy(cl->objs_pool);
+ +      kfree(cl);
+ +}
+ +EXPORT_SYMBOL(dm_mem_cache_client_destroy);
+ +
+ +/*
+ + * Grow a clients cache by an amount of pages.
+ + *
+ + * Don't call from interrupt context!
+ + */
+ +int dm_mem_cache_grow(struct dm_mem_cache_client *cl, unsigned objects)
+ +{
+ +      unsigned pages = objects * cl->chunks * cl->pages_per_chunk;
+ +      struct page_list *pl, *last;
+ +
+ +      BUG_ON(!pages);
+ +      pl = alloc_cache_pages(pages);
+ +      if (!pl)
+ +              return -ENOMEM;
+ +
+ +      last = pl;
+ +      while (last->next)
+ +              last = last->next;
+ +
+ +      spin_lock_irq(&cl->lock);
+ +      last->next = cl->free_list;
+ +      cl->free_list = pl;
+ +      cl->free_pages += pages;
+ +      cl->total_pages += pages;
+ +      cl->objects += objects;
+ +      spin_unlock_irq(&cl->lock);
+ +
+ +      mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL(dm_mem_cache_grow);
+ +
+ +/* Shrink a clients cache by an amount of pages */
+ +int dm_mem_cache_shrink(struct dm_mem_cache_client *cl, unsigned objects)
+ +{
+ +      int r;
+ +      unsigned pages = objects * cl->chunks * cl->pages_per_chunk, p = pages;
+ +      unsigned long flags;
+ +      struct page_list *last = NULL, *pl, *pos;
+ +
+ +      BUG_ON(!pages);
+ +
+ +      spin_lock_irqsave(&cl->lock, flags);
+ +      pl = pos = cl->free_list;
+ +      while (p-- && pos->next) {
+ +              last = pos;
+ +              pos = pos->next;
+ +      }
+ +
+ +      if (++p)
+ +              r = -ENOMEM;
+ +      else {
+ +              r = 0;
+ +              cl->free_list = pos;
+ +              cl->free_pages -= pages;
+ +              cl->total_pages -= pages;
+ +              cl->objects -= objects;
+ +              last->next = NULL;
+ +      }
+ +      spin_unlock_irqrestore(&cl->lock, flags);
+ +
+ +      if (!r) {
+ +              free_cache_pages(pl);
+ +              mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
+ +      }
+ +
+ +      return r;
+ +}
+ +EXPORT_SYMBOL(dm_mem_cache_shrink);
+ +
+ +/*
+ + * Allocate/free a memory object
+ + *
+ + * Can be called from interrupt context
+ + */
+ +struct dm_mem_cache_object *dm_mem_cache_alloc(struct dm_mem_cache_client *cl)
+ +{
+ +      int r = 0;
+ +      unsigned pages = cl->chunks * cl->pages_per_chunk;
+ +      unsigned long flags;
+ +      struct dm_mem_cache_object *obj;
+ +
+ +      obj = mempool_alloc(cl->objs_pool, GFP_NOIO);
+ +      if (!obj)
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      spin_lock_irqsave(&cl->lock, flags);
+ +      if (pages > cl->free_pages)
+ +              r = -ENOMEM;
+ +      else
+ +              cl->free_pages -= pages;
+ +      spin_unlock_irqrestore(&cl->lock, flags);
+ +
+ +      if (r) {
+ +              mempool_free(obj, cl->objs_pool);
+ +              return ERR_PTR(r);
+ +      }
+ +
+ +      alloc_chunks(cl, obj);
+ +      return obj;
+ +}
+ +EXPORT_SYMBOL(dm_mem_cache_alloc);
+ +
+ +void dm_mem_cache_free(struct dm_mem_cache_client *cl,
+ +                     struct dm_mem_cache_object *obj)
+ +{
+ +      free_chunks(cl, obj);
+ +      mempool_free(obj, cl->objs_pool);
+ +}
+ +EXPORT_SYMBOL(dm_mem_cache_free);
+ +
+ +MODULE_DESCRIPTION(DM_NAME " dm memory cache");
+ +MODULE_AUTHOR("Heinz Mauelshagen <heinzm@redhat.com>");
+ +MODULE_LICENSE("GPL");
diff --cc drivers/md/dm-raid45.c

index 5312a16,0000000..fecc9b7

mode 100644,000000..100644
--- 1/drivers/md/dm-raid45.c
--- /dev/null
+++ b/drivers/md/dm-raid45.c
@@@ -1,4691 -1,0 +1,4692 @@@
+ +/*
+ + * Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
+ + *
+ + * Module Author: Heinz Mauelshagen <heinzm@redhat.com>
+ + *
+ + * This file is released under the GPL.
+ + *
+ + *
+ + * Linux 2.6 Device Mapper RAID4 and RAID5 target.
+ + *
+ + * Tested-by: Intel; Marcin.Labun@intel.com, krzysztof.wojcik@intel.com
+ + *
+ + *
+ + * Supports the following ATARAID vendor solutions (and SNIA DDF):
+ + *
+ + *    Adaptec HostRAID ASR
+ + *    SNIA DDF1
+ + *    Hiphpoint 37x
+ + *    Hiphpoint 45x
+ + *    Intel IMSM
+ + *    Jmicron ATARAID
+ + *    LSI Logic MegaRAID
+ + *    NVidia RAID
+ + *    Promise FastTrack
+ + *    Silicon Image Medley
+ + *    VIA Software RAID
+ + *
+ + * via the dmraid application.
+ + *
+ + *
+ + * Features:
+ + *
+ + *    o RAID4 with dedicated and selectable parity device
+ + *    o RAID5 with rotating parity (left+right, symmetric+asymmetric)
+ + *    o recovery of out of sync device for initial
+ + *      RAID set creation or after dead drive replacement
+ + *    o run time optimization of xor algorithm used to calculate parity
+ + *
+ + *
+ + * Thanks to MD for:
+ + *    o the raid address calculation algorithm
+ + *    o the base of the biovec <-> page list copier.
+ + *
+ + *
+ + * Uses region hash to keep track of how many writes are in flight to
+ + * regions in order to use dirty log to keep state of regions to recover:
+ + *
+ + *    o clean regions (those which are synchronized
+ + *    and don't have write io in flight)
+ + *    o dirty regions (those with write io in flight)
+ + *
+ + *
+ + * On startup, any dirty regions are migrated to the
+ + * 'nosync' state and are subject to recovery by the daemon.
+ + *
+ + * See raid_ctr() for table definition.
+ + *
+ + * ANALYZEME: recovery bandwidth
+ + */
+ +
+ +static const char *version = "v0.2597k";
+ +
+ +#include "dm.h"
+ +#include "dm-memcache.h"
+ +#include "dm-raid45.h"
+ +
+ +#include <linux/kernel.h>
+ +#include <linux/vmalloc.h>
+ +#include <linux/raid/xor.h>
+ +#include <linux/slab.h>
++#include <linux/module.h>
+ +
+ +#include <linux/bio.h>
+ +#include <linux/dm-io.h>
+ +#include <linux/dm-dirty-log.h>
+ +#include <linux/dm-region-hash.h>
+ +
+ +
+ +/*
+ + * Configurable parameters
+ + */
+ +
+ +/* Minimum/maximum and default # of selectable stripes. */
+ +#define       STRIPES_MIN             8
+ +#define       STRIPES_MAX             16384
+ +#define       STRIPES_DEFAULT         80
+ +
+ +/* Maximum and default chunk size in sectors if not set in constructor. */
+ +#define       CHUNK_SIZE_MIN          8
+ +#define       CHUNK_SIZE_MAX          16384
+ +#define       CHUNK_SIZE_DEFAULT      64
+ +
+ +/* Default io size in sectors if not set in constructor. */
+ +#define       IO_SIZE_MIN             CHUNK_SIZE_MIN
+ +#define       IO_SIZE_DEFAULT         IO_SIZE_MIN
+ +
+ +/* Recover io size default in sectors. */
+ +#define       RECOVER_IO_SIZE_MIN             64
+ +#define       RECOVER_IO_SIZE_DEFAULT         256
+ +
+ +/* Default, minimum and maximum percentage of recover io bandwidth. */
+ +#define       BANDWIDTH_DEFAULT       10
+ +#define       BANDWIDTH_MIN           1
+ +#define       BANDWIDTH_MAX           100
+ +
+ +/* # of parallel recovered regions */
+ +#define RECOVERY_STRIPES_MIN  1
+ +#define RECOVERY_STRIPES_MAX  64
+ +#define RECOVERY_STRIPES_DEFAULT      RECOVERY_STRIPES_MIN
+ +/*
+ + * END Configurable parameters
+ + */
+ +
+ +#define       TARGET  "dm-raid45"
+ +#define       DAEMON  "kraid45d"
+ +#define       DM_MSG_PREFIX   TARGET
+ +
+ +#define       SECTORS_PER_PAGE        (PAGE_SIZE >> SECTOR_SHIFT)
+ +
+ +/* Amount/size for __xor(). */
+ +#define       XOR_SIZE        PAGE_SIZE
+ +
+ +/* Ticks to run xor_speed() test for. */
+ +#define       XOR_SPEED_TICKS 5
+ +
+ +/* Check value in range. */
+ +#define       range_ok(i, min, max)   (i >= min && i <= max)
+ +
+ +/* Structure access macros. */
+ +/* Derive raid_set from stripe_cache pointer. */
+ +#define       RS(x)   container_of(x, struct raid_set, sc)
+ +
+ +/* Page reference. */
+ +#define PAGE(stripe, p)  ((stripe)->obj[p].pl->page)
+ +
+ +/* Stripe chunk reference. */
+ +#define CHUNK(stripe, p) ((stripe)->chunk + p)
+ +
+ +/* Bio list reference. */
+ +#define       BL(stripe, p, rw)       (stripe->chunk[p].bl + rw)
+ +#define       BL_CHUNK(chunk, rw)     (chunk->bl + rw)
+ +
+ +/* Page list reference. */
+ +#define       PL(stripe, p)           (stripe->obj[p].pl)
+ +/* END: structure access macros. */
+ +
+ +/* Factor out to dm-bio-list.h */
+ +static inline void bio_list_push(struct bio_list *bl, struct bio *bio)
+ +{
+ +      bio->bi_next = bl->head;
+ +      bl->head = bio;
+ +
+ +      if (!bl->tail)
+ +              bl->tail = bio;
+ +}
+ +
+ +/* Factor out to dm.h */
+ +#define TI_ERR_RET(str, ret) \
+ +      do { ti->error = str; return ret; } while (0);
+ +#define TI_ERR(str)     TI_ERR_RET(str, -EINVAL)
+ +
+ +/* Macro to define access IO flags access inline functions. */
+ +#define       BITOPS(name, what, var, flag) \
+ +static inline int TestClear ## name ## what(struct var *v) \
+ +{ return test_and_clear_bit(flag, &v->io.flags); } \
+ +static inline int TestSet ## name ## what(struct var *v) \
+ +{ return test_and_set_bit(flag, &v->io.flags); } \
+ +static inline void Clear ## name ## what(struct var *v) \
+ +{ clear_bit(flag, &v->io.flags); } \
+ +static inline void Set ## name ## what(struct var *v) \
+ +{ set_bit(flag, &v->io.flags); } \
+ +static inline int name ## what(struct var *v) \
+ +{ return test_bit(flag, &v->io.flags); }
+ +
+ +/*-----------------------------------------------------------------
+ + * Stripe cache
+ + *
+ + * Cache for all reads and writes to raid sets (operational or degraded)
+ + *
+ + * We need to run all data to and from a RAID set through this cache,
+ + * because parity chunks need to get calculated from data chunks
+ + * or, in the degraded/resynchronization case, missing chunks need
+ + * to be reconstructed using the other chunks of the stripe.
+ + *---------------------------------------------------------------*/
+ +/* Unique kmem cache name suffix # counter. */
+ +static atomic_t _stripe_sc_nr = ATOMIC_INIT(-1); /* kmem cache # counter. */
+ +
+ +/* A chunk within a stripe (holds bios hanging off). */
+ +/* IO status flags for chunks of a stripe. */
+ +enum chunk_flags {
+ +      CHUNK_DIRTY,            /* Pages of chunk dirty; need writing. */
+ +      CHUNK_ERROR,            /* IO error on any chunk page. */
+ +      CHUNK_IO,               /* Allow/prohibit IO on chunk pages. */
+ +      CHUNK_LOCKED,           /* Chunk pages locked during IO. */
+ +      CHUNK_MUST_IO,          /* Chunk must io. */
+ +      CHUNK_UNLOCK,           /* Enforce chunk unlock. */
+ +      CHUNK_UPTODATE,         /* Chunk pages are uptodate. */
+ +};
+ +
+ +enum bl_type {
+ +      WRITE_QUEUED = WRITE + 1,
+ +      WRITE_MERGED,
+ +      NR_BL_TYPES,    /* Must be last one! */
+ +};
+ +struct stripe_chunk {
+ +      atomic_t cnt;           /* Reference count. */
+ +      struct stripe *stripe;  /* Backpointer to stripe for endio(). */
+ +      /* Bio lists for reads, writes, and writes merged. */
+ +      struct bio_list bl[NR_BL_TYPES];
+ +      struct {
+ +              unsigned long flags; /* IO status flags. */
+ +      } io;
+ +};
+ +
+ +/* Define chunk bit operations. */
+ +BITOPS(Chunk, Dirty,   stripe_chunk, CHUNK_DIRTY)
+ +BITOPS(Chunk, Error,   stripe_chunk, CHUNK_ERROR)
+ +BITOPS(Chunk, Io,      stripe_chunk, CHUNK_IO)
+ +BITOPS(Chunk, Locked,  stripe_chunk, CHUNK_LOCKED)
+ +BITOPS(Chunk, MustIo,  stripe_chunk, CHUNK_MUST_IO)
+ +BITOPS(Chunk, Unlock,  stripe_chunk, CHUNK_UNLOCK)
+ +BITOPS(Chunk, Uptodate,        stripe_chunk, CHUNK_UPTODATE)
+ +
+ +/*
+ + * Stripe linked list indexes. Keep order, because the stripe
+ + * and the stripe cache rely on the first 3!
+ + */
+ +enum list_types {
+ +      LIST_FLUSH,     /* Stripes to flush for io. */
+ +      LIST_ENDIO,     /* Stripes to endio. */
+ +      LIST_LRU,       /* Least recently used stripes. */
+ +      SC_NR_LISTS,    /* # of lists in stripe cache. */
+ +      LIST_HASH = SC_NR_LISTS,        /* Hashed stripes. */
+ +      LIST_RECOVER = LIST_HASH, /* For recovery type stripes only. */
+ +      STRIPE_NR_LISTS,/* To size array in struct stripe. */
+ +};
+ +
+ +/* Adressing region recovery. */
+ +struct recover_addr {
+ +      struct dm_region *reg;  /* Actual region to recover. */
+ +      sector_t pos;   /* Position within region to recover. */
+ +      sector_t end;   /* End of region to recover. */
+ +};
+ +
+ +/* A stripe: the io object to handle all reads and writes to a RAID set. */
+ +struct stripe {
+ +      atomic_t cnt;                   /* Reference count. */
+ +      struct stripe_cache *sc;        /* Backpointer to stripe cache. */
+ +
+ +      /*
+ +       * 4 linked lists:
+ +       *   o io list to flush io
+ +       *   o endio list
+ +       *   o LRU list to put stripes w/o reference count on
+ +       *   o stripe cache hash
+ +       */
+ +      struct list_head lists[STRIPE_NR_LISTS];
+ +
+ +      sector_t key;    /* Hash key. */
+ +      region_t region; /* Region stripe is mapped to. */
+ +
+ +      struct {
+ +              unsigned long flags;    /* Stripe state flags (see below). */
+ +
+ +              /*
+ +               * Pending ios in flight:
+ +               *
+ +               * used to control move of stripe to endio list
+ +               */
+ +              atomic_t pending;
+ +
+ +              /* Sectors to read and write for multi page stripe sets. */
+ +              unsigned size;
+ +      } io;
+ +
+ +      /* Address region recovery. */
+ +      struct recover_addr *recover;
+ +
+ +      /* Lock on stripe (Future: for clustering). */
+ +      void *lock;
+ +
+ +      struct {
+ +              unsigned short parity;  /* Parity chunk index. */
+ +              short recover;          /* Recovery chunk index. */
+ +      } idx;
+ +
+ +      /*
+ +       * This stripe's memory cache object (dm-mem-cache);
+ +       * i.e. the io chunk pages.
+ +       */
+ +      struct dm_mem_cache_object *obj;
+ +
+ +      /* Array of stripe sets (dynamically allocated). */
+ +      struct stripe_chunk chunk[0];
+ +};
+ +
+ +/* States stripes can be in (flags field). */
+ +enum stripe_states {
+ +      STRIPE_ERROR,           /* io error on stripe. */
+ +      STRIPE_MERGED,          /* Writes got merged to be written. */
+ +      STRIPE_RBW,             /* Read-before-write stripe. */
+ +      STRIPE_RECONSTRUCT,     /* Reconstruct of a missing chunk required. */
+ +      STRIPE_RECONSTRUCTED,   /* Reconstructed of a missing chunk. */
+ +      STRIPE_RECOVER,         /* Stripe used for RAID set recovery. */
+ +};
+ +
+ +/* Define stripe bit operations. */
+ +BITOPS(Stripe, Error,       stripe, STRIPE_ERROR)
+ +BITOPS(Stripe, Merged,        stripe, STRIPE_MERGED)
+ +BITOPS(Stripe, RBW,         stripe, STRIPE_RBW)
+ +BITOPS(Stripe, Reconstruct,   stripe, STRIPE_RECONSTRUCT)
+ +BITOPS(Stripe, Reconstructed, stripe, STRIPE_RECONSTRUCTED)
+ +BITOPS(Stripe, Recover,             stripe, STRIPE_RECOVER)
+ +
+ +/* A stripe hash. */
+ +struct stripe_hash {
+ +      struct list_head *hash;
+ +      unsigned buckets;
+ +      unsigned mask;
+ +      unsigned prime;
+ +      unsigned shift;
+ +};
+ +
+ +enum sc_lock_types {
+ +      LOCK_ENDIO,     /* Protect endio list. */
+ +      NR_LOCKS,       /* To size array in struct stripe_cache. */
+ +};
+ +
+ +/* A stripe cache. */
+ +struct stripe_cache {
+ +      /* Stripe hash. */
+ +      struct stripe_hash hash;
+ +
+ +      spinlock_t locks[NR_LOCKS];     /* Locks to protect lists. */
+ +
+ +      /* Stripes with io to flush, stripes to endio and LRU lists. */
+ +      struct list_head lists[SC_NR_LISTS];
+ +
+ +      /* Slab cache to allocate stripes from. */
+ +      struct {
+ +              struct kmem_cache *cache;       /* Cache itself. */
+ +              char name[32];  /* Unique name. */
+ +      } kc;
+ +
+ +      struct dm_io_client *dm_io_client; /* dm-io client resource context. */
+ +
+ +      /* dm-mem-cache client resource context. */
+ +      struct dm_mem_cache_client *mem_cache_client;
+ +
+ +      int stripes_parm;           /* # stripes parameter from constructor. */
+ +      atomic_t stripes;           /* actual # of stripes in cache. */
+ +      atomic_t stripes_to_set;    /* # of stripes to resize cache to. */
+ +      atomic_t stripes_last;      /* last # of stripes in cache. */
+ +      atomic_t active_stripes;    /* actual # of active stripes in cache. */
+ +
+ +      /* REMOVEME: */
+ +      atomic_t active_stripes_max; /* actual # of active stripes in cache. */
+ +};
+ +
+ +/* Flag specs for raid_dev */ ;
+ +enum raid_dev_flags {
+ +      DEV_FAILED,     /* Device failed. */
+ +      DEV_IO_QUEUED,  /* Io got queued to device. */
+ +};
+ +
+ +/* The raid device in a set. */
+ +struct raid_dev {
+ +      struct dm_dev *dev;
+ +      sector_t start;         /* Offset to map to. */
+ +      struct {        /* Using struct to be able to BITOPS(). */
+ +              unsigned long flags;    /* raid_dev_flags. */
+ +      } io;
+ +};
+ +
+ +BITOPS(Dev, Failed,   raid_dev, DEV_FAILED)
+ +BITOPS(Dev, IoQueued, raid_dev, DEV_IO_QUEUED)
+ +
+ +/* Flags spec for raid_set. */
+ +enum raid_set_flags {
+ +      RS_CHECK_OVERWRITE,     /* Check for chunk overwrites. */
+ +      RS_DEAD,                /* RAID set inoperational. */
+ +      RS_DEAD_ENDIO_MESSAGE,  /* RAID set dead endio one-off message. */
+ +      RS_DEGRADED,            /* Io errors on RAID device. */
+ +      RS_DEVEL_STATS,         /* REMOVEME: display status information. */
+ +      RS_ENFORCE_PARITY_CREATION,/* Enforce parity creation. */
+ +      RS_PROHIBIT_WRITES,     /* Prohibit writes on device failure. */
+ +      RS_RECOVER,             /* Do recovery. */
+ +      RS_RECOVERY_BANDWIDTH,  /* Allow recovery bandwidth (delayed bios). */
+ +      RS_SC_BUSY,             /* Stripe cache busy -> send an event. */
+ +      RS_SUSPEND,             /* Suspend RAID set. */
+ +};
+ +
+ +/* REMOVEME: devel stats counters. */
+ +enum stats_types {
+ +      S_BIOS_READ,
+ +      S_BIOS_ADDED_READ,
+ +      S_BIOS_ENDIO_READ,
+ +      S_BIOS_WRITE,
+ +      S_BIOS_ADDED_WRITE,
+ +      S_BIOS_ENDIO_WRITE,
+ +      S_CAN_MERGE,
+ +      S_CANT_MERGE,
+ +      S_CONGESTED,
+ +      S_DM_IO_READ,
+ +      S_DM_IO_WRITE,
+ +      S_BANDWIDTH,
+ +      S_BARRIER,
+ +      S_BIO_COPY_PL_NEXT,
+ +      S_DEGRADED,
+ +      S_DELAYED_BIOS,
+ +      S_FLUSHS,
+ +      S_HITS_1ST,
+ +      S_IOS_POST,
+ +      S_INSCACHE,
+ +      S_MAX_LOOKUP,
+ +      S_CHUNK_LOCKED,
+ +      S_NO_BANDWIDTH,
+ +      S_NOT_CONGESTED,
+ +      S_NO_RW,
+ +      S_NOSYNC,
+ +      S_OVERWRITE,
+ +      S_PROHIBITCHUNKIO,
+ +      S_RECONSTRUCT_EI,
+ +      S_RECONSTRUCT_DEV,
+ +      S_RECONSTRUCT_SET,
+ +      S_RECONSTRUCTED,
+ +      S_REQUEUE,
+ +      S_STRIPE_ERROR,
+ +      S_SUM_DELAYED_BIOS,
+ +      S_XORS,
+ +      S_NR_STATS,     /* # of stats counters. Must be last! */
+ +};
+ +
+ +/* Status type -> string mappings. */
+ +struct stats_map {
+ +      const enum stats_types type;
+ +      const char *str;
+ +};
+ +
+ +static struct stats_map stats_map[] = {
+ +      { S_BIOS_READ, "r=" },
+ +      { S_BIOS_ADDED_READ, "/" },
+ +      { S_BIOS_ENDIO_READ, "/" },
+ +      { S_BIOS_WRITE, " w=" },
+ +      { S_BIOS_ADDED_WRITE, "/" },
+ +      { S_BIOS_ENDIO_WRITE, "/" },
+ +      { S_DM_IO_READ, " rc=" },
+ +      { S_DM_IO_WRITE, " wc=" },
+ +      { S_BANDWIDTH, "\nbw=" },
+ +      { S_NO_BANDWIDTH, " no_bw=" },
+ +      { S_BARRIER, "\nbarrier=" },
+ +      { S_BIO_COPY_PL_NEXT, "\nbio_cp_next=" },
+ +      { S_CAN_MERGE, "\nmerge=" },
+ +      { S_CANT_MERGE, "/no_merge=" },
+ +      { S_CHUNK_LOCKED, "\nchunk_locked=" },
+ +      { S_CONGESTED, "\ncgst=" },
+ +      { S_NOT_CONGESTED, "/not_cgst=" },
+ +      { S_DEGRADED, "\ndegraded=" },
+ +      { S_DELAYED_BIOS, "\ndel_bios=" },
+ +      { S_SUM_DELAYED_BIOS, "/sum_del_bios=" },
+ +      { S_FLUSHS, "\nflushs=" },
+ +      { S_HITS_1ST, "\nhits_1st=" },
+ +      { S_IOS_POST, " ios_post=" },
+ +      { S_INSCACHE, " inscache=" },
+ +      { S_MAX_LOOKUP, " maxlookup=" },
+ +      { S_NO_RW, "\nno_rw=" },
+ +      { S_NOSYNC, " nosync=" },
+ +      { S_OVERWRITE, " ovr=" },
+ +      { S_PROHIBITCHUNKIO, " prhbt_io=" },
+ +      { S_RECONSTRUCT_EI, "\nrec_ei=" },
+ +      { S_RECONSTRUCT_DEV, " rec_dev=" },
+ +      { S_RECONSTRUCT_SET, " rec_set=" },
+ +      { S_RECONSTRUCTED, " rec=" },
+ +      { S_REQUEUE, " requeue=" },
+ +      { S_STRIPE_ERROR, " stripe_err=" },
+ +      { S_XORS, " xors=" },
+ +};
+ +
+ +/*
+ + * A RAID set.
+ + */
+ +#define       dm_rh_client    dm_region_hash
+ +enum count_type { IO_WORK = 0, IO_RECOVER, IO_NR_COUNT };
+ +typedef void (*xor_function_t)(unsigned count, unsigned long **data);
+ +struct raid_set {
+ +      struct dm_target *ti;   /* Target pointer. */
+ +
+ +      struct {
+ +              unsigned long flags;    /* State flags. */
+ +              struct mutex in_lock;   /* Protects central input list below. */
+ +              struct mutex xor_lock;  /* Protects xor algorithm set. */
+ +              struct bio_list in;     /* Pending ios (central input list). */
+ +              struct bio_list work;   /* ios work set. */
+ +              wait_queue_head_t suspendq;     /* suspend synchronization. */
+ +              atomic_t in_process;    /* counter of queued bios (suspendq). */
+ +              atomic_t in_process_max;/* counter of queued bios max. */
+ +
+ +              /* io work. */
+ +              struct workqueue_struct *wq;
+ +              struct delayed_work dws_do_raid;        /* For main worker. */
+ +              struct work_struct ws_do_table_event;   /* For event worker. */
+ +      } io;
+ +
+ +      /* Stripe locking abstraction. */
+ +      struct dm_raid45_locking_type *locking;
+ +
+ +      struct stripe_cache sc; /* Stripe cache for this set. */
+ +
+ +      /* Xor optimization. */
+ +      struct {
+ +              struct xor_func *f;
+ +              unsigned chunks;
+ +              unsigned speed;
+ +      } xor;
+ +
+ +      /* Recovery parameters. */
+ +      struct recover {
+ +              struct dm_dirty_log *dl;        /* Dirty log. */
+ +              struct dm_rh_client *rh;        /* Region hash. */
+ +
+ +              struct dm_io_client *dm_io_client; /* recovery dm-io client. */
+ +              /* dm-mem-cache client resource context for recovery stripes. */
+ +              struct dm_mem_cache_client *mem_cache_client;
+ +
+ +              struct list_head stripes;       /* List of recovery stripes. */
+ +
+ +              region_t nr_regions;
+ +              region_t nr_regions_to_recover;
+ +              region_t nr_regions_recovered;
+ +              unsigned long start_jiffies;
+ +              unsigned long end_jiffies;
+ +
+ +              unsigned bandwidth;      /* Recovery bandwidth [%]. */
+ +              unsigned bandwidth_work; /* Recovery bandwidth [factor]. */
+ +              unsigned bandwidth_parm; /*  " constructor parm. */
+ +              unsigned io_size;        /* recovery io size <= region size. */
+ +              unsigned io_size_parm;   /* recovery io size ctr parameter. */
+ +              unsigned recovery;       /* Recovery allowed/prohibited. */
+ +              unsigned recovery_stripes; /* # of parallel recovery stripes. */
+ +
+ +              /* recovery io throttling. */
+ +              atomic_t io_count[IO_NR_COUNT]; /* counter recover/regular io.*/
+ +              unsigned long last_jiffies;
+ +      } recover;
+ +
+ +      /* RAID set parameters. */
+ +      struct {
+ +              struct raid_type *raid_type;    /* RAID type (eg, RAID4). */
+ +              unsigned raid_parms;    /* # variable raid parameters. */
+ +
+ +              unsigned chunk_size;    /* Sectors per chunk. */
+ +              unsigned chunk_size_parm;
+ +              unsigned chunk_shift;   /* rsector chunk size shift. */
+ +
+ +              unsigned io_size;       /* Sectors per io. */
+ +              unsigned io_size_parm;
+ +              unsigned io_mask;       /* Mask for bio_copy_page_list(). */
+ +              unsigned io_inv_mask;   /* Mask for raid_address(). */
+ +
+ +              sector_t sectors_per_dev;       /* Sectors per device. */
+ +
+ +              atomic_t failed_devs;           /* Amount of devices failed. */
+ +
+ +              /* Index of device to initialize. */
+ +              int dev_to_init;
+ +              int dev_to_init_parm;
+ +
+ +              /* Raid devices dynamically allocated. */
+ +              unsigned raid_devs;     /* # of RAID devices below. */
+ +              unsigned data_devs;     /* # of RAID data devices. */
+ +
+ +              int ei;         /* index of failed RAID device. */
+ +
+ +              /* Index of dedicated parity device (i.e. RAID4). */
+ +              int pi;
+ +              int pi_parm;    /* constructor parm for status output. */
+ +      } set;
+ +
+ +      /* REMOVEME: devel stats counters. */
+ +      atomic_t stats[S_NR_STATS];
+ +
+ +      /* Dynamically allocated temporary pointers for xor(). */
+ +      unsigned long **data;
+ +
+ +      /* Dynamically allocated RAID devices. Alignment? */
+ +      struct raid_dev dev[0];
+ +};
+ +
+ +/* Define RAID set bit operations. */
+ +BITOPS(RS, Bandwidth, raid_set, RS_RECOVERY_BANDWIDTH)
+ +BITOPS(RS, CheckOverwrite, raid_set, RS_CHECK_OVERWRITE)
+ +BITOPS(RS, Dead, raid_set, RS_DEAD)
+ +BITOPS(RS, DeadEndioMessage, raid_set, RS_DEAD_ENDIO_MESSAGE)
+ +BITOPS(RS, Degraded, raid_set, RS_DEGRADED)
+ +BITOPS(RS, DevelStats, raid_set, RS_DEVEL_STATS)
+ +BITOPS(RS, EnforceParityCreation, raid_set, RS_ENFORCE_PARITY_CREATION)
+ +BITOPS(RS, ProhibitWrites, raid_set, RS_PROHIBIT_WRITES)
+ +BITOPS(RS, Recover, raid_set, RS_RECOVER)
+ +BITOPS(RS, ScBusy, raid_set, RS_SC_BUSY)
+ +BITOPS(RS, Suspend, raid_set, RS_SUSPEND)
+ +#undef BITOPS
+ +
+ +/*-----------------------------------------------------------------
+ + * Raid-4/5 set structures.
+ + *---------------------------------------------------------------*/
+ +/* RAID level definitions. */
+ +enum raid_level {
+ +      raid4,
+ +      raid5,
+ +};
+ +
+ +/* Symmetric/Asymmetric, Left/Right parity rotating algorithms. */
+ +enum raid_algorithm {
+ +      none,
+ +      left_asym,
+ +      right_asym,
+ +      left_sym,
+ +      right_sym,
+ +};
+ +
+ +struct raid_type {
+ +      const char *name;               /* RAID algorithm. */
+ +      const char *descr;              /* Descriptor text for logging. */
+ +      const unsigned parity_devs;     /* # of parity devices. */
+ +      const unsigned minimal_devs;    /* minimal # of devices in set. */
+ +      const enum raid_level level;            /* RAID level. */
+ +      const enum raid_algorithm algorithm;    /* RAID algorithm. */
+ +};
+ +
+ +/* Supported raid types and properties. */
+ +static struct raid_type raid_types[] = {
+ +      {"raid4",    "RAID4 (dedicated parity disk)", 1, 3, raid4, none},
+ +      {"raid5_la", "RAID5 (left asymmetric)",       1, 3, raid5, left_asym},
+ +      {"raid5_ra", "RAID5 (right asymmetric)",      1, 3, raid5, right_asym},
+ +      {"raid5_ls", "RAID5 (left symmetric)",        1, 3, raid5, left_sym},
+ +      {"raid5_rs", "RAID5 (right symmetric)",       1, 3, raid5, right_sym},
+ +};
+ +
+ +/* Address as calculated by raid_address(). */
+ +struct raid_address {
+ +      sector_t key;           /* Hash key (address of stripe % chunk_size). */
+ +      unsigned di, pi;        /* Data and parity disks index. */
+ +};
+ +
+ +/* REMOVEME: reset statistics counters. */
+ +static void stats_reset(struct raid_set *rs)
+ +{
+ +      unsigned s = S_NR_STATS;
+ +
+ +      while (s--)
+ +              atomic_set(rs->stats + s, 0);
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * RAID set management routines.
+ + *--------------------------------------------------------------*/
+ +/*
+ + * Begin small helper functions.
+ + */
+ +/* No need to be called from region hash indirectly at dm_rh_dec(). */
+ +static void wake_dummy(void *context) {}
+ +
+ +/* Return # of io reference. */
+ +static int io_ref(struct raid_set *rs)
+ +{
+ +      return atomic_read(&rs->io.in_process);
+ +}
+ +
+ +/* Get an io reference. */
+ +static void io_get(struct raid_set *rs)
+ +{
+ +      int p = atomic_inc_return(&rs->io.in_process);
+ +
+ +      if (p > atomic_read(&rs->io.in_process_max))
+ +              atomic_set(&rs->io.in_process_max, p); /* REMOVEME: max. */
+ +}
+ +
+ +/* Put the io reference and conditionally wake io waiters. */
+ +static void io_put(struct raid_set *rs)
+ +{
+ +      /* Intel: rebuild data corrupter? */
+ +      if (atomic_dec_and_test(&rs->io.in_process))
+ +              wake_up(&rs->io.suspendq);
+ +      else
+ +              BUG_ON(io_ref(rs) < 0);
+ +}
+ +
+ +/* Wait until all io has been processed. */
+ +static void wait_ios(struct raid_set *rs)
+ +{
+ +      wait_event(rs->io.suspendq, !io_ref(rs));
+ +}
+ +
+ +/* Queue (optionally delayed) io work. */
+ +static void wake_do_raid_delayed(struct raid_set *rs, unsigned long delay)
+ +{
+ +      queue_delayed_work(rs->io.wq, &rs->io.dws_do_raid, delay);
+ +}
+ +
+ +/* Queue io work immediately (called from region hash too). */
+ +static void wake_do_raid(void *context)
+ +{
+ +      struct raid_set *rs = context;
+ +
+ +      queue_work(rs->io.wq, &rs->io.dws_do_raid.work);
+ +}
+ +
+ +/* Calculate device sector offset. */
+ +static sector_t _sector(struct raid_set *rs, struct bio *bio)
+ +{
+ +      sector_t sector = bio->bi_sector;
+ +
+ +      sector_div(sector, rs->set.data_devs);
+ +      return sector;
+ +}
+ +
+ +/* Return # of active stripes in stripe cache. */
+ +static int sc_active(struct stripe_cache *sc)
+ +{
+ +      return atomic_read(&sc->active_stripes);
+ +}
+ +
+ +/* Stripe cache busy indicator. */
+ +static int sc_busy(struct raid_set *rs)
+ +{
+ +      return sc_active(&rs->sc) >
+ +             atomic_read(&rs->sc.stripes) - (STRIPES_MIN / 2);
+ +}
+ +
+ +/* Set chunks states. */
+ +enum chunk_dirty_type { CLEAN, DIRTY, ERROR };
+ +static void chunk_set(struct stripe_chunk *chunk, enum chunk_dirty_type type)
+ +{
+ +      switch (type) {
+ +      case CLEAN:
+ +              ClearChunkDirty(chunk);
+ +              break;
+ +      case DIRTY:
+ +              SetChunkDirty(chunk);
+ +              break;
+ +      case ERROR:
+ +              SetChunkError(chunk);
+ +              SetStripeError(chunk->stripe);
+ +              return;
+ +      default:
+ +              BUG();
+ +      }
+ +
+ +      SetChunkUptodate(chunk);
+ +      SetChunkIo(chunk);
+ +      ClearChunkError(chunk);
+ +}
+ +
+ +/* Return region state for a sector. */
+ +static int region_state(struct raid_set *rs, sector_t sector,
+ +                      enum dm_rh_region_states state)
+ +{
+ +      struct dm_rh_client *rh = rs->recover.rh;
+ +      region_t region = dm_rh_sector_to_region(rh, sector);
+ +
+ +      return !!(dm_rh_get_state(rh, region, 1) & state);
+ +}
+ +
+ +/*
+ + * Return true in case a chunk should be read/written
+ + *
+ + * Conditions to read/write:
+ + *    o chunk not uptodate
+ + *    o chunk dirty
+ + *
+ + * Conditios to avoid io:
+ + *    o io already ongoing on chunk
+ + *    o io explitely prohibited
+ + */
+ +static int chunk_io(struct stripe_chunk *chunk)
+ +{
+ +      /* 2nd run optimization (flag set below on first run). */
+ +      if (TestClearChunkMustIo(chunk))
+ +              return 1;
+ +
+ +      /* Avoid io if prohibited or a locked chunk. */
+ +      if (!ChunkIo(chunk) || ChunkLocked(chunk))
+ +              return 0;
+ +
+ +      if (!ChunkUptodate(chunk) || ChunkDirty(chunk)) {
+ +              SetChunkMustIo(chunk); /* 2nd run optimization. */
+ +              return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Call a function on each chunk needing io unless device failed. */
+ +static unsigned for_each_io_dev(struct stripe *stripe,
+ +                              void (*f_io)(struct stripe *stripe, unsigned p))
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned p, r = 0;
+ +
+ +      for (p = 0; p < rs->set.raid_devs; p++) {
+ +              if (chunk_io(CHUNK(stripe, p)) && !DevFailed(rs->dev + p)) {
+ +                      f_io(stripe, p);
+ +                      r++;
+ +              }
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/*
+ + * Index of device to calculate parity on.
+ + *
+ + * Either the parity device index *or* the selected
+ + * device to init after a spare replacement.
+ + */
+ +static int dev_for_parity(struct stripe *stripe, int *sync)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      int r = region_state(rs, stripe->key, DM_RH_NOSYNC | DM_RH_RECOVERING);
+ +
+ +      *sync = !r;
+ +
+ +      /* Reconstruct a particular device ?. */
+ +      if (r && rs->set.dev_to_init > -1)
+ +              return rs->set.dev_to_init;
+ +      else if (rs->set.raid_type->level == raid4)
+ +              return rs->set.pi;
+ +      else if (!StripeRecover(stripe))
+ +              return stripe->idx.parity;
+ +      else
+ +              return -1;
+ +}
+ +
+ +/* RAID set congested function. */
+ +static int rs_congested(void *congested_data, int bdi_bits)
+ +{
+ +      int r;
+ +      unsigned p;
+ +      struct raid_set *rs = congested_data;
+ +
+ +      if (sc_busy(rs) || RSSuspend(rs) || RSProhibitWrites(rs))
+ +              r = 1;
+ +      else for (r = 0, p = rs->set.raid_devs; !r && p--; ) {
+ +              /* If any of our component devices are overloaded. */
+ +              struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+ +
+ +              r |= bdi_congested(&q->backing_dev_info, bdi_bits);
+ +      }
+ +
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(rs->stats + (r ? S_CONGESTED : S_NOT_CONGESTED));
+ +      return r;
+ +}
+ +
+ +/* RAID device degrade check. */
+ +static void rs_check_degrade_dev(struct raid_set *rs,
+ +                               struct stripe *stripe, unsigned p)
+ +{
+ +      if (TestSetDevFailed(rs->dev + p))
+ +              return;
+ +
+ +      /* Through an event in case of member device errors. */
+ +      if ((atomic_inc_return(&rs->set.failed_devs) >
+ +           rs->set.raid_type->parity_devs) &&
+ +           !TestSetRSDead(rs)) {
+ +              /* Display RAID set dead message once. */
+ +              unsigned p;
+ +              char buf[BDEVNAME_SIZE];
+ +
+ +              DMERR("FATAL: too many devices failed -> RAID set broken");
+ +              for (p = 0; p < rs->set.raid_devs; p++) {
+ +                      if (DevFailed(rs->dev + p))
+ +                              DMERR("device /dev/%s failed",
+ +                                    bdevname(rs->dev[p].dev->bdev, buf));
+ +              }
+ +      }
+ +
+ +      /* Only log the first member error. */
+ +      if (!TestSetRSDegraded(rs)) {
+ +              char buf[BDEVNAME_SIZE];
+ +
+ +              /* Store index for recovery. */
+ +              rs->set.ei = p;
+ +              DMERR("CRITICAL: %sio error on device /dev/%s "
+ +                    "in region=%llu; DEGRADING RAID set\n",
+ +                    stripe ? "" : "FAKED ",
+ +                    bdevname(rs->dev[p].dev->bdev, buf),
+ +                    (unsigned long long) (stripe ? stripe->key : 0));
+ +              DMERR("further device error messages suppressed");
+ +      }
+ +
+ +      /* Prohibit further writes to allow for userpace to update metadata. */
+ +      SetRSProhibitWrites(rs);
+ +      schedule_work(&rs->io.ws_do_table_event);
+ +}
+ +
+ +/* RAID set degrade check. */
+ +static void rs_check_degrade(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned p = rs->set.raid_devs;
+ +
+ +      while (p--) {
+ +              if (ChunkError(CHUNK(stripe, p)))
+ +                      rs_check_degrade_dev(rs, stripe, p);
+ +      }
+ +}
+ +
+ +/* Lookup a RAID device by name or by major:minor number. */
+ +static int raid_dev_lookup(struct raid_set *rs, struct raid_dev *dev_lookup)
+ +{
+ +      unsigned p;
+ +      struct raid_dev *dev;
+ +
+ +      /*
+ +       * Must be an incremental loop, because the device array
+ +       * can have empty slots still on calls from raid_ctr()
+ +       */
+ +      for (dev = rs->dev, p = 0;
+ +           dev->dev && p < rs->set.raid_devs;
+ +           dev++, p++) {
+ +              if (dev_lookup->dev->bdev->bd_dev == dev->dev->bdev->bd_dev)
+ +                      return p;
+ +      }
+ +
+ +      return -ENODEV;
+ +}
+ +/*
+ + * End small helper functions.
+ + */
+ +
+ +/*
+ + * Stripe hash functions
+ + */
+ +/* Initialize/destroy stripe hash. */
+ +static int hash_init(struct stripe_hash *hash, unsigned stripes)
+ +{
+ +      unsigned buckets = roundup_pow_of_two(stripes >> 1);
+ +      static unsigned hash_primes[] = {
+ +              /* Table of primes for hash_fn/table size optimization. */
+ +              1, 2, 3, 7, 13, 27, 53, 97, 193, 389, 769,
+ +              1543, 3079, 6151, 12289, 24593, 49157, 98317,
+ +      };
+ +
+ +      /* Allocate stripe hash buckets. */
+ +      hash->hash = vmalloc(buckets * sizeof(*hash->hash));
+ +      if (!hash->hash)
+ +              return -ENOMEM;
+ +
+ +      hash->buckets = buckets;
+ +      hash->mask = buckets - 1;
+ +      hash->shift = ffs(buckets);
+ +      if (hash->shift > ARRAY_SIZE(hash_primes))
+ +              hash->shift = ARRAY_SIZE(hash_primes) - 1;
+ +
+ +      BUG_ON(hash->shift < 2);
+ +      hash->prime = hash_primes[hash->shift];
+ +
+ +      /* Initialize buckets. */
+ +      while (buckets--)
+ +              INIT_LIST_HEAD(hash->hash + buckets);
+ +      return 0;
+ +}
+ +
+ +static void hash_exit(struct stripe_hash *hash)
+ +{
+ +      if (hash->hash) {
+ +              vfree(hash->hash);
+ +              hash->hash = NULL;
+ +      }
+ +}
+ +
+ +static unsigned hash_fn(struct stripe_hash *hash, sector_t key)
+ +{
+ +      return (unsigned) (((key * hash->prime) >> hash->shift) & hash->mask);
+ +}
+ +
+ +static struct list_head *hash_bucket(struct stripe_hash *hash, sector_t key)
+ +{
+ +      return hash->hash + hash_fn(hash, key);
+ +}
+ +
+ +/* Insert an entry into a hash. */
+ +static void stripe_insert(struct stripe_hash *hash, struct stripe *stripe)
+ +{
+ +      list_add(stripe->lists + LIST_HASH, hash_bucket(hash, stripe->key));
+ +}
+ +
+ +/* Lookup an entry in the stripe hash. */
+ +static struct stripe *stripe_lookup(struct stripe_cache *sc, sector_t key)
+ +{
+ +      unsigned look = 0;
+ +      struct stripe *stripe;
+ +      struct list_head *bucket = hash_bucket(&sc->hash, key);
+ +
+ +      list_for_each_entry(stripe, bucket, lists[LIST_HASH]) {
+ +              look++;
+ +
+ +              if (stripe->key == key) {
+ +                      /* REMOVEME: statisics. */
+ +                      if (look > atomic_read(RS(sc)->stats + S_MAX_LOOKUP))
+ +                              atomic_set(RS(sc)->stats + S_MAX_LOOKUP, look);
+ +                      return stripe;
+ +              }
+ +      }
+ +
+ +      return NULL;
+ +}
+ +
+ +/* Resize the stripe cache hash on size changes. */
+ +static int sc_hash_resize(struct stripe_cache *sc)
+ +{
+ +      /* Resize indicated ? */
+ +      if (atomic_read(&sc->stripes) != atomic_read(&sc->stripes_last)) {
+ +              int r;
+ +              struct stripe_hash hash;
+ +
+ +              r = hash_init(&hash, atomic_read(&sc->stripes));
+ +              if (r)
+ +                      return r;
+ +
+ +              if (sc->hash.hash) {
+ +                      unsigned b = sc->hash.buckets;
+ +                      struct list_head *pos, *tmp;
+ +
+ +                      /* Walk old buckets and insert into new. */
+ +                      while (b--) {
+ +                              list_for_each_safe(pos, tmp, sc->hash.hash + b)
+ +                                  stripe_insert(&hash,
+ +                                                list_entry(pos, struct stripe,
+ +                                                           lists[LIST_HASH]));
+ +                      }
+ +
+ +              }
+ +
+ +              hash_exit(&sc->hash);
+ +              memcpy(&sc->hash, &hash, sizeof(sc->hash));
+ +              atomic_set(&sc->stripes_last, atomic_read(&sc->stripes));
+ +      }
+ +
+ +      return 0;
+ +}
+ +/* End hash stripe hash function. */
+ +
+ +/* List add, delete, push and pop functions. */
+ +/* Add stripe to flush list. */
+ +#define       DEL_LIST(lh) \
+ +      if (!list_empty(lh)) \
+ +              list_del_init(lh);
+ +
+ +/* Delete stripe from hash. */
+ +static void stripe_hash_del(struct stripe *stripe)
+ +{
+ +      DEL_LIST(stripe->lists + LIST_HASH);
+ +}
+ +
+ +/* Return stripe reference count. */
+ +static inline int stripe_ref(struct stripe *stripe)
+ +{
+ +      return atomic_read(&stripe->cnt);
+ +}
+ +
+ +static void stripe_flush_add(struct stripe *stripe)
+ +{
+ +      struct stripe_cache *sc = stripe->sc;
+ +      struct list_head *lh = stripe->lists + LIST_FLUSH;
+ +
+ +      if (!StripeReconstruct(stripe) && list_empty(lh))
+ +              list_add_tail(lh, sc->lists + LIST_FLUSH);
+ +}
+ +
+ +/*
+ + * Add stripe to LRU (inactive) list.
+ + *
+ + * Need lock, because of concurrent access from message interface.
+ + */
+ +static void stripe_lru_add(struct stripe *stripe)
+ +{
+ +      if (!StripeRecover(stripe)) {
+ +              struct list_head *lh = stripe->lists + LIST_LRU;
+ +
+ +              if (list_empty(lh))
+ +                      list_add_tail(lh, stripe->sc->lists + LIST_LRU);
+ +      }
+ +}
+ +
+ +#define POP_LIST(list) \
+ +      do { \
+ +              if (list_empty(sc->lists + (list))) \
+ +                      stripe = NULL; \
+ +              else { \
+ +                      stripe = list_first_entry(sc->lists + (list), \
+ +                                                struct stripe, \
+ +                                                lists[(list)]); \
+ +                      list_del_init(stripe->lists + (list)); \
+ +              } \
+ +      } while (0);
+ +
+ +/* Pop an available stripe off the LRU list. */
+ +static struct stripe *stripe_lru_pop(struct stripe_cache *sc)
+ +{
+ +      struct stripe *stripe;
+ +
+ +      POP_LIST(LIST_LRU);
+ +      return stripe;
+ +}
+ +
+ +/* Pop an available stripe off the io list. */
+ +static struct stripe *stripe_io_pop(struct stripe_cache *sc)
+ +{
+ +      struct stripe *stripe;
+ +
+ +      POP_LIST(LIST_FLUSH);
+ +      return stripe;
+ +}
+ +
+ +/* Push a stripe safely onto the endio list to be handled by do_endios(). */
+ +static void stripe_endio_push(struct stripe *stripe)
+ +{
+ +      unsigned long flags;
+ +      struct stripe_cache *sc = stripe->sc;
+ +      struct list_head *stripe_list = stripe->lists + LIST_ENDIO,
+ +                       *sc_list = sc->lists + LIST_ENDIO;
+ +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
+ +
+ +      /* This runs in parallel with do_endios(). */
+ +      spin_lock_irqsave(lock, flags);
+ +      if (list_empty(stripe_list))
+ +              list_add_tail(stripe_list, sc_list);
+ +      spin_unlock_irqrestore(lock, flags);
+ +
+ +      wake_do_raid(RS(sc)); /* Wake myself. */
+ +}
+ +
+ +/* Pop a stripe off safely off the endio list. */
+ +static struct stripe *stripe_endio_pop(struct stripe_cache *sc)
+ +{
+ +      struct stripe *stripe;
+ +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
+ +
+ +      /* This runs in parallel with endio(). */
+ +      spin_lock_irq(lock);
+ +      POP_LIST(LIST_ENDIO)
+ +      spin_unlock_irq(lock);
+ +      return stripe;
+ +}
+ +#undef POP_LIST
+ +
+ +/*
+ + * Stripe cache locking functions
+ + */
+ +/* Dummy lock function for single host RAID4+5. */
+ +static void *no_lock(sector_t key, enum dm_lock_type type)
+ +{
+ +      return &no_lock;
+ +}
+ +
+ +/* Dummy unlock function for single host RAID4+5. */
+ +static void no_unlock(void *lock_handle)
+ +{
+ +}
+ +
+ +/* No locking (for single host RAID 4+5). */
+ +static struct dm_raid45_locking_type locking_none = {
+ +      .lock = no_lock,
+ +      .unlock = no_unlock,
+ +};
+ +
+ +/* Lock a stripe (for clustering). */
+ +static int
+ +stripe_lock(struct stripe *stripe, int rw, sector_t key)
+ +{
+ +      stripe->lock = RS(stripe->sc)->locking->lock(key, rw == READ ? DM_RAID45_SHARED : DM_RAID45_EX);
+ +      return stripe->lock ? 0 : -EPERM;
+ +}
+ +
+ +/* Unlock a stripe (for clustering). */
+ +static void stripe_unlock(struct stripe *stripe)
+ +{
+ +      RS(stripe->sc)->locking->unlock(stripe->lock);
+ +      stripe->lock = NULL;
+ +}
+ +
+ +/* Test io pending on stripe. */
+ +static int stripe_io_ref(struct stripe *stripe)
+ +{
+ +      return atomic_read(&stripe->io.pending);
+ +}
+ +
+ +static void stripe_io_get(struct stripe *stripe)
+ +{
+ +      if (atomic_inc_return(&stripe->io.pending) == 1)
+ +              /* REMOVEME: statistics */
+ +              atomic_inc(&stripe->sc->active_stripes);
+ +      else
+ +              BUG_ON(stripe_io_ref(stripe) < 0);
+ +}
+ +
+ +static void stripe_io_put(struct stripe *stripe)
+ +{
+ +      if (atomic_dec_and_test(&stripe->io.pending)) {
+ +              if (unlikely(StripeRecover(stripe)))
+ +                      /* Don't put recovery stripe on endio list. */
+ +                      wake_do_raid(RS(stripe->sc));
+ +              else
+ +                      /* Add regular stripe to endio list and wake daemon. */
+ +                      stripe_endio_push(stripe);
+ +
+ +              /* REMOVEME: statistics */
+ +              atomic_dec(&stripe->sc->active_stripes);
+ +      } else
+ +              BUG_ON(stripe_io_ref(stripe) < 0);
+ +}
+ +
+ +/* Take stripe reference out. */
+ +static int stripe_get(struct stripe *stripe)
+ +{
+ +      int r;
+ +      struct list_head *lh = stripe->lists + LIST_LRU;
+ +
+ +      /* Delete stripe from LRU (inactive) list if on. */
+ +      DEL_LIST(lh);
+ +      BUG_ON(stripe_ref(stripe) < 0);
+ +
+ +      /* Lock stripe on first reference */
+ +      r = (atomic_inc_return(&stripe->cnt) == 1) ?
+ +          stripe_lock(stripe, WRITE, stripe->key) : 0;
+ +
+ +      return r;
+ +}
+ +#undef DEL_LIST
+ +
+ +/* Return references on a chunk. */
+ +static int chunk_ref(struct stripe_chunk *chunk)
+ +{
+ +      return atomic_read(&chunk->cnt);
+ +}
+ +
+ +/* Take out reference on a chunk. */
+ +static int chunk_get(struct stripe_chunk *chunk)
+ +{
+ +      return atomic_inc_return(&chunk->cnt);
+ +}
+ +
+ +/* Drop reference on a chunk. */
+ +static void chunk_put(struct stripe_chunk *chunk)
+ +{
+ +      BUG_ON(atomic_dec_return(&chunk->cnt) < 0);
+ +}
+ +
+ +/*
+ + * Drop reference on a stripe.
+ + *
+ + * Move it to list of LRU stripes if zero.
+ + */
+ +static void stripe_put(struct stripe *stripe)
+ +{
+ +      if (atomic_dec_and_test(&stripe->cnt)) {
+ +              BUG_ON(stripe_io_ref(stripe));
+ +              stripe_unlock(stripe);
+ +      } else
+ +              BUG_ON(stripe_ref(stripe) < 0);
+ +}
+ +
+ +/* Helper needed by for_each_io_dev(). */
+ +static void stripe_get_references(struct stripe *stripe, unsigned p)
+ +{
+ +
+ +      /*
+ +       * Another one to reference the stripe in
+ +       * order to protect vs. LRU list moves.
+ +       */
+ +      io_get(RS(stripe->sc)); /* Global io references. */
+ +      stripe_get(stripe);
+ +      stripe_io_get(stripe);  /* One for each chunk io. */
+ +}
+ +
+ +/* Helper for endio() to put all take references. */
+ +static void stripe_put_references(struct stripe *stripe)
+ +{
+ +      stripe_io_put(stripe);  /* One for each chunk io. */
+ +      stripe_put(stripe);
+ +      io_put(RS(stripe->sc));
+ +}
+ +
+ +/*
+ + * Stripe cache functions.
+ + */
+ +/*
+ + * Invalidate all chunks (i.e. their pages)  of a stripe.
+ + *
+ + * I only keep state for the whole chunk.
+ + */
+ +static inline void stripe_chunk_invalidate(struct stripe_chunk *chunk)
+ +{
+ +      chunk->io.flags = 0;
+ +}
+ +
+ +static void
+ +stripe_chunks_invalidate(struct stripe *stripe)
+ +{
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--)
+ +              stripe_chunk_invalidate(CHUNK(stripe, p));
+ +}
+ +
+ +/* Prepare stripe for (re)use. */
+ +static void stripe_invalidate(struct stripe *stripe)
+ +{
+ +      stripe->io.flags = 0;
+ +      stripe->idx.parity = stripe->idx.recover = -1;
+ +      stripe_chunks_invalidate(stripe);
+ +}
+ +
+ +/*
+ + * Allow io on all chunks of a stripe.
+ + * If not set, IO will not occur; i.e. it's prohibited.
+ + *
+ + * Actual IO submission for allowed chunks depends
+ + * on their !uptodate or dirty state.
+ + */
+ +static void stripe_allow_io(struct stripe *stripe)
+ +{
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--)
+ +              SetChunkIo(CHUNK(stripe, p));
+ +}
+ +
+ +/* Initialize a stripe. */
+ +static void stripe_init(struct stripe_cache *sc, struct stripe *stripe)
+ +{
+ +      unsigned i, p = RS(sc)->set.raid_devs;
+ +
+ +      /* Work all io chunks. */
+ +      while (p--) {
+ +              struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +
+ +              atomic_set(&chunk->cnt, 0);
+ +              chunk->stripe = stripe;
+ +              i = ARRAY_SIZE(chunk->bl);
+ +              while (i--)
+ +                      bio_list_init(chunk->bl + i);
+ +      }
+ +
+ +      stripe->sc = sc;
+ +
+ +      i = ARRAY_SIZE(stripe->lists);
+ +      while (i--)
+ +              INIT_LIST_HEAD(stripe->lists + i);
+ +
+ +      stripe->io.size = RS(sc)->set.io_size;
+ +      atomic_set(&stripe->cnt, 0);
+ +      atomic_set(&stripe->io.pending, 0);
+ +      stripe_invalidate(stripe);
+ +}
+ +
+ +/* Number of pages per chunk. */
+ +static inline unsigned chunk_pages(unsigned sectors)
+ +{
+ +      return dm_div_up(sectors, SECTORS_PER_PAGE);
+ +}
+ +
+ +/* Number of pages per stripe. */
+ +static inline unsigned stripe_pages(struct raid_set *rs, unsigned io_size)
+ +{
+ +      return chunk_pages(io_size) * rs->set.raid_devs;
+ +}
+ +
+ +/* Initialize part of page_list (recovery). */
+ +static void stripe_zero_pl_part(struct stripe *stripe, int p,
+ +                              unsigned start, unsigned count)
+ +{
+ +      unsigned o = start / SECTORS_PER_PAGE, pages = chunk_pages(count);
+ +      /* Get offset into the page_list. */
+ +      struct page_list *pl = pl_elem(PL(stripe, p), o);
+ +
+ +      BUG_ON(!pl);
+ +      while (pl && pages--) {
+ +              BUG_ON(!pl->page);
+ +              memset(page_address(pl->page), 0, PAGE_SIZE);
+ +              pl = pl->next;
+ +      }
+ +}
+ +
+ +/* Initialize parity chunk of stripe. */
+ +static void stripe_zero_chunk(struct stripe *stripe, int p)
+ +{
+ +      if (p > -1)
+ +              stripe_zero_pl_part(stripe, p, 0, stripe->io.size);
+ +}
+ +
+ +/* Return dynamic stripe structure size. */
+ +static size_t stripe_size(struct raid_set *rs)
+ +{
+ +      return sizeof(struct stripe) +
+ +                    rs->set.raid_devs * sizeof(struct stripe_chunk);
+ +}
+ +
+ +/* Allocate a stripe and its memory object. */
+ +/* XXX adjust to cope with stripe cache and recovery stripe caches. */
+ +enum grow { SC_GROW, SC_KEEP };
+ +static struct stripe *stripe_alloc(struct stripe_cache *sc,
+ +                                 struct dm_mem_cache_client *mc,
+ +                                 enum grow grow)
+ +{
+ +      int r;
+ +      struct stripe *stripe;
+ +
+ +      stripe = kmem_cache_zalloc(sc->kc.cache, GFP_KERNEL);
+ +      if (stripe) {
+ +              /* Grow the dm-mem-cache by one object. */
+ +              if (grow == SC_GROW) {
+ +                      r = dm_mem_cache_grow(mc, 1);
+ +                      if (r)
+ +                              goto err_free;
+ +              }
+ +
+ +              stripe->obj = dm_mem_cache_alloc(mc);
+ +              if (IS_ERR(stripe->obj))
+ +                      goto err_shrink;
+ +
+ +              stripe_init(sc, stripe);
+ +      }
+ +
+ +      return stripe;
+ +
+ +err_shrink:
+ +      if (grow == SC_GROW)
+ +              dm_mem_cache_shrink(mc, 1);
+ +err_free:
+ +      kmem_cache_free(sc->kc.cache, stripe);
+ +      return NULL;
+ +}
+ +
+ +/*
+ + * Free a stripes memory object, shrink the
+ + * memory cache and free the stripe itself.
+ + */
+ +static void stripe_free(struct stripe *stripe, struct dm_mem_cache_client *mc)
+ +{
+ +      dm_mem_cache_free(mc, stripe->obj);
+ +      dm_mem_cache_shrink(mc, 1);
+ +      kmem_cache_free(stripe->sc->kc.cache, stripe);
+ +}
+ +
+ +/* Free the recovery stripe. */
+ +static void stripe_recover_free(struct raid_set *rs)
+ +{
+ +      struct recover *rec = &rs->recover;
+ +      struct dm_mem_cache_client *mc;
+ +
+ +      mc = rec->mem_cache_client;
+ +      rec->mem_cache_client = NULL;
+ +      if (mc) {
+ +              struct stripe *stripe;
+ +
+ +              while (!list_empty(&rec->stripes)) {
+ +                      stripe = list_first_entry(&rec->stripes, struct stripe,
+ +                                                lists[LIST_RECOVER]);
+ +                      list_del(stripe->lists + LIST_RECOVER);
+ +                      kfree(stripe->recover);
+ +                      stripe_free(stripe, mc);
+ +              }
+ +
+ +              dm_mem_cache_client_destroy(mc);
+ +              dm_io_client_destroy(rec->dm_io_client);
+ +              rec->dm_io_client = NULL;
+ +      }
+ +}
+ +
+ +/* Grow stripe cache. */
+ +static int sc_grow(struct stripe_cache *sc, unsigned stripes, enum grow grow)
+ +{
+ +      int r = 0;
+ +
+ +      /* Try to allocate this many (additional) stripes. */
+ +      while (stripes--) {
+ +              struct stripe *stripe =
+ +                      stripe_alloc(sc, sc->mem_cache_client, grow);
+ +
+ +              if (likely(stripe)) {
+ +                      stripe_lru_add(stripe);
+ +                      atomic_inc(&sc->stripes);
+ +              } else {
+ +                      r = -ENOMEM;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      return r ? r : sc_hash_resize(sc);
+ +}
+ +
+ +/* Shrink stripe cache. */
+ +static int sc_shrink(struct stripe_cache *sc, unsigned stripes)
+ +{
+ +      int r = 0;
+ +
+ +      /* Try to get unused stripe from LRU list. */
+ +      while (stripes--) {
+ +              struct stripe *stripe;
+ +
+ +              stripe = stripe_lru_pop(sc);
+ +              if (stripe) {
+ +                      /* An LRU stripe may never have ios pending! */
+ +                      BUG_ON(stripe_io_ref(stripe));
+ +                      BUG_ON(stripe_ref(stripe));
+ +                      atomic_dec(&sc->stripes);
+ +                      /* Remove from hash if on before deletion. */
+ +                      stripe_hash_del(stripe);
+ +                      stripe_free(stripe, sc->mem_cache_client);
+ +              } else {
+ +                      r = -ENOENT;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      /* Check if stats are still sane. */
+ +      if (atomic_read(&sc->active_stripes_max) >
+ +          atomic_read(&sc->stripes))
+ +              atomic_set(&sc->active_stripes_max, 0);
+ +
+ +      if (r)
+ +              return r;
+ +
+ +      return atomic_read(&sc->stripes) ? sc_hash_resize(sc) : 0;
+ +}
+ +
+ +/* Create stripe cache and recovery. */
+ +static int sc_init(struct raid_set *rs, unsigned stripes)
+ +{
+ +      unsigned i, r, rstripes;
+ +      struct stripe_cache *sc = &rs->sc;
+ +      struct stripe *stripe;
+ +      struct recover *rec = &rs->recover;
+ +      struct mapped_device *md;
+ +      struct gendisk *disk;
+ +
+ +
+ +      /* Initialize lists and locks. */
+ +      i = ARRAY_SIZE(sc->lists);
+ +      while (i--)
+ +              INIT_LIST_HEAD(sc->lists + i);
+ +
+ +      INIT_LIST_HEAD(&rec->stripes);
+ +
+ +      /* Initialize endio and LRU list locks. */
+ +      i = NR_LOCKS;
+ +      while (i--)
+ +              spin_lock_init(sc->locks + i);
+ +
+ +      /* Initialize atomic variables. */
+ +      atomic_set(&sc->stripes, 0);
+ +      atomic_set(&sc->stripes_to_set, 0);
+ +      atomic_set(&sc->active_stripes, 0);
+ +      atomic_set(&sc->active_stripes_max, 0); /* REMOVEME: statistics. */
+ +
+ +      /*
+ +       * We need a runtime unique # to suffix the kmem cache name
+ +       * because we'll have one for each active RAID set.
+ +       */
+ +      md = dm_table_get_md(rs->ti->table);
+ +      disk = dm_disk(md);
+ +      snprintf(sc->kc.name, sizeof(sc->kc.name), "%s-%d.%d", TARGET,
+ +               disk->first_minor, atomic_inc_return(&_stripe_sc_nr));
+ +      sc->kc.cache = kmem_cache_create(sc->kc.name, stripe_size(rs),
+ +                                       0, 0, NULL);
+ +      if (!sc->kc.cache)
+ +              return -ENOMEM;
+ +
+ +      /* Create memory cache client context for RAID stripe cache. */
+ +      sc->mem_cache_client =
+ +              dm_mem_cache_client_create(stripes, rs->set.raid_devs,
+ +                                         chunk_pages(rs->set.io_size));
+ +      if (IS_ERR(sc->mem_cache_client))
+ +              return PTR_ERR(sc->mem_cache_client);
+ +
+ +      /* Create memory cache client context for RAID recovery stripe(s). */
+ +      rstripes = rec->recovery_stripes;
+ +      rec->mem_cache_client =
+ +              dm_mem_cache_client_create(rstripes, rs->set.raid_devs,
+ +                                         chunk_pages(rec->io_size));
+ +      if (IS_ERR(rec->mem_cache_client))
+ +              return PTR_ERR(rec->mem_cache_client);
+ +
+ +      /* Create dm-io client context for IO stripes. */
+ +      sc->dm_io_client = dm_io_client_create();
+ +      if (IS_ERR(sc->dm_io_client))
+ +              return PTR_ERR(sc->dm_io_client);
+ +
+ +      /* FIXME: intermingeled with stripe cache initialization. */
+ +      /* Create dm-io client context for recovery stripes. */
+ +      rec->dm_io_client = dm_io_client_create();
+ +      if (IS_ERR(rec->dm_io_client))
+ +              return PTR_ERR(rec->dm_io_client);
+ +
+ +      /* Allocate stripes for set recovery. */
+ +      while (rstripes--) {
+ +              stripe = stripe_alloc(sc, rec->mem_cache_client, SC_KEEP);
+ +              if (!stripe)
+ +                      return -ENOMEM;
+ +
+ +              stripe->recover = kzalloc(sizeof(*stripe->recover), GFP_KERNEL);
+ +              if (!stripe->recover) {
+ +                      stripe_free(stripe, rec->mem_cache_client);
+ +                      return -ENOMEM;
+ +              }
+ +
+ +              SetStripeRecover(stripe);
+ +              stripe->io.size = rec->io_size;
+ +              list_add_tail(stripe->lists + LIST_RECOVER, &rec->stripes);
+ +              /* Don't add recovery stripes to LRU list! */
+ +      }
+ +
+ +      /*
+ +       * Allocate the stripe objetcs from the
+ +       * cache and add them to the LRU list.
+ +       */
+ +      r = sc_grow(sc, stripes, SC_KEEP);
+ +      if (!r)
+ +              atomic_set(&sc->stripes_last, stripes);
+ +
+ +      return r;
+ +}
+ +
+ +/* Destroy the stripe cache. */
+ +static void sc_exit(struct stripe_cache *sc)
+ +{
+ +      struct raid_set *rs = RS(sc);
+ +
+ +      if (sc->kc.cache) {
+ +              stripe_recover_free(rs);
+ +              BUG_ON(sc_shrink(sc, atomic_read(&sc->stripes)));
+ +              kmem_cache_destroy(sc->kc.cache);
+ +              sc->kc.cache = NULL;
+ +
+ +              if (sc->mem_cache_client && !IS_ERR(sc->mem_cache_client))
+ +                      dm_mem_cache_client_destroy(sc->mem_cache_client);
+ +
+ +              if (sc->dm_io_client && !IS_ERR(sc->dm_io_client))
+ +                      dm_io_client_destroy(sc->dm_io_client);
+ +
+ +              hash_exit(&sc->hash);
+ +      }
+ +}
+ +
+ +/*
+ + * Calculate RAID address
+ + *
+ + * Delivers tuple with the index of the data disk holding the chunk
+ + * in the set, the parity disks index and the start of the stripe
+ + * within the address space of the set (used as the stripe cache hash key).
+ + */
+ +/* thx MD. */
+ +static struct raid_address *raid_address(struct raid_set *rs, sector_t sector,
+ +                                       struct raid_address *addr)
+ +{
+ +      sector_t stripe, tmp;
+ +
+ +      /*
+ +       * chunk_number = sector / chunk_size
+ +       * stripe_number = chunk_number / data_devs
+ +       * di = stripe % data_devs;
+ +       */
+ +      stripe = sector >> rs->set.chunk_shift;
+ +      addr->di = sector_div(stripe, rs->set.data_devs);
+ +
+ +      switch (rs->set.raid_type->level) {
+ +      case raid4:
+ +              addr->pi = rs->set.pi;
+ +              goto check_shift_di;
+ +      case raid5:
+ +              tmp = stripe;
+ +              addr->pi = sector_div(tmp, rs->set.raid_devs);
+ +
+ +              switch (rs->set.raid_type->algorithm) {
+ +              case left_asym:         /* Left asymmetric. */
+ +                      addr->pi = rs->set.data_devs - addr->pi;
+ +              case right_asym:        /* Right asymmetric. */
+ +check_shift_di:
+ +                      if (addr->di >= addr->pi)
+ +                              addr->di++;
+ +                      break;
+ +              case left_sym:          /* Left symmetric. */
+ +                      addr->pi = rs->set.data_devs - addr->pi;
+ +              case right_sym:         /* Right symmetric. */
+ +                      addr->di = (addr->pi + addr->di + 1) %
+ +                                 rs->set.raid_devs;
+ +                      break;
+ +              case none: /* Ain't happen: RAID4 algorithm placeholder. */
+ +                      BUG();
+ +              }
+ +      }
+ +
+ +      /*
+ +       * Start offset of the stripes chunk on any single device of the RAID
+ +       * set, adjusted in case io size differs from chunk size.
+ +       */
+ +      addr->key = (stripe << rs->set.chunk_shift) +
+ +                  (sector & rs->set.io_inv_mask);
+ +      return addr;
+ +}
+ +
+ +/*
+ + * Copy data across between stripe pages and bio vectors.
+ + *
+ + * Pay attention to data alignment in stripe and bio pages.
+ + */
+ +static void bio_copy_page_list(int rw, struct stripe *stripe,
+ +                             struct page_list *pl, struct bio *bio)
+ +{
+ +      unsigned i, page_offset;
+ +      void *page_addr;
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      struct bio_vec *bv;
+ +
+ +      /* Get start page in page list for this sector. */
+ +      i = (bio->bi_sector & rs->set.io_mask) / SECTORS_PER_PAGE;
+ +      pl = pl_elem(pl, i);
+ +      BUG_ON(!pl);
+ +      BUG_ON(!pl->page);
+ +
+ +      page_addr = page_address(pl->page);
+ +      page_offset = to_bytes(bio->bi_sector & (SECTORS_PER_PAGE - 1));
+ +
+ +      /* Walk all segments and copy data across between bio_vecs and pages. */
+ +      bio_for_each_segment(bv, bio, i) {
+ +              int len = bv->bv_len, size;
+ +              unsigned bio_offset = 0;
+ +              void *bio_addr = __bio_kmap_atomic(bio, i, KM_USER0);
+ +redo:
+ +              size = (page_offset + len > PAGE_SIZE) ?
+ +                     PAGE_SIZE - page_offset : len;
+ +
+ +              if (rw == READ)
+ +                      memcpy(bio_addr + bio_offset,
+ +                             page_addr + page_offset, size);
+ +              else
+ +                      memcpy(page_addr + page_offset,
+ +                             bio_addr + bio_offset, size);
+ +
+ +              page_offset += size;
+ +              if (page_offset == PAGE_SIZE) {
+ +                      /*
+ +                       * We reached the end of the chunk page ->
+ +                       * need to refer to the next one to copy more data.
+ +                       */
+ +                      len -= size;
+ +                      if (len) {
+ +                              /* Get next page. */
+ +                              pl = pl->next;
+ +                              BUG_ON(!pl);
+ +                              BUG_ON(!pl->page);
+ +                              page_addr = page_address(pl->page);
+ +                              page_offset = 0;
+ +                              bio_offset += size;
+ +                              /* REMOVEME: statistics. */
+ +                              atomic_inc(rs->stats + S_BIO_COPY_PL_NEXT);
+ +                              goto redo;
+ +                      }
+ +              }
+ +
+ +              __bio_kunmap_atomic(bio_addr, KM_USER0);
+ +      }
+ +}
+ +
+ +/*
+ + * Xor optimization macros.
+ + */
+ +/* Xor data pointer declaration and initialization macros. */
+ +#define DECLARE_2     unsigned long *d0 = data[0], *d1 = data[1]
+ +#define DECLARE_3     DECLARE_2, *d2 = data[2]
+ +#define DECLARE_4     DECLARE_3, *d3 = data[3]
+ +#define DECLARE_5     DECLARE_4, *d4 = data[4]
+ +#define DECLARE_6     DECLARE_5, *d5 = data[5]
+ +#define DECLARE_7     DECLARE_6, *d6 = data[6]
+ +#define DECLARE_8     DECLARE_7, *d7 = data[7]
+ +
+ +/* Xor unrole macros. */
+ +#define D2(n) d0[n] = d0[n] ^ d1[n]
+ +#define D3(n) D2(n) ^ d2[n]
+ +#define D4(n) D3(n) ^ d3[n]
+ +#define D5(n) D4(n) ^ d4[n]
+ +#define D6(n) D5(n) ^ d5[n]
+ +#define D7(n) D6(n) ^ d6[n]
+ +#define D8(n) D7(n) ^ d7[n]
+ +
+ +#define       X_2(macro, offset)      macro(offset); macro(offset + 1);
+ +#define       X_4(macro, offset)      X_2(macro, offset); X_2(macro, offset + 2);
+ +#define       X_8(macro, offset)      X_4(macro, offset); X_4(macro, offset + 4);
+ +#define       X_16(macro, offset)     X_8(macro, offset); X_8(macro, offset + 8);
+ +#define       X_32(macro, offset)     X_16(macro, offset); X_16(macro, offset + 16);
+ +#define       X_64(macro, offset)     X_32(macro, offset); X_32(macro, offset + 32);
+ +
+ +/* Define a _xor_#chunks_#xors_per_run() function. */
+ +#define       _XOR(chunks, xors_per_run) \
+ +static void _xor ## chunks ## _ ## xors_per_run(unsigned long **data) \
+ +{ \
+ +      unsigned end = XOR_SIZE / sizeof(data[0]), i; \
+ +      DECLARE_ ## chunks; \
+ +\
+ +      for (i = 0; i < end; i += xors_per_run) { \
+ +              X_ ## xors_per_run(D ## chunks, i); \
+ +      } \
+ +}
+ +
+ +/* Define xor functions for 2 - 8 chunks and xors per run. */
+ +#define       MAKE_XOR_PER_RUN(xors_per_run) \
+ +      _XOR(2, xors_per_run); _XOR(3, xors_per_run); \
+ +      _XOR(4, xors_per_run); _XOR(5, xors_per_run); \
+ +      _XOR(6, xors_per_run); _XOR(7, xors_per_run); \
+ +      _XOR(8, xors_per_run);
+ +
+ +MAKE_XOR_PER_RUN(8)   /* Define _xor_*_8() functions. */
+ +MAKE_XOR_PER_RUN(16)  /* Define _xor_*_16() functions. */
+ +MAKE_XOR_PER_RUN(32)  /* Define _xor_*_32() functions. */
+ +MAKE_XOR_PER_RUN(64)  /* Define _xor_*_64() functions. */
+ +
+ +#define MAKE_XOR(xors_per_run) \
+ +struct { \
+ +      void (*f)(unsigned long **); \
+ +} static xor_funcs ## xors_per_run[] = { \
+ +      { NULL }, /* NULL pointers to optimize indexing in xor(). */ \
+ +      { NULL }, \
+ +      { _xor2_ ## xors_per_run }, \
+ +      { _xor3_ ## xors_per_run }, \
+ +      { _xor4_ ## xors_per_run }, \
+ +      { _xor5_ ## xors_per_run }, \
+ +      { _xor6_ ## xors_per_run }, \
+ +      { _xor7_ ## xors_per_run }, \
+ +      { _xor8_ ## xors_per_run }, \
+ +}; \
+ +\
+ +static void xor_ ## xors_per_run(unsigned n, unsigned long **data) \
+ +{ \
+ +      /* Call respective function for amount of chunks. */ \
+ +      xor_funcs ## xors_per_run[n].f(data); \
+ +}
+ +
+ +/* Define xor_8() - xor_64 functions. */
+ +MAKE_XOR(8)
+ +MAKE_XOR(16)
+ +MAKE_XOR(32)
+ +MAKE_XOR(64)
+ +/*
+ + * END xor optimization macros.
+ + */
+ +
+ +/* Maximum number of chunks, which can be xor'ed in one go. */
+ +#define       XOR_CHUNKS_MAX  (ARRAY_SIZE(xor_funcs8) - 1)
+ +
+ +/* xor_blocks wrapper to allow for using that crypto library function. */
+ +static void xor_blocks_wrapper(unsigned n, unsigned long **data)
+ +{
+ +      BUG_ON(n < 2 || n > MAX_XOR_BLOCKS + 1);
+ +      xor_blocks(n - 1, XOR_SIZE, (void *) data[0], (void **) data + 1);
+ +}
+ +
+ +struct xor_func {
+ +      xor_function_t f;
+ +      const char *name;
+ +} static xor_funcs[] = {
+ +      { xor_64,  "xor_64" },
+ +      { xor_32,  "xor_32" },
+ +      { xor_16,  "xor_16" },
+ +      { xor_8,   "xor_8"  },
+ +      { xor_blocks_wrapper, "xor_blocks" },
+ +};
+ +
+ +/*
+ + * Check, if chunk has to be xored in/out:
+ + *
+ + * o if writes are queued
+ + * o if writes are merged
+ + * o if stripe is to be reconstructed
+ + * o if recovery stripe
+ + */
+ +static inline int chunk_must_xor(struct stripe_chunk *chunk)
+ +{
+ +      if (ChunkUptodate(chunk)) {
+ +              BUG_ON(!bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)) &&
+ +                     !bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED)));
+ +
+ +              if (!bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)) ||
+ +                  !bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED)))
+ +                      return 1;
+ +
+ +              if (StripeReconstruct(chunk->stripe) ||
+ +                  StripeRecover(chunk->stripe))
+ +                      return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Calculate crc.
+ + *
+ + * This indexes into the chunks of a stripe and their pages.
+ + *
+ + * All chunks will be xored into the indexed (@pi)
+ + * chunk in maximum groups of xor.chunks.
+ + *
+ + */
+ +static void xor(struct stripe *stripe, unsigned pi, unsigned sector)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned max_chunks = rs->xor.chunks, n = 1,
+ +               o = sector / SECTORS_PER_PAGE, /* Offset into the page_list. */
+ +               p = rs->set.raid_devs;
+ +      unsigned long **d = rs->data;
+ +      xor_function_t xor_f = rs->xor.f->f;
+ +
+ +      BUG_ON(sector > stripe->io.size);
+ +
+ +      /* Address of parity page to xor into. */
+ +      d[0] = page_address(pl_elem(PL(stripe, pi), o)->page);
+ +
+ +      while (p--) {
+ +              /* Preset pointers to data pages. */
+ +              if (p != pi && chunk_must_xor(CHUNK(stripe, p)))
+ +                      d[n++] = page_address(pl_elem(PL(stripe, p), o)->page);
+ +
+ +              /* If max chunks -> xor. */
+ +              if (n == max_chunks) {
+ +                      mutex_lock(&rs->io.xor_lock);
+ +                      xor_f(n, d);
+ +                      mutex_unlock(&rs->io.xor_lock);
+ +                      n = 1;
+ +              }
+ +      }
+ +
+ +      /* If chunks -> xor. */
+ +      if (n > 1) {
+ +              mutex_lock(&rs->io.xor_lock);
+ +              xor_f(n, d);
+ +              mutex_unlock(&rs->io.xor_lock);
+ +      }
+ +}
+ +
+ +/* Common xor loop through all stripe page lists. */
+ +static void common_xor(struct stripe *stripe, sector_t count,
+ +                     unsigned off, unsigned pi)
+ +{
+ +      unsigned sector;
+ +
+ +      BUG_ON(!count);
+ +      for (sector = off; sector < count; sector += SECTORS_PER_PAGE)
+ +              xor(stripe, pi, sector);
+ +
+ +      /* Set parity page uptodate and clean. */
+ +      chunk_set(CHUNK(stripe, pi), CLEAN);
+ +      atomic_inc(RS(stripe->sc)->stats + S_XORS); /* REMOVEME: statistics. */
+ +}
+ +
+ +/*
+ + * Calculate parity sectors on intact stripes.
+ + *
+ + * Need to calculate raid address for recover stripe, because its
+ + * chunk sizes differs and is typically larger than io chunk size.
+ + */
+ +static void parity_xor(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      int size_differs = stripe->io.size != rs->set.io_size;
+ +      unsigned chunk_size = rs->set.chunk_size, io_size = stripe->io.size,
+ +               xor_size = chunk_size > io_size ? io_size : chunk_size;
+ +      sector_t off;
+ +
+ +      /* This can be the recover stripe with a larger io size. */
+ +      for (off = 0; off < io_size; off += xor_size) {
+ +              /*
+ +               * Recover stripe is likely bigger than regular io
+ +               * ones and has no precalculated parity disk index ->
+ +               * need to calculate RAID address.
+ +               */
+ +              if (unlikely(size_differs)) {
+ +                      struct raid_address addr;
+ +
+ +                      raid_address(rs, (stripe->key + off) *
+ +                                       rs->set.data_devs, &addr);
+ +                      stripe->idx.parity = addr.pi;
+ +                      stripe_zero_pl_part(stripe, addr.pi, off, xor_size);
+ +              }
+ +
+ +              common_xor(stripe, xor_size, off, stripe->idx.parity);
+ +              chunk_set(CHUNK(stripe, stripe->idx.parity), DIRTY);
+ +      }
+ +}
+ +
+ +/* Reconstruct missing chunk. */
+ +static void stripe_reconstruct(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      int p = rs->set.raid_devs, pr = stripe->idx.recover;
+ +
+ +      BUG_ON(pr < 0);
+ +
+ +      /* Check if all but the chunk to be reconstructed are uptodate. */
+ +      while (p--)
+ +              BUG_ON(p != pr && !ChunkUptodate(CHUNK(stripe, p)));
+ +
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(rs->stats + (RSDegraded(rs) ? S_RECONSTRUCT_EI :
+ +                                               S_RECONSTRUCT_DEV));
+ +      /* Zero chunk to be reconstructed. */
+ +      stripe_zero_chunk(stripe, pr);
+ +      common_xor(stripe, stripe->io.size, 0, pr);
+ +}
+ +
+ +/*
+ + * Recovery io throttling
+ + */
+ +/* Conditionally reset io counters. */
+ +static int recover_io_reset(struct raid_set *rs)
+ +{
+ +      unsigned long j = jiffies;
+ +
+ +      /* Pay attention to jiffies overflows. */
+ +      if (j > rs->recover.last_jiffies + HZ ||
+ +          j < rs->recover.last_jiffies) {
+ +              atomic_set(rs->recover.io_count + IO_WORK, 0);
+ +              atomic_set(rs->recover.io_count + IO_RECOVER, 0);
+ +              rs->recover.last_jiffies = j;
+ +              return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Count ios. */
+ +static void recover_io_count(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +
+ +      atomic_inc(rs->recover.io_count +
+ +                 (StripeRecover(stripe) ? IO_RECOVER : IO_WORK));
+ +}
+ +
+ +/* Try getting a stripe either from the hash or from the LRU list. */
+ +static struct stripe *stripe_find(struct raid_set *rs,
+ +                                struct raid_address *addr)
+ +{
+ +      int r;
+ +      struct stripe_cache *sc = &rs->sc;
+ +      struct stripe *stripe;
+ +
+ +      /* Try stripe from hash. */
+ +      stripe = stripe_lookup(sc, addr->key);
+ +      if (stripe) {
+ +              r = stripe_get(stripe);
+ +              if (r)
+ +                      goto get_lock_failed;
+ +
+ +              atomic_inc(rs->stats + S_HITS_1ST); /* REMOVEME: statistics. */
+ +      } else {
+ +              /* Not in hash -> try to get an LRU stripe. */
+ +              stripe = stripe_lru_pop(sc);
+ +              if (stripe) {
+ +                      /*
+ +                       * An LRU stripe may not be referenced
+ +                       * and may never have ios pending!
+ +                       */
+ +                      BUG_ON(stripe_ref(stripe));
+ +                      BUG_ON(stripe_io_ref(stripe));
+ +
+ +                      /* Remove from hash if on before reuse. */
+ +                      stripe_hash_del(stripe);
+ +
+ +                      /* Invalidate before reinserting with changed key. */
+ +                      stripe_invalidate(stripe);
+ +
+ +                      stripe->key = addr->key;
+ +                      stripe->region = dm_rh_sector_to_region(rs->recover.rh,
+ +                                                              addr->key);
+ +                      stripe->idx.parity = addr->pi;
+ +                      r = stripe_get(stripe);
+ +                      if (r)
+ +                              goto get_lock_failed;
+ +
+ +                      /* Insert stripe into the stripe hash. */
+ +                      stripe_insert(&sc->hash, stripe);
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_INSCACHE);
+ +              }
+ +      }
+ +
+ +      return stripe;
+ +
+ +get_lock_failed:
+ +      stripe_put(stripe);
+ +      return NULL;
+ +}
+ +
+ +/*
+ + * Process end io
+ + *
+ + * I need to do it here because I can't in interrupt
+ + */
+ +/* End io all bios on a bio list. */
+ +static void bio_list_endio(struct stripe *stripe, struct bio_list *bl,
+ +                         int p, int error)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      struct bio *bio;
+ +      struct page_list *pl = PL(stripe, p);
+ +      struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +
+ +      /* Update region counters. */
+ +      while ((bio = bio_list_pop(bl))) {
+ +              if (bio_data_dir(bio) == WRITE)
+ +                      /* Drop io pending count for any writes. */
+ +                      dm_rh_dec(rs->recover.rh, stripe->region);
+ +              else if (!error)
+ +                      /* Copy data accross. */
+ +                      bio_copy_page_list(READ, stripe, pl, bio);
+ +
+ +              bio_endio(bio, error);
+ +
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + (bio_data_dir(bio) == READ ?
+ +                         S_BIOS_ENDIO_READ : S_BIOS_ENDIO_WRITE));
+ +
+ +              chunk_put(chunk);
+ +              stripe_put(stripe);
+ +              io_put(rs);     /* Wake any suspend waiters on last bio. */
+ +      }
+ +}
+ +
+ +/*
+ + * End io all reads/writes on a stripe copying
+ + * read data accross from stripe to bios and
+ + * decrementing region counters for writes.
+ + *
+ + * Processing of ios depeding on state:
+ + * o no chunk error -> endio ok
+ + * o degraded:
+ + *   - chunk error and read -> ignore to be requeued
+ + *   - chunk error and write -> endio ok
+ + * o dead (more than parity_devs failed) and chunk_error-> endio failed
+ + */
+ +static void stripe_endio(int rw, struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned p = rs->set.raid_devs;
+ +      int write = (rw != READ);
+ +
+ +      while (p--) {
+ +              struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +              struct bio_list *bl;
+ +
+ +              BUG_ON(ChunkLocked(chunk));
+ +
+ +              bl = BL_CHUNK(chunk, rw);
+ +              if (bio_list_empty(bl))
+ +                      continue;
+ +
+ +              if (unlikely(ChunkError(chunk) || !ChunkUptodate(chunk))) {
+ +                      /* RAID set dead. */
+ +                      if (unlikely(RSDead(rs)))
+ +                              bio_list_endio(stripe, bl, p, -EIO);
+ +                      /* RAID set degraded. */
+ +                      else if (write)
+ +                              bio_list_endio(stripe, bl, p, 0);
+ +              } else {
+ +                      BUG_ON(!RSDegraded(rs) && ChunkDirty(chunk));
+ +                      bio_list_endio(stripe, bl, p, 0);
+ +              }
+ +      }
+ +}
+ +
+ +/* Fail all ios hanging off all bio lists of a stripe. */
+ +static void stripe_fail_io(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned p = rs->set.raid_devs;
+ +
+ +      while (p--) {
+ +              struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +              int i = ARRAY_SIZE(chunk->bl);
+ +
+ +              /* Fail all bios on all bio lists of the stripe. */
+ +              while (i--) {
+ +                      struct bio_list *bl = chunk->bl + i;
+ +
+ +                      if (!bio_list_empty(bl))
+ +                              bio_list_endio(stripe, bl, p, -EIO);
+ +              }
+ +      }
+ +
+ +      /* Put stripe on LRU list. */
+ +      BUG_ON(stripe_io_ref(stripe));
+ +      BUG_ON(stripe_ref(stripe));
+ +}
+ +
+ +/* Unlock all required chunks. */
+ +static void stripe_chunks_unlock(struct stripe *stripe)
+ +{
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +      struct stripe_chunk *chunk;
+ +
+ +      while (p--) {
+ +              chunk = CHUNK(stripe, p);
+ +
+ +              if (TestClearChunkUnlock(chunk))
+ +                      ClearChunkLocked(chunk);
+ +      }
+ +}
+ +
+ +/*
+ + * Queue reads and writes to a stripe by hanging
+ + * their bios off the stripesets read/write lists.
+ + */
+ +static int stripe_queue_bio(struct raid_set *rs, struct bio *bio,
+ +                          struct bio_list *reject)
+ +{
+ +      struct raid_address addr;
+ +      struct stripe *stripe;
+ +
+ +      stripe = stripe_find(rs, raid_address(rs, bio->bi_sector, &addr));
+ +      if (stripe) {
+ +              int r = 0, rw = bio_data_dir(bio);
+ +
+ +              /* Distinguish reads and writes. */
+ +              bio_list_add(BL(stripe, addr.di, rw), bio);
+ +
+ +              if (rw == READ)
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_BIOS_ADDED_READ);
+ +              else {
+ +                      /* Inrement pending write count on region. */
+ +                      dm_rh_inc(rs->recover.rh, stripe->region);
+ +                      r = 1;
+ +
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_BIOS_ADDED_WRITE);
+ +              }
+ +
+ +              /*
+ +               * Put on io (flush) list in case of
+ +               * initial bio queued to chunk.
+ +               */
+ +              if (chunk_get(CHUNK(stripe, addr.di)) == 1)
+ +                      stripe_flush_add(stripe);
+ +
+ +              return r;
+ +      }
+ +
+ +      /* Got no stripe from cache or failed to lock it -> reject bio. */
+ +      bio_list_add(reject, bio);
+ +      atomic_inc(rs->stats + S_IOS_POST); /* REMOVEME: statistics. */
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Handle all stripes by handing them to the daemon, because we can't
+ + * map their chunk pages to copy the data in interrupt context.
+ + *
+ + * We don't want to handle them here either, while interrupts are disabled.
+ + */
+ +
+ +/* Read/write endio function for dm-io (interrupt context). */
+ +static void endio(unsigned long error, void *context)
+ +{
+ +      struct stripe_chunk *chunk = context;
+ +
+ +      if (unlikely(error)) {
+ +              chunk_set(chunk, ERROR);
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(RS(chunk->stripe->sc)->stats + S_STRIPE_ERROR);
+ +      } else
+ +              chunk_set(chunk, CLEAN);
+ +
+ +      /*
+ +       * For recovery stripes, I need to reset locked locked
+ +       * here, because those aren't processed in do_endios().
+ +       */
+ +      if (unlikely(StripeRecover(chunk->stripe)))
+ +              ClearChunkLocked(chunk);
+ +      else
+ +              SetChunkUnlock(chunk);
+ +
+ +      /* Indirectly puts stripe on cache's endio list via stripe_io_put(). */
+ +      stripe_put_references(chunk->stripe);
+ +}
+ +
+ +/* Read/Write a chunk asynchronously. */
+ +static void stripe_chunk_rw(struct stripe *stripe, unsigned p)
+ +{
+ +      struct stripe_cache *sc = stripe->sc;
+ +      struct raid_set *rs = RS(sc);
+ +      struct dm_mem_cache_object *obj = stripe->obj + p;
+ +      struct page_list *pl = obj->pl;
+ +      struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +      struct raid_dev *dev = rs->dev + p;
+ +      struct dm_io_region io = {
+ +              .bdev = dev->dev->bdev,
+ +              .sector = stripe->key,
+ +              .count = stripe->io.size,
+ +      };
+ +      struct dm_io_request control = {
+ +              .bi_rw = ChunkDirty(chunk) ? WRITE : READ,
+ +              .mem = {
+ +                      .type = DM_IO_PAGE_LIST,
+ +                      .ptr.pl = pl,
+ +                      .offset = 0,
+ +              },
+ +              .notify = {
+ +                      .fn = endio,
+ +                      .context = chunk,
+ +              },
+ +              .client = StripeRecover(stripe) ? rs->recover.dm_io_client :
+ +                                                sc->dm_io_client,
+ +      };
+ +
+ +      BUG_ON(ChunkLocked(chunk));
+ +      BUG_ON(!ChunkUptodate(chunk) && ChunkDirty(chunk));
+ +      BUG_ON(ChunkUptodate(chunk) && !ChunkDirty(chunk));
+ +
+ +      /*
+ +       * Don't rw past end of device, which can happen, because
+ +       * typically sectors_per_dev isn't divisible by io_size.
+ +       */
+ +      if (unlikely(io.sector + io.count > rs->set.sectors_per_dev))
+ +              io.count = rs->set.sectors_per_dev - io.sector;
+ +
+ +      BUG_ON(!io.count);
+ +      io.sector += dev->start;        /* Add <offset>. */
+ +      if (RSRecover(rs))
+ +              recover_io_count(stripe);       /* Recovery io accounting. */
+ +
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(rs->stats + (ChunkDirty(chunk) ? S_DM_IO_WRITE :
+ +                                                  S_DM_IO_READ));
+ +      SetChunkLocked(chunk);
+ +      SetDevIoQueued(dev);
+ +      BUG_ON(dm_io(&control, 1, &io, NULL));
+ +}
+ +
+ +/*
+ + * Write dirty or read not uptodate page lists of a stripe.
+ + */
+ +static int stripe_chunks_rw(struct stripe *stripe)
+ +{
+ +      int r;
+ +      struct raid_set *rs = RS(stripe->sc);
+ +
+ +      /*
+ +       * Increment the pending count on the stripe
+ +       * first, so that we don't race in endio().
+ +       *
+ +       * An inc (IO) is needed for any chunk unless !ChunkIo(chunk):
+ +       *
+ +       * o not uptodate
+ +       * o dirtied by writes merged
+ +       * o dirtied by parity calculations
+ +       */
+ +      r = for_each_io_dev(stripe, stripe_get_references);
+ +      if (r) {
+ +              /* Io needed: chunks are either not uptodate or dirty. */
+ +              int max;        /* REMOVEME: */
+ +              struct stripe_cache *sc = &rs->sc;
+ +
+ +              /* Submit actual io. */
+ +              for_each_io_dev(stripe, stripe_chunk_rw);
+ +
+ +              /* REMOVEME: statistics */
+ +              max = sc_active(sc);
+ +              if (atomic_read(&sc->active_stripes_max) < max)
+ +                      atomic_set(&sc->active_stripes_max, max);
+ +
+ +              atomic_inc(rs->stats + S_FLUSHS);
+ +              /* END REMOVEME: statistics */
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/* Merge in all writes hence dirtying respective chunks. */
+ +static void stripe_merge_writes(struct stripe *stripe)
+ +{
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--) {
+ +              struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +              struct bio_list *write = BL_CHUNK(chunk, WRITE_QUEUED);
+ +
+ +              if (!bio_list_empty(write)) {
+ +                      struct bio *bio;
+ +                      struct page_list *pl = stripe->obj[p].pl;
+ +
+ +                      /*
+ +                       * We can play with the lists without holding a lock,
+ +                       * because it is just us accessing them anyway.
+ +                       */
+ +                      bio_list_for_each(bio, write)
+ +                              bio_copy_page_list(WRITE, stripe, pl, bio);
+ +
+ +                      bio_list_merge(BL_CHUNK(chunk, WRITE_MERGED), write);
+ +                      bio_list_init(write);
+ +                      chunk_set(chunk, DIRTY);
+ +              }
+ +      }
+ +}
+ +
+ +/* Queue all writes to get merged. */
+ +static int stripe_queue_writes(struct stripe *stripe)
+ +{
+ +      int r = 0;
+ +      unsigned p = RS(stripe->sc)->set.raid_devs;
+ +
+ +      while (p--) {
+ +              struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +              struct bio_list *write = BL_CHUNK(chunk, WRITE);
+ +
+ +              if (!bio_list_empty(write)) {
+ +                      bio_list_merge(BL_CHUNK(chunk, WRITE_QUEUED), write);
+ +                      bio_list_init(write);
+ +SetChunkIo(chunk);
+ +                      r = 1;
+ +              }
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +
+ +/* Check, if a chunk gets completely overwritten. */
+ +static int stripe_check_chunk_overwrite(struct stripe *stripe, unsigned p)
+ +{
+ +      unsigned sectors = 0;
+ +      struct bio *bio;
+ +      struct bio_list *bl = BL(stripe, p, WRITE_QUEUED);
+ +
+ +      bio_list_for_each(bio, bl)
+ +              sectors += bio_sectors(bio);
+ +
+ +      BUG_ON(sectors > RS(stripe->sc)->set.io_size);
+ +      return sectors == RS(stripe->sc)->set.io_size;
+ +}
+ +
+ +/*
+ + * Avoid io on broken/reconstructed drive in order to
+ + * reconstruct date on endio.
+ + *
+ + * (*1*) We set StripeReconstruct() in here, so that _do_endios()
+ + *     will trigger a reconstruct call before resetting it.
+ + */
+ +static int stripe_chunk_set_io_flags(struct stripe *stripe, int pr)
+ +{
+ +      struct stripe_chunk *chunk = CHUNK(stripe, pr);
+ +
+ +      /*
+ +       * Allow io on all chunks but the indexed one,
+ +       * because we're either degraded or prohibit it
+ +       * on the one for later reconstruction.
+ +       */
+ +      /* Includes ClearChunkIo(), ClearChunkUptodate(). */
+ +      stripe_chunk_invalidate(chunk);
+ +      stripe->idx.recover = pr;
+ +      SetStripeReconstruct(stripe);
+ +
+ +      /* REMOVEME: statistics. */
+ +      atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
+ +      return -EPERM;
+ +}
+ +
+ +/* Chunk locked/uptodate and device failed tests. */
+ +static struct stripe_chunk *
+ +stripe_chunk_check(struct stripe *stripe, unsigned p, unsigned *chunks_uptodate)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      struct stripe_chunk *chunk = CHUNK(stripe, p);
+ +
+ +      /* Can't access active chunks. */
+ +      if (ChunkLocked(chunk)) {
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + S_CHUNK_LOCKED);
+ +              return NULL;
+ +      }
+ +
+ +      /* Can't access broken devive. */
+ +      if (ChunkError(chunk) || DevFailed(rs->dev + p))
+ +              return NULL;
+ +
+ +      /* Can access uptodate chunks. */
+ +      if (ChunkUptodate(chunk)) {
+ +              (*chunks_uptodate)++;
+ +              return NULL;
+ +      }
+ +
+ +      return chunk;
+ +}
+ +
+ +/*
+ + * Degraded/reconstruction mode.
+ + *
+ + * Check stripe state to figure which chunks don't need IO.
+ + *
+ + * Returns 0 for fully operational, -EPERM for degraded/resynchronizing.
+ + */
+ +static int stripe_check_reconstruct(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +
+ +      if (RSDead(rs)) {
+ +              ClearStripeReconstruct(stripe);
+ +              ClearStripeReconstructed(stripe);
+ +              stripe_allow_io(stripe);
+ +              return 0;
+ +      }
+ +
+ +      /* Avoid further reconstruction setting, when already set. */
+ +      if (StripeReconstruct(stripe)) {
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + S_RECONSTRUCT_SET);
+ +              return -EBUSY;
+ +      }
+ +
+ +      /* Initially allow io on all chunks. */
+ +      stripe_allow_io(stripe);
+ +
+ +      /* Return if stripe is already reconstructed. */
+ +      if (StripeReconstructed(stripe)) {
+ +              atomic_inc(rs->stats + S_RECONSTRUCTED);
+ +              return 0;
+ +      }
+ +
+ +      /*
+ +       * Degraded/reconstruction mode (device failed) ->
+ +       * avoid io on the failed device.
+ +       */
+ +      if (unlikely(RSDegraded(rs))) {
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + S_DEGRADED);
+ +              /* Allow IO on all devices but the dead one. */
+ +              BUG_ON(rs->set.ei < 0);
+ +              return stripe_chunk_set_io_flags(stripe, rs->set.ei);
+ +      } else {
+ +              int sync, pi = dev_for_parity(stripe, &sync);
+ +
+ +              /*
+ +               * Reconstruction mode (ie. a particular (replaced) device or
+ +               * some (rotating) parity chunk is being resynchronized) ->
+ +               *   o make sure all needed chunks are read in
+ +               *   o cope with 3/4 disk array special case where it
+ +               *     doesn't make a difference to read in parity
+ +               *     to xor data in/out
+ +               */
+ +              if (RSEnforceParityCreation(rs) || !sync) {
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_NOSYNC);
+ +                      /* Allow IO on all devs but the one to reconstruct. */
+ +                      return stripe_chunk_set_io_flags(stripe, pi);
+ +              }
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Check, if stripe is ready to merge writes.
+ + * I.e. if all chunks present to allow to merge bios.
+ + *
+ + * We prohibit io on:
+ + *
+ + * o chunks without bios
+ + * o chunks which get completely written over
+ + */
+ +static int stripe_merge_possible(struct stripe *stripe, int nosync)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned chunks_overwrite = 0, chunks_prohibited = 0,
+ +               chunks_uptodate = 0, p = rs->set.raid_devs;
+ +
+ +      /* Walk all chunks. */
+ +      while (p--) {
+ +              struct stripe_chunk *chunk;
+ +
+ +              /* Prohibit io on broken devices. */
+ +              if (DevFailed(rs->dev + p)) {
+ +                      chunk = CHUNK(stripe, p);
+ +                      goto prohibit_io;
+ +              }
+ +
+ +              /* We can't optimize any further if no chunk. */
+ +              chunk = stripe_chunk_check(stripe, p, &chunks_uptodate);
+ +              if (!chunk || nosync)
+ +                      continue;
+ +
+ +              /*
+ +               * We have a chunk, which is not uptodate.
+ +               *
+ +               * If this is not parity and we don't have
+ +               * reads queued, we can optimize further.
+ +               */
+ +              if (p != stripe->idx.parity &&
+ +                  bio_list_empty(BL_CHUNK(chunk, READ)) &&
+ +                  bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED))) {
+ +                      if (bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)))
+ +                              goto prohibit_io;
+ +                      else if (RSCheckOverwrite(rs) &&
+ +                               stripe_check_chunk_overwrite(stripe, p))
+ +                              /* Completely overwritten chunk. */
+ +                              chunks_overwrite++;
+ +              }
+ +
+ +              /* Allow io for chunks with bios and overwritten ones. */
+ +              SetChunkIo(chunk);
+ +              continue;
+ +
+ +prohibit_io:
+ +              /* No io for broken devices or for chunks w/o bios. */
+ +              ClearChunkIo(chunk);
+ +              chunks_prohibited++;
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
+ +      }
+ +
+ +      /* All data chunks will get written over. */
+ +      if (chunks_overwrite == rs->set.data_devs)
+ +              atomic_inc(rs->stats + S_OVERWRITE); /* REMOVEME: statistics.*/
+ +      else if (chunks_uptodate + chunks_prohibited < rs->set.raid_devs) {
+ +              /* We don't have enough chunks to merge. */
+ +              atomic_inc(rs->stats + S_CANT_MERGE); /* REMOVEME: statistics.*/
+ +              return -EPERM;
+ +      }
+ +
+ +      /*
+ +       * If we have all chunks up to date or overwrite them, we
+ +       * just zero the parity chunk and let stripe_rw() recreate it.
+ +       */
+ +      if (chunks_uptodate == rs->set.raid_devs ||
+ +          chunks_overwrite == rs->set.data_devs) {
+ +              stripe_zero_chunk(stripe, stripe->idx.parity);
+ +              BUG_ON(StripeReconstruct(stripe));
+ +              SetStripeReconstruct(stripe);   /* Enforce xor in caller. */
+ +      } else {
+ +              /*
+ +               * With less chunks, we xor parity out.
+ +               *
+ +               * (*4*) We rely on !StripeReconstruct() in chunk_must_xor(),
+ +               *       so that only chunks with queued or merged writes
+ +               *       are being xored.
+ +               */
+ +              parity_xor(stripe);
+ +      }
+ +
+ +      /*
+ +       * We do have enough chunks to merge.
+ +       * All chunks are uptodate or get written over.
+ +       */
+ +      atomic_inc(rs->stats + S_CAN_MERGE); /* REMOVEME: statistics. */
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Avoid reading chunks in case we're fully operational.
+ + *
+ + * We prohibit io on any chunks without bios but the parity chunk.
+ + */
+ +static void stripe_avoid_reads(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      unsigned dummy = 0, p = rs->set.raid_devs;
+ +
+ +      /* Walk all chunks. */
+ +      while (p--) {
+ +              struct stripe_chunk *chunk =
+ +                      stripe_chunk_check(stripe, p, &dummy);
+ +
+ +              if (!chunk)
+ +                      continue;
+ +
+ +              /* If parity or any bios pending -> allow io. */
+ +              if (chunk_ref(chunk) || p == stripe->idx.parity)
+ +                      SetChunkIo(chunk);
+ +              else {
+ +                      ClearChunkIo(chunk);
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
+ +              }
+ +      }
+ +}
+ +
+ +/*
+ + * Read/write a stripe.
+ + *
+ + * All stripe read/write activity goes through this function
+ + * unless recovery, which has to call stripe_chunk_rw() directly.
+ + *
+ + * Make sure we don't try already merged stripes in order
+ + * to avoid data corruption.
+ + *
+ + * Check the state of the RAID set and if degraded (or
+ + * resynchronizing for reads), read in all other chunks but
+ + * the one on the dead/resynchronizing device in order to be
+ + * able to reconstruct the missing one in _do_endios().
+ + *
+ + * Can be called on active stripes in order
+ + * to dispatch new io on inactive chunks.
+ + *
+ + * States to cover:
+ + *   o stripe to read and/or write
+ + *   o stripe with error to reconstruct
+ + */
+ +static int stripe_rw(struct stripe *stripe)
+ +{
+ +      int nosync, r;
+ +      struct raid_set *rs = RS(stripe->sc);
+ +
+ +      /*
+ +       * Check, if a chunk needs to be reconstructed
+ +       * because of a degraded set or a region out of sync.
+ +       */
+ +      nosync = stripe_check_reconstruct(stripe);
+ +      switch (nosync) {
+ +      case -EBUSY:
+ +              return 0; /* Wait for stripe reconstruction to finish. */
+ +      case -EPERM:
+ +              goto io;
+ +      }
+ +
+ +      /*
+ +       * If we don't have merged writes pending, we can schedule
+ +       * queued writes to be merged next without corrupting data.
+ +       */
+ +      if (!StripeMerged(stripe)) {
+ +              r = stripe_queue_writes(stripe);
+ +              if (r)
+ +                      /* Writes got queued -> flag RBW. */
+ +                      SetStripeRBW(stripe);
+ +      }
+ +
+ +      /*
+ +       * Merge all writes hanging off uptodate/overwritten
+ +       * chunks of the stripe.
+ +       */
+ +      if (StripeRBW(stripe)) {
+ +              r = stripe_merge_possible(stripe, nosync);
+ +              if (!r) { /* Merge possible. */
+ +                      struct stripe_chunk *chunk;
+ +
+ +                      /*
+ +                       * I rely on valid parity in order
+ +                       * to xor a fraction of chunks out
+ +                       * of parity and back in.
+ +                       */
+ +                      stripe_merge_writes(stripe);    /* Merge writes in. */
+ +                      parity_xor(stripe);             /* Update parity. */
+ +                      ClearStripeReconstruct(stripe); /* Reset xor enforce. */
+ +                      SetStripeMerged(stripe);        /* Writes merged. */
+ +                      ClearStripeRBW(stripe);         /* Disable RBW. */
+ +
+ +                      /*
+ +                       * REMOVEME: sanity check on parity chunk
+ +                       *           states after writes got merged.
+ +                       */
+ +                      chunk = CHUNK(stripe, stripe->idx.parity);
+ +                      BUG_ON(ChunkLocked(chunk));
+ +                      BUG_ON(!ChunkUptodate(chunk));
+ +                      BUG_ON(!ChunkDirty(chunk));
+ +                      BUG_ON(!ChunkIo(chunk));
+ +              }
+ +      } else if (!nosync && !StripeMerged(stripe))
+ +              /* Read avoidance if not degraded/resynchronizing/merged. */
+ +              stripe_avoid_reads(stripe);
+ +
+ +io:
+ +      /* Now submit any reads/writes for non-uptodate or dirty chunks. */
+ +      r = stripe_chunks_rw(stripe);
+ +      if (!r) {
+ +              /*
+ +               * No io submitted because of chunk io
+ +               * prohibited or locked chunks/failed devices
+ +               * -> push to end io list for processing.
+ +               */
+ +              stripe_endio_push(stripe);
+ +              atomic_inc(rs->stats + S_NO_RW); /* REMOVEME: statistics. */
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/*
+ + * Recovery functions
+ + */
+ +/* Read a stripe off a raid set for recovery. */
+ +static int stripe_recover_read(struct stripe *stripe, int pi)
+ +{
+ +      BUG_ON(stripe_io_ref(stripe));
+ +
+ +      /* Invalidate all chunks so that they get read in. */
+ +      stripe_chunks_invalidate(stripe);
+ +      stripe_allow_io(stripe); /* Allow io on all recovery chunks. */
+ +
+ +      /*
+ +       * If we are reconstructing a perticular device, we can avoid
+ +       * reading the respective chunk in, because we're going to
+ +       * reconstruct it anyway.
+ +       *
+ +       * We can't do that for resynchronization of rotating parity,
+ +       * because the recovery stripe chunk size is typically larger
+ +       * than the sets chunk size.
+ +       */
+ +      if (pi > -1)
+ +              ClearChunkIo(CHUNK(stripe, pi));
+ +
+ +      return stripe_chunks_rw(stripe);
+ +}
+ +
+ +/* Write a stripe to a raid set for recovery. */
+ +static int stripe_recover_write(struct stripe *stripe, int pi)
+ +{
+ +      BUG_ON(stripe_io_ref(stripe));
+ +
+ +      /*
+ +       * If this is a reconstruct of a particular device, then
+ +       * reconstruct the respective chunk, else create parity chunk.
+ +       */
+ +      if (pi > -1) {
+ +              stripe_zero_chunk(stripe, pi);
+ +              common_xor(stripe, stripe->io.size, 0, pi);
+ +              chunk_set(CHUNK(stripe, pi), DIRTY);
+ +      } else
+ +              parity_xor(stripe);
+ +
+ +      return stripe_chunks_rw(stripe);
+ +}
+ +
+ +/* Read/write a recovery stripe. */
+ +static int stripe_recover_rw(struct stripe *stripe)
+ +{
+ +      int r = 0, sync = 0;
+ +
+ +      /* Read/write flip-flop. */
+ +      if (TestClearStripeRBW(stripe)) {
+ +              SetStripeMerged(stripe);
+ +              stripe->key = stripe->recover->pos;
+ +              r = stripe_recover_read(stripe, dev_for_parity(stripe, &sync));
+ +              BUG_ON(!r);
+ +      } else if (TestClearStripeMerged(stripe)) {
+ +              r = stripe_recover_write(stripe, dev_for_parity(stripe, &sync));
+ +              BUG_ON(!r);
+ +      }
+ +
+ +      BUG_ON(sync);
+ +      return r;
+ +}
+ +
+ +/* Recover bandwidth available ?. */
+ +static int recover_bandwidth(struct raid_set *rs)
+ +{
+ +      int r, work;
+ +
+ +      /* On reset or when bios delayed -> allow recovery. */
+ +      r = recover_io_reset(rs);
+ +      if (r || RSBandwidth(rs))
+ +              goto out;
+ +
+ +      work = atomic_read(rs->recover.io_count + IO_WORK);
+ +      if (work) {
+ +              /* Pay attention to larger recover stripe size. */
+ +              int recover = atomic_read(rs->recover.io_count + IO_RECOVER) *
+ +                                        rs->recover.io_size / rs->set.io_size;
+ +
+ +              /*
+ +               * Don't use more than given bandwidth
+ +               * of the work io for recovery.
+ +               */
+ +              if (recover > work / rs->recover.bandwidth_work) {
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_NO_BANDWIDTH);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +out:
+ +      atomic_inc(rs->stats + S_BANDWIDTH);    /* REMOVEME: statistics. */
+ +      return 1;
+ +}
+ +
+ +/* Try to get a region to recover. */
+ +static int stripe_recover_get_region(struct stripe *stripe)
+ +{
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      struct recover *rec = &rs->recover;
+ +      struct recover_addr *addr = stripe->recover;
+ +      struct dm_dirty_log *dl = rec->dl;
+ +      struct dm_rh_client *rh = rec->rh;
+ +
+ +      BUG_ON(!dl);
+ +      BUG_ON(!rh);
+ +
+ +      /* Return, that we have region first to finish it during suspension. */
+ +      if (addr->reg)
+ +              return 1;
+ +
+ +      if (RSSuspend(rs))
+ +              return -EPERM;
+ +
+ +      if (dl->type->get_sync_count(dl) >= rec->nr_regions)
+ +              return -ENOENT;
+ +
+ +      /* If we don't have enough bandwidth, we don't proceed recovering. */
+ +      if (!recover_bandwidth(rs))
+ +              return -EAGAIN;
+ +
+ +      /* Start quiescing a region. */
+ +      dm_rh_recovery_prepare(rh);
+ +      addr->reg = dm_rh_recovery_start(rh);
+ +      if (!addr->reg)
+ +              return -EAGAIN;
+ +
+ +      addr->pos = dm_rh_region_to_sector(rh, dm_rh_get_region_key(addr->reg));
+ +      addr->end = addr->pos + dm_rh_get_region_size(rh);
+ +
+ +      /*
+ +       * Take one global io reference out for the
+ +       * whole region, which is going to be released
+ +       * when the region is completely done with.
+ +       */
+ +      io_get(rs);
+ +      return 0;
+ +}
+ +
+ +/* Update region hash state. */
+ +enum recover_type { REC_FAILURE = 0, REC_SUCCESS = 1 };
+ +static void recover_rh_update(struct stripe *stripe, enum recover_type success)
+ +{
+ +      struct recover_addr *addr = stripe->recover;
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      struct recover *rec = &rs->recover;
+ +
+ +      if (!addr->reg) {
+ +              DMERR("%s- Called w/o region", __func__);
+ +              return;
+ +      }
+ +
+ +      dm_rh_recovery_end(addr->reg, success);
+ +      if (success)
+ +              rec->nr_regions_recovered++;
+ +
+ +      addr->reg = NULL;
+ +
+ +      /*
+ +       * Completely done with this region ->
+ +       * release the 1st io reference.
+ +       */
+ +      io_put(rs);
+ +}
+ +
+ +/* Set start of recovery state. */
+ +static void set_start_recovery(struct raid_set *rs)
+ +{
+ +      /* Initialize recovery. */
+ +      rs->recover.start_jiffies = jiffies;
+ +      rs->recover.end_jiffies = 0;
+ +}
+ +
+ +/* Set end of recovery state. */
+ +static void set_end_recovery(struct raid_set *rs)
+ +{
+ +      ClearRSRecover(rs);
+ +/* Achtung: nicht mehr zurück setzten -> 'i' belibt in status output und userpace könnte sich darauf verlassen, das es verschiwndet!!!! */
+ +      rs->set.dev_to_init = -1;
+ +
+ +      /* Check for jiffies overrun. */
+ +      rs->recover.end_jiffies = jiffies;
+ +      if (rs->recover.end_jiffies < rs->recover.start_jiffies)
+ +              rs->recover.end_jiffies = ~0;
+ +}
+ +
+ +/* Handle recovery on one recovery stripe. */
+ +static int _do_recovery(struct stripe *stripe)
+ +{
+ +      int r;
+ +      struct raid_set *rs = RS(stripe->sc);
+ +      struct recover_addr *addr = stripe->recover;
+ +
+ +      /* If recovery is active -> return. */
+ +      if (stripe_io_ref(stripe))
+ +              return 1;
+ +
+ +      /* IO error is fatal for recovery -> stop it. */
+ +      if (unlikely(StripeError(stripe)))
+ +              goto err;
+ +
+ +      /* Recovery end required. */
+ +      if (unlikely(RSDegraded(rs)))
+ +              goto err;
+ +
+ +      /* Get a region to recover. */
+ +      r = stripe_recover_get_region(stripe);
+ +      switch (r) {
+ +      case 0: /* Got a new region: flag initial read before write. */
+ +              SetStripeRBW(stripe);
+ +      case 1: /* Have a region in the works. */
+ +              break;
+ +      case -EAGAIN:
+ +              /* No bandwidth/quiesced region yet, try later. */
+ +              if (!io_ref(rs))
+ +                      wake_do_raid_delayed(rs, HZ / 4);
+ +      case -EPERM:
+ +              /* Suspend. */
+ +              return 1;
+ +      case -ENOENT:   /* No more regions to recover. */
+ +              schedule_work(&rs->io.ws_do_table_event);
+ +              return 0;
+ +      default:
+ +              BUG();
+ +      }
+ +
+ +      /* Read/write a recover stripe. */
+ +      r = stripe_recover_rw(stripe);
+ +      if (r)
+ +              /* IO initiated. */
+ +              return 1;
+ +
+ +      /* Read and write finished-> update recovery position within region. */
+ +      addr->pos += stripe->io.size;
+ +
+ +      /* If we're at end of region, update region hash. */
+ +      if (addr->pos >= addr->end ||
+ +          addr->pos >= rs->set.sectors_per_dev)
+ +              recover_rh_update(stripe, REC_SUCCESS);
+ +      else
+ +              /* Prepare to read next region segment. */
+ +              SetStripeRBW(stripe);
+ +
+ +      /* Schedule myself for another round... */
+ +      wake_do_raid(rs);
+ +      return 1;
+ +
+ +err:
+ +      /* FIXME: rather try recovering other regions on error? */
+ +      rs_check_degrade(stripe);
+ +      recover_rh_update(stripe, REC_FAILURE);
+ +
+ +      /* Check state of partially recovered array. */
+ +      if (RSDegraded(rs) && !RSDead(rs) &&
+ +          rs->set.dev_to_init != -1 &&
+ +          rs->set.ei != rs->set.dev_to_init) {
+ +              /* Broken drive != drive to recover -> FATAL. */
+ +              SetRSDead(rs);
+ +              DMERR("FATAL: failed device != device to initialize -> "
+ +                    "RAID set broken");
+ +      }
+ +
+ +      if (StripeError(stripe) || RSDegraded(rs)) {
+ +              char buf[BDEVNAME_SIZE];
+ +
+ +              DMERR("stopping recovery due to "
+ +                    "ERROR on /dev/%s, stripe at offset %llu",
+ +                    bdevname(rs->dev[rs->set.ei].dev->bdev, buf),
+ +                    (unsigned long long) stripe->key);
+ +
+ +      }
+ +
+ +      /* Make sure, that all quiesced regions get released. */
+ +      while (addr->reg) {
+ +              dm_rh_recovery_end(addr->reg, -EIO);
+ +              addr->reg = dm_rh_recovery_start(rs->recover.rh);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Called by main io daemon to recover regions. */
+ +static int do_recovery(struct raid_set *rs)
+ +{
+ +      if (RSRecover(rs)) {
+ +              int r = 0;
+ +              struct stripe *stripe;
+ +
+ +              list_for_each_entry(stripe, &rs->recover.stripes,
+ +                                  lists[LIST_RECOVER])
+ +                      r += _do_recovery(stripe);
+ +
+ +              if (r)
+ +                      return r;
+ +
+ +              set_end_recovery(rs);
+ +              stripe_recover_free(rs);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * END recovery functions
+ + */
+ +
+ +/* End io process all stripes handed in by endio() callback. */
+ +static void _do_endios(struct raid_set *rs, struct stripe *stripe,
+ +                     struct list_head *flush_list)
+ +{
+ +      /* First unlock all required chunks. */
+ +      stripe_chunks_unlock(stripe);
+ +
+ +      /*
+ +       * If an io error on a stripe occured, degrade the RAID set
+ +       * and try to endio as many bios as possible. If any bios can't
+ +       * be endio processed, requeue the stripe (stripe_ref() != 0).
+ +       */
+ +      if (TestClearStripeError(stripe)) {
+ +              /*
+ +               * FIXME: if read, rewrite the failed chunk after reconstruction
+ +               *        in order to trigger disk bad sector relocation.
+ +               */
+ +              rs_check_degrade(stripe); /* Resets ChunkError(). */
+ +              ClearStripeReconstruct(stripe);
+ +              ClearStripeReconstructed(stripe);
+ +
+ +              /*
+ +               * FIXME: if write, don't endio writes in flight and don't
+ +               *        allow for new writes until userspace has updated
+ +               *        its metadata.
+ +               */
+ +      }
+ +
+ +      /* Got to reconstruct a missing chunk. */
+ +      if (StripeReconstruct(stripe)) {
+ +              /*
+ +               * (*2*) We use StripeReconstruct() to allow for
+ +               *       all chunks to be xored into the reconstructed
+ +               *       one (see chunk_must_xor()).
+ +               */
+ +              stripe_reconstruct(stripe);
+ +
+ +              /*
+ +               * (*3*) Now we reset StripeReconstruct() and flag
+ +               *       StripeReconstructed() to show to stripe_rw(),
+ +               *       that we have reconstructed a missing chunk.
+ +               */
+ +              ClearStripeReconstruct(stripe);
+ +              SetStripeReconstructed(stripe);
+ +
+ +              /* FIXME: reschedule to be written in case of read. */
+ +              /* if (!RSDead && RSDegraded(rs) !StripeRBW(stripe)) {
+ +                      chunk_set(CHUNK(stripe, stripe->idx.recover), DIRTY);
+ +                      stripe_chunks_rw(stripe);
+ +              } */
+ +
+ +              stripe->idx.recover = -1;
+ +      }
+ +
+ +      /*
+ +       * Now that we eventually got a complete stripe, we
+ +       * can process the rest of the end ios on reads.
+ +       */
+ +      stripe_endio(READ, stripe);
+ +
+ +      /* End io all merged writes if not prohibited. */
+ +      if (!RSProhibitWrites(rs) && StripeMerged(stripe)) {
+ +              ClearStripeMerged(stripe);
+ +              stripe_endio(WRITE_MERGED, stripe);
+ +      }
+ +
+ +      /* If RAID set is dead -> fail any ios to dead drives. */
+ +      if (RSDead(rs)) {
+ +              if (!TestSetRSDeadEndioMessage(rs))
+ +                      DMERR("RAID set dead: failing ios to dead devices");
+ +
+ +              stripe_fail_io(stripe);
+ +      }
+ +
+ +      /*
+ +       * We have stripe references still,
+ +       * beacuse of read before writes or IO errors ->
+ +       * got to put on flush list for processing.
+ +       */
+ +      if (stripe_ref(stripe)) {
+ +              BUG_ON(!list_empty(stripe->lists + LIST_LRU));
+ +              list_add_tail(stripe->lists + LIST_FLUSH, flush_list);
+ +              atomic_inc(rs->stats + S_REQUEUE); /* REMOVEME: statistics. */
+ +      } else
+ +              stripe_lru_add(stripe);
+ +}
+ +
+ +/* Pop any endio stripes off of the endio list and belabour them. */
+ +static void do_endios(struct raid_set *rs)
+ +{
+ +      struct stripe_cache *sc = &rs->sc;
+ +      struct stripe *stripe;
+ +      /* IO flush list for sorted requeued stripes. */
+ +      struct list_head flush_list;
+ +
+ +      INIT_LIST_HEAD(&flush_list);
+ +
+ +      while ((stripe = stripe_endio_pop(sc))) {
+ +              /* Avoid endio on stripes with newly io'ed chunks. */
+ +              if (!stripe_io_ref(stripe))
+ +                      _do_endios(rs, stripe, &flush_list);
+ +      }
+ +
+ +      /*
+ +       * Insert any requeued stripes in the proper
+ +       * order at the beginning of the io (flush) list.
+ +       */
+ +      list_splice(&flush_list, sc->lists + LIST_FLUSH);
+ +}
+ +
+ +/* Flush any stripes on the io list. */
+ +static int do_flush(struct raid_set *rs)
+ +{
+ +      int r = 0;
+ +      struct stripe *stripe;
+ +
+ +      while ((stripe = stripe_io_pop(&rs->sc)))
+ +              r += stripe_rw(stripe); /* Read/write stripe. */
+ +
+ +      return r;
+ +}
+ +
+ +/* Stripe cache resizing. */
+ +static void do_sc_resize(struct raid_set *rs)
+ +{
+ +      unsigned set = atomic_read(&rs->sc.stripes_to_set);
+ +
+ +      if (set) {
+ +              unsigned cur = atomic_read(&rs->sc.stripes);
+ +              int r = (set > cur) ? sc_grow(&rs->sc, set - cur, SC_GROW) :
+ +                                    sc_shrink(&rs->sc, cur - set);
+ +
+ +              /* Flag end of resizeing if ok. */
+ +              if (!r)
+ +                      atomic_set(&rs->sc.stripes_to_set, 0);
+ +      }
+ +}
+ +
+ +/*
+ + * Process all ios
+ + *
+ + * We do different things with the io depending
+ + * on the state of the region that it is in:
+ + *
+ + * o reads: hang off stripe cache or postpone if full
+ + *
+ + * o writes:
+ + *
+ + *  CLEAN/DIRTY/NOSYNC:       increment pending and hang io off stripe's stripe set.
+ + *                    In case stripe cache is full or busy, postpone the io.
+ + *
+ + *  RECOVERING:               delay the io until recovery of the region completes.
+ + *
+ + */
+ +static void do_ios(struct raid_set *rs, struct bio_list *ios)
+ +{
+ +      int r;
+ +      unsigned flush = 0, delay = 0;
+ +      sector_t sector;
+ +      struct dm_rh_client *rh = rs->recover.rh;
+ +      struct bio *bio;
+ +      struct bio_list reject;
+ +
+ +      bio_list_init(&reject);
+ +
+ +      /*
+ +       * Classify each io:
+ +       *    o delay writes to recovering regions (let reads go through)
+ +       *    o queue io to all other regions
+ +       */
+ +      while ((bio = bio_list_pop(ios))) {
+ +              /*
+ +               * In case we get a barrier bio, push it back onto
+ +               * the input queue unless all work queues are empty
+ +               * and the stripe cache is inactive.
+ +               */
+ +              if (bio->bi_rw & REQ_FLUSH) {
+ +                      /* REMOVEME: statistics. */
+ +                      atomic_inc(rs->stats + S_BARRIER);
+ +                      if (delay ||
+ +                          !list_empty(rs->sc.lists + LIST_FLUSH) ||
+ +                          !bio_list_empty(&reject) ||
+ +                          sc_active(&rs->sc)) {
+ +                              bio_list_push(ios, bio);
+ +                              break;
+ +                      }
+ +              }
+ +
+ +              /* If writes prohibited because of failures -> postpone. */
+ +              if (RSProhibitWrites(rs) && bio_data_dir(bio) == WRITE) {
+ +                      bio_list_add(&reject, bio);
+ +                      continue;
+ +              }
+ +
+ +              /* Check for recovering regions. */
+ +              sector = _sector(rs, bio);
+ +              r = region_state(rs, sector, DM_RH_RECOVERING);
+ +              if (unlikely(r)) {
+ +                      delay++;
+ +                      /* Wait writing to recovering regions. */
+ +                      dm_rh_delay_by_region(rh, bio,
+ +                                            dm_rh_sector_to_region(rh,
+ +                                                                   sector));
+ +                      /* REMOVEME: statistics.*/
+ +                      atomic_inc(rs->stats + S_DELAYED_BIOS);
+ +                      atomic_inc(rs->stats + S_SUM_DELAYED_BIOS);
+ +
+ +                      /* Force bandwidth tests in recovery. */
+ +                      SetRSBandwidth(rs);
+ +              } else {
+ +                      /*
+ +                       * Process ios to non-recovering regions by queueing
+ +                       * them to stripes (does dm_rh_inc()) for writes).
+ +                       */
+ +                      flush += stripe_queue_bio(rs, bio, &reject);
+ +              }
+ +      }
+ +
+ +      if (flush) {
+ +              /* FIXME: better error handling. */
+ +              r = dm_rh_flush(rh); /* Writes got queued -> flush dirty log. */
+ +              if (r)
+ +                      DMERR_LIMIT("dirty log flush");
+ +      }
+ +
+ +      /* Merge any rejected bios back to the head of the input list. */
+ +      bio_list_merge_head(ios, &reject);
+ +}
+ +
+ +/* Send an event in case we're getting too busy. */
+ +static void do_busy_event(struct raid_set *rs)
+ +{
+ +      if (sc_busy(rs)) {
+ +              if (!TestSetRSScBusy(rs))
+ +                      schedule_work(&rs->io.ws_do_table_event);
+ +      } else
+ +              ClearRSScBusy(rs);
+ +}
+ +
+ +/* Throw an event. */
+ +static void do_table_event(struct work_struct *ws)
+ +{
+ +      struct raid_set *rs = container_of(ws, struct raid_set,
+ +                                         io.ws_do_table_event);
+ +      dm_table_event(rs->ti->table);
+ +}
+ +
+ +
+ +/*-----------------------------------------------------------------
+ + * RAID daemon
+ + *---------------------------------------------------------------*/
+ +/*
+ + * o belabour all end ios
+ + * o update the region hash states
+ + * o optionally shrink the stripe cache
+ + * o optionally do recovery
+ + * o unplug any component raid devices with queued bios
+ + * o grab the input queue
+ + * o work an all requeued or new ios and perform stripe cache flushs
+ + * o unplug any component raid devices with queued bios
+ + * o check, if the stripe cache gets too busy and throw an event if so
+ + */
+ +static void do_raid(struct work_struct *ws)
+ +{
+ +      int r;
+ +      struct raid_set *rs = container_of(ws, struct raid_set,
+ +                                         io.dws_do_raid.work);
+ +      struct bio_list *ios = &rs->io.work, *ios_in = &rs->io.in;
+ +
+ +      /*
+ +       * We always need to end io, so that ios can get errored in
+ +       * case the set failed and the region counters get decremented
+ +       * before we update region hash states and go any further.
+ +       */
+ +      do_endios(rs);
+ +      dm_rh_update_states(rs->recover.rh, 1);
+ +
+ +      /*
+ +       * Now that we've end io'd, which may have put stripes on the LRU list
+ +       * to allow for shrinking, we resize the stripe cache if requested.
+ +       */
+ +      do_sc_resize(rs);
+ +
+ +      /* Try to recover regions. */
+ +      r = do_recovery(rs);
+ +
+ +      /* Quickly grab all new ios queued and add them to the work list. */
+ +      mutex_lock(&rs->io.in_lock);
+ +      bio_list_merge(ios, ios_in);
+ +      bio_list_init(ios_in);
+ +      mutex_unlock(&rs->io.in_lock);
+ +
+ +      if (!bio_list_empty(ios))
+ +              do_ios(rs, ios); /* Got ios to work into the cache. */
+ +
+ +      r = do_flush(rs);               /* Flush any stripes on io list. */
+ +
+ +      do_busy_event(rs);      /* Check if we got too busy. */
+ +}
+ +
+ +/*
+ + * Callback for region hash to dispatch
+ + * delayed bios queued to recovered regions
+ + * (gets called via dm_rh_update_states()).
+ + */
+ +static void dispatch_delayed_bios(void *context, struct bio_list *bl)
+ +{
+ +      struct raid_set *rs = context;
+ +      struct bio *bio;
+ +
+ +      /* REMOVEME: statistics; decrement pending delayed bios counter. */
+ +      bio_list_for_each(bio, bl)
+ +              atomic_dec(rs->stats + S_DELAYED_BIOS);
+ +
+ +      /* Merge region hash private list to work list. */
+ +      bio_list_merge_head(&rs->io.work, bl);
+ +      bio_list_init(bl);
+ +      ClearRSBandwidth(rs);
+ +}
+ +
+ +/*************************************************************
+ + * Constructor helpers
+ + *************************************************************/
+ +/* Calculate MB/sec. */
+ +static unsigned mbpers(struct raid_set *rs, unsigned io_size)
+ +{
+ +      return to_bytes((rs->xor.speed * rs->set.data_devs *
+ +                       io_size * HZ / XOR_SPEED_TICKS) >> 10) >> 10;
+ +}
+ +
+ +/*
+ + * Discover fastest xor algorithm and # of chunks combination.
+ + */
+ +/* Calculate speed of particular algorithm and # of chunks. */
+ +static unsigned xor_speed(struct stripe *stripe)
+ +{
+ +      int ticks = XOR_SPEED_TICKS;
+ +      unsigned p = RS(stripe->sc)->set.raid_devs, r = 0;
+ +      unsigned long j;
+ +
+ +      /* Set uptodate so that common_xor()->xor() will belabour chunks. */
+ +      while (p--)
+ +              SetChunkUptodate(CHUNK(stripe, p));
+ +
+ +      /* Wait for next tick. */
+ +      for (j = jiffies; j == jiffies; );
+ +
+ +      /* Do xors for a few ticks. */
+ +      while (ticks--) {
+ +              unsigned xors = 0;
+ +
+ +              for (j = jiffies; j == jiffies; ) {
+ +                      mb();
+ +                      common_xor(stripe, stripe->io.size, 0, 0);
+ +                      mb();
+ +                      xors++;
+ +                      mb();
+ +              }
+ +
+ +              if (xors > r)
+ +                      r = xors;
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +/* Define for xor multi recovery stripe optimization runs. */
+ +#define DMRAID45_XOR_TEST
+ +
+ +/* Optimize xor algorithm for this RAID set. */
+ +static unsigned xor_optimize(struct raid_set *rs)
+ +{
+ +      unsigned chunks_max = 2, speed_max = 0;
+ +      struct xor_func *f = ARRAY_END(xor_funcs), *f_max = NULL;
+ +      struct stripe *stripe;
+ +      unsigned io_size = 0, speed_hm = 0, speed_min = ~0, speed_xor_blocks = 0;
+ +
+ +      BUG_ON(list_empty(&rs->recover.stripes));
+ +#ifndef DMRAID45_XOR_TEST
+ +      stripe = list_first_entry(&rs->recover.stripes, struct stripe,
+ +                                lists[LIST_RECOVER]);
+ +#endif
+ +
+ +      /* Try all xor functions. */
+ +      while (f-- > xor_funcs) {
+ +              unsigned speed;
+ +
+ +#ifdef DMRAID45_XOR_TEST
+ +              list_for_each_entry(stripe, &rs->recover.stripes,
+ +                                  lists[LIST_RECOVER]) {
+ +                      io_size = stripe->io.size;
+ +#endif
+ +
+ +                      /* Set actual xor function for common_xor(). */
+ +                      rs->xor.f = f;
+ +                      rs->xor.chunks = (f->f == xor_blocks_wrapper ?
+ +                                        (MAX_XOR_BLOCKS + 1) :
+ +                                        XOR_CHUNKS_MAX);
+ +                      if (rs->xor.chunks > rs->set.raid_devs)
+ +                              rs->xor.chunks = rs->set.raid_devs;
+ +
+ +                      for ( ; rs->xor.chunks > 1; rs->xor.chunks--) {
+ +                              speed = xor_speed(stripe);
+ +
+ +#ifdef DMRAID45_XOR_TEST
+ +                              if (f->f == xor_blocks_wrapper) {
+ +                                      if (speed > speed_xor_blocks)
+ +                                              speed_xor_blocks = speed;
+ +                              } else if (speed > speed_hm)
+ +                                      speed_hm = speed;
+ +
+ +                              if (speed < speed_min)
+ +                                      speed_min = speed;
+ +#endif
+ +
+ +                              if (speed > speed_max) {
+ +                                      speed_max = speed;
+ +                                      chunks_max = rs->xor.chunks;
+ +                                      f_max = f;
+ +                              }
+ +                      }
+ +#ifdef DMRAID45_XOR_TEST
+ +              }
+ +#endif
+ +      }
+ +
+ +      /* Memorize optimal parameters. */
+ +      rs->xor.f = f_max;
+ +      rs->xor.chunks = chunks_max;
+ +#ifdef DMRAID45_XOR_TEST
+ +      DMINFO("%s stripes=%u/size=%u min=%u xor_blocks=%u hm=%u max=%u",
+ +             speed_max == speed_hm ? "HM" : "NB",
+ +             rs->recover.recovery_stripes, io_size, speed_min,
+ +             speed_xor_blocks, speed_hm, speed_max);
+ +#endif
+ +      return speed_max;
+ +}
+ +
+ +/*
+ + * Allocate a RAID context (a RAID set)
+ + */
+ +/* Structure for variable RAID parameters. */
+ +struct variable_parms {
+ +      int bandwidth;
+ +      int bandwidth_parm;
+ +      int chunk_size;
+ +      int chunk_size_parm;
+ +      int io_size;
+ +      int io_size_parm;
+ +      int stripes;
+ +      int stripes_parm;
+ +      int recover_io_size;
+ +      int recover_io_size_parm;
+ +      int raid_parms;
+ +      int recovery;
+ +      int recovery_stripes;
+ +      int recovery_stripes_parm;
+ +};
+ +
+ +static struct raid_set *
+ +context_alloc(struct raid_type *raid_type, struct variable_parms *p,
+ +            unsigned raid_devs, sector_t sectors_per_dev,
+ +            struct dm_target *ti, unsigned dl_parms, char **argv)
+ +{
+ +      int r;
+ +      size_t len;
+ +      sector_t region_size, ti_len;
+ +      struct raid_set *rs = NULL;
+ +      struct dm_dirty_log *dl;
+ +      struct recover *rec;
+ +
+ +      /*
+ +       * Create the dirty log
+ +       *
+ +       * We need to change length for the dirty log constructor,
+ +       * because we want an amount of regions for all stripes derived
+ +       * from the single device size, so that we can keep region
+ +       * size = 2^^n independant of the number of devices
+ +       */
+ +      ti_len = ti->len;
+ +      ti->len = sectors_per_dev;
+ +      dl = dm_dirty_log_create(argv[0], ti, NULL, dl_parms, argv + 2);
+ +      ti->len = ti_len;
+ +      if (!dl)
+ +              goto bad_dirty_log;
+ +
+ +      /* Chunk size *must* be smaller than region size. */
+ +      region_size = dl->type->get_region_size(dl);
+ +      if (p->chunk_size > region_size)
+ +              goto bad_chunk_size;
+ +
+ +      /* Recover io size *must* be smaller than region size as well. */
+ +      if (p->recover_io_size > region_size)
+ +              goto bad_recover_io_size;
+ +
+ +      /* Size and allocate the RAID set structure. */
+ +      len = sizeof(*rs->data) + sizeof(*rs->dev);
+ +      if (dm_array_too_big(sizeof(*rs), len, raid_devs))
+ +              goto bad_array;
+ +
+ +      len = sizeof(*rs) + raid_devs * len;
+ +      rs = kzalloc(len, GFP_KERNEL);
+ +      if (!rs)
+ +              goto bad_alloc;
+ +
+ +      rec = &rs->recover;
+ +      atomic_set(&rs->io.in_process, 0);
+ +      atomic_set(&rs->io.in_process_max, 0);
+ +      rec->io_size = p->recover_io_size;
+ +
+ +      /* Pointer to data array. */
+ +      rs->data = (unsigned long **)
+ +                 ((void *) rs->dev + raid_devs * sizeof(*rs->dev));
+ +      rec->dl = dl;
+ +      rs->set.raid_devs = raid_devs;
+ +      rs->set.data_devs = raid_devs - raid_type->parity_devs;
+ +      rs->set.raid_type = raid_type;
+ +
+ +      rs->set.raid_parms = p->raid_parms;
+ +      rs->set.chunk_size_parm = p->chunk_size_parm;
+ +      rs->set.io_size_parm = p->io_size_parm;
+ +      rs->sc.stripes_parm = p->stripes_parm;
+ +      rec->io_size_parm = p->recover_io_size_parm;
+ +      rec->bandwidth_parm = p->bandwidth_parm;
+ +      rec->recovery = p->recovery;
+ +      rec->recovery_stripes = p->recovery_stripes;
+ +
+ +      /*
+ +       * Set chunk and io size and respective shifts
+ +       * (used to avoid divisions)
+ +       */
+ +      rs->set.chunk_size = p->chunk_size;
+ +      rs->set.chunk_shift = ffs(p->chunk_size) - 1;
+ +
+ +      rs->set.io_size = p->io_size;
+ +      rs->set.io_mask = p->io_size - 1;
+ +      /* Mask to adjust address key in case io_size != chunk_size. */
+ +      rs->set.io_inv_mask = (p->chunk_size - 1) & ~rs->set.io_mask;
+ +
+ +      rs->set.sectors_per_dev = sectors_per_dev;
+ +
+ +      rs->set.ei = -1;        /* Indicate no failed device. */
+ +      atomic_set(&rs->set.failed_devs, 0);
+ +
+ +      rs->ti = ti;
+ +
+ +      atomic_set(rec->io_count + IO_WORK, 0);
+ +      atomic_set(rec->io_count + IO_RECOVER, 0);
+ +
+ +      /* Initialize io lock and queues. */
+ +      mutex_init(&rs->io.in_lock);
+ +      mutex_init(&rs->io.xor_lock);
+ +      bio_list_init(&rs->io.in);
+ +      bio_list_init(&rs->io.work);
+ +
+ +      init_waitqueue_head(&rs->io.suspendq);  /* Suspend waiters (dm-io). */
+ +
+ +      rec->nr_regions = dm_sector_div_up(sectors_per_dev, region_size);
+ +      rec->rh = dm_region_hash_create(rs, dispatch_delayed_bios,
+ +                      wake_dummy, wake_do_raid, 0, p->recovery_stripes,
+ +                      dl, region_size, rec->nr_regions);
+ +      if (IS_ERR(rec->rh))
+ +              goto bad_rh;
+ +
+ +      /* Initialize stripe cache. */
+ +      r = sc_init(rs, p->stripes);
+ +      if (r)
+ +              goto bad_sc;
+ +
+ +      /* REMOVEME: statistics. */
+ +      stats_reset(rs);
+ +      ClearRSDevelStats(rs);  /* Disnable development status. */
+ +      return rs;
+ +
+ +bad_dirty_log:
+ +      TI_ERR_RET("Error creating dirty log", ERR_PTR(-ENOMEM));
+ +
+ +bad_chunk_size:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR_RET("Chunk size larger than region size", ERR_PTR(-EINVAL));
+ +
+ +bad_recover_io_size:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR_RET("Recover stripe io size larger than region size",
+ +                      ERR_PTR(-EINVAL));
+ +
+ +bad_array:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR_RET("Arry too big", ERR_PTR(-EINVAL));
+ +
+ +bad_alloc:
+ +      dm_dirty_log_destroy(dl);
+ +      TI_ERR_RET("Cannot allocate raid context", ERR_PTR(-ENOMEM));
+ +
+ +bad_rh:
+ +      dm_dirty_log_destroy(dl);
+ +      ti->error = DM_MSG_PREFIX "Error creating dirty region hash";
+ +      goto free_rs;
+ +
+ +bad_sc:
+ +      dm_region_hash_destroy(rec->rh); /* Destroys dirty log too. */
+ +      sc_exit(&rs->sc);
+ +      ti->error = DM_MSG_PREFIX "Error creating stripe cache";
+ +free_rs:
+ +      kfree(rs);
+ +      return ERR_PTR(-ENOMEM);
+ +}
+ +
+ +/* Free a RAID context (a RAID set). */
+ +static void context_free(struct raid_set *rs, unsigned p)
+ +{
+ +      while (p--)
+ +              dm_put_device(rs->ti, rs->dev[p].dev);
+ +
+ +      sc_exit(&rs->sc);
+ +      dm_region_hash_destroy(rs->recover.rh); /* Destroys dirty log too. */
+ +      kfree(rs);
+ +}
+ +
+ +/* Create work queue and initialize delayed work. */
+ +static int rs_workqueue_init(struct raid_set *rs)
+ +{
+ +      struct dm_target *ti = rs->ti;
+ +
+ +      rs->io.wq = create_singlethread_workqueue(DAEMON);
+ +      if (!rs->io.wq)
+ +              TI_ERR_RET("failed to create " DAEMON, -ENOMEM);
+ +
+ +      INIT_DELAYED_WORK(&rs->io.dws_do_raid, do_raid);
+ +      INIT_WORK(&rs->io.ws_do_table_event, do_table_event);
+ +      return 0;
+ +}
+ +
+ +/* Return pointer to raid_type structure for raid name. */
+ +static struct raid_type *get_raid_type(char *name)
+ +{
+ +      struct raid_type *r = ARRAY_END(raid_types);
+ +
+ +      while (r-- > raid_types) {
+ +              if (!strcmp(r->name, name))
+ +                      return r;
+ +      }
+ +
+ +      return NULL;
+ +}
+ +
+ +/* FIXME: factor out to dm core. */
+ +static int multiple(sector_t a, sector_t b, sector_t *n)
+ +{
+ +      sector_t r = a;
+ +
+ +      sector_div(r, b);
+ +      *n = r;
+ +      return a == r * b;
+ +}
+ +
+ +/* Log RAID set information to kernel log. */
+ +static void rs_log(struct raid_set *rs, unsigned io_size)
+ +{
+ +      unsigned p;
+ +      char buf[BDEVNAME_SIZE];
+ +
+ +      for (p = 0; p < rs->set.raid_devs; p++)
+ +              DMINFO("/dev/%s is raid disk %u%s",
+ +                              bdevname(rs->dev[p].dev->bdev, buf), p,
+ +                              (p == rs->set.pi) ? " (parity)" : "");
+ +
+ +      DMINFO("%d/%d/%d sectors chunk/io/recovery size, %u stripes\n"
+ +             "algorithm \"%s\", %u chunks with %uMB/s\n"
+ +             "%s set with net %u/%u devices",
+ +             rs->set.chunk_size, rs->set.io_size, rs->recover.io_size,
+ +             atomic_read(&rs->sc.stripes),
+ +             rs->xor.f->name, rs->xor.chunks, mbpers(rs, io_size),
+ +             rs->set.raid_type->descr, rs->set.data_devs, rs->set.raid_devs);
+ +}
+ +
+ +/* Get all devices and offsets. */
+ +static int dev_parms(struct raid_set *rs, char **argv, int *p)
+ +{
+ +      struct dm_target *ti = rs->ti;
+ +
+ +DMINFO("rs->set.sectors_per_dev=%llu", (unsigned long long) rs->set.sectors_per_dev);
+ +      for (*p = 0; *p < rs->set.raid_devs; (*p)++, argv += 2) {
+ +              int r;
+ +              unsigned long long tmp;
+ +              struct raid_dev *dev = rs->dev + *p;
+ +
+ +              /* Get offset and device. */
+ +              if (sscanf(argv[1], "%llu", &tmp) != 1 ||
+ +                  tmp > rs->set.sectors_per_dev)
+ +                      TI_ERR("Invalid RAID device offset parameter");
+ +
+ +              dev->start = tmp;
+ +              r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
+ +                                &dev->dev);
+ +              if (r)
+ +                      TI_ERR_RET("RAID device lookup failure", r);
+ +
+ +              r = raid_dev_lookup(rs, dev);
+ +              if (r != -ENODEV && r < *p) {
+ +                      (*p)++; /* Ensure dm_put_device() on actual device. */
+ +                      TI_ERR_RET("Duplicate RAID device", -ENXIO);
+ +              }
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Set recovery bandwidth. */
+ +static void
+ +recover_set_bandwidth(struct raid_set *rs, unsigned bandwidth)
+ +{
+ +      rs->recover.bandwidth = bandwidth;
+ +      rs->recover.bandwidth_work = 100 / bandwidth;
+ +}
+ +
+ +/* Handle variable number of RAID parameters. */
+ +static int get_raid_variable_parms(struct dm_target *ti, char **argv,
+ +                                 struct variable_parms *vp)
+ +{
+ +      int p, value;
+ +      struct {
+ +              int action; /* -1: skip, 0: no power2 check, 1: power2 check */
+ +              char *errmsg;
+ +              int min, max;
+ +              int *var, *var2, *var3;
+ +      } argctr[] = {
+ +              { 1,
+ +                "Invalid chunk size; must be -1 or 2^^n and <= 16384",
+ +                IO_SIZE_MIN, CHUNK_SIZE_MAX,
+ +                &vp->chunk_size_parm, &vp->chunk_size, &vp->io_size },
+ +              { 0,
+ +                "Invalid number of stripes: must be -1 or >= 8 and <= 16384",
+ +                STRIPES_MIN, STRIPES_MAX,
+ +                &vp->stripes_parm, &vp->stripes, NULL },
+ +              { 1,
+ +                "Invalid io size; must -1 or >= 8, 2^^n and less equal "
+ +                "min(BIO_MAX_SECTORS/2, chunk size)",
+ +                IO_SIZE_MIN, 0, /* Needs to be updated in loop below. */
+ +                &vp->io_size_parm, &vp->io_size, NULL },
+ +              { 1,
+ +                "Invalid recovery io size; must be -1 or "
+ +                "2^^n and less equal BIO_MAX_SECTORS/2",
+ +                RECOVER_IO_SIZE_MIN, BIO_MAX_SECTORS / 2,
+ +                &vp->recover_io_size_parm, &vp->recover_io_size, NULL },
+ +              { 0,
+ +                "Invalid recovery bandwidth percentage; "
+ +                "must be -1 or > 0 and <= 100",
+ +                BANDWIDTH_MIN, BANDWIDTH_MAX,
+ +                &vp->bandwidth_parm, &vp->bandwidth, NULL },
+ +              /* Handle sync argument seperately in loop. */
+ +              { -1,
+ +                "Invalid recovery switch; must be \"sync\" or \"nosync\"" },
+ +              { 0,
+ +                "Invalid number of recovery stripes;"
+ +                "must be -1, > 0 and <= 64",
+ +                RECOVERY_STRIPES_MIN, RECOVERY_STRIPES_MAX,
+ +                &vp->recovery_stripes_parm, &vp->recovery_stripes, NULL },
+ +      }, *varp;
+ +
+ +      /* Fetch # of variable raid parameters. */
+ +      if (sscanf(*(argv++), "%d", &vp->raid_parms) != 1 ||
+ +          !range_ok(vp->raid_parms, 0, 7))
+ +              TI_ERR("Bad variable raid parameters number");
+ +
+ +      /* Preset variable RAID parameters. */
+ +      vp->chunk_size = CHUNK_SIZE_DEFAULT;
+ +      vp->io_size = IO_SIZE_DEFAULT;
+ +      vp->stripes = STRIPES_DEFAULT;
+ +      vp->recover_io_size = RECOVER_IO_SIZE_DEFAULT;
+ +      vp->bandwidth = BANDWIDTH_DEFAULT;
+ +      vp->recovery = 1;
+ +      vp->recovery_stripes = RECOVERY_STRIPES_DEFAULT;
+ +
+ +      /* Walk the array of argument constraints for all given ones. */
+ +      for (p = 0, varp = argctr; p < vp->raid_parms; p++, varp++) {
+ +              BUG_ON(varp >= ARRAY_END(argctr));
+ +
+ +              /* Special case for "[no]sync" string argument. */
+ +              if (varp->action < 0) {
+ +                      if (!strcmp(*argv, "sync"))
+ +                              ;
+ +                      else if (!strcmp(*argv, "nosync"))
+ +                              vp->recovery = 0;
+ +                      else
+ +                              TI_ERR(varp->errmsg);
+ +
+ +                      argv++;
+ +                      continue;
+ +              }
+ +
+ +              /*
+ +               * Special case for io_size depending
+ +               * on previously set chunk size.
+ +               */
+ +              if (p == 2)
+ +                      varp->max = min(BIO_MAX_SECTORS / 2, vp->chunk_size);
+ +
+ +              if (sscanf(*(argv++), "%d", &value) != 1 ||
+ +                  (value != -1 &&
+ +                   ((varp->action && !is_power_of_2(value)) ||
+ +                    !range_ok(value, varp->min, varp->max))))
+ +                      TI_ERR(varp->errmsg);
+ +
+ +              *varp->var = value;
+ +              if (value != -1) {
+ +                      if (varp->var2)
+ +                              *varp->var2 = value;
+ +                      if (varp->var3)
+ +                              *varp->var3 = value;
+ +              }
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/* Parse optional locking parameters. */
+ +static int get_raid_locking_parms(struct dm_target *ti, char **argv,
+ +                                int *locking_parms,
+ +                                struct dm_raid45_locking_type **locking_type)
+ +{
+ +      if (!strnicmp(argv[0], "locking", strlen(argv[0]))) {
+ +              char *lckstr = argv[1];
+ +              size_t lcksz = strlen(lckstr);
+ +
+ +              if (!strnicmp(lckstr, "none", lcksz)) {
+ +                      *locking_type = &locking_none;
+ +                      *locking_parms = 2;
+ +              } else if (!strnicmp(lckstr, "cluster", lcksz)) {
+ +                      DMERR("locking type \"%s\" not yet implemented",
+ +                            lckstr);
+ +                      return -EINVAL;
+ +              } else {
+ +                      DMERR("unknown locking type \"%s\"", lckstr);
+ +                      return -EINVAL;
+ +              }
+ +      }
+ +
+ +      *locking_parms = 0;
+ +      *locking_type = &locking_none;
+ +      return 0;
+ +}
+ +
+ +/* Set backing device read ahead properties of RAID set. */
+ +static void rs_set_read_ahead(struct raid_set *rs,
+ +                            unsigned sectors, unsigned stripes)
+ +{
+ +      unsigned ra_pages = dm_div_up(sectors, SECTORS_PER_PAGE);
+ +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ +      struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
+ +
+ +      /* Set read-ahead for the RAID set and the component devices. */
+ +      if (ra_pages) {
+ +              unsigned p = rs->set.raid_devs;
+ +
+ +              bdi->ra_pages = stripes * ra_pages * rs->set.data_devs;
+ +
+ +              while (p--) {
+ +                      struct request_queue *q =
+ +                              bdev_get_queue(rs->dev[p].dev->bdev);
+ +
+ +                      q->backing_dev_info.ra_pages = ra_pages;
+ +              }
+ +      }
+ +}
+ +
+ +/* Set congested function. */
+ +static void rs_set_congested_fn(struct raid_set *rs)
+ +{
+ +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ +      struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
+ +
+ +      /* Set congested function and data. */
+ +      bdi->congested_fn = rs_congested;
+ +      bdi->congested_data = rs;
+ +}
+ +
+ +/*
+ + * Construct a RAID4/5 mapping:
+ + *
+ + * log_type #log_params <log_params> \
+ + * raid_type [#parity_dev] #raid_variable_params <raid_params> \
+ + * [locking "none"/"cluster"]
+ + * #raid_devs #dev_to_initialize [<dev_path> <offset>]{3,}
+ + *
+ + * log_type = "core"/"disk",
+ + * #log_params = 1-3 (1-2 for core dirty log type, 3 for disk dirty log only)
+ + * log_params = [dirty_log_path] region_size [[no]sync])
+ + *
+ + * raid_type = "raid4", "raid5_la", "raid5_ra", "raid5_ls", "raid5_rs"
+ + *
+ + * #parity_dev = N if raid_type = "raid4"
+ + * o N = -1: pick default = last device
+ + * o N >= 0 and < #raid_devs: parity device index
+ + *
+ + * #raid_variable_params = 0-7; raid_params (-1 = default):
+ + *   [chunk_size [#stripes [io_size [recover_io_size \
+ + *    [%recovery_bandwidth [recovery_switch [#recovery_stripes]]]]]]]
+ + *   o chunk_size (unit to calculate drive addresses; must be 2^^n, > 8
+ + *     and <= CHUNK_SIZE_MAX)
+ + *   o #stripes is number of stripes allocated to stripe cache
+ + *     (must be > 1 and < STRIPES_MAX)
+ + *   o io_size (io unit size per device in sectors; must be 2^^n and > 8)
+ + *   o recover_io_size (io unit size per device for recovery in sectors;
+ + must be 2^^n, > SECTORS_PER_PAGE and <= region_size)
+ + *   o %recovery_bandwith is the maximum amount spend for recovery during
+ + *     application io (1-100%)
+ + *   o recovery switch = [sync|nosync]
+ + *   o #recovery_stripes is the number of recovery stripes used for
+ + *     parallel recovery of the RAID set
+ + * If raid_variable_params = 0, defaults will be used.
+ + * Any raid_variable_param can be set to -1 to apply a default
+ + *
+ + * #raid_devs = N (N >= 3)
+ + *
+ + * #dev_to_initialize = N
+ + * -1: initialize parity on all devices
+ + * >= 0 and < #raid_devs: initialize raid_path; used to force reconstruction
+ + * of a failed devices content after replacement
+ + *
+ + * <dev_path> = device_path (eg, /dev/sdd1)
+ + * <offset>   = begin at offset on <dev_path>
+ + *
+ + */
+ +#define       MIN_PARMS       13
+ +static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ +{
+ +      int dev_to_init, dl_parms, i, locking_parms,
+ +          parity_parm, pi = -1, r, raid_devs;
+ +      sector_t tmp, sectors_per_dev;
+ +      struct dm_raid45_locking_type *locking;
+ +      struct raid_set *rs;
+ +      struct raid_type *raid_type;
+ +      struct variable_parms parms;
+ +
+ +      /* Ensure minimum number of parameters. */
+ +      if (argc < MIN_PARMS)
+ +              TI_ERR("Not enough parameters");
+ +
+ +      /* Fetch # of dirty log parameters. */
+ +      if (sscanf(argv[1], "%d", &dl_parms) != 1 ||
+ +          !range_ok(dl_parms, 1, 4711)) /* ;-) */
+ +              TI_ERR("Bad dirty log parameters number");
+ +
+ +      /* Check raid_type. */
+ +      raid_type = get_raid_type(argv[dl_parms + 2]);
+ +      if (!raid_type)
+ +              TI_ERR("Bad raid type");
+ +
+ +      /* In case of RAID4, parity drive is selectable. */
+ +      parity_parm = !!(raid_type->level == raid4);
+ +
+ +      /* Handle variable number of RAID parameters. */
+ +      r = get_raid_variable_parms(ti, argv + dl_parms + parity_parm + 3,
+ +                                  &parms);
+ +      if (r)
+ +              return r;
+ +
+ +      /* Handle any locking parameters. */
+ +      r = get_raid_locking_parms(ti,
+ +                                 argv + dl_parms + parity_parm +
+ +                                 parms.raid_parms + 4,
+ +                                 &locking_parms, &locking);
+ +      if (r)
+ +              return r;
+ +
+ +      /* # of raid devices. */
+ +      i = dl_parms + parity_parm + parms.raid_parms + locking_parms + 4;
+ +      if (sscanf(argv[i], "%d", &raid_devs) != 1 ||
+ +          raid_devs < raid_type->minimal_devs)
+ +              TI_ERR("Invalid number of raid devices");
+ +
+ +      /* In case of RAID4, check parity drive index is in limits. */
+ +      if (raid_type->level == raid4) {
+ +              /* Fetch index of parity device. */
+ +              if (sscanf(argv[dl_parms + 3], "%d", &pi) != 1 ||
+ +                  (pi != -1 && !range_ok(pi, 0, raid_devs - 1)))
+ +                      TI_ERR("Invalid RAID4 parity device index");
+ +      }
+ +
+ +      /*
+ +       * Index of device to initialize starts at 0
+ +       *
+ +       * o -1 -> don't initialize a selected device;
+ +       *         initialize parity conforming to algorithm
+ +       * o 0..raid_devs-1 -> initialize respective device
+ +       *   (used for reconstruction of a replaced device)
+ +       */
+ +      if (sscanf(argv[dl_parms + parity_parm + parms.raid_parms +
+ +                 locking_parms + 5], "%d", &dev_to_init) != 1 ||
+ +          !range_ok(dev_to_init, -1, raid_devs - 1))
+ +              TI_ERR("Invalid number for raid device to initialize");
+ +
+ +      /* Check # of raid device arguments. */
+ +      if (argc - dl_parms - parity_parm - parms.raid_parms - 6 !=
+ +          2 * raid_devs)
+ +              TI_ERR("Wrong number of raid device/offset arguments");
+ +
+ +      /*
+ +       * Check that the table length is devisable
+ +       * w/o rest by (raid_devs - parity_devs)
+ +       */
+ +      if (!multiple(ti->len, raid_devs - raid_type->parity_devs,
+ +                    &sectors_per_dev))
+ +              TI_ERR("Target length not divisible by number of data devices");
+ +
+ +      /*
+ +       * Check that the device size is
+ +       * devisable w/o rest by chunk size
+ +       */
+ +      if (!multiple(sectors_per_dev, parms.chunk_size, &tmp))
+ +              TI_ERR("Device length not divisible by chunk_size");
+ +
+ +      /****************************************************************
+ +       * Now that we checked the constructor arguments ->
+ +       * let's allocate the RAID set
+ +       ****************************************************************/
+ +      rs = context_alloc(raid_type, &parms, raid_devs, sectors_per_dev,
+ +                         ti, dl_parms, argv);
+ +      if (IS_ERR(rs))
+ +              return PTR_ERR(rs);
+ +
+ +
+ +      rs->set.dev_to_init = rs->set.dev_to_init_parm = dev_to_init;
+ +      rs->set.pi = rs->set.pi_parm = pi;
+ +
+ +      /* Set RAID4 parity drive index. */
+ +      if (raid_type->level == raid4)
+ +              rs->set.pi = (pi == -1) ? rs->set.data_devs : pi;
+ +
+ +      recover_set_bandwidth(rs, parms.bandwidth);
+ +
+ +      /* Use locking type to lock stripe access. */
+ +      rs->locking = locking;
+ +
+ +      /* Get the device/offset tupels. */
+ +      argv += dl_parms + 6 + parity_parm + parms.raid_parms;
+ +      r = dev_parms(rs, argv, &i);
+ +      if (r)
+ +              goto err;
+ +
+ +      /* Set backing device information (eg. read ahead). */
+ +      rs_set_read_ahead(rs, 2 * rs->set.chunk_size /* sectors per device */,
+ +                            2 /* # of stripes */);
+ +      rs_set_congested_fn(rs); /* Set congested function. */
+ +      SetRSCheckOverwrite(rs); /* Allow chunk overwrite checks. */
+ +      rs->xor.speed = xor_optimize(rs); /* Select best xor algorithm. */
+ +
+ +      /* Set for recovery of any nosync regions. */
+ +      if (parms.recovery)
+ +              SetRSRecover(rs);
+ +      else {
+ +              /*
+ +               * Need to free recovery stripe(s) here in case
+ +               * of nosync, because xor_optimize uses one.
+ +               */
+ +              set_start_recovery(rs);
+ +              set_end_recovery(rs);
+ +              stripe_recover_free(rs);
+ +      }
+ +
+ +      /*
+ +       * Enable parity chunk creation enformcement for
+ +       * little numbers of array members where it doesn'ti
+ +       * gain us performance to xor parity out and back in as
+ +       * with larger array member numbers.
+ +       */
+ +      if (rs->set.raid_devs <= rs->set.raid_type->minimal_devs + 1)
+ +              SetRSEnforceParityCreation(rs);
+ +
+ +      /*
+ +       * Make sure that dm core only hands maximum io size
+ +       * length down and pays attention to io boundaries.
+ +       */
+ +      ti->split_io = rs->set.io_size;
+ +      ti->private = rs;
+ +
+ +      /* Initialize work queue to handle this RAID set's io. */
+ +      r = rs_workqueue_init(rs);
+ +      if (r)
+ +              goto err;
+ +
+ +      rs_log(rs, rs->recover.io_size); /* Log information about RAID set. */
+ +      return 0;
+ +
+ +err:
+ +      context_free(rs, i);
+ +      return r;
+ +}
+ +
+ +/*
+ + * Destruct a raid mapping
+ + */
+ +static void raid_dtr(struct dm_target *ti)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +
+ +      destroy_workqueue(rs->io.wq);
+ +      context_free(rs, rs->set.raid_devs);
+ +}
+ +
+ +/* Raid mapping function. */
+ +static int raid_map(struct dm_target *ti, struct bio *bio,
+ +                  union map_info *map_context)
+ +{
+ +      /* I don't want to waste stripe cache capacity. */
+ +      if (bio_rw(bio) == READA)
+ +              return -EIO;
+ +      else {
+ +              struct raid_set *rs = ti->private;
+ +
+ +              /*
+ +               * Get io reference to be waiting for to drop
+ +               * to zero on device suspension/destruction.
+ +               */
+ +              io_get(rs);
+ +              bio->bi_sector -= ti->begin;    /* Remap sector. */
+ +
+ +              /* Queue io to RAID set. */
+ +              mutex_lock(&rs->io.in_lock);
+ +              bio_list_add(&rs->io.in, bio);
+ +              mutex_unlock(&rs->io.in_lock);
+ +
+ +              /* Wake daemon to process input list. */
+ +              wake_do_raid(rs);
+ +
+ +              /* REMOVEME: statistics. */
+ +              atomic_inc(rs->stats + (bio_data_dir(bio) == READ ?
+ +                                      S_BIOS_READ : S_BIOS_WRITE));
+ +              return DM_MAPIO_SUBMITTED;      /* Handle later. */
+ +      }
+ +}
+ +
+ +/* Device suspend. */
+ +static void raid_presuspend(struct dm_target *ti)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +      struct dm_dirty_log *dl = rs->recover.dl;
+ +
+ +      SetRSSuspend(rs);
+ +
+ +      if (RSRecover(rs))
+ +              dm_rh_stop_recovery(rs->recover.rh);
+ +
+ +      cancel_delayed_work(&rs->io.dws_do_raid);
+ +      flush_workqueue(rs->io.wq);
+ +      wait_ios(rs);   /* Wait for completion of all ios being processed. */
+ +
+ +      if (dl->type->presuspend && dl->type->presuspend(dl))
+ +              /* FIXME: need better error handling. */
+ +              DMWARN("log presuspend failed");
+ +}
+ +
+ +static void raid_postsuspend(struct dm_target *ti)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +      struct dm_dirty_log *dl = rs->recover.dl;
+ +
+ +      if (dl->type->postsuspend && dl->type->postsuspend(dl))
+ +              /* FIXME: need better error handling. */
+ +              DMWARN("log postsuspend failed");
+ +
+ +}
+ +
+ +/* Device resume. */
+ +static void raid_resume(struct dm_target *ti)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +      struct recover *rec = &rs->recover;
+ +      struct dm_dirty_log *dl = rec->dl;
+ +
+ +DMINFO("%s...", __func__);
+ +      if (dl->type->resume && dl->type->resume(dl))
+ +              /* Resume dirty log. */
+ +              /* FIXME: need better error handling. */
+ +              DMWARN("log resume failed");
+ +
+ +      rec->nr_regions_to_recover =
+ +              rec->nr_regions - dl->type->get_sync_count(dl);
+ +
+ +      /* Restart any unfinished recovery. */
+ +      if (RSRecover(rs)) {
+ +              set_start_recovery(rs);
+ +              dm_rh_start_recovery(rec->rh);
+ +      }
+ +
+ +      ClearRSSuspend(rs);
+ +}
+ +
+ +/* Return stripe cache size. */
+ +static unsigned sc_size(struct raid_set *rs)
+ +{
+ +      return to_sector(atomic_read(&rs->sc.stripes) *
+ +                       (sizeof(struct stripe) +
+ +                        (sizeof(struct stripe_chunk) +
+ +                         (sizeof(struct page_list) +
+ +                          to_bytes(rs->set.io_size) *
+ +                          rs->set.raid_devs)) +
+ +                        (rs->recover.end_jiffies ?
+ +                         0 : rs->recover.recovery_stripes *
+ +                         to_bytes(rs->set.raid_devs * rs->recover.io_size))));
+ +}
+ +
+ +/* REMOVEME: status output for development. */
+ +static void raid_devel_stats(struct dm_target *ti, char *result,
+ +                           unsigned *size, unsigned maxlen)
+ +{
+ +      unsigned sz = *size;
+ +      unsigned long j;
+ +      char buf[BDEVNAME_SIZE], *p;
+ +      struct stats_map *sm;
+ +      struct raid_set *rs = ti->private;
+ +      struct recover *rec = &rs->recover;
+ +      struct timespec ts;
+ +
+ +      DMEMIT("%s %s=%u bw=%u\n",
+ +             version, rs->xor.f->name, rs->xor.chunks, rs->recover.bandwidth);
+ +      DMEMIT("act_ios=%d ", io_ref(rs));
+ +      DMEMIT("act_ios_max=%d\n", atomic_read(&rs->io.in_process_max));
+ +      DMEMIT("act_stripes=%d ", sc_active(&rs->sc));
+ +      DMEMIT("act_stripes_max=%d\n",
+ +             atomic_read(&rs->sc.active_stripes_max));
+ +
+ +      for (sm = stats_map; sm < ARRAY_END(stats_map); sm++)
+ +              DMEMIT("%s%d", sm->str, atomic_read(rs->stats + sm->type));
+ +
+ +      DMEMIT(" checkovr=%s\n", RSCheckOverwrite(rs) ? "on" : "off");
+ +      DMEMIT("sc=%u/%u/%u/%u/%u/%u/%u\n", rs->set.chunk_size,
+ +             atomic_read(&rs->sc.stripes), rs->set.io_size,
+ +             rec->recovery_stripes, rec->io_size, rs->sc.hash.buckets,
+ +             sc_size(rs));
+ +
+ +      j = (rec->end_jiffies ? rec->end_jiffies : jiffies) -
+ +          rec->start_jiffies;
+ +      jiffies_to_timespec(j, &ts);
+ +      sprintf(buf, "%ld.%ld", ts.tv_sec, ts.tv_nsec);
+ +      p = strchr(buf, '.');
+ +      p[3] = 0;
+ +
+ +      DMEMIT("rg=%llu/%llu/%llu/%u %s\n",
+ +             (unsigned long long) rec->nr_regions_recovered,
+ +             (unsigned long long) rec->nr_regions_to_recover,
+ +             (unsigned long long) rec->nr_regions, rec->bandwidth, buf);
+ +
+ +      *size = sz;
+ +}
+ +
+ +static int raid_status(struct dm_target *ti, status_type_t type,
+ +                     char *result, unsigned maxlen)
+ +{
+ +      unsigned p, sz = 0;
+ +      char buf[BDEVNAME_SIZE];
+ +      struct raid_set *rs = ti->private;
+ +      struct dm_dirty_log *dl = rs->recover.dl;
+ +      int raid_parms[] = {
+ +              rs->set.chunk_size_parm,
+ +              rs->sc.stripes_parm,
+ +              rs->set.io_size_parm,
+ +              rs->recover.io_size_parm,
+ +              rs->recover.bandwidth_parm,
+ +              -2,
+ +              rs->recover.recovery_stripes,
+ +      };
+ +
+ +      switch (type) {
+ +      case STATUSTYPE_INFO:
+ +              /* REMOVEME: statistics. */
+ +              if (RSDevelStats(rs))
+ +                      raid_devel_stats(ti, result, &sz, maxlen);
+ +
+ +              DMEMIT("%u ", rs->set.raid_devs);
+ +
+ +              for (p = 0; p < rs->set.raid_devs; p++)
+ +                      DMEMIT("%s ",
+ +                             format_dev_t(buf, rs->dev[p].dev->bdev->bd_dev));
+ +
+ +              DMEMIT("2 ");
+ +              for (p = 0; p < rs->set.raid_devs; p++) {
+ +                      DMEMIT("%c", !DevFailed(rs->dev + p) ? 'A' : 'D');
+ +
+ +                      if (p == rs->set.pi)
+ +                              DMEMIT("p");
+ +
+ +                      if (p == rs->set.dev_to_init)
+ +                              DMEMIT("i");
+ +              }
+ +
+ +              DMEMIT(" %llu/%llu ",
+ +                    (unsigned long long) dl->type->get_sync_count(dl),
+ +                    (unsigned long long) rs->recover.nr_regions);
+ +
+ +              sz += dl->type->status(dl, type, result+sz, maxlen-sz);
+ +              break;
+ +      case STATUSTYPE_TABLE:
+ +              sz = rs->recover.dl->type->status(rs->recover.dl, type,
+ +                                                result, maxlen);
+ +              DMEMIT("%s %u ", rs->set.raid_type->name, rs->set.raid_parms);
+ +
+ +              for (p = 0; p < rs->set.raid_parms; p++) {
+ +                      if (raid_parms[p] > -2)
+ +                              DMEMIT("%d ", raid_parms[p]);
+ +                      else
+ +                              DMEMIT("%s ", rs->recover.recovery ?
+ +                                            "sync" : "nosync");
+ +              }
+ +
+ +              DMEMIT("%u %d ", rs->set.raid_devs, rs->set.dev_to_init);
+ +
+ +              for (p = 0; p < rs->set.raid_devs; p++)
+ +                      DMEMIT("%s %llu ",
+ +                             format_dev_t(buf, rs->dev[p].dev->bdev->bd_dev),
+ +                             (unsigned long long) rs->dev[p].start);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Message interface
+ + */
+ +/* Turn a delta into an absolute value. */
+ +static int _absolute(char *action, int act, int r)
+ +{
+ +      size_t len = strlen(action);
+ +
+ +      if (len < 2)
+ +              len = 2;
+ +
+ +      /* Make delta absolute. */
+ +      if (!strncmp("set", action, len))
+ +              ;
+ +      else if (!strncmp("grow", action, len))
+ +              r += act;
+ +      else if (!strncmp("shrink", action, len))
+ +              r = act - r;
+ +      else
+ +              r = -EINVAL;
+ +
+ +      return r;
+ +}
+ +
+ + /* Change recovery io bandwidth. */
+ +static int bandwidth_change(struct raid_set *rs, int argc, char **argv,
+ +                          enum raid_set_flags flag)
+ +{
+ +      int act = rs->recover.bandwidth, bandwidth;
+ +
+ +      if (argc != 2)
+ +              return -EINVAL;
+ +
+ +      if (sscanf(argv[1], "%d", &bandwidth) == 1 &&
+ +          range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ +              /* Make delta bandwidth absolute. */
+ +              bandwidth = _absolute(argv[0], act, bandwidth);
+ +
+ +              /* Check range. */
+ +              if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ +                      recover_set_bandwidth(rs, bandwidth);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      return -EINVAL;
+ +}
+ +
+ +/* Set/reset development feature flags. */
+ +static int devel_flags(struct raid_set *rs, int argc, char **argv,
+ +                     enum raid_set_flags flag)
+ +{
+ +      size_t len;
+ +
+ +      if (argc != 1)
+ +              return -EINVAL;
+ +
+ +      len = strlen(argv[0]);
+ +      if (len < 2)
+ +              len = 2;
+ +
+ +      if (!strncmp(argv[0], "on", len))
+ +              return test_and_set_bit(flag, &rs->io.flags) ? -EPERM : 0;
+ +      else if (!strncmp(argv[0], "off", len))
+ +              return test_and_clear_bit(flag, &rs->io.flags) ? 0 : -EPERM;
+ +      else if (!strncmp(argv[0], "reset", len)) {
+ +              if (flag == RS_DEVEL_STATS) {
+ +                      if  (test_bit(flag, &rs->io.flags)) {
+ +                              stats_reset(rs);
+ +                              return 0;
+ +                      } else
+ +                              return -EPERM;
+ +              } else  {
+ +                      set_bit(flag, &rs->io.flags);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      return -EINVAL;
+ +}
+ +
+ +/* Resize the stripe cache. */
+ +static int sc_resize(struct raid_set *rs, int argc, char **argv,
+ +                   enum raid_set_flags flag)
+ +{
+ +      int act, stripes;
+ +
+ +      if (argc != 2)
+ +              return -EINVAL;
+ +
+ +      /* Deny permission in case the daemon is still resizing!. */
+ +      if (atomic_read(&rs->sc.stripes_to_set))
+ +              return -EPERM;
+ +
+ +      if (sscanf(argv[1], "%d", &stripes) == 1 &&
+ +          stripes > 0) {
+ +              act = atomic_read(&rs->sc.stripes);
+ +
+ +              /* Make delta stripes absolute. */
+ +              stripes = _absolute(argv[0], act, stripes);
+ +
+ +              /*
+ +               * Check range and that the # of stripes changes.
+ +               * We leave the resizing to the wroker.
+ +               */
+ +              if (range_ok(stripes, STRIPES_MIN, STRIPES_MAX) &&
+ +                  stripes != atomic_read(&rs->sc.stripes)) {
+ +                      atomic_set(&rs->sc.stripes_to_set, stripes);
+ +                      wake_do_raid(rs);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      return -EINVAL;
+ +}
+ +
+ +/* Change xor algorithm and number of chunks. */
+ +static int xor_set(struct raid_set *rs, int argc, char **argv,
+ +                 enum raid_set_flags flag)
+ +{
+ +      if (argc == 2) {
+ +              int chunks;
+ +              char *algorithm = argv[0];
+ +              struct xor_func *f = ARRAY_END(xor_funcs);
+ +
+ +              if (sscanf(argv[1], "%d", &chunks) == 1 &&
+ +                  range_ok(chunks, 2, XOR_CHUNKS_MAX) &&
+ +                  chunks <= rs->set.raid_devs) {
+ +                      while (f-- > xor_funcs) {
+ +                              if (!strcmp(algorithm, f->name)) {
+ +                                      unsigned io_size = 0;
+ +                                      struct stripe *stripe = stripe_alloc(&rs->sc, rs->sc.mem_cache_client, SC_GROW);
+ +
+ +                                      DMINFO("xor: %s", f->name);
+ +                                      if (f->f == xor_blocks_wrapper &&
+ +                                          chunks > MAX_XOR_BLOCKS + 1) {
+ +                                              DMERR("chunks > MAX_XOR_BLOCKS"
+ +                                                    " + 1");
+ +                                              break;
+ +                                      }
+ +
+ +                                      mutex_lock(&rs->io.xor_lock);
+ +                                      rs->xor.f = f;
+ +                                      rs->xor.chunks = chunks;
+ +                                      rs->xor.speed = 0;
+ +                                      mutex_unlock(&rs->io.xor_lock);
+ +
+ +                                      if (stripe) {
+ +                                              rs->xor.speed = xor_speed(stripe);
+ +                                              io_size = stripe->io.size;
+ +                                              stripe_free(stripe, rs->sc.mem_cache_client);
+ +                                      }
+ +
+ +                                      rs_log(rs, io_size);
+ +                                      return 0;
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +
+ +      return -EINVAL;
+ +}
+ +
+ +/*
+ + * Allow writes after they got prohibited because of a device failure.
+ + *
+ + * This needs to be called after userspace updated metadata state
+ + * based on an event being thrown during device failure processing.
+ + */
+ +static int allow_writes(struct raid_set *rs, int argc, char **argv,
+ +                      enum raid_set_flags flag)
+ +{
+ +      if (TestClearRSProhibitWrites(rs)) {
+ +DMINFO("%s waking", __func__);
+ +              wake_do_raid(rs);
+ +              return 0;
+ +      }
+ +
+ +      return -EPERM;
+ +}
+ +
+ +/* Parse the RAID message. */
+ +/*
+ + * 'all[ow_writes]'
+ + * 'ba[ndwidth] {se[t],g[row],sh[rink]} #'    # e.g 'ba se 50'
+ + * "o[verwrite]  {on,of[f],r[eset]}'          # e.g. 'o of'
+ + * 'sta[tistics] {on,of[f],r[eset]}'          # e.g. 'stat of'
+ + * 'str[ipecache] {se[t],g[row],sh[rink]} #'  # e.g. 'stripe set 1024'
+ + * 'xor algorithm #chunks'                    # e.g. 'xor xor_8 5'
+ + *
+ + */
+ +static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
+ +{
+ +      if (argc) {
+ +              size_t len = strlen(argv[0]);
+ +              struct raid_set *rs = ti->private;
+ +              struct {
+ +                      const char *name;
+ +                      int (*f) (struct raid_set *rs, int argc, char **argv,
+ +                                enum raid_set_flags flag);
+ +                      enum raid_set_flags flag;
+ +              } msg_descr[] = {
+ +                      { "allow_writes", allow_writes, 0 },
+ +                      { "bandwidth", bandwidth_change, 0 },
+ +                      { "overwrite", devel_flags, RS_CHECK_OVERWRITE },
+ +                      { "statistics", devel_flags, RS_DEVEL_STATS },
+ +                      { "stripe_cache", sc_resize, 0 },
+ +                      { "xor", xor_set, 0 },
+ +              }, *m = ARRAY_END(msg_descr);
+ +
+ +              if (len < 3)
+ +                      len = 3;
+ +
+ +              while (m-- > msg_descr) {
+ +                      if (!strncmp(argv[0], m->name, len))
+ +                              return m->f(rs, argc - 1, argv + 1, m->flag);
+ +              }
+ +
+ +      }
+ +
+ +      return -EINVAL;
+ +}
+ +/*
+ + * END message interface
+ + */
+ +
+ +/* Provide io hints. */
+ +static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
+ +{
+ +      struct raid_set *rs = ti->private;
+ +
+ +      blk_limits_io_min(limits, rs->set.chunk_size);
+ +      blk_limits_io_opt(limits, rs->set.chunk_size * rs->set.data_devs);
+ +}
+ +
+ +static struct target_type raid_target = {
+ +      .name = "raid45",
+ +      .version = {1, 0, 0},
+ +      .module = THIS_MODULE,
+ +      .ctr = raid_ctr,
+ +      .dtr = raid_dtr,
+ +      .map = raid_map,
+ +      .presuspend = raid_presuspend,
+ +      .postsuspend = raid_postsuspend,
+ +      .resume = raid_resume,
+ +      .status = raid_status,
+ +      .message = raid_message,
+ +      .io_hints = raid_io_hints,
+ +};
+ +
+ +static void init_exit(const char *bad_msg, const char *good_msg, int r)
+ +{
+ +      if (r)
+ +              DMERR("Failed to %sregister target [%d]", bad_msg, r);
+ +      else
+ +              DMINFO("%s %s", good_msg, version);
+ +}
+ +
+ +static int __init dm_raid_init(void)
+ +{
+ +      int r = dm_register_target(&raid_target);
+ +
+ +      init_exit("", "initialized", r);
+ +      return r;
+ +}
+ +
+ +static void __exit dm_raid_exit(void)
+ +{
+ +      dm_unregister_target(&raid_target);
+ +      init_exit("un", "exit", 0);
+ +}
+ +
+ +/* Module hooks. */
+ +module_init(dm_raid_init);
+ +module_exit(dm_raid_exit);
+ +
+ +MODULE_DESCRIPTION(DM_NAME " raid4/5 target");
+ +MODULE_AUTHOR("Heinz Mauelshagen <heinzm@redhat.com>");
+ +MODULE_LICENSE("GPL");
+ +MODULE_ALIAS("dm-raid4");
+ +MODULE_ALIAS("dm-raid5");
diff --cc drivers/md/dm-table.c
Simple merge
diff --cc drivers/md/dm.c
Simple merge
diff --cc drivers/misc/Kconfig
Simple merge
diff --cc drivers/net/ethernet/dec/tulip/tulip_core.c

index 0000000,9656dd0..dbdd4c5

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@@ -1,0 -1,2008 +1,2012 @@@
+ /*    tulip_core.c: A DEC 21x4x-family ethernet driver for Linux.
+ 
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+ 
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+ 
+       Please submit bugs to http://bugzilla.kernel.org/ .
+ */
+ 
+ #define pr_fmt(fmt) "tulip: " fmt
+ 
+ #define DRV_NAME      "tulip"
+ #ifdef CONFIG_TULIP_NAPI
+ #define DRV_VERSION    "1.1.15-NAPI" /* Keep at least for test */
+ #else
+ #define DRV_VERSION   "1.1.15"
+ #endif
+ #define DRV_RELDATE   "Feb 27, 2007"
+ 
+ 
+ #include <linux/module.h>
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+ #include "tulip.h"
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/etherdevice.h>
+ #include <linux/delay.h>
+ #include <linux/mii.h>
+ #include <linux/crc32.h>
+ #include <asm/unaligned.h>
+ #include <asm/uaccess.h>
+ 
+ #ifdef CONFIG_SPARC
+ #include <asm/prom.h>
+ #endif
+ 
+ static char version[] __devinitdata =
+       "Linux Tulip driver version " DRV_VERSION " (" DRV_RELDATE ")\n";
+ 
+ /* A few user-configurable values. */
+ 
+ /* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+ static unsigned int max_interrupt_work = 25;
+ 
+ #define MAX_UNITS 8
+ /* Used to pass the full-duplex flag, etc. */
+ static int full_duplex[MAX_UNITS];
+ static int options[MAX_UNITS];
+ static int mtu[MAX_UNITS];                    /* Jumbo MTU for interfaces. */
+ 
+ /*  The possible media types that can be set in options[] are: */
+ const char * const medianame[32] = {
+       "10baseT", "10base2", "AUI", "100baseTx",
+       "10baseT-FDX", "100baseTx-FDX", "100baseT4", "100baseFx",
+       "100baseFx-FDX", "MII 10baseT", "MII 10baseT-FDX", "MII",
+       "10baseT(forced)", "MII 100baseTx", "MII 100baseTx-FDX", "MII 100baseT4",
+       "MII 100baseFx-HDX", "MII 100baseFx-FDX", "Home-PNA 1Mbps", "Invalid-19",
+       "","","","", "","","","",  "","","","Transceiver reset",
+ };
+ 
+ /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
+ #if defined(__alpha__) || defined(__arm__) || defined(__hppa__) || \
+       defined(CONFIG_SPARC) || defined(__ia64__) || \
+       defined(__sh__) || defined(__mips__)
+ static int rx_copybreak = 1518;
+ #else
+ static int rx_copybreak = 100;
+ #endif
+ 
+ /*
+   Set the bus performance register.
+       Typical: Set 16 longword cache alignment, no burst limit.
+       Cache alignment bits 15:14           Burst length 13:8
+               0000    No alignment  0x00000000 unlimited              0800 8 longwords
+               4000    8  longwords            0100 1 longword         1000 16 longwords
+               8000    16 longwords            0200 2 longwords        2000 32 longwords
+               C000    32  longwords           0400 4 longwords
+       Warning: many older 486 systems are broken and require setting 0x00A04800
+          8 longword cache alignment, 8 longword burst.
+       ToDo: Non-Intel setting could be better.
+ */
+ 
+ #if defined(__alpha__) || defined(__ia64__)
+ static int csr0 = 0x01A00000 | 0xE000;
+ #elif defined(__i386__) || defined(__powerpc__) || defined(__x86_64__)
+ static int csr0 = 0x01A00000 | 0x8000;
+ #elif defined(CONFIG_SPARC) || defined(__hppa__)
+ /* The UltraSparc PCI controllers will disconnect at every 64-byte
+  * crossing anyways so it makes no sense to tell Tulip to burst
+  * any more than that.
+  */
+ static int csr0 = 0x01A00000 | 0x9000;
+ #elif defined(__arm__) || defined(__sh__)
+ static int csr0 = 0x01A00000 | 0x4800;
+ #elif defined(__mips__)
+ static int csr0 = 0x00200000 | 0x4000;
+ #else
+ #warning Processor architecture undefined!
+ static int csr0 = 0x00A00000 | 0x4800;
+ #endif
+ 
+ /* Operational parameters that usually are not changed. */
+ /* Time in jiffies before concluding the transmitter is hung. */
+ #define TX_TIMEOUT  (4*HZ)
+ 
+ 
+ MODULE_AUTHOR("The Linux Kernel Team");
+ MODULE_DESCRIPTION("Digital 21*4* Tulip ethernet driver");
+ MODULE_LICENSE("GPL");
+ MODULE_VERSION(DRV_VERSION);
+ module_param(tulip_debug, int, 0);
+ module_param(max_interrupt_work, int, 0);
+ module_param(rx_copybreak, int, 0);
+ module_param(csr0, int, 0);
+ module_param_array(options, int, NULL, 0);
+ module_param_array(full_duplex, int, NULL, 0);
+ 
+ #ifdef TULIP_DEBUG
+ int tulip_debug = TULIP_DEBUG;
+ #else
+ int tulip_debug = 1;
+ #endif
+ 
+ static void tulip_timer(unsigned long data)
+ {
+       struct net_device *dev = (struct net_device *)data;
+       struct tulip_private *tp = netdev_priv(dev);
+ 
+       if (netif_running(dev))
+               schedule_work(&tp->media_work);
+ }
+ 
+ /*
+  * This table use during operation for capabilities and media timer.
+  *
+  * It is indexed via the values in 'enum chips'
+  */
+ 
+ struct tulip_chip_table tulip_tbl[] = {
+   { }, /* placeholder for array, slot unused currently */
+   { }, /* placeholder for array, slot unused currently */
+ 
+   /* DC21140 */
+   { "Digital DS21140 Tulip", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_PCI_MWI, tulip_timer,
+       tulip_media_task },
+ 
+   /* DC21142, DC21143 */
+   { "Digital DS21142/43 Tulip", 128, 0x0801fbff,
+       HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI | HAS_NWAY
+       | HAS_INTR_MITIGATION | HAS_PCI_MWI, tulip_timer, t21142_media_task },
+ 
+   /* LC82C168 */
+   { "Lite-On 82c168 PNIC", 256, 0x0001fbef,
+       HAS_MII | HAS_PNICNWAY, pnic_timer, },
+ 
+   /* MX98713 */
+   { "Macronix 98713 PMAC", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, mxic_timer, },
+ 
+   /* MX98715 */
+   { "Macronix 98715 PMAC", 256, 0x0001ebef,
+       HAS_MEDIA_TABLE, mxic_timer, },
+ 
+   /* MX98725 */
+   { "Macronix 98725 PMAC", 256, 0x0001ebef,
+       HAS_MEDIA_TABLE, mxic_timer, },
+ 
+   /* AX88140 */
+   { "ASIX AX88140", 128, 0x0001fbff,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | MC_HASH_ONLY
+       | IS_ASIX, tulip_timer, tulip_media_task },
+ 
+   /* PNIC2 */
+   { "Lite-On PNIC-II", 256, 0x0801fbff,
+       HAS_MII | HAS_NWAY | HAS_8023X | HAS_PCI_MWI, pnic2_timer, },
+ 
+   /* COMET */
+   { "ADMtek Comet", 256, 0x0001abef,
+       HAS_MII | MC_HASH_ONLY | COMET_MAC_ADDR, comet_timer, },
+ 
+   /* COMPEX9881 */
+   { "Compex 9881 PMAC", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, mxic_timer, },
+ 
+   /* I21145 */
+   { "Intel DS21145 Tulip", 128, 0x0801fbff,
+       HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI
+       | HAS_NWAY | HAS_PCI_MWI, tulip_timer, tulip_media_task },
+ 
+   /* DM910X */
+ #ifdef CONFIG_TULIP_DM910X
+   { "Davicom DM9102/DM9102A", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_ACPI,
+       tulip_timer, tulip_media_task },
+ #else
+   { NULL },
+ #endif
+ 
+   /* RS7112 */
+   { "Conexant LANfinity", 256, 0x0001ebef,
+       HAS_MII | HAS_ACPI, tulip_timer, tulip_media_task },
+ 
+ };
+ 
+ 
+ static DEFINE_PCI_DEVICE_TABLE(tulip_pci_tbl) = {
+       { 0x1011, 0x0009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21140 },
+       { 0x1011, 0x0019, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21143 },
+       { 0x11AD, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, LC82C168 },
+       { 0x10d9, 0x0512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98713 },
+       { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 },
+ /*    { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98725 },*/
+       { 0x125B, 0x1400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AX88140 },
+       { 0x11AD, 0xc115, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PNIC2 },
+       { 0x1317, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1317, 0x0985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1317, 0x1985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1317, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x13D1, 0xAB02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x13D1, 0xAB03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x13D1, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x104A, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x104A, 0x2774, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1259, 0xa120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x11F6, 0x9881, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMPEX9881 },
+       { 0x8086, 0x0039, PCI_ANY_ID, PCI_ANY_ID, 0, 0, I21145 },
+ #ifdef CONFIG_TULIP_DM910X
+       { 0x1282, 0x9100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X },
+       { 0x1282, 0x9102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X },
+ #endif
+       { 0x1113, 0x1216, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1113, 0x1217, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 },
+       { 0x1113, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1186, 0x1541, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1186, 0x1561, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1186, 0x1591, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x14f1, 0x1803, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CONEXANT },
+       { 0x1626, 0x8410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1737, 0xAB09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1737, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x17B3, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x10b7, 0x9300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, /* 3Com 3CSOHO100B-TX */
+       { 0x14ea, 0xab08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, /* Planex FNW-3602-TX */
+       { 0x1414, 0x0001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, /* Microsoft MN-120 */
+       { 0x1414, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { } /* terminate list */
+ };
+ MODULE_DEVICE_TABLE(pci, tulip_pci_tbl);
+ 
+ 
+ /* A full-duplex map for media types. */
+ const char tulip_media_cap[32] =
+ {0,0,0,16,  3,19,16,24,  27,4,7,5, 0,20,23,20,  28,31,0,0, };
+ 
+ static void tulip_tx_timeout(struct net_device *dev);
+ static void tulip_init_ring(struct net_device *dev);
+ static void tulip_free_ring(struct net_device *dev);
+ static netdev_tx_t tulip_start_xmit(struct sk_buff *skb,
+                                         struct net_device *dev);
+ static int tulip_open(struct net_device *dev);
+ static int tulip_close(struct net_device *dev);
+ static void tulip_up(struct net_device *dev);
+ static void tulip_down(struct net_device *dev);
+ static struct net_device_stats *tulip_get_stats(struct net_device *dev);
+ static int private_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+ static void set_rx_mode(struct net_device *dev);
+ static void tulip_set_wolopts(struct pci_dev *pdev, u32 wolopts);
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ static void poll_tulip(struct net_device *dev);
+ #endif
+ 
+ static void tulip_set_power_state (struct tulip_private *tp,
+                                  int sleep, int snooze)
+ {
+       if (tp->flags & HAS_ACPI) {
+               u32 tmp, newtmp;
+               pci_read_config_dword (tp->pdev, CFDD, &tmp);
+               newtmp = tmp & ~(CFDD_Sleep | CFDD_Snooze);
+               if (sleep)
+                       newtmp |= CFDD_Sleep;
+               else if (snooze)
+                       newtmp |= CFDD_Snooze;
+               if (tmp != newtmp)
+                       pci_write_config_dword (tp->pdev, CFDD, newtmp);
+       }
+ 
+ }
+ 
+ 
+ static void tulip_up(struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+       int next_tick = 3*HZ;
+       u32 reg;
+       int i;
+ 
+ #ifdef CONFIG_TULIP_NAPI
+       napi_enable(&tp->napi);
+ #endif
+ 
+       /* Wake the chip from sleep/snooze mode. */
+       tulip_set_power_state (tp, 0, 0);
+ 
+       /* Disable all WOL events */
+       pci_enable_wake(tp->pdev, PCI_D3hot, 0);
+       pci_enable_wake(tp->pdev, PCI_D3cold, 0);
+       tulip_set_wolopts(tp->pdev, 0);
+ 
+       /* On some chip revs we must set the MII/SYM port before the reset!? */
+       if (tp->mii_cnt  ||  (tp->mtable  &&  tp->mtable->has_mii))
+               iowrite32(0x00040000, ioaddr + CSR6);
+ 
+       /* Reset the chip, holding bit 0 set at least 50 PCI cycles. */
+       iowrite32(0x00000001, ioaddr + CSR0);
+       pci_read_config_dword(tp->pdev, PCI_COMMAND, &reg);  /* flush write */
+       udelay(100);
+ 
+       /* Deassert reset.
+          Wait the specified 50 PCI cycles after a reset by initializing
+          Tx and Rx queues and the address filter list. */
+       iowrite32(tp->csr0, ioaddr + CSR0);
+       pci_read_config_dword(tp->pdev, PCI_COMMAND, &reg);  /* flush write */
+       udelay(100);
+ 
+       if (tulip_debug > 1)
+               netdev_dbg(dev, "tulip_up(), irq==%d\n", dev->irq);
+ 
+       iowrite32(tp->rx_ring_dma, ioaddr + CSR3);
+       iowrite32(tp->tx_ring_dma, ioaddr + CSR4);
+       tp->cur_rx = tp->cur_tx = 0;
+       tp->dirty_rx = tp->dirty_tx = 0;
+ 
+       if (tp->flags & MC_HASH_ONLY) {
+               u32 addr_low = get_unaligned_le32(dev->dev_addr);
+               u32 addr_high = get_unaligned_le16(dev->dev_addr + 4);
+               if (tp->chip_id == AX88140) {
+                       iowrite32(0, ioaddr + CSR13);
+                       iowrite32(addr_low,  ioaddr + CSR14);
+                       iowrite32(1, ioaddr + CSR13);
+                       iowrite32(addr_high, ioaddr + CSR14);
+               } else if (tp->flags & COMET_MAC_ADDR) {
+                       iowrite32(addr_low,  ioaddr + 0xA4);
+                       iowrite32(addr_high, ioaddr + 0xA8);
+                       iowrite32(0, ioaddr + CSR27);
+                       iowrite32(0, ioaddr + CSR28);
+               }
+       } else {
+               /* This is set_rx_mode(), but without starting the transmitter. */
+               u16 *eaddrs = (u16 *)dev->dev_addr;
+               u16 *setup_frm = &tp->setup_frame[15*6];
+               dma_addr_t mapping;
+ 
+               /* 21140 bug: you must add the broadcast address. */
+               memset(tp->setup_frame, 0xff, sizeof(tp->setup_frame));
+               /* Fill the final entry of the table with our physical address. */
+               *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0];
+               *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
+               *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
+ 
+               mapping = pci_map_single(tp->pdev, tp->setup_frame,
+                                        sizeof(tp->setup_frame),
+                                        PCI_DMA_TODEVICE);
+               tp->tx_buffers[tp->cur_tx].skb = NULL;
+               tp->tx_buffers[tp->cur_tx].mapping = mapping;
+ 
+               /* Put the setup frame on the Tx list. */
+               tp->tx_ring[tp->cur_tx].length = cpu_to_le32(0x08000000 | 192);
+               tp->tx_ring[tp->cur_tx].buffer1 = cpu_to_le32(mapping);
+               tp->tx_ring[tp->cur_tx].status = cpu_to_le32(DescOwned);
+ 
+               tp->cur_tx++;
+       }
+ 
+       tp->saved_if_port = dev->if_port;
+       if (dev->if_port == 0)
+               dev->if_port = tp->default_port;
+ 
+       /* Allow selecting a default media. */
+       i = 0;
+       if (tp->mtable == NULL)
+               goto media_picked;
+       if (dev->if_port) {
+               int looking_for = tulip_media_cap[dev->if_port] & MediaIsMII ? 11 :
+                       (dev->if_port == 12 ? 0 : dev->if_port);
+               for (i = 0; i < tp->mtable->leafcount; i++)
+                       if (tp->mtable->mleaf[i].media == looking_for) {
+                               dev_info(&dev->dev,
+                                        "Using user-specified media %s\n",
+                                        medianame[dev->if_port]);
+                               goto media_picked;
+                       }
+       }
+       if ((tp->mtable->defaultmedia & 0x0800) == 0) {
+               int looking_for = tp->mtable->defaultmedia & MEDIA_MASK;
+               for (i = 0; i < tp->mtable->leafcount; i++)
+                       if (tp->mtable->mleaf[i].media == looking_for) {
+                               dev_info(&dev->dev,
+                                        "Using EEPROM-set media %s\n",
+                                        medianame[looking_for]);
+                               goto media_picked;
+                       }
+       }
+       /* Start sensing first non-full-duplex media. */
+       for (i = tp->mtable->leafcount - 1;
+                (tulip_media_cap[tp->mtable->mleaf[i].media] & MediaAlwaysFD) && i > 0; i--)
+               ;
+ media_picked:
+ 
+       tp->csr6 = 0;
+       tp->cur_index = i;
+       tp->nwayset = 0;
+ 
+       if (dev->if_port) {
+               if (tp->chip_id == DC21143  &&
+                   (tulip_media_cap[dev->if_port] & MediaIsMII)) {
+                       /* We must reset the media CSRs when we force-select MII mode. */
+                       iowrite32(0x0000, ioaddr + CSR13);
+                       iowrite32(0x0000, ioaddr + CSR14);
+                       iowrite32(0x0008, ioaddr + CSR15);
+               }
+               tulip_select_media(dev, 1);
+       } else if (tp->chip_id == DC21142) {
+               if (tp->mii_cnt) {
+                       tulip_select_media(dev, 1);
+                       if (tulip_debug > 1)
+                               dev_info(&dev->dev,
+                                        "Using MII transceiver %d, status %04x\n",
+                                        tp->phys[0],
+                                        tulip_mdio_read(dev, tp->phys[0], 1));
+                       iowrite32(csr6_mask_defstate, ioaddr + CSR6);
+                       tp->csr6 = csr6_mask_hdcap;
+                       dev->if_port = 11;
+                       iowrite32(0x0000, ioaddr + CSR13);
+                       iowrite32(0x0000, ioaddr + CSR14);
+               } else
+                       t21142_start_nway(dev);
+       } else if (tp->chip_id == PNIC2) {
+               /* for initial startup advertise 10/100 Full and Half */
+               tp->sym_advertise = 0x01E0;
+                 /* enable autonegotiate end interrupt */
+               iowrite32(ioread32(ioaddr+CSR5)| 0x00008010, ioaddr + CSR5);
+               iowrite32(ioread32(ioaddr+CSR7)| 0x00008010, ioaddr + CSR7);
+               pnic2_start_nway(dev);
+       } else if (tp->chip_id == LC82C168  &&  ! tp->medialock) {
+               if (tp->mii_cnt) {
+                       dev->if_port = 11;
+                       tp->csr6 = 0x814C0000 | (tp->full_duplex ? 0x0200 : 0);
+                       iowrite32(0x0001, ioaddr + CSR15);
+               } else if (ioread32(ioaddr + CSR5) & TPLnkPass)
+                       pnic_do_nway(dev);
+               else {
+                       /* Start with 10mbps to do autonegotiation. */
+                       iowrite32(0x32, ioaddr + CSR12);
+                       tp->csr6 = 0x00420000;
+                       iowrite32(0x0001B078, ioaddr + 0xB8);
+                       iowrite32(0x0201B078, ioaddr + 0xB8);
+                       next_tick = 1*HZ;
+               }
+       } else if ((tp->chip_id == MX98713 || tp->chip_id == COMPEX9881) &&
+                  ! tp->medialock) {
+               dev->if_port = 0;
+               tp->csr6 = 0x01880000 | (tp->full_duplex ? 0x0200 : 0);
+               iowrite32(0x0f370000 | ioread16(ioaddr + 0x80), ioaddr + 0x80);
+       } else if (tp->chip_id == MX98715 || tp->chip_id == MX98725) {
+               /* Provided by BOLO, Macronix - 12/10/1998. */
+               dev->if_port = 0;
+               tp->csr6 = 0x01a80200;
+               iowrite32(0x0f370000 | ioread16(ioaddr + 0x80), ioaddr + 0x80);
+               iowrite32(0x11000 | ioread16(ioaddr + 0xa0), ioaddr + 0xa0);
+       } else if (tp->chip_id == COMET || tp->chip_id == CONEXANT) {
+               /* Enable automatic Tx underrun recovery. */
+               iowrite32(ioread32(ioaddr + 0x88) | 1, ioaddr + 0x88);
+               dev->if_port = tp->mii_cnt ? 11 : 0;
+               tp->csr6 = 0x00040000;
+       } else if (tp->chip_id == AX88140) {
+               tp->csr6 = tp->mii_cnt ? 0x00040100 : 0x00000100;
+       } else
+               tulip_select_media(dev, 1);
+ 
+       /* Start the chip's Tx to process setup frame. */
+       tulip_stop_rxtx(tp);
+       barrier();
+       udelay(5);
+       iowrite32(tp->csr6 | TxOn, ioaddr + CSR6);
+ 
+       /* Enable interrupts by setting the interrupt mask. */
+       iowrite32(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR5);
+       iowrite32(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
+       tulip_start_rxtx(tp);
+       iowrite32(0, ioaddr + CSR2);            /* Rx poll demand */
+ 
+       if (tulip_debug > 2) {
+               netdev_dbg(dev, "Done tulip_up(), CSR0 %08x, CSR5 %08x CSR6 %08x\n",
+                          ioread32(ioaddr + CSR0),
+                          ioread32(ioaddr + CSR5),
+                          ioread32(ioaddr + CSR6));
+       }
+ 
+       /* Set the timer to switch to check for link beat and perhaps switch
+          to an alternate media type. */
+       tp->timer.expires = RUN_AT(next_tick);
+       add_timer(&tp->timer);
+ #ifdef CONFIG_TULIP_NAPI
+       init_timer(&tp->oom_timer);
+         tp->oom_timer.data = (unsigned long)dev;
+         tp->oom_timer.function = oom_timer;
+ #endif
+ }
+ 
+ static int
+ tulip_open(struct net_device *dev)
+ {
+       int retval;
+ 
+       tulip_init_ring (dev);
+ 
+       retval = request_irq(dev->irq, tulip_interrupt, IRQF_SHARED, dev->name, dev);
+       if (retval)
+               goto free_ring;
+ 
+       tulip_up (dev);
+ 
+       netif_start_queue (dev);
+ 
+       return 0;
+ 
+ free_ring:
+       tulip_free_ring (dev);
+       return retval;
+ }
+ 
+ 
+ static void tulip_tx_timeout(struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+       unsigned long flags;
+ 
+       spin_lock_irqsave (&tp->lock, flags);
+ 
+       if (tulip_media_cap[dev->if_port] & MediaIsMII) {
+               /* Do nothing -- the media monitor should handle this. */
+               if (tulip_debug > 1)
+                       dev_warn(&dev->dev,
+                                "Transmit timeout using MII device\n");
+       } else if (tp->chip_id == DC21140 || tp->chip_id == DC21142 ||
+                  tp->chip_id == MX98713 || tp->chip_id == COMPEX9881 ||
+                  tp->chip_id == DM910X) {
+               dev_warn(&dev->dev,
+                        "21140 transmit timed out, status %08x, SIA %08x %08x %08x %08x, resetting...\n",
+                        ioread32(ioaddr + CSR5), ioread32(ioaddr + CSR12),
+                        ioread32(ioaddr + CSR13), ioread32(ioaddr + CSR14),
+                        ioread32(ioaddr + CSR15));
+               tp->timeout_recovery = 1;
+               schedule_work(&tp->media_work);
+               goto out_unlock;
+       } else if (tp->chip_id == PNIC2) {
+               dev_warn(&dev->dev,
+                        "PNIC2 transmit timed out, status %08x, CSR6/7 %08x / %08x CSR12 %08x, resetting...\n",
+                        (int)ioread32(ioaddr + CSR5),
+                        (int)ioread32(ioaddr + CSR6),
+                        (int)ioread32(ioaddr + CSR7),
+                        (int)ioread32(ioaddr + CSR12));
+       } else {
+               dev_warn(&dev->dev,
+                        "Transmit timed out, status %08x, CSR12 %08x, resetting...\n",
+                        ioread32(ioaddr + CSR5), ioread32(ioaddr + CSR12));
+               dev->if_port = 0;
+       }
+ 
+ #if defined(way_too_many_messages)
+       if (tulip_debug > 3) {
+               int i;
+               for (i = 0; i < RX_RING_SIZE; i++) {
+                       u8 *buf = (u8 *)(tp->rx_ring[i].buffer1);
+                       int j;
+                       printk(KERN_DEBUG
+                              "%2d: %08x %08x %08x %08x  %02x %02x %02x\n",
+                              i,
+                              (unsigned int)tp->rx_ring[i].status,
+                              (unsigned int)tp->rx_ring[i].length,
+                              (unsigned int)tp->rx_ring[i].buffer1,
+                              (unsigned int)tp->rx_ring[i].buffer2,
+                              buf[0], buf[1], buf[2]);
+                       for (j = 0; buf[j] != 0xee && j < 1600; j++)
+                               if (j < 100)
+                                       pr_cont(" %02x", buf[j]);
+                       pr_cont(" j=%d\n", j);
+               }
+               printk(KERN_DEBUG "  Rx ring %p: ", tp->rx_ring);
+               for (i = 0; i < RX_RING_SIZE; i++)
+                       pr_cont(" %08x", (unsigned int)tp->rx_ring[i].status);
+               printk(KERN_DEBUG "  Tx ring %p: ", tp->tx_ring);
+               for (i = 0; i < TX_RING_SIZE; i++)
+                       pr_cont(" %08x", (unsigned int)tp->tx_ring[i].status);
+               pr_cont("\n");
+       }
+ #endif
+ 
+       tulip_tx_timeout_complete(tp, ioaddr);
+ 
+ out_unlock:
+       spin_unlock_irqrestore (&tp->lock, flags);
+       dev->trans_start = jiffies; /* prevent tx timeout */
+       netif_wake_queue (dev);
+ }
+ 
+ 
+ /* Initialize the Rx and Tx rings, along with various 'dev' bits. */
+ static void tulip_init_ring(struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       int i;
+ 
+       tp->susp_rx = 0;
+       tp->ttimer = 0;
+       tp->nir = 0;
+ 
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               tp->rx_ring[i].status = 0x00000000;
+               tp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ);
+               tp->rx_ring[i].buffer2 = cpu_to_le32(tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * (i + 1));
+               tp->rx_buffers[i].skb = NULL;
+               tp->rx_buffers[i].mapping = 0;
+       }
+       /* Mark the last entry as wrapping the ring. */
+       tp->rx_ring[i-1].length = cpu_to_le32(PKT_BUF_SZ | DESC_RING_WRAP);
+       tp->rx_ring[i-1].buffer2 = cpu_to_le32(tp->rx_ring_dma);
+ 
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               dma_addr_t mapping;
+ 
+               /* Note the receive buffer must be longword aligned.
+                  dev_alloc_skb() provides 16 byte alignment.  But do *not*
+                  use skb_reserve() to align the IP header! */
+               struct sk_buff *skb = dev_alloc_skb(PKT_BUF_SZ);
+               tp->rx_buffers[i].skb = skb;
+               if (skb == NULL)
+                       break;
+               mapping = pci_map_single(tp->pdev, skb->data,
+                                        PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+               tp->rx_buffers[i].mapping = mapping;
+               skb->dev = dev;                 /* Mark as being used by this device. */
+               tp->rx_ring[i].status = cpu_to_le32(DescOwned); /* Owned by Tulip chip */
+               tp->rx_ring[i].buffer1 = cpu_to_le32(mapping);
+       }
+       tp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
+ 
+       /* The Tx buffer descriptor is filled in as needed, but we
+          do need to clear the ownership bit. */
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               tp->tx_buffers[i].skb = NULL;
+               tp->tx_buffers[i].mapping = 0;
+               tp->tx_ring[i].status = 0x00000000;
+               tp->tx_ring[i].buffer2 = cpu_to_le32(tp->tx_ring_dma + sizeof(struct tulip_tx_desc) * (i + 1));
+       }
+       tp->tx_ring[i-1].buffer2 = cpu_to_le32(tp->tx_ring_dma);
+ }
+ 
+ static netdev_tx_t
+ tulip_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       int entry;
+       u32 flag;
+       dma_addr_t mapping;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&tp->lock, flags);
+ 
+       /* Calculate the next Tx descriptor entry. */
+       entry = tp->cur_tx % TX_RING_SIZE;
+ 
+       tp->tx_buffers[entry].skb = skb;
+       mapping = pci_map_single(tp->pdev, skb->data,
+                                skb->len, PCI_DMA_TODEVICE);
+       tp->tx_buffers[entry].mapping = mapping;
+       tp->tx_ring[entry].buffer1 = cpu_to_le32(mapping);
+ 
+       if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */
+               flag = 0x60000000; /* No interrupt */
+       } else if (tp->cur_tx - tp->dirty_tx == TX_RING_SIZE/2) {
+               flag = 0xe0000000; /* Tx-done intr. */
+       } else if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) {
+               flag = 0x60000000; /* No Tx-done intr. */
+       } else {                /* Leave room for set_rx_mode() to fill entries. */
+               flag = 0xe0000000; /* Tx-done intr. */
+               netif_stop_queue(dev);
+       }
+       if (entry == TX_RING_SIZE-1)
+               flag = 0xe0000000 | DESC_RING_WRAP;
+ 
+       tp->tx_ring[entry].length = cpu_to_le32(skb->len | flag);
+       /* if we were using Transmit Automatic Polling, we would need a
+        * wmb() here. */
+       tp->tx_ring[entry].status = cpu_to_le32(DescOwned);
+       wmb();
+ 
+       tp->cur_tx++;
+ 
+       /* Trigger an immediate transmit demand. */
+       iowrite32(0, tp->base_addr + CSR1);
+ 
+       spin_unlock_irqrestore(&tp->lock, flags);
+ 
+       return NETDEV_TX_OK;
+ }
+ 
+ static void tulip_clean_tx_ring(struct tulip_private *tp)
+ {
+       unsigned int dirty_tx;
+ 
+       for (dirty_tx = tp->dirty_tx ; tp->cur_tx - dirty_tx > 0;
+               dirty_tx++) {
+               int entry = dirty_tx % TX_RING_SIZE;
+               int status = le32_to_cpu(tp->tx_ring[entry].status);
+ 
+               if (status < 0) {
+                       tp->dev->stats.tx_errors++;     /* It wasn't Txed */
+                       tp->tx_ring[entry].status = 0;
+               }
+ 
+               /* Check for Tx filter setup frames. */
+               if (tp->tx_buffers[entry].skb == NULL) {
+                       /* test because dummy frames not mapped */
+                       if (tp->tx_buffers[entry].mapping)
+                               pci_unmap_single(tp->pdev,
+                                       tp->tx_buffers[entry].mapping,
+                                       sizeof(tp->setup_frame),
+                                       PCI_DMA_TODEVICE);
+                       continue;
+               }
+ 
+               pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping,
+                               tp->tx_buffers[entry].skb->len,
+                               PCI_DMA_TODEVICE);
+ 
+               /* Free the original skb. */
+               dev_kfree_skb_irq(tp->tx_buffers[entry].skb);
+               tp->tx_buffers[entry].skb = NULL;
+               tp->tx_buffers[entry].mapping = 0;
+       }
+ }
+ 
+ static void tulip_down (struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+       unsigned long flags;
+ 
+       cancel_work_sync(&tp->media_work);
+ 
+ #ifdef CONFIG_TULIP_NAPI
+       napi_disable(&tp->napi);
+ #endif
+ 
+       del_timer_sync (&tp->timer);
+ #ifdef CONFIG_TULIP_NAPI
+       del_timer_sync (&tp->oom_timer);
+ #endif
+       spin_lock_irqsave (&tp->lock, flags);
+ 
+       /* Disable interrupts by clearing the interrupt mask. */
+       iowrite32 (0x00000000, ioaddr + CSR7);
+ 
+       /* Stop the Tx and Rx processes. */
+       tulip_stop_rxtx(tp);
+ 
+       /* prepare receive buffers */
+       tulip_refill_rx(dev);
+ 
+       /* release any unconsumed transmit buffers */
+       tulip_clean_tx_ring(tp);
+ 
+       if (ioread32(ioaddr + CSR6) != 0xffffffff)
+               dev->stats.rx_missed_errors += ioread32(ioaddr + CSR8) & 0xffff;
+ 
+       spin_unlock_irqrestore (&tp->lock, flags);
+ 
+       init_timer(&tp->timer);
+       tp->timer.data = (unsigned long)dev;
+       tp->timer.function = tulip_tbl[tp->chip_id].media_timer;
+ 
+       dev->if_port = tp->saved_if_port;
+ 
+       /* Leave the driver in snooze, not sleep, mode. */
+       tulip_set_power_state (tp, 0, 1);
+ }
+ 
+ static void tulip_free_ring (struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       int i;
+ 
+       /* Free all the skbuffs in the Rx queue. */
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               struct sk_buff *skb = tp->rx_buffers[i].skb;
+               dma_addr_t mapping = tp->rx_buffers[i].mapping;
+ 
+               tp->rx_buffers[i].skb = NULL;
+               tp->rx_buffers[i].mapping = 0;
+ 
+               tp->rx_ring[i].status = 0;      /* Not owned by Tulip chip. */
+               tp->rx_ring[i].length = 0;
+               /* An invalid address. */
+               tp->rx_ring[i].buffer1 = cpu_to_le32(0xBADF00D0);
+               if (skb) {
+                       pci_unmap_single(tp->pdev, mapping, PKT_BUF_SZ,
+                                        PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb (skb);
+               }
+       }
+ 
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               struct sk_buff *skb = tp->tx_buffers[i].skb;
+ 
+               if (skb != NULL) {
+                       pci_unmap_single(tp->pdev, tp->tx_buffers[i].mapping,
+                                        skb->len, PCI_DMA_TODEVICE);
+                       dev_kfree_skb (skb);
+               }
+               tp->tx_buffers[i].skb = NULL;
+               tp->tx_buffers[i].mapping = 0;
+       }
+ }
+ 
+ static int tulip_close (struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+ 
+       netif_stop_queue (dev);
+ 
+       tulip_down (dev);
+ 
+       if (tulip_debug > 1)
+               netdev_dbg(dev, "Shutting down ethercard, status was %02x\n",
+                          ioread32 (ioaddr + CSR5));
+ 
+       free_irq (dev->irq, dev);
+ 
+       tulip_free_ring (dev);
+ 
+       return 0;
+ }
+ 
+ static struct net_device_stats *tulip_get_stats(struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+ 
+       if (netif_running(dev)) {
+               unsigned long flags;
+ 
+               spin_lock_irqsave (&tp->lock, flags);
+ 
+               dev->stats.rx_missed_errors += ioread32(ioaddr + CSR8) & 0xffff;
+ 
+               spin_unlock_irqrestore(&tp->lock, flags);
+       }
+ 
+       return &dev->stats;
+ }
+ 
+ 
+ static void tulip_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+ {
+       struct tulip_private *np = netdev_priv(dev);
+       strcpy(info->driver, DRV_NAME);
+       strcpy(info->version, DRV_VERSION);
+       strcpy(info->bus_info, pci_name(np->pdev));
+ }
+ 
+ 
+ static int tulip_ethtool_set_wol(struct net_device *dev,
+                                struct ethtool_wolinfo *wolinfo)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+ 
+       if (wolinfo->wolopts & (~tp->wolinfo.supported))
+                  return -EOPNOTSUPP;
+ 
+       tp->wolinfo.wolopts = wolinfo->wolopts;
+       device_set_wakeup_enable(&tp->pdev->dev, tp->wolinfo.wolopts);
+       return 0;
+ }
+ 
+ static void tulip_ethtool_get_wol(struct net_device *dev,
+                                 struct ethtool_wolinfo *wolinfo)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+ 
+       wolinfo->supported = tp->wolinfo.supported;
+       wolinfo->wolopts = tp->wolinfo.wolopts;
+       return;
+ }
+ 
+ 
+ static const struct ethtool_ops ops = {
+       .get_drvinfo = tulip_get_drvinfo,
+       .set_wol     = tulip_ethtool_set_wol,
+       .get_wol     = tulip_ethtool_get_wol,
+ };
+ 
+ /* Provide ioctl() calls to examine the MII xcvr state. */
+ static int private_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+       struct mii_ioctl_data *data = if_mii(rq);
+       const unsigned int phy_idx = 0;
+       int phy = tp->phys[phy_idx] & 0x1f;
+       unsigned int regnum = data->reg_num;
+ 
+       switch (cmd) {
+       case SIOCGMIIPHY:               /* Get address of MII PHY in use. */
+               if (tp->mii_cnt)
+                       data->phy_id = phy;
+               else if (tp->flags & HAS_NWAY)
+                       data->phy_id = 32;
+               else if (tp->chip_id == COMET)
+                       data->phy_id = 1;
+               else
+                       return -ENODEV;
+ 
+       case SIOCGMIIREG:               /* Read MII PHY register. */
+               if (data->phy_id == 32 && (tp->flags & HAS_NWAY)) {
+                       int csr12 = ioread32 (ioaddr + CSR12);
+                       int csr14 = ioread32 (ioaddr + CSR14);
+                       switch (regnum) {
+                       case 0:
+                                 if (((csr14<<5) & 0x1000) ||
+                                         (dev->if_port == 5 && tp->nwayset))
+                                         data->val_out = 0x1000;
+                                 else
+                                         data->val_out = (tulip_media_cap[dev->if_port]&MediaIs100 ? 0x2000 : 0)
+                                                 | (tulip_media_cap[dev->if_port]&MediaIsFD ? 0x0100 : 0);
+                               break;
+                       case 1:
+                                 data->val_out =
+                                       0x1848 +
+                                       ((csr12&0x7000) == 0x5000 ? 0x20 : 0) +
+                                       ((csr12&0x06) == 6 ? 0 : 4);
+                                 data->val_out |= 0x6048;
+                               break;
+                       case 4:
+                                 /* Advertised value, bogus 10baseTx-FD value from CSR6. */
+                                 data->val_out =
+                                       ((ioread32(ioaddr + CSR6) >> 3) & 0x0040) +
+                                       ((csr14 >> 1) & 0x20) + 1;
+                                 data->val_out |= ((csr14 >> 9) & 0x03C0);
+                               break;
+                       case 5: data->val_out = tp->lpar; break;
+                       default: data->val_out = 0; break;
+                       }
+               } else {
+                       data->val_out = tulip_mdio_read (dev, data->phy_id & 0x1f, regnum);
+               }
+               return 0;
+ 
+       case SIOCSMIIREG:               /* Write MII PHY register. */
+               if (regnum & ~0x1f)
+                       return -EINVAL;
+               if (data->phy_id == phy) {
+                       u16 value = data->val_in;
+                       switch (regnum) {
+                       case 0: /* Check for autonegotiation on or reset. */
+                               tp->full_duplex_lock = (value & 0x9000) ? 0 : 1;
+                               if (tp->full_duplex_lock)
+                                       tp->full_duplex = (value & 0x0100) ? 1 : 0;
+                               break;
+                       case 4:
+                               tp->advertising[phy_idx] =
+                               tp->mii_advertise = data->val_in;
+                               break;
+                       }
+               }
+               if (data->phy_id == 32 && (tp->flags & HAS_NWAY)) {
+                       u16 value = data->val_in;
+                       if (regnum == 0) {
+                         if ((value & 0x1200) == 0x1200) {
+                           if (tp->chip_id == PNIC2) {
+                                    pnic2_start_nway (dev);
+                             } else {
+                                  t21142_start_nway (dev);
+                             }
+                         }
+                       } else if (regnum == 4)
+                               tp->sym_advertise = value;
+               } else {
+                       tulip_mdio_write (dev, data->phy_id & 0x1f, regnum, data->val_in);
+               }
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+ 
+       return -EOPNOTSUPP;
+ }
+ 
+ 
+ /* Set or clear the multicast filter for this adaptor.
+    Note that we only use exclusion around actually queueing the
+    new frame, not around filling tp->setup_frame.  This is non-deterministic
+    when re-entered but still correct. */
+ 
+ #undef set_bit_le
+ #define set_bit_le(i,p) do { ((char *)(p))[(i)/8] |= (1<<((i)%8)); } while(0)
+ 
+ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       u16 hash_table[32];
+       struct netdev_hw_addr *ha;
+       int i;
+       u16 *eaddrs;
+ 
+       memset(hash_table, 0, sizeof(hash_table));
+       set_bit_le(255, hash_table);                    /* Broadcast entry */
+       /* This should work on big-endian machines as well. */
+       netdev_for_each_mc_addr(ha, dev) {
+               int index = ether_crc_le(ETH_ALEN, ha->addr) & 0x1ff;
+ 
+               set_bit_le(index, hash_table);
+       }
+       for (i = 0; i < 32; i++) {
+               *setup_frm++ = hash_table[i];
+               *setup_frm++ = hash_table[i];
+       }
+       setup_frm = &tp->setup_frame[13*6];
+ 
+       /* Fill the final entry with our physical address. */
+       eaddrs = (u16 *)dev->dev_addr;
+       *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0];
+       *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
+       *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
+ }
+ 
+ static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       struct netdev_hw_addr *ha;
+       u16 *eaddrs;
+ 
+       /* We have <= 14 addresses so we can use the wonderful
+          16 address perfect filtering of the Tulip. */
+       netdev_for_each_mc_addr(ha, dev) {
+               eaddrs = (u16 *) ha->addr;
+               *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
+               *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
+               *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
+       }
+       /* Fill the unused entries with the broadcast address. */
+       memset(setup_frm, 0xff, (15 - netdev_mc_count(dev)) * 12);
+       setup_frm = &tp->setup_frame[15*6];
+ 
+       /* Fill the final entry with our physical address. */
+       eaddrs = (u16 *)dev->dev_addr;
+       *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0];
+       *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
+       *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
+ }
+ 
+ 
+ static void set_rx_mode(struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+       int csr6;
+ 
+       csr6 = ioread32(ioaddr + CSR6) & ~0x00D5;
+ 
+       tp->csr6 &= ~0x00D5;
+       if (dev->flags & IFF_PROMISC) {                 /* Set promiscuous. */
+               tp->csr6 |= AcceptAllMulticast | AcceptAllPhys;
+               csr6 |= AcceptAllMulticast | AcceptAllPhys;
+       } else if ((netdev_mc_count(dev) > 1000) ||
+                  (dev->flags & IFF_ALLMULTI)) {
+               /* Too many to filter well -- accept all multicasts. */
+               tp->csr6 |= AcceptAllMulticast;
+               csr6 |= AcceptAllMulticast;
+       } else  if (tp->flags & MC_HASH_ONLY) {
+               /* Some work-alikes have only a 64-entry hash filter table. */
+               /* Should verify correctness on big-endian/__powerpc__ */
+               struct netdev_hw_addr *ha;
+               if (netdev_mc_count(dev) > 64) {
+                       /* Arbitrary non-effective limit. */
+                       tp->csr6 |= AcceptAllMulticast;
+                       csr6 |= AcceptAllMulticast;
+               } else {
+                       u32 mc_filter[2] = {0, 0};               /* Multicast hash filter */
+                       int filterbit;
+                       netdev_for_each_mc_addr(ha, dev) {
+                               if (tp->flags & COMET_MAC_ADDR)
+                                       filterbit = ether_crc_le(ETH_ALEN,
+                                                                ha->addr);
+                               else
+                                       filterbit = ether_crc(ETH_ALEN,
+                                                             ha->addr) >> 26;
+                               filterbit &= 0x3f;
+                               mc_filter[filterbit >> 5] |= 1 << (filterbit & 31);
+                               if (tulip_debug > 2)
+                                       dev_info(&dev->dev,
+                                                "Added filter for %pM  %08x bit %d\n",
+                                                ha->addr,
+                                                ether_crc(ETH_ALEN, ha->addr),
+                                                filterbit);
+                       }
+                       if (mc_filter[0] == tp->mc_filter[0]  &&
+                               mc_filter[1] == tp->mc_filter[1])
+                               ;                               /* No change. */
+                       else if (tp->flags & IS_ASIX) {
+                               iowrite32(2, ioaddr + CSR13);
+                               iowrite32(mc_filter[0], ioaddr + CSR14);
+                               iowrite32(3, ioaddr + CSR13);
+                               iowrite32(mc_filter[1], ioaddr + CSR14);
+                       } else if (tp->flags & COMET_MAC_ADDR) {
+                               iowrite32(mc_filter[0], ioaddr + CSR27);
+                               iowrite32(mc_filter[1], ioaddr + CSR28);
+                       }
+                       tp->mc_filter[0] = mc_filter[0];
+                       tp->mc_filter[1] = mc_filter[1];
+               }
+       } else {
+               unsigned long flags;
+               u32 tx_flags = 0x08000000 | 192;
+ 
+               /* Note that only the low-address shortword of setup_frame is valid!
+                  The values are doubled for big-endian architectures. */
+               if (netdev_mc_count(dev) > 14) {
+                       /* Must use a multicast hash table. */
+                       build_setup_frame_hash(tp->setup_frame, dev);
+                       tx_flags = 0x08400000 | 192;
+               } else {
+                       build_setup_frame_perfect(tp->setup_frame, dev);
+               }
+ 
+               spin_lock_irqsave(&tp->lock, flags);
+ 
+               if (tp->cur_tx - tp->dirty_tx > TX_RING_SIZE - 2) {
+                       /* Same setup recently queued, we need not add it. */
+               } else {
+                       unsigned int entry;
+                       int dummy = -1;
+ 
+                       /* Now add this frame to the Tx list. */
+ 
+                       entry = tp->cur_tx++ % TX_RING_SIZE;
+ 
+                       if (entry != 0) {
+                               /* Avoid a chip errata by prefixing a dummy entry. */
+                               tp->tx_buffers[entry].skb = NULL;
+                               tp->tx_buffers[entry].mapping = 0;
+                               tp->tx_ring[entry].length =
+                                       (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0;
+                               tp->tx_ring[entry].buffer1 = 0;
+                               /* Must set DescOwned later to avoid race with chip */
+                               dummy = entry;
+                               entry = tp->cur_tx++ % TX_RING_SIZE;
+ 
+                       }
+ 
+                       tp->tx_buffers[entry].skb = NULL;
+                       tp->tx_buffers[entry].mapping =
+                               pci_map_single(tp->pdev, tp->setup_frame,
+                                              sizeof(tp->setup_frame),
+                                              PCI_DMA_TODEVICE);
+                       /* Put the setup frame on the Tx list. */
+                       if (entry == TX_RING_SIZE-1)
+                               tx_flags |= DESC_RING_WRAP;             /* Wrap ring. */
+                       tp->tx_ring[entry].length = cpu_to_le32(tx_flags);
+                       tp->tx_ring[entry].buffer1 =
+                               cpu_to_le32(tp->tx_buffers[entry].mapping);
+                       tp->tx_ring[entry].status = cpu_to_le32(DescOwned);
+                       if (dummy >= 0)
+                               tp->tx_ring[dummy].status = cpu_to_le32(DescOwned);
+                       if (tp->cur_tx - tp->dirty_tx >= TX_RING_SIZE - 2)
+                               netif_stop_queue(dev);
+ 
+                       /* Trigger an immediate transmit demand. */
+                       iowrite32(0, ioaddr + CSR1);
+               }
+ 
+               spin_unlock_irqrestore(&tp->lock, flags);
+       }
+ 
+       iowrite32(csr6, ioaddr + CSR6);
+ }
+ 
+ #ifdef CONFIG_TULIP_MWI
+ static void __devinit tulip_mwi_config (struct pci_dev *pdev,
+                                       struct net_device *dev)
+ {
+       struct tulip_private *tp = netdev_priv(dev);
+       u8 cache;
+       u16 pci_command;
+       u32 csr0;
+ 
+       if (tulip_debug > 3)
+               netdev_dbg(dev, "tulip_mwi_config()\n");
+ 
+       tp->csr0 = csr0 = 0;
+ 
+       /* if we have any cache line size at all, we can do MRM and MWI */
+       csr0 |= MRM | MWI;
+ 
+       /* Enable MWI in the standard PCI command bit.
+        * Check for the case where MWI is desired but not available
+        */
+       pci_try_set_mwi(pdev);
+ 
+       /* read result from hardware (in case bit refused to enable) */
+       pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+       if ((csr0 & MWI) && (!(pci_command & PCI_COMMAND_INVALIDATE)))
+               csr0 &= ~MWI;
+ 
+       /* if cache line size hardwired to zero, no MWI */
+       pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache);
+       if ((csr0 & MWI) && (cache == 0)) {
+               csr0 &= ~MWI;
+               pci_clear_mwi(pdev);
+       }
+ 
+       /* assign per-cacheline-size cache alignment and
+        * burst length values
+        */
+       switch (cache) {
+       case 8:
+               csr0 |= MRL | (1 << CALShift) | (16 << BurstLenShift);
+               break;
+       case 16:
+               csr0 |= MRL | (2 << CALShift) | (16 << BurstLenShift);
+               break;
+       case 32:
+               csr0 |= MRL | (3 << CALShift) | (32 << BurstLenShift);
+               break;
+       default:
+               cache = 0;
+               break;
+       }
+ 
+       /* if we have a good cache line size, we by now have a good
+        * csr0, so save it and exit
+        */
+       if (cache)
+               goto out;
+ 
+       /* we don't have a good csr0 or cache line size, disable MWI */
+       if (csr0 & MWI) {
+               pci_clear_mwi(pdev);
+               csr0 &= ~MWI;
+       }
+ 
+       /* sane defaults for burst length and cache alignment
+        * originally from de4x5 driver
+        */
+       csr0 |= (8 << BurstLenShift) | (1 << CALShift);
+ 
+ out:
+       tp->csr0 = csr0;
+       if (tulip_debug > 2)
+               netdev_dbg(dev, "MWI config cacheline=%d, csr0=%08x\n",
+                          cache, csr0);
+ }
+ #endif
+ 
+ /*
+  *    Chips that have the MRM/reserved bit quirk and the burst quirk. That
+  *    is the DM910X and the on chip ULi devices
+  */
+ 
+ static int tulip_uli_dm_quirk(struct pci_dev *pdev)
+ {
+       if (pdev->vendor == 0x1282 && pdev->device == 0x9102)
+               return 1;
+       return 0;
+ }
+ 
+ static const struct net_device_ops tulip_netdev_ops = {
+       .ndo_open               = tulip_open,
+       .ndo_start_xmit         = tulip_start_xmit,
+       .ndo_tx_timeout         = tulip_tx_timeout,
+       .ndo_stop               = tulip_close,
+       .ndo_get_stats          = tulip_get_stats,
+       .ndo_do_ioctl           = private_ioctl,
+       .ndo_set_rx_mode        = set_rx_mode,
+       .ndo_change_mtu         = eth_change_mtu,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_validate_addr      = eth_validate_addr,
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller     = poll_tulip,
+ #endif
+ };
+ 
+ DEFINE_PCI_DEVICE_TABLE(early_486_chipsets) = {
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424) },
+       { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496) },
+       { },
+ };
+ 
+ static int __devinit tulip_init_one (struct pci_dev *pdev,
+                                    const struct pci_device_id *ent)
+ {
+       struct tulip_private *tp;
+       /* See note below on the multiport cards. */
+       static unsigned char last_phys_addr[6] = {0x00, 'L', 'i', 'n', 'u', 'x'};
+       static int last_irq;
+       static int multiport_cnt;       /* For four-port boards w/one EEPROM */
+       int i, irq;
+       unsigned short sum;
+       unsigned char *ee_data;
+       struct net_device *dev;
+       void __iomem *ioaddr;
+       static int board_idx = -1;
+       int chip_idx = ent->driver_data;
+       const char *chip_name = tulip_tbl[chip_idx].chip_name;
+       unsigned int eeprom_missing = 0;
+       unsigned int force_csr0 = 0;
+ 
+ #ifndef MODULE
+       if (tulip_debug > 0)
+               printk_once(KERN_INFO "%s", version);
+ #endif
+ 
+       board_idx++;
+ 
+       /*
+        *      Lan media wire a tulip chip to a wan interface. Needs a very
+        *      different driver (lmc driver)
+        */
+ 
+         if (pdev->subsystem_vendor == PCI_VENDOR_ID_LMC) {
+               pr_err("skipping LMC card\n");
+               return -ENODEV;
+       } else if (pdev->subsystem_vendor == PCI_VENDOR_ID_SBE &&
+                  (pdev->subsystem_device == PCI_SUBDEVICE_ID_SBE_T3E3 ||
+                   pdev->subsystem_device == PCI_SUBDEVICE_ID_SBE_2T3E3_P0 ||
+                   pdev->subsystem_device == PCI_SUBDEVICE_ID_SBE_2T3E3_P1)) {
+               pr_err("skipping SBE T3E3 port\n");
+               return -ENODEV;
+       }
+ 
+       /*
+        *      DM910x chips should be handled by the dmfe driver, except
+        *      on-board chips on SPARC systems.  Also, early DM9100s need
+        *      software CRC which only the dmfe driver supports.
+        */
+ 
+ #ifdef CONFIG_TULIP_DM910X
+       if (chip_idx == DM910X) {
+               struct device_node *dp;
+ 
+               if (pdev->vendor == 0x1282 && pdev->device == 0x9100 &&
+                   pdev->revision < 0x30) {
+                       pr_info("skipping early DM9100 with Crc bug (use dmfe)\n");
+                       return -ENODEV;
+               }
+ 
+               dp = pci_device_to_OF_node(pdev);
+               if (!(dp && of_get_property(dp, "local-mac-address", NULL))) {
+                       pr_info("skipping DM910x expansion card (use dmfe)\n");
+                       return -ENODEV;
+               }
+       }
+ #endif
+ 
+       /*
+        *      Looks for early PCI chipsets where people report hangs
+        *      without the workarounds being on.
+        */
+ 
+       /* 1. Intel Saturn. Switch to 8 long words burst, 8 long word cache
+             aligned.  Aries might need this too. The Saturn errata are not
+             pretty reading but thankfully it's an old 486 chipset.
+ 
+          2. The dreaded SiS496 486 chipset. Same workaround as Intel
+             Saturn.
+       */
+ 
+       if (pci_dev_present(early_486_chipsets)) {
+               csr0 = MRL | MRM | (8 << BurstLenShift) | (1 << CALShift);
+               force_csr0 = 1;
+       }
+ 
+       /* bugfix: the ASIX must have a burst limit or horrible things happen. */
+       if (chip_idx == AX88140) {
+               if ((csr0 & 0x3f00) == 0)
+                       csr0 |= 0x2000;
+       }
+ 
+       /* PNIC doesn't have MWI/MRL/MRM... */
+       if (chip_idx == LC82C168)
+               csr0 &= ~0xfff10000; /* zero reserved bits 31:20, 16 */
+ 
+       /* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */
+       if (tulip_uli_dm_quirk(pdev)) {
+               csr0 &= ~0x01f100ff;
+ #if defined(CONFIG_SPARC)
+                 csr0 = (csr0 & ~0xff00) | 0xe000;
+ #endif
+       }
+       /*
+        *      And back to business
+        */
+ 
+       i = pci_enable_device(pdev);
+       if (i) {
+               pr_err("Cannot enable tulip board #%d, aborting\n", board_idx);
+               return i;
+       }
+ 
+       /* The chip will fail to enter a low-power state later unless
+        * first explicitly commanded into D0 */
+       if (pci_set_power_state(pdev, PCI_D0)) {
+               pr_notice("Failed to set power state to D0\n");
+       }
+ 
+       irq = pdev->irq;
+ 
+       /* alloc_etherdev ensures aligned and zeroed private structures */
+       dev = alloc_etherdev (sizeof (*tp));
+       if (!dev) {
+               pr_err("ether device alloc failed, aborting\n");
+               return -ENOMEM;
+       }
+ 
+       SET_NETDEV_DEV(dev, &pdev->dev);
+       if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) {
+               pr_err("%s: I/O region (0x%llx@0x%llx) too small, aborting\n",
+                      pci_name(pdev),
+                      (unsigned long long)pci_resource_len (pdev, 0),
+                      (unsigned long long)pci_resource_start (pdev, 0));
+               goto err_out_free_netdev;
+       }
+ 
+       /* grab all resources from both PIO and MMIO regions, as we
+        * don't want anyone else messing around with our hardware */
+       if (pci_request_regions (pdev, DRV_NAME))
+               goto err_out_free_netdev;
+ 
+       ioaddr =  pci_iomap(pdev, TULIP_BAR, tulip_tbl[chip_idx].io_size);
+ 
+       if (!ioaddr)
+               goto err_out_free_res;
+ 
+       /*
+        * initialize private data structure 'tp'
+        * it is zeroed and aligned in alloc_etherdev
+        */
+       tp = netdev_priv(dev);
+       tp->dev = dev;
+ 
+       tp->rx_ring = pci_alloc_consistent(pdev,
+                                          sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
+                                          sizeof(struct tulip_tx_desc) * TX_RING_SIZE,
+                                          &tp->rx_ring_dma);
+       if (!tp->rx_ring)
+               goto err_out_mtable;
+       tp->tx_ring = (struct tulip_tx_desc *)(tp->rx_ring + RX_RING_SIZE);
+       tp->tx_ring_dma = tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * RX_RING_SIZE;
+ 
+       tp->chip_id = chip_idx;
+       tp->flags = tulip_tbl[chip_idx].flags;
+ 
+       tp->wolinfo.supported = 0;
+       tp->wolinfo.wolopts = 0;
+       /* COMET: Enable power management only for AN983B */
+       if (chip_idx == COMET ) {
+               u32 sig;
+               pci_read_config_dword (pdev, 0x80, &sig);
+               if (sig == 0x09811317) {
+                       tp->flags |= COMET_PM;
+                       tp->wolinfo.supported = WAKE_PHY | WAKE_MAGIC;
+                       pr_info("%s: Enabled WOL support for AN983B\n",
+                               __func__);
+               }
+       }
+       tp->pdev = pdev;
+       tp->base_addr = ioaddr;
+       tp->revision = pdev->revision;
+       tp->csr0 = csr0;
+       spin_lock_init(&tp->lock);
+       spin_lock_init(&tp->mii_lock);
+       init_timer(&tp->timer);
+       tp->timer.data = (unsigned long)dev;
+       tp->timer.function = tulip_tbl[tp->chip_id].media_timer;
+ 
+       INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task);
+ 
+       dev->base_addr = (unsigned long)ioaddr;
+ 
+ #ifdef CONFIG_TULIP_MWI
+       if (!force_csr0 && (tp->flags & HAS_PCI_MWI))
+               tulip_mwi_config (pdev, dev);
+ #endif
+ 
+       /* Stop the chip's Tx and Rx processes. */
+       tulip_stop_rxtx(tp);
+ 
+       pci_set_master(pdev);
+ 
+ #ifdef CONFIG_GSC
+       if (pdev->subsystem_vendor == PCI_VENDOR_ID_HP) {
+               switch (pdev->subsystem_device) {
+               default:
+                       break;
+               case 0x1061:
+               case 0x1062:
+               case 0x1063:
+               case 0x1098:
+               case 0x1099:
+               case 0x10EE:
+                       tp->flags |= HAS_SWAPPED_SEEPROM | NEEDS_FAKE_MEDIA_TABLE;
+                       chip_name = "GSC DS21140 Tulip";
+               }
+       }
+ #endif
+ 
+       /* Clear the missed-packet counter. */
+       ioread32(ioaddr + CSR8);
+ 
+       /* The station address ROM is read byte serially.  The register must
+          be polled, waiting for the value to be read bit serially from the
+          EEPROM.
+          */
+       ee_data = tp->eeprom;
+       memset(ee_data, 0, sizeof(tp->eeprom));
+       sum = 0;
+       if (chip_idx == LC82C168) {
+               for (i = 0; i < 3; i++) {
+                       int value, boguscnt = 100000;
+                       iowrite32(0x600 | i, ioaddr + 0x98);
+                       do {
+                               value = ioread32(ioaddr + CSR9);
+                       } while (value < 0  && --boguscnt > 0);
+                       put_unaligned_le16(value, ((__le16 *)dev->dev_addr) + i);
+                       sum += value & 0xffff;
+               }
+       } else if (chip_idx == COMET) {
+               /* No need to read the EEPROM. */
+               put_unaligned_le32(ioread32(ioaddr + 0xA4), dev->dev_addr);
+               put_unaligned_le16(ioread32(ioaddr + 0xA8), dev->dev_addr + 4);
+               for (i = 0; i < 6; i ++)
+                       sum += dev->dev_addr[i];
+       } else {
+               /* A serial EEPROM interface, we read now and sort it out later. */
+               int sa_offset = 0;
+               int ee_addr_size = tulip_read_eeprom(dev, 0xff, 8) & 0x40000 ? 8 : 6;
+               int ee_max_addr = ((1 << ee_addr_size) - 1) * sizeof(u16);
+ 
+               if (ee_max_addr > sizeof(tp->eeprom))
+                       ee_max_addr = sizeof(tp->eeprom);
+ 
+               for (i = 0; i < ee_max_addr ; i += sizeof(u16)) {
+                       u16 data = tulip_read_eeprom(dev, i/2, ee_addr_size);
+                       ee_data[i] = data & 0xff;
+                       ee_data[i + 1] = data >> 8;
+               }
+ 
+               /* DEC now has a specification (see Notes) but early board makers
+                  just put the address in the first EEPROM locations. */
+               /* This does  memcmp(ee_data, ee_data+16, 8) */
+               for (i = 0; i < 8; i ++)
+                       if (ee_data[i] != ee_data[16+i])
+                               sa_offset = 20;
+               if (chip_idx == CONEXANT) {
+                       /* Check that the tuple type and length is correct. */
+                       if (ee_data[0x198] == 0x04  &&  ee_data[0x199] == 6)
+                               sa_offset = 0x19A;
+               } else if (ee_data[0] == 0xff  &&  ee_data[1] == 0xff &&
+                                  ee_data[2] == 0) {
+                       sa_offset = 2;          /* Grrr, damn Matrox boards. */
+                       multiport_cnt = 4;
+               }
+ #ifdef CONFIG_MIPS_COBALT
+                if ((pdev->bus->number == 0) &&
+                    ((PCI_SLOT(pdev->devfn) == 7) ||
+                     (PCI_SLOT(pdev->devfn) == 12))) {
+                        /* Cobalt MAC address in first EEPROM locations. */
+                        sa_offset = 0;
+                      /* Ensure our media table fixup get's applied */
+                      memcpy(ee_data + 16, ee_data, 8);
+                }
+ #endif
+ #ifdef CONFIG_GSC
+               /* Check to see if we have a broken srom */
+               if (ee_data[0] == 0x61 && ee_data[1] == 0x10) {
+                       /* pci_vendor_id and subsystem_id are swapped */
+                       ee_data[0] = ee_data[2];
+                       ee_data[1] = ee_data[3];
+                       ee_data[2] = 0x61;
+                       ee_data[3] = 0x10;
+ 
+                       /* HSC-PCI boards need to be byte-swaped and shifted
+                        * up 1 word.  This shift needs to happen at the end
+                        * of the MAC first because of the 2 byte overlap.
+                        */
+                       for (i = 4; i >= 0; i -= 2) {
+                               ee_data[17 + i + 3] = ee_data[17 + i];
+                               ee_data[16 + i + 5] = ee_data[16 + i];
+                       }
+               }
+ #endif
+ 
+               for (i = 0; i < 6; i ++) {
+                       dev->dev_addr[i] = ee_data[i + sa_offset];
+                       sum += ee_data[i + sa_offset];
+               }
+       }
+       /* Lite-On boards have the address byte-swapped. */
+       if ((dev->dev_addr[0] == 0xA0 ||
+            dev->dev_addr[0] == 0xC0 ||
+            dev->dev_addr[0] == 0x02) &&
+           dev->dev_addr[1] == 0x00)
+               for (i = 0; i < 6; i+=2) {
+                       char tmp = dev->dev_addr[i];
+                       dev->dev_addr[i] = dev->dev_addr[i+1];
+                       dev->dev_addr[i+1] = tmp;
+               }
+       /* On the Zynx 315 Etherarray and other multiport boards only the
+          first Tulip has an EEPROM.
+          On Sparc systems the mac address is held in the OBP property
+          "local-mac-address".
+          The addresses of the subsequent ports are derived from the first.
+          Many PCI BIOSes also incorrectly report the IRQ line, so we correct
+          that here as well. */
+       if (sum == 0  || sum == 6*0xff) {
+ #if defined(CONFIG_SPARC)
+               struct device_node *dp = pci_device_to_OF_node(pdev);
+               const unsigned char *addr;
+               int len;
+ #endif
+               eeprom_missing = 1;
+               for (i = 0; i < 5; i++)
+                       dev->dev_addr[i] = last_phys_addr[i];
+               dev->dev_addr[i] = last_phys_addr[i] + 1;
+ #if defined(CONFIG_SPARC)
+               addr = of_get_property(dp, "local-mac-address", &len);
+               if (addr && len == 6)
+                       memcpy(dev->dev_addr, addr, 6);
+ #endif
+ #if defined(__i386__) || defined(__x86_64__)  /* Patch up x86 BIOS bug. */
+               if (last_irq)
+                       irq = last_irq;
+ #endif
+       }
+ 
+       for (i = 0; i < 6; i++)
+               last_phys_addr[i] = dev->dev_addr[i];
+       last_irq = irq;
+       dev->irq = irq;
+ 
+       /* The lower four bits are the media type. */
+       if (board_idx >= 0  &&  board_idx < MAX_UNITS) {
+               if (options[board_idx] & MEDIA_MASK)
+                       tp->default_port = options[board_idx] & MEDIA_MASK;
+               if ((options[board_idx] & FullDuplex) || full_duplex[board_idx] > 0)
+                       tp->full_duplex = 1;
+               if (mtu[board_idx] > 0)
+                       dev->mtu = mtu[board_idx];
+       }
+       if (dev->mem_start & MEDIA_MASK)
+               tp->default_port = dev->mem_start & MEDIA_MASK;
+       if (tp->default_port) {
+               pr_info(DRV_NAME "%d: Transceiver selection forced to %s\n",
+                       board_idx, medianame[tp->default_port & MEDIA_MASK]);
+               tp->medialock = 1;
+               if (tulip_media_cap[tp->default_port] & MediaAlwaysFD)
+                       tp->full_duplex = 1;
+       }
+       if (tp->full_duplex)
+               tp->full_duplex_lock = 1;
+ 
+       if (tulip_media_cap[tp->default_port] & MediaIsMII) {
+               static const u16 media2advert[] = {
+                       0x20, 0x40, 0x03e0, 0x60, 0x80, 0x100, 0x200
+               };
+               tp->mii_advertise = media2advert[tp->default_port - 9];
+               tp->mii_advertise |= (tp->flags & HAS_8023X); /* Matching bits! */
+       }
+ 
+       if (tp->flags & HAS_MEDIA_TABLE) {
+               sprintf(dev->name, DRV_NAME "%d", board_idx);   /* hack */
+               tulip_parse_eeprom(dev);
+               strcpy(dev->name, "eth%d");                     /* un-hack */
+       }
+ 
+       if ((tp->flags & ALWAYS_CHECK_MII) ||
+               (tp->mtable  &&  tp->mtable->has_mii) ||
+               ( ! tp->mtable  &&  (tp->flags & HAS_MII))) {
+               if (tp->mtable  &&  tp->mtable->has_mii) {
+                       for (i = 0; i < tp->mtable->leafcount; i++)
+                               if (tp->mtable->mleaf[i].media == 11) {
+                                       tp->cur_index = i;
+                                       tp->saved_if_port = dev->if_port;
+                                       tulip_select_media(dev, 2);
+                                       dev->if_port = tp->saved_if_port;
+                                       break;
+                               }
+               }
+ 
+               /* Find the connected MII xcvrs.
+                  Doing this in open() would allow detecting external xcvrs
+                  later, but takes much time. */
+               tulip_find_mii (dev, board_idx);
+       }
+ 
+       /* The Tulip-specific entries in the device structure. */
+       dev->netdev_ops = &tulip_netdev_ops;
+       dev->watchdog_timeo = TX_TIMEOUT;
+ #ifdef CONFIG_TULIP_NAPI
+       netif_napi_add(dev, &tp->napi, tulip_poll, 16);
+ #endif
+       SET_ETHTOOL_OPS(dev, &ops);
+ 
+       if (register_netdev(dev))
+               goto err_out_free_ring;
+ 
+       pci_set_drvdata(pdev, dev);
+ 
+       dev_info(&dev->dev,
+ #ifdef CONFIG_TULIP_MMIO
+                "%s rev %d at MMIO %#llx,%s %pM, IRQ %d\n",
+ #else
+                "%s rev %d at Port %#llx,%s %pM, IRQ %d\n",
+ #endif
+                chip_name, pdev->revision,
+                (unsigned long long)pci_resource_start(pdev, TULIP_BAR),
+                eeprom_missing ? " EEPROM not present," : "",
+                dev->dev_addr, irq);
+ 
+         if (tp->chip_id == PNIC2)
+               tp->link_change = pnic2_lnk_change;
+       else if (tp->flags & HAS_NWAY)
+               tp->link_change = t21142_lnk_change;
+       else if (tp->flags & HAS_PNICNWAY)
+               tp->link_change = pnic_lnk_change;
+ 
+       /* Reset the xcvr interface and turn on heartbeat. */
+       switch (chip_idx) {
+       case DC21140:
+       case DM910X:
+       default:
+               if (tp->mtable)
+                       iowrite32(tp->mtable->csr12dir | 0x100, ioaddr + CSR12);
+               break;
+       case DC21142:
+               if (tp->mii_cnt  ||  tulip_media_cap[dev->if_port] & MediaIsMII) {
+                       iowrite32(csr6_mask_defstate, ioaddr + CSR6);
+                       iowrite32(0x0000, ioaddr + CSR13);
+                       iowrite32(0x0000, ioaddr + CSR14);
+                       iowrite32(csr6_mask_hdcap, ioaddr + CSR6);
+               } else
+                       t21142_start_nway(dev);
+               break;
+       case PNIC2:
+               /* just do a reset for sanity sake */
+               iowrite32(0x0000, ioaddr + CSR13);
+               iowrite32(0x0000, ioaddr + CSR14);
+               break;
+       case LC82C168:
+               if ( ! tp->mii_cnt) {
+                       tp->nway = 1;
+                       tp->nwayset = 0;
+                       iowrite32(csr6_ttm | csr6_ca, ioaddr + CSR6);
+                       iowrite32(0x30, ioaddr + CSR12);
+                       iowrite32(0x0001F078, ioaddr + CSR6);
+                       iowrite32(0x0201F078, ioaddr + CSR6); /* Turn on autonegotiation. */
+               }
+               break;
+       case MX98713:
+       case COMPEX9881:
+               iowrite32(0x00000000, ioaddr + CSR6);
+               iowrite32(0x000711C0, ioaddr + CSR14); /* Turn on NWay. */
+               iowrite32(0x00000001, ioaddr + CSR13);
+               break;
+       case MX98715:
+       case MX98725:
+               iowrite32(0x01a80000, ioaddr + CSR6);
+               iowrite32(0xFFFFFFFF, ioaddr + CSR14);
+               iowrite32(0x00001000, ioaddr + CSR12);
+               break;
+       case COMET:
+               /* No initialization necessary. */
+               break;
+       }
+ 
+       /* put the chip in snooze mode until opened */
+       tulip_set_power_state (tp, 0, 1);
+ 
+       return 0;
+ 
+ err_out_free_ring:
+       pci_free_consistent (pdev,
+                            sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
+                            sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
+                            tp->rx_ring, tp->rx_ring_dma);
+ 
+ err_out_mtable:
+       kfree (tp->mtable);
+       pci_iounmap(pdev, ioaddr);
+ 
+ err_out_free_res:
+       pci_release_regions (pdev);
+ 
+ err_out_free_netdev:
+       free_netdev (dev);
+       return -ENODEV;
+ }
+ 
+ 
+ /* set the registers according to the given wolopts */
+ static void tulip_set_wolopts (struct pci_dev *pdev, u32 wolopts)
+ {
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+ 
+       if (tp->flags & COMET_PM) {
+         
+               unsigned int tmp;
+                       
+               tmp = ioread32(ioaddr + CSR18);
+               tmp &= ~(comet_csr18_pmes_sticky | comet_csr18_apm_mode | comet_csr18_d3a);
+               tmp |= comet_csr18_pm_mode;
+               iowrite32(tmp, ioaddr + CSR18);
+                       
+               /* Set the Wake-up Control/Status Register to the given WOL options*/
+               tmp = ioread32(ioaddr + CSR13);
+               tmp &= ~(comet_csr13_linkoffe | comet_csr13_linkone | comet_csr13_wfre | comet_csr13_lsce | comet_csr13_mpre);
+               if (wolopts & WAKE_MAGIC)
+                       tmp |= comet_csr13_mpre;
+               if (wolopts & WAKE_PHY)
+                       tmp |= comet_csr13_linkoffe | comet_csr13_linkone | comet_csr13_lsce;
+               /* Clear the event flags */
+               tmp |= comet_csr13_wfr | comet_csr13_mpr | comet_csr13_lsc;
+               iowrite32(tmp, ioaddr + CSR13);
+       }
+ }
+ 
+ #ifdef CONFIG_PM
+ 
+ 
+ static int tulip_suspend (struct pci_dev *pdev, pm_message_t state)
+ {
+       pci_power_t pstate;
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct tulip_private *tp = netdev_priv(dev);
+ 
+       if (!dev)
+               return -EINVAL;
+ 
+       if (!netif_running(dev))
+               goto save_state;
+ 
+       tulip_down(dev);
+ 
+       netif_device_detach(dev);
+       free_irq(dev->irq, dev);
+ 
+ save_state:
+       pci_save_state(pdev);
+       pci_disable_device(pdev);
+       pstate = pci_choose_state(pdev, state);
+       if (state.event == PM_EVENT_SUSPEND && pstate != PCI_D0) {
+               int rc;
+ 
+               tulip_set_wolopts(pdev, tp->wolinfo.wolopts);
+               rc = pci_enable_wake(pdev, pstate, tp->wolinfo.wolopts);
+               if (rc)
+                       pr_err("pci_enable_wake failed (%d)\n", rc);
+       }
+       pci_set_power_state(pdev, pstate);
+ 
+       return 0;
+ }
+ 
+ 
+ static int tulip_resume(struct pci_dev *pdev)
+ {
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct tulip_private *tp = netdev_priv(dev);
+       void __iomem *ioaddr = tp->base_addr;
+       int retval;
+       unsigned int tmp;
+ 
+       if (!dev)
+               return -EINVAL;
+ 
+       pci_set_power_state(pdev, PCI_D0);
+       pci_restore_state(pdev);
+ 
+       if (!netif_running(dev))
+               return 0;
+ 
+       if ((retval = pci_enable_device(pdev))) {
+               pr_err("pci_enable_device failed in resume\n");
+               return retval;
+       }
+ 
+       if ((retval = request_irq(dev->irq, tulip_interrupt, IRQF_SHARED, dev->name, dev))) {
+               pr_err("request_irq failed in resume\n");
+               return retval;
+       }
+ 
+       if (tp->flags & COMET_PM) {
+               pci_enable_wake(pdev, PCI_D3hot, 0);
+               pci_enable_wake(pdev, PCI_D3cold, 0);
+ 
+               /* Clear the PMES flag */
+               tmp = ioread32(ioaddr + CSR20);
+               tmp |= comet_csr20_pmes;
+               iowrite32(tmp, ioaddr + CSR20);
+ 
+               /* Disable all wake-up events */
+               tulip_set_wolopts(pdev, 0);
+       }
+       netif_device_attach(dev);
+ 
+       if (netif_running(dev))
+               tulip_up(dev);
+ 
+       return 0;
+ }
+ 
+ #endif /* CONFIG_PM */
+ 
+ 
+ static void __devexit tulip_remove_one (struct pci_dev *pdev)
+ {
+       struct net_device *dev = pci_get_drvdata (pdev);
+       struct tulip_private *tp;
+ 
+       if (!dev)
+               return;
+ 
+       tp = netdev_priv(dev);
++
++      /* shoot NIC in the head before deallocating descriptors */
++      pci_disable_device(tp->pdev);
++
+       unregister_netdev(dev);
+       pci_free_consistent (pdev,
+                            sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
+                            sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
+                            tp->rx_ring, tp->rx_ring_dma);
+       kfree (tp->mtable);
+       pci_iounmap(pdev, tp->base_addr);
+       free_netdev (dev);
+       pci_release_regions (pdev);
+       pci_set_drvdata (pdev, NULL);
+ 
+       /* pci_power_off (pdev, -1); */
+ }
+ 
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ /*
+  * Polling 'interrupt' - used by things like netconsole to send skbs
+  * without having to re-enable interrupts. It's not called while
+  * the interrupt routine is executing.
+  */
+ 
+ static void poll_tulip (struct net_device *dev)
+ {
+       /* disable_irq here is not very nice, but with the lockless
+          interrupt handler we have no other choice. */
+       disable_irq(dev->irq);
+       tulip_interrupt (dev->irq, dev);
+       enable_irq(dev->irq);
+ }
+ #endif
+ 
+ static struct pci_driver tulip_driver = {
+       .name           = DRV_NAME,
+       .id_table       = tulip_pci_tbl,
+       .probe          = tulip_init_one,
+       .remove         = __devexit_p(tulip_remove_one),
+ #ifdef CONFIG_PM
+       .suspend        = tulip_suspend,
+       .resume         = tulip_resume,
+ #endif /* CONFIG_PM */
+ };
+ 
+ 
+ static int __init tulip_init (void)
+ {
+ #ifdef MODULE
+       pr_info("%s", version);
+ #endif
+ 
+       /* copy module parms into globals */
+       tulip_rx_copybreak = rx_copybreak;
+       tulip_max_interrupt_work = max_interrupt_work;
+ 
+       /* probe for and init boards */
+       return pci_register_driver(&tulip_driver);
+ }
+ 
+ 
+ static void __exit tulip_cleanup (void)
+ {
+       pci_unregister_driver (&tulip_driver);
+ }
+ 
+ 
+ module_init(tulip_init);
+ module_exit(tulip_cleanup);
diff --cc drivers/net/ethernet/ibm/ehea/ehea_main.c

index 0000000,37b70f7..5fdd5bb

mode 000000,100644..100644
--- /dev/null
--- 2/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@@ -1,0 -1,3556 +1,3568 @@@
+ /*
+  *  linux/drivers/net/ehea/ehea_main.c
+  *
+  *  eHEA ethernet device driver for IBM eServer System p
+  *
+  *  (C) Copyright IBM Corp. 2006
+  *
+  *  Authors:
+  *     Christoph Raisch <raisch@de.ibm.com>
+  *     Jan-Bernd Themann <themann@de.ibm.com>
+  *     Thomas Klein <tklein@de.ibm.com>
+  *
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2, or (at your option)
+  * any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.        See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, write to the Free Software
+  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+  */
+ 
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ 
+ #include <linux/in.h>
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
+ #include <linux/if.h>
+ #include <linux/list.h>
+ #include <linux/slab.h>
+ #include <linux/if_ether.h>
+ #include <linux/notifier.h>
+ #include <linux/reboot.h>
+ #include <linux/memory.h>
+ #include <asm/kexec.h>
+ #include <linux/mutex.h>
+ #include <linux/prefetch.h>
+ 
+ #include <net/ip.h>
+ 
+ #include "ehea.h"
+ #include "ehea_qmr.h"
+ #include "ehea_phyp.h"
+ 
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+ MODULE_DESCRIPTION("IBM eServer HEA Driver");
+ MODULE_VERSION(DRV_VERSION);
+ 
+ 
+ static int msg_level = -1;
+ static int rq1_entries = EHEA_DEF_ENTRIES_RQ1;
+ static int rq2_entries = EHEA_DEF_ENTRIES_RQ2;
+ static int rq3_entries = EHEA_DEF_ENTRIES_RQ3;
+ static int sq_entries = EHEA_DEF_ENTRIES_SQ;
+ static int use_mcs = 1;
+ static int prop_carrier_state;
+ 
+ module_param(msg_level, int, 0);
+ module_param(rq1_entries, int, 0);
+ module_param(rq2_entries, int, 0);
+ module_param(rq3_entries, int, 0);
+ module_param(sq_entries, int, 0);
+ module_param(prop_carrier_state, int, 0);
+ module_param(use_mcs, int, 0);
+ 
+ MODULE_PARM_DESC(msg_level, "msg_level");
+ MODULE_PARM_DESC(prop_carrier_state, "Propagate carrier state of physical "
+                "port to stack. 1:yes, 0:no.  Default = 0 ");
+ MODULE_PARM_DESC(rq3_entries, "Number of entries for Receive Queue 3 "
+                "[2^x - 1], x = [6..14]. Default = "
+                __MODULE_STRING(EHEA_DEF_ENTRIES_RQ3) ")");
+ MODULE_PARM_DESC(rq2_entries, "Number of entries for Receive Queue 2 "
+                "[2^x - 1], x = [6..14]. Default = "
+                __MODULE_STRING(EHEA_DEF_ENTRIES_RQ2) ")");
+ MODULE_PARM_DESC(rq1_entries, "Number of entries for Receive Queue 1 "
+                "[2^x - 1], x = [6..14]. Default = "
+                __MODULE_STRING(EHEA_DEF_ENTRIES_RQ1) ")");
+ MODULE_PARM_DESC(sq_entries, " Number of entries for the Send Queue  "
+                "[2^x - 1], x = [6..14]. Default = "
+                __MODULE_STRING(EHEA_DEF_ENTRIES_SQ) ")");
+ MODULE_PARM_DESC(use_mcs, " Multiple receive queues, 1: enable, 0: disable, "
+                "Default = 1");
+ 
+ static int port_name_cnt;
+ static LIST_HEAD(adapter_list);
+ static unsigned long ehea_driver_flags;
+ static DEFINE_MUTEX(dlpar_mem_lock);
+ struct ehea_fw_handle_array ehea_fw_handles;
+ struct ehea_bcmc_reg_array ehea_bcmc_regs;
+ 
+ 
+ static int __devinit ehea_probe_adapter(struct platform_device *dev,
+                                       const struct of_device_id *id);
+ 
+ static int __devexit ehea_remove(struct platform_device *dev);
+ 
++static struct of_device_id ehea_module_device_table[] = {
++      {
++              .name = "lhea",
++              .compatible = "IBM,lhea",
++      },
++      {
++              .type = "network",
++              .compatible = "IBM,lhea-ethernet",
++      },
++      {},
++};
++MODULE_DEVICE_TABLE(of, ehea_module_device_table);
++
+ static struct of_device_id ehea_device_table[] = {
+       {
+               .name = "lhea",
+               .compatible = "IBM,lhea",
+       },
+       {},
+ };
- -MODULE_DEVICE_TABLE(of, ehea_device_table);
+ 
+ static struct of_platform_driver ehea_driver = {
+       .driver = {
+               .name = "ehea",
+               .owner = THIS_MODULE,
+               .of_match_table = ehea_device_table,
+       },
+       .probe = ehea_probe_adapter,
+       .remove = ehea_remove,
+ };
+ 
+ void ehea_dump(void *adr, int len, char *msg)
+ {
+       int x;
+       unsigned char *deb = adr;
+       for (x = 0; x < len; x += 16) {
+               pr_info("%s adr=%p ofs=%04x %016llx %016llx\n",
+                       msg, deb, x, *((u64 *)&deb[0]), *((u64 *)&deb[8]));
+               deb += 16;
+       }
+ }
+ 
+ void ehea_schedule_port_reset(struct ehea_port *port)
+ {
+       if (!test_bit(__EHEA_DISABLE_PORT_RESET, &port->flags))
+               schedule_work(&port->reset_task);
+ }
+ 
+ static void ehea_update_firmware_handles(void)
+ {
+       struct ehea_fw_handle_entry *arr = NULL;
+       struct ehea_adapter *adapter;
+       int num_adapters = 0;
+       int num_ports = 0;
+       int num_portres = 0;
+       int i = 0;
+       int num_fw_handles, k, l;
+ 
+       /* Determine number of handles */
+       mutex_lock(&ehea_fw_handles.lock);
+ 
+       list_for_each_entry(adapter, &adapter_list, list) {
+               num_adapters++;
+ 
+               for (k = 0; k < EHEA_MAX_PORTS; k++) {
+                       struct ehea_port *port = adapter->port[k];
+ 
+                       if (!port || (port->state != EHEA_PORT_UP))
+                               continue;
+ 
+                       num_ports++;
+                       num_portres += port->num_def_qps;
+               }
+       }
+ 
+       num_fw_handles = num_adapters * EHEA_NUM_ADAPTER_FW_HANDLES +
+                        num_ports * EHEA_NUM_PORT_FW_HANDLES +
+                        num_portres * EHEA_NUM_PORTRES_FW_HANDLES;
+ 
+       if (num_fw_handles) {
+               arr = kcalloc(num_fw_handles, sizeof(*arr), GFP_KERNEL);
+               if (!arr)
+                       goto out;  /* Keep the existing array */
+       } else
+               goto out_update;
+ 
+       list_for_each_entry(adapter, &adapter_list, list) {
+               if (num_adapters == 0)
+                       break;
+ 
+               for (k = 0; k < EHEA_MAX_PORTS; k++) {
+                       struct ehea_port *port = adapter->port[k];
+ 
+                       if (!port || (port->state != EHEA_PORT_UP) ||
+                           (num_ports == 0))
+                               continue;
+ 
+                       for (l = 0; l < port->num_def_qps; l++) {
+                               struct ehea_port_res *pr = &port->port_res[l];
+ 
+                               arr[i].adh = adapter->handle;
+                               arr[i++].fwh = pr->qp->fw_handle;
+                               arr[i].adh = adapter->handle;
+                               arr[i++].fwh = pr->send_cq->fw_handle;
+                               arr[i].adh = adapter->handle;
+                               arr[i++].fwh = pr->recv_cq->fw_handle;
+                               arr[i].adh = adapter->handle;
+                               arr[i++].fwh = pr->eq->fw_handle;
+                               arr[i].adh = adapter->handle;
+                               arr[i++].fwh = pr->send_mr.handle;
+                               arr[i].adh = adapter->handle;
+                               arr[i++].fwh = pr->recv_mr.handle;
+                       }
+                       arr[i].adh = adapter->handle;
+                       arr[i++].fwh = port->qp_eq->fw_handle;
+                       num_ports--;
+               }
+ 
+               arr[i].adh = adapter->handle;
+               arr[i++].fwh = adapter->neq->fw_handle;
+ 
+               if (adapter->mr.handle) {
+                       arr[i].adh = adapter->handle;
+                       arr[i++].fwh = adapter->mr.handle;
+               }
+               num_adapters--;
+       }
+ 
+ out_update:
+       kfree(ehea_fw_handles.arr);
+       ehea_fw_handles.arr = arr;
+       ehea_fw_handles.num_entries = i;
+ out:
+       mutex_unlock(&ehea_fw_handles.lock);
+ }
+ 
+ static void ehea_update_bcmc_registrations(void)
+ {
+       unsigned long flags;
+       struct ehea_bcmc_reg_entry *arr = NULL;
+       struct ehea_adapter *adapter;
+       struct ehea_mc_list *mc_entry;
+       int num_registrations = 0;
+       int i = 0;
+       int k;
+ 
+       spin_lock_irqsave(&ehea_bcmc_regs.lock, flags);
+ 
+       /* Determine number of registrations */
+       list_for_each_entry(adapter, &adapter_list, list)
+               for (k = 0; k < EHEA_MAX_PORTS; k++) {
+                       struct ehea_port *port = adapter->port[k];
+ 
+                       if (!port || (port->state != EHEA_PORT_UP))
+                               continue;
+ 
+                       num_registrations += 2; /* Broadcast registrations */
+ 
+                       list_for_each_entry(mc_entry, &port->mc_list->list,list)
+                               num_registrations += 2;
+               }
+ 
+       if (num_registrations) {
+               arr = kcalloc(num_registrations, sizeof(*arr), GFP_ATOMIC);
+               if (!arr)
+                       goto out;  /* Keep the existing array */
+       } else
+               goto out_update;
+ 
+       list_for_each_entry(adapter, &adapter_list, list) {
+               for (k = 0; k < EHEA_MAX_PORTS; k++) {
+                       struct ehea_port *port = adapter->port[k];
+ 
+                       if (!port || (port->state != EHEA_PORT_UP))
+                               continue;
+ 
+                       if (num_registrations == 0)
+                               goto out_update;
+ 
+                       arr[i].adh = adapter->handle;
+                       arr[i].port_id = port->logical_port_id;
+                       arr[i].reg_type = EHEA_BCMC_BROADCAST |
+                                         EHEA_BCMC_UNTAGGED;
+                       arr[i++].macaddr = port->mac_addr;
+ 
+                       arr[i].adh = adapter->handle;
+                       arr[i].port_id = port->logical_port_id;
+                       arr[i].reg_type = EHEA_BCMC_BROADCAST |
+                                         EHEA_BCMC_VLANID_ALL;
+                       arr[i++].macaddr = port->mac_addr;
+                       num_registrations -= 2;
+ 
+                       list_for_each_entry(mc_entry,
+                                           &port->mc_list->list, list) {
+                               if (num_registrations == 0)
+                                       goto out_update;
+ 
+                               arr[i].adh = adapter->handle;
+                               arr[i].port_id = port->logical_port_id;
+                               arr[i].reg_type = EHEA_BCMC_SCOPE_ALL |
+                                                 EHEA_BCMC_MULTICAST |
+                                                 EHEA_BCMC_UNTAGGED;
+                               arr[i++].macaddr = mc_entry->macaddr;
+ 
+                               arr[i].adh = adapter->handle;
+                               arr[i].port_id = port->logical_port_id;
+                               arr[i].reg_type = EHEA_BCMC_SCOPE_ALL |
+                                                 EHEA_BCMC_MULTICAST |
+                                                 EHEA_BCMC_VLANID_ALL;
+                               arr[i++].macaddr = mc_entry->macaddr;
+                               num_registrations -= 2;
+                       }
+               }
+       }
+ 
+ out_update:
+       kfree(ehea_bcmc_regs.arr);
+       ehea_bcmc_regs.arr = arr;
+       ehea_bcmc_regs.num_entries = i;
+ out:
+       spin_unlock_irqrestore(&ehea_bcmc_regs.lock, flags);
+ }
+ 
+ static struct rtnl_link_stats64 *ehea_get_stats64(struct net_device *dev,
+                                       struct rtnl_link_stats64 *stats)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       u64 rx_packets = 0, tx_packets = 0, rx_bytes = 0, tx_bytes = 0;
+       int i;
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               rx_packets += port->port_res[i].rx_packets;
+               rx_bytes   += port->port_res[i].rx_bytes;
+       }
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               tx_packets += port->port_res[i].tx_packets;
+               tx_bytes   += port->port_res[i].tx_bytes;
+       }
+ 
+       stats->tx_packets = tx_packets;
+       stats->rx_bytes = rx_bytes;
+       stats->tx_bytes = tx_bytes;
+       stats->rx_packets = rx_packets;
+ 
+       return &port->stats;
+ }
+ 
+ static void ehea_update_stats(struct work_struct *work)
+ {
+       struct ehea_port *port =
+               container_of(work, struct ehea_port, stats_work.work);
+       struct net_device *dev = port->netdev;
+       struct rtnl_link_stats64 *stats = &port->stats;
+       struct hcp_ehea_port_cb2 *cb2;
+       u64 hret;
+ 
+       cb2 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb2) {
+               netdev_err(dev, "No mem for cb2. Some interface statistics were not updated\n");
+               goto resched;
+       }
+ 
+       hret = ehea_h_query_ehea_port(port->adapter->handle,
+                                     port->logical_port_id,
+                                     H_PORT_CB2, H_PORT_CB2_ALL, cb2);
+       if (hret != H_SUCCESS) {
+               netdev_err(dev, "query_ehea_port failed\n");
+               goto out_herr;
+       }
+ 
+       if (netif_msg_hw(port))
+               ehea_dump(cb2, sizeof(*cb2), "net_device_stats");
+ 
+       stats->multicast = cb2->rxmcp;
+       stats->rx_errors = cb2->rxuerr;
+ 
+ out_herr:
+       free_page((unsigned long)cb2);
+ resched:
+       schedule_delayed_work(&port->stats_work, msecs_to_jiffies(1000));
+ }
+ 
+ static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes)
+ {
+       struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr;
+       struct net_device *dev = pr->port->netdev;
+       int max_index_mask = pr->rq1_skba.len - 1;
+       int fill_wqes = pr->rq1_skba.os_skbs + nr_of_wqes;
+       int adder = 0;
+       int i;
+ 
+       pr->rq1_skba.os_skbs = 0;
+ 
+       if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) {
+               if (nr_of_wqes > 0)
+                       pr->rq1_skba.index = index;
+               pr->rq1_skba.os_skbs = fill_wqes;
+               return;
+       }
+ 
+       for (i = 0; i < fill_wqes; i++) {
+               if (!skb_arr_rq1[index]) {
+                       skb_arr_rq1[index] = netdev_alloc_skb(dev,
+                                                             EHEA_L_PKT_SIZE);
+                       if (!skb_arr_rq1[index]) {
+                               netdev_info(dev, "Unable to allocate enough skb in the array\n");
+                               pr->rq1_skba.os_skbs = fill_wqes - i;
+                               break;
+                       }
+               }
+               index--;
+               index &= max_index_mask;
+               adder++;
+       }
+ 
+       if (adder == 0)
+               return;
+ 
+       /* Ring doorbell */
+       ehea_update_rq1a(pr->qp, adder);
+ }
+ 
+ static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a)
+ {
+       struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr;
+       struct net_device *dev = pr->port->netdev;
+       int i;
+ 
+       if (nr_rq1a > pr->rq1_skba.len) {
+               netdev_err(dev, "NR_RQ1A bigger than skb array len\n");
+               return;
+       }
+ 
+       for (i = 0; i < nr_rq1a; i++) {
+               skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE);
+               if (!skb_arr_rq1[i]) {
+                       netdev_info(dev, "Not enough memory to allocate skb array\n");
+                       break;
+               }
+       }
+       /* Ring doorbell */
+       ehea_update_rq1a(pr->qp, i - 1);
+ }
+ 
+ static int ehea_refill_rq_def(struct ehea_port_res *pr,
+                             struct ehea_q_skb_arr *q_skba, int rq_nr,
+                             int num_wqes, int wqe_type, int packet_size)
+ {
+       struct net_device *dev = pr->port->netdev;
+       struct ehea_qp *qp = pr->qp;
+       struct sk_buff **skb_arr = q_skba->arr;
+       struct ehea_rwqe *rwqe;
+       int i, index, max_index_mask, fill_wqes;
+       int adder = 0;
+       int ret = 0;
+ 
+       fill_wqes = q_skba->os_skbs + num_wqes;
+       q_skba->os_skbs = 0;
+ 
+       if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) {
+               q_skba->os_skbs = fill_wqes;
+               return ret;
+       }
+ 
+       index = q_skba->index;
+       max_index_mask = q_skba->len - 1;
+       for (i = 0; i < fill_wqes; i++) {
+               u64 tmp_addr;
+               struct sk_buff *skb;
+ 
+               skb = netdev_alloc_skb_ip_align(dev, packet_size);
+               if (!skb) {
+                       q_skba->os_skbs = fill_wqes - i;
+                       if (q_skba->os_skbs == q_skba->len - 2) {
+                               netdev_info(pr->port->netdev,
+                                           "rq%i ran dry - no mem for skb\n",
+                                           rq_nr);
+                               ret = -ENOMEM;
+                       }
+                       break;
+               }
+ 
+               skb_arr[index] = skb;
+               tmp_addr = ehea_map_vaddr(skb->data);
+               if (tmp_addr == -1) {
+                       dev_kfree_skb(skb);
+                       q_skba->os_skbs = fill_wqes - i;
+                       ret = 0;
+                       break;
+               }
+ 
+               rwqe = ehea_get_next_rwqe(qp, rq_nr);
+               rwqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, wqe_type)
+                           | EHEA_BMASK_SET(EHEA_WR_ID_INDEX, index);
+               rwqe->sg_list[0].l_key = pr->recv_mr.lkey;
+               rwqe->sg_list[0].vaddr = tmp_addr;
+               rwqe->sg_list[0].len = packet_size;
+               rwqe->data_segments = 1;
+ 
+               index++;
+               index &= max_index_mask;
+               adder++;
+       }
+ 
+       q_skba->index = index;
+       if (adder == 0)
+               goto out;
+ 
+       /* Ring doorbell */
+       iosync();
+       if (rq_nr == 2)
+               ehea_update_rq2a(pr->qp, adder);
+       else
+               ehea_update_rq3a(pr->qp, adder);
+ out:
+       return ret;
+ }
+ 
+ 
+ static int ehea_refill_rq2(struct ehea_port_res *pr, int nr_of_wqes)
+ {
+       return ehea_refill_rq_def(pr, &pr->rq2_skba, 2,
+                                 nr_of_wqes, EHEA_RWQE2_TYPE,
+                                 EHEA_RQ2_PKT_SIZE);
+ }
+ 
+ 
+ static int ehea_refill_rq3(struct ehea_port_res *pr, int nr_of_wqes)
+ {
+       return ehea_refill_rq_def(pr, &pr->rq3_skba, 3,
+                                 nr_of_wqes, EHEA_RWQE3_TYPE,
+                                 EHEA_MAX_PACKET_SIZE);
+ }
+ 
+ static inline int ehea_check_cqe(struct ehea_cqe *cqe, int *rq_num)
+ {
+       *rq_num = (cqe->type & EHEA_CQE_TYPE_RQ) >> 5;
+       if ((cqe->status & EHEA_CQE_STAT_ERR_MASK) == 0)
+               return 0;
+       if (((cqe->status & EHEA_CQE_STAT_ERR_TCP) != 0) &&
+           (cqe->header_length == 0))
+               return 0;
+       return -EINVAL;
+ }
+ 
+ static inline void ehea_fill_skb(struct net_device *dev,
+                                struct sk_buff *skb, struct ehea_cqe *cqe,
+                                struct ehea_port_res *pr)
+ {
+       int length = cqe->num_bytes_transfered - 4;     /*remove CRC */
+ 
+       skb_put(skb, length);
+       skb->protocol = eth_type_trans(skb, dev);
+ 
+       /* The packet was not an IPV4 packet so a complemented checksum was
+          calculated. The value is found in the Internet Checksum field. */
+       if (cqe->status & EHEA_CQE_BLIND_CKSUM) {
+               skb->ip_summed = CHECKSUM_COMPLETE;
+               skb->csum = csum_unfold(~cqe->inet_checksum_value);
+       } else
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 
+       skb_record_rx_queue(skb, pr - &pr->port->port_res[0]);
+ }
+ 
+ static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array,
+                                              int arr_len,
+                                              struct ehea_cqe *cqe)
+ {
+       int skb_index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, cqe->wr_id);
+       struct sk_buff *skb;
+       void *pref;
+       int x;
+ 
+       x = skb_index + 1;
+       x &= (arr_len - 1);
+ 
+       pref = skb_array[x];
+       if (pref) {
+               prefetchw(pref);
+               prefetchw(pref + EHEA_CACHE_LINE);
+ 
+               pref = (skb_array[x]->data);
+               prefetch(pref);
+               prefetch(pref + EHEA_CACHE_LINE);
+               prefetch(pref + EHEA_CACHE_LINE * 2);
+               prefetch(pref + EHEA_CACHE_LINE * 3);
+       }
+ 
+       skb = skb_array[skb_index];
+       skb_array[skb_index] = NULL;
+       return skb;
+ }
+ 
+ static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array,
+                                                 int arr_len, int wqe_index)
+ {
+       struct sk_buff *skb;
+       void *pref;
+       int x;
+ 
+       x = wqe_index + 1;
+       x &= (arr_len - 1);
+ 
+       pref = skb_array[x];
+       if (pref) {
+               prefetchw(pref);
+               prefetchw(pref + EHEA_CACHE_LINE);
+ 
+               pref = (skb_array[x]->data);
+               prefetchw(pref);
+               prefetchw(pref + EHEA_CACHE_LINE);
+       }
+ 
+       skb = skb_array[wqe_index];
+       skb_array[wqe_index] = NULL;
+       return skb;
+ }
+ 
+ static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq,
+                                struct ehea_cqe *cqe, int *processed_rq2,
+                                int *processed_rq3)
+ {
+       struct sk_buff *skb;
+ 
+       if (cqe->status & EHEA_CQE_STAT_ERR_TCP)
+               pr->p_stats.err_tcp_cksum++;
+       if (cqe->status & EHEA_CQE_STAT_ERR_IP)
+               pr->p_stats.err_ip_cksum++;
+       if (cqe->status & EHEA_CQE_STAT_ERR_CRC)
+               pr->p_stats.err_frame_crc++;
+ 
+       if (rq == 2) {
+               *processed_rq2 += 1;
+               skb = get_skb_by_index(pr->rq2_skba.arr, pr->rq2_skba.len, cqe);
+               dev_kfree_skb(skb);
+       } else if (rq == 3) {
+               *processed_rq3 += 1;
+               skb = get_skb_by_index(pr->rq3_skba.arr, pr->rq3_skba.len, cqe);
+               dev_kfree_skb(skb);
+       }
+ 
+       if (cqe->status & EHEA_CQE_STAT_FAT_ERR_MASK) {
+               if (netif_msg_rx_err(pr->port)) {
+                       pr_err("Critical receive error for QP %d. Resetting port.\n",
+                              pr->qp->init_attr.qp_nr);
+                       ehea_dump(cqe, sizeof(*cqe), "CQE");
+               }
+               ehea_schedule_port_reset(pr->port);
+               return 1;
+       }
+ 
+       return 0;
+ }
+ 
+ static int ehea_proc_rwqes(struct net_device *dev,
+                          struct ehea_port_res *pr,
+                          int budget)
+ {
+       struct ehea_port *port = pr->port;
+       struct ehea_qp *qp = pr->qp;
+       struct ehea_cqe *cqe;
+       struct sk_buff *skb;
+       struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr;
+       struct sk_buff **skb_arr_rq2 = pr->rq2_skba.arr;
+       struct sk_buff **skb_arr_rq3 = pr->rq3_skba.arr;
+       int skb_arr_rq1_len = pr->rq1_skba.len;
+       int skb_arr_rq2_len = pr->rq2_skba.len;
+       int skb_arr_rq3_len = pr->rq3_skba.len;
+       int processed, processed_rq1, processed_rq2, processed_rq3;
+       u64 processed_bytes = 0;
+       int wqe_index, last_wqe_index, rq, port_reset;
+ 
+       processed = processed_rq1 = processed_rq2 = processed_rq3 = 0;
+       last_wqe_index = 0;
+ 
+       cqe = ehea_poll_rq1(qp, &wqe_index);
+       while ((processed < budget) && cqe) {
+               ehea_inc_rq1(qp);
+               processed_rq1++;
+               processed++;
+               if (netif_msg_rx_status(port))
+                       ehea_dump(cqe, sizeof(*cqe), "CQE");
+ 
+               last_wqe_index = wqe_index;
+               rmb();
+               if (!ehea_check_cqe(cqe, &rq)) {
+                       if (rq == 1) {
+                               /* LL RQ1 */
+                               skb = get_skb_by_index_ll(skb_arr_rq1,
+                                                         skb_arr_rq1_len,
+                                                         wqe_index);
+                               if (unlikely(!skb)) {
+                                       netif_info(port, rx_err, dev,
+                                                 "LL rq1: skb=NULL\n");
+ 
+                                       skb = netdev_alloc_skb(dev,
+                                                              EHEA_L_PKT_SIZE);
+                                       if (!skb) {
+                                               netdev_err(dev, "Not enough memory to allocate skb\n");
+                                               break;
+                                       }
+                               }
+                               skb_copy_to_linear_data(skb, ((char *)cqe) + 64,
+                                                cqe->num_bytes_transfered - 4);
+                               ehea_fill_skb(dev, skb, cqe, pr);
+                       } else if (rq == 2) {
+                               /* RQ2 */
+                               skb = get_skb_by_index(skb_arr_rq2,
+                                                      skb_arr_rq2_len, cqe);
+                               if (unlikely(!skb)) {
+                                       netif_err(port, rx_err, dev,
+                                                 "rq2: skb=NULL\n");
+                                       break;
+                               }
+                               ehea_fill_skb(dev, skb, cqe, pr);
+                               processed_rq2++;
+                       } else {
+                               /* RQ3 */
+                               skb = get_skb_by_index(skb_arr_rq3,
+                                                      skb_arr_rq3_len, cqe);
+                               if (unlikely(!skb)) {
+                                       netif_err(port, rx_err, dev,
+                                                 "rq3: skb=NULL\n");
+                                       break;
+                               }
+                               ehea_fill_skb(dev, skb, cqe, pr);
+                               processed_rq3++;
+                       }
+ 
+                       processed_bytes += skb->len;
+ 
+                       if (cqe->status & EHEA_CQE_VLAN_TAG_XTRACT)
+                               __vlan_hwaccel_put_tag(skb, cqe->vlan_tag);
+ 
+                       napi_gro_receive(&pr->napi, skb);
+               } else {
+                       pr->p_stats.poll_receive_errors++;
+                       port_reset = ehea_treat_poll_error(pr, rq, cqe,
+                                                          &processed_rq2,
+                                                          &processed_rq3);
+                       if (port_reset)
+                               break;
+               }
+               cqe = ehea_poll_rq1(qp, &wqe_index);
+       }
+ 
+       pr->rx_packets += processed;
+       pr->rx_bytes += processed_bytes;
+ 
+       ehea_refill_rq1(pr, last_wqe_index, processed_rq1);
+       ehea_refill_rq2(pr, processed_rq2);
+       ehea_refill_rq3(pr, processed_rq3);
+ 
+       return processed;
+ }
+ 
+ #define SWQE_RESTART_CHECK 0xdeadbeaff00d0000ull
+ 
+ static void reset_sq_restart_flag(struct ehea_port *port)
+ {
+       int i;
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               struct ehea_port_res *pr = &port->port_res[i];
+               pr->sq_restart_flag = 0;
+       }
+       wake_up(&port->restart_wq);
+ }
+ 
+ static void check_sqs(struct ehea_port *port)
+ {
+       struct ehea_swqe *swqe;
+       int swqe_index;
+       int i, k;
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               struct ehea_port_res *pr = &port->port_res[i];
+               int ret;
+               k = 0;
+               swqe = ehea_get_swqe(pr->qp, &swqe_index);
+               memset(swqe, 0, SWQE_HEADER_SIZE);
+               atomic_dec(&pr->swqe_avail);
+ 
+               swqe->tx_control |= EHEA_SWQE_PURGE;
+               swqe->wr_id = SWQE_RESTART_CHECK;
+               swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION;
+               swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT;
+               swqe->immediate_data_length = 80;
+ 
+               ehea_post_swqe(pr->qp, swqe);
+ 
+               ret = wait_event_timeout(port->restart_wq,
+                                        pr->sq_restart_flag == 0,
+                                        msecs_to_jiffies(100));
+ 
+               if (!ret) {
+                       pr_err("HW/SW queues out of sync\n");
+                       ehea_schedule_port_reset(pr->port);
+                       return;
+               }
+       }
+ }
+ 
+ 
+ static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota)
+ {
+       struct sk_buff *skb;
+       struct ehea_cq *send_cq = pr->send_cq;
+       struct ehea_cqe *cqe;
+       int quota = my_quota;
+       int cqe_counter = 0;
+       int swqe_av = 0;
+       int index;
+       struct netdev_queue *txq = netdev_get_tx_queue(pr->port->netdev,
+                                               pr - &pr->port->port_res[0]);
+ 
+       cqe = ehea_poll_cq(send_cq);
+       while (cqe && (quota > 0)) {
+               ehea_inc_cq(send_cq);
+ 
+               cqe_counter++;
+               rmb();
+ 
+               if (cqe->wr_id == SWQE_RESTART_CHECK) {
+                       pr->sq_restart_flag = 1;
+                       swqe_av++;
+                       break;
+               }
+ 
+               if (cqe->status & EHEA_CQE_STAT_ERR_MASK) {
+                       pr_err("Bad send completion status=0x%04X\n",
+                              cqe->status);
+ 
+                       if (netif_msg_tx_err(pr->port))
+                               ehea_dump(cqe, sizeof(*cqe), "Send CQE");
+ 
+                       if (cqe->status & EHEA_CQE_STAT_RESET_MASK) {
+                               pr_err("Resetting port\n");
+                               ehea_schedule_port_reset(pr->port);
+                               break;
+                       }
+               }
+ 
+               if (netif_msg_tx_done(pr->port))
+                       ehea_dump(cqe, sizeof(*cqe), "CQE");
+ 
+               if (likely(EHEA_BMASK_GET(EHEA_WR_ID_TYPE, cqe->wr_id)
+                          == EHEA_SWQE2_TYPE)) {
+ 
+                       index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, cqe->wr_id);
+                       skb = pr->sq_skba.arr[index];
+                       dev_kfree_skb(skb);
+                       pr->sq_skba.arr[index] = NULL;
+               }
+ 
+               swqe_av += EHEA_BMASK_GET(EHEA_WR_ID_REFILL, cqe->wr_id);
+               quota--;
+ 
+               cqe = ehea_poll_cq(send_cq);
+       }
+ 
+       ehea_update_feca(send_cq, cqe_counter);
+       atomic_add(swqe_av, &pr->swqe_avail);
+ 
+       if (unlikely(netif_tx_queue_stopped(txq) &&
+                    (atomic_read(&pr->swqe_avail) >= pr->swqe_refill_th))) {
+               __netif_tx_lock(txq, smp_processor_id());
+               if (netif_tx_queue_stopped(txq) &&
+                   (atomic_read(&pr->swqe_avail) >= pr->swqe_refill_th))
+                       netif_tx_wake_queue(txq);
+               __netif_tx_unlock(txq);
+       }
+ 
+       wake_up(&pr->port->swqe_avail_wq);
+ 
+       return cqe;
+ }
+ 
+ #define EHEA_POLL_MAX_CQES 65535
+ 
+ static int ehea_poll(struct napi_struct *napi, int budget)
+ {
+       struct ehea_port_res *pr = container_of(napi, struct ehea_port_res,
+                                               napi);
+       struct net_device *dev = pr->port->netdev;
+       struct ehea_cqe *cqe;
+       struct ehea_cqe *cqe_skb = NULL;
+       int wqe_index;
+       int rx = 0;
+ 
+       cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES);
+       rx += ehea_proc_rwqes(dev, pr, budget - rx);
+ 
+       while (rx != budget) {
+               napi_complete(napi);
+               ehea_reset_cq_ep(pr->recv_cq);
+               ehea_reset_cq_ep(pr->send_cq);
+               ehea_reset_cq_n1(pr->recv_cq);
+               ehea_reset_cq_n1(pr->send_cq);
+               rmb();
+               cqe = ehea_poll_rq1(pr->qp, &wqe_index);
+               cqe_skb = ehea_poll_cq(pr->send_cq);
+ 
+               if (!cqe && !cqe_skb)
+                       return rx;
+ 
+               if (!napi_reschedule(napi))
+                       return rx;
+ 
+               cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES);
+               rx += ehea_proc_rwqes(dev, pr, budget - rx);
+       }
+ 
+       return rx;
+ }
+ 
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ static void ehea_netpoll(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       int i;
+ 
+       for (i = 0; i < port->num_def_qps; i++)
+               napi_schedule(&port->port_res[i].napi);
+ }
+ #endif
+ 
+ static irqreturn_t ehea_recv_irq_handler(int irq, void *param)
+ {
+       struct ehea_port_res *pr = param;
+ 
+       napi_schedule(&pr->napi);
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param)
+ {
+       struct ehea_port *port = param;
+       struct ehea_eqe *eqe;
+       struct ehea_qp *qp;
+       u32 qp_token;
+       u64 resource_type, aer, aerr;
+       int reset_port = 0;
+ 
+       eqe = ehea_poll_eq(port->qp_eq);
+ 
+       while (eqe) {
+               qp_token = EHEA_BMASK_GET(EHEA_EQE_QP_TOKEN, eqe->entry);
+               pr_err("QP aff_err: entry=0x%llx, token=0x%x\n",
+                      eqe->entry, qp_token);
+ 
+               qp = port->port_res[qp_token].qp;
+ 
+               resource_type = ehea_error_data(port->adapter, qp->fw_handle,
+                                               &aer, &aerr);
+ 
+               if (resource_type == EHEA_AER_RESTYPE_QP) {
+                       if ((aer & EHEA_AER_RESET_MASK) ||
+                           (aerr & EHEA_AERR_RESET_MASK))
+                                reset_port = 1;
+               } else
+                       reset_port = 1;   /* Reset in case of CQ or EQ error */
+ 
+               eqe = ehea_poll_eq(port->qp_eq);
+       }
+ 
+       if (reset_port) {
+               pr_err("Resetting port\n");
+               ehea_schedule_port_reset(port);
+       }
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ static struct ehea_port *ehea_get_port(struct ehea_adapter *adapter,
+                                      int logical_port)
+ {
+       int i;
+ 
+       for (i = 0; i < EHEA_MAX_PORTS; i++)
+               if (adapter->port[i])
+                       if (adapter->port[i]->logical_port_id == logical_port)
+                               return adapter->port[i];
+       return NULL;
+ }
+ 
+ int ehea_sense_port_attr(struct ehea_port *port)
+ {
+       int ret;
+       u64 hret;
+       struct hcp_ehea_port_cb0 *cb0;
+ 
+       /* may be called via ehea_neq_tasklet() */
+       cb0 = (void *)get_zeroed_page(GFP_ATOMIC);
+       if (!cb0) {
+               pr_err("no mem for cb0\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea_port(port->adapter->handle,
+                                     port->logical_port_id, H_PORT_CB0,
+                                     EHEA_BMASK_SET(H_PORT_CB0_ALL, 0xFFFF),
+                                     cb0);
+       if (hret != H_SUCCESS) {
+               ret = -EIO;
+               goto out_free;
+       }
+ 
+       /* MAC address */
+       port->mac_addr = cb0->port_mac_addr << 16;
+ 
+       if (!is_valid_ether_addr((u8 *)&port->mac_addr)) {
+               ret = -EADDRNOTAVAIL;
+               goto out_free;
+       }
+ 
+       /* Port speed */
+       switch (cb0->port_speed) {
+       case H_SPEED_10M_H:
+               port->port_speed = EHEA_SPEED_10M;
+               port->full_duplex = 0;
+               break;
+       case H_SPEED_10M_F:
+               port->port_speed = EHEA_SPEED_10M;
+               port->full_duplex = 1;
+               break;
+       case H_SPEED_100M_H:
+               port->port_speed = EHEA_SPEED_100M;
+               port->full_duplex = 0;
+               break;
+       case H_SPEED_100M_F:
+               port->port_speed = EHEA_SPEED_100M;
+               port->full_duplex = 1;
+               break;
+       case H_SPEED_1G_F:
+               port->port_speed = EHEA_SPEED_1G;
+               port->full_duplex = 1;
+               break;
+       case H_SPEED_10G_F:
+               port->port_speed = EHEA_SPEED_10G;
+               port->full_duplex = 1;
+               break;
+       default:
+               port->port_speed = 0;
+               port->full_duplex = 0;
+               break;
+       }
+ 
+       port->autoneg = 1;
+       port->num_mcs = cb0->num_default_qps;
+ 
+       /* Number of default QPs */
+       if (use_mcs)
+               port->num_def_qps = cb0->num_default_qps;
+       else
+               port->num_def_qps = 1;
+ 
+       if (!port->num_def_qps) {
+               ret = -EINVAL;
+               goto out_free;
+       }
+ 
+       ret = 0;
+ out_free:
+       if (ret || netif_msg_probe(port))
+               ehea_dump(cb0, sizeof(*cb0), "ehea_sense_port_attr");
+       free_page((unsigned long)cb0);
+ out:
+       return ret;
+ }
+ 
+ int ehea_set_portspeed(struct ehea_port *port, u32 port_speed)
+ {
+       struct hcp_ehea_port_cb4 *cb4;
+       u64 hret;
+       int ret = 0;
+ 
+       cb4 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb4) {
+               pr_err("no mem for cb4\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       cb4->port_speed = port_speed;
+ 
+       netif_carrier_off(port->netdev);
+ 
+       hret = ehea_h_modify_ehea_port(port->adapter->handle,
+                                      port->logical_port_id,
+                                      H_PORT_CB4, H_PORT_CB4_SPEED, cb4);
+       if (hret == H_SUCCESS) {
+               port->autoneg = port_speed == EHEA_SPEED_AUTONEG ? 1 : 0;
+ 
+               hret = ehea_h_query_ehea_port(port->adapter->handle,
+                                             port->logical_port_id,
+                                             H_PORT_CB4, H_PORT_CB4_SPEED,
+                                             cb4);
+               if (hret == H_SUCCESS) {
+                       switch (cb4->port_speed) {
+                       case H_SPEED_10M_H:
+                               port->port_speed = EHEA_SPEED_10M;
+                               port->full_duplex = 0;
+                               break;
+                       case H_SPEED_10M_F:
+                               port->port_speed = EHEA_SPEED_10M;
+                               port->full_duplex = 1;
+                               break;
+                       case H_SPEED_100M_H:
+                               port->port_speed = EHEA_SPEED_100M;
+                               port->full_duplex = 0;
+                               break;
+                       case H_SPEED_100M_F:
+                               port->port_speed = EHEA_SPEED_100M;
+                               port->full_duplex = 1;
+                               break;
+                       case H_SPEED_1G_F:
+                               port->port_speed = EHEA_SPEED_1G;
+                               port->full_duplex = 1;
+                               break;
+                       case H_SPEED_10G_F:
+                               port->port_speed = EHEA_SPEED_10G;
+                               port->full_duplex = 1;
+                               break;
+                       default:
+                               port->port_speed = 0;
+                               port->full_duplex = 0;
+                               break;
+                       }
+               } else {
+                       pr_err("Failed sensing port speed\n");
+                       ret = -EIO;
+               }
+       } else {
+               if (hret == H_AUTHORITY) {
+                       pr_info("Hypervisor denied setting port speed\n");
+                       ret = -EPERM;
+               } else {
+                       ret = -EIO;
+                       pr_err("Failed setting port speed\n");
+               }
+       }
+       if (!prop_carrier_state || (port->phy_link == EHEA_PHY_LINK_UP))
+               netif_carrier_on(port->netdev);
+ 
+       free_page((unsigned long)cb4);
+ out:
+       return ret;
+ }
+ 
+ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe)
+ {
+       int ret;
+       u8 ec;
+       u8 portnum;
+       struct ehea_port *port;
+       struct net_device *dev;
+ 
+       ec = EHEA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+       portnum = EHEA_BMASK_GET(NEQE_PORTNUM, eqe);
+       port = ehea_get_port(adapter, portnum);
+       dev = port->netdev;
+ 
+       switch (ec) {
+       case EHEA_EC_PORTSTATE_CHG:     /* port state change */
+ 
+               if (!port) {
+                       netdev_err(dev, "unknown portnum %x\n", portnum);
+                       break;
+               }
+ 
+               if (EHEA_BMASK_GET(NEQE_PORT_UP, eqe)) {
+                       if (!netif_carrier_ok(dev)) {
+                               ret = ehea_sense_port_attr(port);
+                               if (ret) {
+                                       netdev_err(dev, "failed resensing port attributes\n");
+                                       break;
+                               }
+ 
+                               netif_info(port, link, dev,
+                                          "Logical port up: %dMbps %s Duplex\n",
+                                          port->port_speed,
+                                          port->full_duplex == 1 ?
+                                          "Full" : "Half");
+ 
+                               netif_carrier_on(dev);
+                               netif_wake_queue(dev);
+                       }
+               } else
+                       if (netif_carrier_ok(dev)) {
+                               netif_info(port, link, dev,
+                                          "Logical port down\n");
+                               netif_carrier_off(dev);
+                               netif_tx_disable(dev);
+                       }
+ 
+               if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PORT_UP, eqe)) {
+                       port->phy_link = EHEA_PHY_LINK_UP;
+                       netif_info(port, link, dev,
+                                  "Physical port up\n");
+                       if (prop_carrier_state)
+                               netif_carrier_on(dev);
+               } else {
+                       port->phy_link = EHEA_PHY_LINK_DOWN;
+                       netif_info(port, link, dev,
+                                  "Physical port down\n");
+                       if (prop_carrier_state)
+                               netif_carrier_off(dev);
+               }
+ 
+               if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PRIMARY, eqe))
+                       netdev_info(dev,
+                                   "External switch port is primary port\n");
+               else
+                       netdev_info(dev,
+                                   "External switch port is backup port\n");
+ 
+               break;
+       case EHEA_EC_ADAPTER_MALFUNC:
+               netdev_err(dev, "Adapter malfunction\n");
+               break;
+       case EHEA_EC_PORT_MALFUNC:
+               netdev_info(dev, "Port malfunction\n");
+               netif_carrier_off(dev);
+               netif_tx_disable(dev);
+               break;
+       default:
+               netdev_err(dev, "unknown event code %x, eqe=0x%llX\n", ec, eqe);
+               break;
+       }
+ }
+ 
+ static void ehea_neq_tasklet(unsigned long data)
+ {
+       struct ehea_adapter *adapter = (struct ehea_adapter *)data;
+       struct ehea_eqe *eqe;
+       u64 event_mask;
+ 
+       eqe = ehea_poll_eq(adapter->neq);
+       pr_debug("eqe=%p\n", eqe);
+ 
+       while (eqe) {
+               pr_debug("*eqe=%lx\n", (unsigned long) eqe->entry);
+               ehea_parse_eqe(adapter, eqe->entry);
+               eqe = ehea_poll_eq(adapter->neq);
+               pr_debug("next eqe=%p\n", eqe);
+       }
+ 
+       event_mask = EHEA_BMASK_SET(NELR_PORTSTATE_CHG, 1)
+                  | EHEA_BMASK_SET(NELR_ADAPTER_MALFUNC, 1)
+                  | EHEA_BMASK_SET(NELR_PORT_MALFUNC, 1);
+ 
+       ehea_h_reset_events(adapter->handle,
+                           adapter->neq->fw_handle, event_mask);
+ }
+ 
+ static irqreturn_t ehea_interrupt_neq(int irq, void *param)
+ {
+       struct ehea_adapter *adapter = param;
+       tasklet_hi_schedule(&adapter->neq_tasklet);
+       return IRQ_HANDLED;
+ }
+ 
+ 
+ static int ehea_fill_port_res(struct ehea_port_res *pr)
+ {
+       int ret;
+       struct ehea_qp_init_attr *init_attr = &pr->qp->init_attr;
+ 
+       ehea_init_fill_rq1(pr, pr->rq1_skba.len);
+ 
+       ret = ehea_refill_rq2(pr, init_attr->act_nr_rwqes_rq2 - 1);
+ 
+       ret |= ehea_refill_rq3(pr, init_attr->act_nr_rwqes_rq3 - 1);
+ 
+       return ret;
+ }
+ 
+ static int ehea_reg_interrupts(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_port_res *pr;
+       int i, ret;
+ 
+ 
+       snprintf(port->int_aff_name, EHEA_IRQ_NAME_SIZE - 1, "%s-aff",
+                dev->name);
+ 
+       ret = ibmebus_request_irq(port->qp_eq->attr.ist1,
+                                 ehea_qp_aff_irq_handler,
+                                 IRQF_DISABLED, port->int_aff_name, port);
+       if (ret) {
+               netdev_err(dev, "failed registering irq for qp_aff_irq_handler:ist=%X\n",
+                          port->qp_eq->attr.ist1);
+               goto out_free_qpeq;
+       }
+ 
+       netif_info(port, ifup, dev,
+                  "irq_handle 0x%X for function qp_aff_irq_handler registered\n",
+                  port->qp_eq->attr.ist1);
+ 
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               pr = &port->port_res[i];
+               snprintf(pr->int_send_name, EHEA_IRQ_NAME_SIZE - 1,
+                        "%s-queue%d", dev->name, i);
+               ret = ibmebus_request_irq(pr->eq->attr.ist1,
+                                         ehea_recv_irq_handler,
+                                         IRQF_DISABLED, pr->int_send_name,
+                                         pr);
+               if (ret) {
+                       netdev_err(dev, "failed registering irq for ehea_queue port_res_nr:%d, ist=%X\n",
+                                  i, pr->eq->attr.ist1);
+                       goto out_free_req;
+               }
+               netif_info(port, ifup, dev,
+                          "irq_handle 0x%X for function ehea_queue_int %d registered\n",
+                          pr->eq->attr.ist1, i);
+       }
+ out:
+       return ret;
+ 
+ 
+ out_free_req:
+       while (--i >= 0) {
+               u32 ist = port->port_res[i].eq->attr.ist1;
+               ibmebus_free_irq(ist, &port->port_res[i]);
+       }
+ 
+ out_free_qpeq:
+       ibmebus_free_irq(port->qp_eq->attr.ist1, port);
+       i = port->num_def_qps;
+ 
+       goto out;
+ 
+ }
+ 
+ static void ehea_free_interrupts(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_port_res *pr;
+       int i;
+ 
+       /* send */
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               pr = &port->port_res[i];
+               ibmebus_free_irq(pr->eq->attr.ist1, pr);
+               netif_info(port, intr, dev,
+                          "free send irq for res %d with handle 0x%X\n",
+                          i, pr->eq->attr.ist1);
+       }
+ 
+       /* associated events */
+       ibmebus_free_irq(port->qp_eq->attr.ist1, port);
+       netif_info(port, intr, dev,
+                  "associated event interrupt for handle 0x%X freed\n",
+                  port->qp_eq->attr.ist1);
+ }
+ 
+ static int ehea_configure_port(struct ehea_port *port)
+ {
+       int ret, i;
+       u64 hret, mask;
+       struct hcp_ehea_port_cb0 *cb0;
+ 
+       ret = -ENOMEM;
+       cb0 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb0)
+               goto out;
+ 
+       cb0->port_rc = EHEA_BMASK_SET(PXLY_RC_VALID, 1)
+                    | EHEA_BMASK_SET(PXLY_RC_IP_CHKSUM, 1)
+                    | EHEA_BMASK_SET(PXLY_RC_TCP_UDP_CHKSUM, 1)
+                    | EHEA_BMASK_SET(PXLY_RC_VLAN_XTRACT, 1)
+                    | EHEA_BMASK_SET(PXLY_RC_VLAN_TAG_FILTER,
+                                     PXLY_RC_VLAN_FILTER)
+                    | EHEA_BMASK_SET(PXLY_RC_JUMBO_FRAME, 1);
+ 
+       for (i = 0; i < port->num_mcs; i++)
+               if (use_mcs)
+                       cb0->default_qpn_arr[i] =
+                               port->port_res[i].qp->init_attr.qp_nr;
+               else
+                       cb0->default_qpn_arr[i] =
+                               port->port_res[0].qp->init_attr.qp_nr;
+ 
+       if (netif_msg_ifup(port))
+               ehea_dump(cb0, sizeof(*cb0), "ehea_configure_port");
+ 
+       mask = EHEA_BMASK_SET(H_PORT_CB0_PRC, 1)
+            | EHEA_BMASK_SET(H_PORT_CB0_DEFQPNARRAY, 1);
+ 
+       hret = ehea_h_modify_ehea_port(port->adapter->handle,
+                                      port->logical_port_id,
+                                      H_PORT_CB0, mask, cb0);
+       ret = -EIO;
+       if (hret != H_SUCCESS)
+               goto out_free;
+ 
+       ret = 0;
+ 
+ out_free:
+       free_page((unsigned long)cb0);
+ out:
+       return ret;
+ }
+ 
+ int ehea_gen_smrs(struct ehea_port_res *pr)
+ {
+       int ret;
+       struct ehea_adapter *adapter = pr->port->adapter;
+ 
+       ret = ehea_gen_smr(adapter, &adapter->mr, &pr->send_mr);
+       if (ret)
+               goto out;
+ 
+       ret = ehea_gen_smr(adapter, &adapter->mr, &pr->recv_mr);
+       if (ret)
+               goto out_free;
+ 
+       return 0;
+ 
+ out_free:
+       ehea_rem_mr(&pr->send_mr);
+ out:
+       pr_err("Generating SMRS failed\n");
+       return -EIO;
+ }
+ 
+ int ehea_rem_smrs(struct ehea_port_res *pr)
+ {
+       if ((ehea_rem_mr(&pr->send_mr)) ||
+           (ehea_rem_mr(&pr->recv_mr)))
+               return -EIO;
+       else
+               return 0;
+ }
+ 
+ static int ehea_init_q_skba(struct ehea_q_skb_arr *q_skba, int max_q_entries)
+ {
+       int arr_size = sizeof(void *) * max_q_entries;
+ 
+       q_skba->arr = vzalloc(arr_size);
+       if (!q_skba->arr)
+               return -ENOMEM;
+ 
+       q_skba->len = max_q_entries;
+       q_skba->index = 0;
+       q_skba->os_skbs = 0;
+ 
+       return 0;
+ }
+ 
+ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr,
+                             struct port_res_cfg *pr_cfg, int queue_token)
+ {
+       struct ehea_adapter *adapter = port->adapter;
+       enum ehea_eq_type eq_type = EHEA_EQ;
+       struct ehea_qp_init_attr *init_attr = NULL;
+       int ret = -EIO;
+       u64 tx_bytes, rx_bytes, tx_packets, rx_packets;
+ 
+       tx_bytes = pr->tx_bytes;
+       tx_packets = pr->tx_packets;
+       rx_bytes = pr->rx_bytes;
+       rx_packets = pr->rx_packets;
+ 
+       memset(pr, 0, sizeof(struct ehea_port_res));
+ 
+       pr->tx_bytes = rx_bytes;
+       pr->tx_packets = tx_packets;
+       pr->rx_bytes = rx_bytes;
+       pr->rx_packets = rx_packets;
+ 
+       pr->port = port;
+ 
+       pr->eq = ehea_create_eq(adapter, eq_type, EHEA_MAX_ENTRIES_EQ, 0);
+       if (!pr->eq) {
+               pr_err("create_eq failed (eq)\n");
+               goto out_free;
+       }
+ 
+       pr->recv_cq = ehea_create_cq(adapter, pr_cfg->max_entries_rcq,
+                                    pr->eq->fw_handle,
+                                    port->logical_port_id);
+       if (!pr->recv_cq) {
+               pr_err("create_cq failed (cq_recv)\n");
+               goto out_free;
+       }
+ 
+       pr->send_cq = ehea_create_cq(adapter, pr_cfg->max_entries_scq,
+                                    pr->eq->fw_handle,
+                                    port->logical_port_id);
+       if (!pr->send_cq) {
+               pr_err("create_cq failed (cq_send)\n");
+               goto out_free;
+       }
+ 
+       if (netif_msg_ifup(port))
+               pr_info("Send CQ: act_nr_cqes=%d, Recv CQ: act_nr_cqes=%d\n",
+                       pr->send_cq->attr.act_nr_of_cqes,
+                       pr->recv_cq->attr.act_nr_of_cqes);
+ 
+       init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+       if (!init_attr) {
+               ret = -ENOMEM;
+               pr_err("no mem for ehea_qp_init_attr\n");
+               goto out_free;
+       }
+ 
+       init_attr->low_lat_rq1 = 1;
+       init_attr->signalingtype = 1;   /* generate CQE if specified in WQE */
+       init_attr->rq_count = 3;
+       init_attr->qp_token = queue_token;
+       init_attr->max_nr_send_wqes = pr_cfg->max_entries_sq;
+       init_attr->max_nr_rwqes_rq1 = pr_cfg->max_entries_rq1;
+       init_attr->max_nr_rwqes_rq2 = pr_cfg->max_entries_rq2;
+       init_attr->max_nr_rwqes_rq3 = pr_cfg->max_entries_rq3;
+       init_attr->wqe_size_enc_sq = EHEA_SG_SQ;
+       init_attr->wqe_size_enc_rq1 = EHEA_SG_RQ1;
+       init_attr->wqe_size_enc_rq2 = EHEA_SG_RQ2;
+       init_attr->wqe_size_enc_rq3 = EHEA_SG_RQ3;
+       init_attr->rq2_threshold = EHEA_RQ2_THRESHOLD;
+       init_attr->rq3_threshold = EHEA_RQ3_THRESHOLD;
+       init_attr->port_nr = port->logical_port_id;
+       init_attr->send_cq_handle = pr->send_cq->fw_handle;
+       init_attr->recv_cq_handle = pr->recv_cq->fw_handle;
+       init_attr->aff_eq_handle = port->qp_eq->fw_handle;
+ 
+       pr->qp = ehea_create_qp(adapter, adapter->pd, init_attr);
+       if (!pr->qp) {
+               pr_err("create_qp failed\n");
+               ret = -EIO;
+               goto out_free;
+       }
+ 
+       if (netif_msg_ifup(port))
+               pr_info("QP: qp_nr=%d\n act_nr_snd_wqe=%d\n nr_rwqe_rq1=%d\n nr_rwqe_rq2=%d\n nr_rwqe_rq3=%d\n",
+                       init_attr->qp_nr,
+                       init_attr->act_nr_send_wqes,
+                       init_attr->act_nr_rwqes_rq1,
+                       init_attr->act_nr_rwqes_rq2,
+                       init_attr->act_nr_rwqes_rq3);
+ 
+       pr->sq_skba_size = init_attr->act_nr_send_wqes + 1;
+ 
+       ret = ehea_init_q_skba(&pr->sq_skba, pr->sq_skba_size);
+       ret |= ehea_init_q_skba(&pr->rq1_skba, init_attr->act_nr_rwqes_rq1 + 1);
+       ret |= ehea_init_q_skba(&pr->rq2_skba, init_attr->act_nr_rwqes_rq2 + 1);
+       ret |= ehea_init_q_skba(&pr->rq3_skba, init_attr->act_nr_rwqes_rq3 + 1);
+       if (ret)
+               goto out_free;
+ 
+       pr->swqe_refill_th = init_attr->act_nr_send_wqes / 10;
+       if (ehea_gen_smrs(pr) != 0) {
+               ret = -EIO;
+               goto out_free;
+       }
+ 
+       atomic_set(&pr->swqe_avail, init_attr->act_nr_send_wqes - 1);
+ 
+       kfree(init_attr);
+ 
+       netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll, 64);
+ 
+       ret = 0;
+       goto out;
+ 
+ out_free:
+       kfree(init_attr);
+       vfree(pr->sq_skba.arr);
+       vfree(pr->rq1_skba.arr);
+       vfree(pr->rq2_skba.arr);
+       vfree(pr->rq3_skba.arr);
+       ehea_destroy_qp(pr->qp);
+       ehea_destroy_cq(pr->send_cq);
+       ehea_destroy_cq(pr->recv_cq);
+       ehea_destroy_eq(pr->eq);
+ out:
+       return ret;
+ }
+ 
+ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr)
+ {
+       int ret, i;
+ 
+       if (pr->qp)
+               netif_napi_del(&pr->napi);
+ 
+       ret = ehea_destroy_qp(pr->qp);
+ 
+       if (!ret) {
+               ehea_destroy_cq(pr->send_cq);
+               ehea_destroy_cq(pr->recv_cq);
+               ehea_destroy_eq(pr->eq);
+ 
+               for (i = 0; i < pr->rq1_skba.len; i++)
+                       if (pr->rq1_skba.arr[i])
+                               dev_kfree_skb(pr->rq1_skba.arr[i]);
+ 
+               for (i = 0; i < pr->rq2_skba.len; i++)
+                       if (pr->rq2_skba.arr[i])
+                               dev_kfree_skb(pr->rq2_skba.arr[i]);
+ 
+               for (i = 0; i < pr->rq3_skba.len; i++)
+                       if (pr->rq3_skba.arr[i])
+                               dev_kfree_skb(pr->rq3_skba.arr[i]);
+ 
+               for (i = 0; i < pr->sq_skba.len; i++)
+                       if (pr->sq_skba.arr[i])
+                               dev_kfree_skb(pr->sq_skba.arr[i]);
+ 
+               vfree(pr->rq1_skba.arr);
+               vfree(pr->rq2_skba.arr);
+               vfree(pr->rq3_skba.arr);
+               vfree(pr->sq_skba.arr);
+               ret = ehea_rem_smrs(pr);
+       }
+       return ret;
+ }
+ 
+ static void write_swqe2_immediate(struct sk_buff *skb, struct ehea_swqe *swqe,
+                                 u32 lkey)
+ {
+       int skb_data_size = skb_headlen(skb);
+       u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0];
+       struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry;
+       unsigned int immediate_len = SWQE2_MAX_IMM;
+ 
+       swqe->descriptors = 0;
+ 
+       if (skb_is_gso(skb)) {
+               swqe->tx_control |= EHEA_SWQE_TSO;
+               swqe->mss = skb_shinfo(skb)->gso_size;
+               /*
+                * For TSO packets we only copy the headers into the
+                * immediate area.
+                */
+               immediate_len = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb);
+       }
+ 
+       if (skb_is_gso(skb) || skb_data_size >= SWQE2_MAX_IMM) {
+               skb_copy_from_linear_data(skb, imm_data, immediate_len);
+               swqe->immediate_data_length = immediate_len;
+ 
+               if (skb_data_size > immediate_len) {
+                       sg1entry->l_key = lkey;
+                       sg1entry->len = skb_data_size - immediate_len;
+                       sg1entry->vaddr =
+                               ehea_map_vaddr(skb->data + immediate_len);
+                       swqe->descriptors++;
+               }
+       } else {
+               skb_copy_from_linear_data(skb, imm_data, skb_data_size);
+               swqe->immediate_data_length = skb_data_size;
+       }
+ }
+ 
+ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev,
+                                   struct ehea_swqe *swqe, u32 lkey)
+ {
+       struct ehea_vsgentry *sg_list, *sg1entry, *sgentry;
+       skb_frag_t *frag;
+       int nfrags, sg1entry_contains_frag_data, i;
+ 
+       nfrags = skb_shinfo(skb)->nr_frags;
+       sg1entry = &swqe->u.immdata_desc.sg_entry;
+       sg_list = (struct ehea_vsgentry *)&swqe->u.immdata_desc.sg_list;
+       sg1entry_contains_frag_data = 0;
+ 
+       write_swqe2_immediate(skb, swqe, lkey);
+ 
+       /* write descriptors */
+       if (nfrags > 0) {
+               if (swqe->descriptors == 0) {
+                       /* sg1entry not yet used */
+                       frag = &skb_shinfo(skb)->frags[0];
+ 
+                       /* copy sg1entry data */
+                       sg1entry->l_key = lkey;
+                       sg1entry->len = skb_frag_size(frag);
+                       sg1entry->vaddr =
+                               ehea_map_vaddr(skb_frag_address(frag));
+                       swqe->descriptors++;
+                       sg1entry_contains_frag_data = 1;
+               }
+ 
+               for (i = sg1entry_contains_frag_data; i < nfrags; i++) {
+ 
+                       frag = &skb_shinfo(skb)->frags[i];
+                       sgentry = &sg_list[i - sg1entry_contains_frag_data];
+ 
+                       sgentry->l_key = lkey;
+                       sgentry->len = skb_frag_size(frag);
+                       sgentry->vaddr = ehea_map_vaddr(skb_frag_address(frag));
+                       swqe->descriptors++;
+               }
+       }
+ }
+ 
+ static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid)
+ {
+       int ret = 0;
+       u64 hret;
+       u8 reg_type;
+ 
+       /* De/Register untagged packets */
+       reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_UNTAGGED;
+       hret = ehea_h_reg_dereg_bcmc(port->adapter->handle,
+                                    port->logical_port_id,
+                                    reg_type, port->mac_addr, 0, hcallid);
+       if (hret != H_SUCCESS) {
+               pr_err("%sregistering bc address failed (tagged)\n",
+                      hcallid == H_REG_BCMC ? "" : "de");
+               ret = -EIO;
+               goto out_herr;
+       }
+ 
+       /* De/Register VLAN packets */
+       reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_VLANID_ALL;
+       hret = ehea_h_reg_dereg_bcmc(port->adapter->handle,
+                                    port->logical_port_id,
+                                    reg_type, port->mac_addr, 0, hcallid);
+       if (hret != H_SUCCESS) {
+               pr_err("%sregistering bc address failed (vlan)\n",
+                      hcallid == H_REG_BCMC ? "" : "de");
+               ret = -EIO;
+       }
+ out_herr:
+       return ret;
+ }
+ 
+ static int ehea_set_mac_addr(struct net_device *dev, void *sa)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct sockaddr *mac_addr = sa;
+       struct hcp_ehea_port_cb0 *cb0;
+       int ret;
+       u64 hret;
+ 
+       if (!is_valid_ether_addr(mac_addr->sa_data)) {
+               ret = -EADDRNOTAVAIL;
+               goto out;
+       }
+ 
+       cb0 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb0) {
+               pr_err("no mem for cb0\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       memcpy(&(cb0->port_mac_addr), &(mac_addr->sa_data[0]), ETH_ALEN);
+ 
+       cb0->port_mac_addr = cb0->port_mac_addr >> 16;
+ 
+       hret = ehea_h_modify_ehea_port(port->adapter->handle,
+                                      port->logical_port_id, H_PORT_CB0,
+                                      EHEA_BMASK_SET(H_PORT_CB0_MAC, 1), cb0);
+       if (hret != H_SUCCESS) {
+               ret = -EIO;
+               goto out_free;
+       }
+ 
+       memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len);
+ 
+       /* Deregister old MAC in pHYP */
+       if (port->state == EHEA_PORT_UP) {
+               ret = ehea_broadcast_reg_helper(port, H_DEREG_BCMC);
+               if (ret)
+                       goto out_upregs;
+       }
+ 
+       port->mac_addr = cb0->port_mac_addr << 16;
+ 
+       /* Register new MAC in pHYP */
+       if (port->state == EHEA_PORT_UP) {
+               ret = ehea_broadcast_reg_helper(port, H_REG_BCMC);
+               if (ret)
+                       goto out_upregs;
+       }
+ 
+       ret = 0;
+ 
+ out_upregs:
+       ehea_update_bcmc_registrations();
+ out_free:
+       free_page((unsigned long)cb0);
+ out:
+       return ret;
+ }
+ 
+ static void ehea_promiscuous_error(u64 hret, int enable)
+ {
+       if (hret == H_AUTHORITY)
+               pr_info("Hypervisor denied %sabling promiscuous mode\n",
+                       enable == 1 ? "en" : "dis");
+       else
+               pr_err("failed %sabling promiscuous mode\n",
+                      enable == 1 ? "en" : "dis");
+ }
+ 
+ static void ehea_promiscuous(struct net_device *dev, int enable)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct hcp_ehea_port_cb7 *cb7;
+       u64 hret;
+ 
+       if (enable == port->promisc)
+               return;
+ 
+       cb7 = (void *)get_zeroed_page(GFP_ATOMIC);
+       if (!cb7) {
+               pr_err("no mem for cb7\n");
+               goto out;
+       }
+ 
+       /* Modify Pxs_DUCQPN in CB7 */
+       cb7->def_uc_qpn = enable == 1 ? port->port_res[0].qp->fw_handle : 0;
+ 
+       hret = ehea_h_modify_ehea_port(port->adapter->handle,
+                                      port->logical_port_id,
+                                      H_PORT_CB7, H_PORT_CB7_DUCQPN, cb7);
+       if (hret) {
+               ehea_promiscuous_error(hret, enable);
+               goto out;
+       }
+ 
+       port->promisc = enable;
+ out:
+       free_page((unsigned long)cb7);
+ }
+ 
+ static u64 ehea_multicast_reg_helper(struct ehea_port *port, u64 mc_mac_addr,
+                                    u32 hcallid)
+ {
+       u64 hret;
+       u8 reg_type;
+ 
+       reg_type = EHEA_BCMC_SCOPE_ALL | EHEA_BCMC_MULTICAST
+                | EHEA_BCMC_UNTAGGED;
+ 
+       hret = ehea_h_reg_dereg_bcmc(port->adapter->handle,
+                                    port->logical_port_id,
+                                    reg_type, mc_mac_addr, 0, hcallid);
+       if (hret)
+               goto out;
+ 
+       reg_type = EHEA_BCMC_SCOPE_ALL | EHEA_BCMC_MULTICAST
+                | EHEA_BCMC_VLANID_ALL;
+ 
+       hret = ehea_h_reg_dereg_bcmc(port->adapter->handle,
+                                    port->logical_port_id,
+                                    reg_type, mc_mac_addr, 0, hcallid);
+ out:
+       return hret;
+ }
+ 
+ static int ehea_drop_multicast_list(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_mc_list *mc_entry = port->mc_list;
+       struct list_head *pos;
+       struct list_head *temp;
+       int ret = 0;
+       u64 hret;
+ 
+       list_for_each_safe(pos, temp, &(port->mc_list->list)) {
+               mc_entry = list_entry(pos, struct ehea_mc_list, list);
+ 
+               hret = ehea_multicast_reg_helper(port, mc_entry->macaddr,
+                                                H_DEREG_BCMC);
+               if (hret) {
+                       pr_err("failed deregistering mcast MAC\n");
+                       ret = -EIO;
+               }
+ 
+               list_del(pos);
+               kfree(mc_entry);
+       }
+       return ret;
+ }
+ 
+ static void ehea_allmulti(struct net_device *dev, int enable)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       u64 hret;
+ 
+       if (!port->allmulti) {
+               if (enable) {
+                       /* Enable ALLMULTI */
+                       ehea_drop_multicast_list(dev);
+                       hret = ehea_multicast_reg_helper(port, 0, H_REG_BCMC);
+                       if (!hret)
+                               port->allmulti = 1;
+                       else
+                               netdev_err(dev,
+                                          "failed enabling IFF_ALLMULTI\n");
+               }
+       } else
+               if (!enable) {
+                       /* Disable ALLMULTI */
+                       hret = ehea_multicast_reg_helper(port, 0, H_DEREG_BCMC);
+                       if (!hret)
+                               port->allmulti = 0;
+                       else
+                               netdev_err(dev,
+                                          "failed disabling IFF_ALLMULTI\n");
+               }
+ }
+ 
+ static void ehea_add_multicast_entry(struct ehea_port *port, u8 *mc_mac_addr)
+ {
+       struct ehea_mc_list *ehea_mcl_entry;
+       u64 hret;
+ 
+       ehea_mcl_entry = kzalloc(sizeof(*ehea_mcl_entry), GFP_ATOMIC);
+       if (!ehea_mcl_entry) {
+               pr_err("no mem for mcl_entry\n");
+               return;
+       }
+ 
+       INIT_LIST_HEAD(&ehea_mcl_entry->list);
+ 
+       memcpy(&ehea_mcl_entry->macaddr, mc_mac_addr, ETH_ALEN);
+ 
+       hret = ehea_multicast_reg_helper(port, ehea_mcl_entry->macaddr,
+                                        H_REG_BCMC);
+       if (!hret)
+               list_add(&ehea_mcl_entry->list, &port->mc_list->list);
+       else {
+               pr_err("failed registering mcast MAC\n");
+               kfree(ehea_mcl_entry);
+       }
+ }
+ 
+ static void ehea_set_multicast_list(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct netdev_hw_addr *ha;
+       int ret;
+ 
+       if (port->promisc) {
+               ehea_promiscuous(dev, 1);
+               return;
+       }
+       ehea_promiscuous(dev, 0);
+ 
+       if (dev->flags & IFF_ALLMULTI) {
+               ehea_allmulti(dev, 1);
+               goto out;
+       }
+       ehea_allmulti(dev, 0);
+ 
+       if (!netdev_mc_empty(dev)) {
+               ret = ehea_drop_multicast_list(dev);
+               if (ret) {
+                       /* Dropping the current multicast list failed.
+                        * Enabling ALL_MULTI is the best we can do.
+                        */
+                       ehea_allmulti(dev, 1);
+               }
+ 
+               if (netdev_mc_count(dev) > port->adapter->max_mc_mac) {
+                       pr_info("Mcast registration limit reached (0x%llx). Use ALLMULTI!\n",
+                               port->adapter->max_mc_mac);
+                       goto out;
+               }
+ 
+               netdev_for_each_mc_addr(ha, dev)
+                       ehea_add_multicast_entry(port, ha->addr);
+ 
+       }
+ out:
+       ehea_update_bcmc_registrations();
+ }
+ 
+ static int ehea_change_mtu(struct net_device *dev, int new_mtu)
+ {
+       if ((new_mtu < 68) || (new_mtu > EHEA_MAX_PACKET_SIZE))
+               return -EINVAL;
+       dev->mtu = new_mtu;
+       return 0;
+ }
+ 
+ static void xmit_common(struct sk_buff *skb, struct ehea_swqe *swqe)
+ {
+       swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_CRC;
+ 
+       if (skb->protocol != htons(ETH_P_IP))
+               return;
+ 
+       if (skb->ip_summed == CHECKSUM_PARTIAL)
+               swqe->tx_control |= EHEA_SWQE_IP_CHECKSUM;
+ 
+       swqe->ip_start = skb_network_offset(skb);
+       swqe->ip_end = swqe->ip_start + ip_hdrlen(skb) - 1;
+ 
+       switch (ip_hdr(skb)->protocol) {
+       case IPPROTO_UDP:
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       swqe->tx_control |= EHEA_SWQE_TCP_CHECKSUM;
+ 
+               swqe->tcp_offset = swqe->ip_end + 1 +
+                                  offsetof(struct udphdr, check);
+               break;
+ 
+       case IPPROTO_TCP:
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       swqe->tx_control |= EHEA_SWQE_TCP_CHECKSUM;
+ 
+               swqe->tcp_offset = swqe->ip_end + 1 +
+                                  offsetof(struct tcphdr, check);
+               break;
+       }
+ }
+ 
+ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
+                      struct ehea_swqe *swqe, u32 lkey)
+ {
+       swqe->tx_control |= EHEA_SWQE_DESCRIPTORS_PRESENT;
+ 
+       xmit_common(skb, swqe);
+ 
+       write_swqe2_data(skb, dev, swqe, lkey);
+ }
+ 
+ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
+                      struct ehea_swqe *swqe)
+ {
+       u8 *imm_data = &swqe->u.immdata_nodesc.immediate_data[0];
+ 
+       xmit_common(skb, swqe);
+ 
+       if (!skb->data_len)
+               skb_copy_from_linear_data(skb, imm_data, skb->len);
+       else
+               skb_copy_bits(skb, 0, imm_data, skb->len);
+ 
+       swqe->immediate_data_length = skb->len;
+       dev_kfree_skb(skb);
+ }
+ 
+ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_swqe *swqe;
+       u32 lkey;
+       int swqe_index;
+       struct ehea_port_res *pr;
+       struct netdev_queue *txq;
+ 
+       pr = &port->port_res[skb_get_queue_mapping(skb)];
+       txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ 
+       swqe = ehea_get_swqe(pr->qp, &swqe_index);
+       memset(swqe, 0, SWQE_HEADER_SIZE);
+       atomic_dec(&pr->swqe_avail);
+ 
+       if (vlan_tx_tag_present(skb)) {
+               swqe->tx_control |= EHEA_SWQE_VLAN_INSERT;
+               swqe->vlan_tag = vlan_tx_tag_get(skb);
+       }
+ 
+       pr->tx_packets++;
+       pr->tx_bytes += skb->len;
+ 
+       if (skb->len <= SWQE3_MAX_IMM) {
+               u32 sig_iv = port->sig_comp_iv;
+               u32 swqe_num = pr->swqe_id_counter;
+               ehea_xmit3(skb, dev, swqe);
+               swqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE3_TYPE)
+                       | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, swqe_num);
+               if (pr->swqe_ll_count >= (sig_iv - 1)) {
+                       swqe->wr_id |= EHEA_BMASK_SET(EHEA_WR_ID_REFILL,
+                                                     sig_iv);
+                       swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION;
+                       pr->swqe_ll_count = 0;
+               } else
+                       pr->swqe_ll_count += 1;
+       } else {
+               swqe->wr_id =
+                       EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE2_TYPE)
+                     | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, pr->swqe_id_counter)
+                     | EHEA_BMASK_SET(EHEA_WR_ID_REFILL, 1)
+                     | EHEA_BMASK_SET(EHEA_WR_ID_INDEX, pr->sq_skba.index);
+               pr->sq_skba.arr[pr->sq_skba.index] = skb;
+ 
+               pr->sq_skba.index++;
+               pr->sq_skba.index &= (pr->sq_skba.len - 1);
+ 
+               lkey = pr->send_mr.lkey;
+               ehea_xmit2(skb, dev, swqe, lkey);
+               swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION;
+       }
+       pr->swqe_id_counter += 1;
+ 
+       netif_info(port, tx_queued, dev,
+                  "post swqe on QP %d\n", pr->qp->init_attr.qp_nr);
+       if (netif_msg_tx_queued(port))
+               ehea_dump(swqe, 512, "swqe");
+ 
+       if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) {
+               netif_tx_stop_queue(txq);
+               swqe->tx_control |= EHEA_SWQE_PURGE;
+       }
+ 
+       ehea_post_swqe(pr->qp, swqe);
+ 
+       if (unlikely(atomic_read(&pr->swqe_avail) <= 1)) {
+               pr->p_stats.queue_stopped++;
+               netif_tx_stop_queue(txq);
+       }
+ 
+       return NETDEV_TX_OK;
+ }
+ 
+ static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_adapter *adapter = port->adapter;
+       struct hcp_ehea_port_cb1 *cb1;
+       int index;
+       u64 hret;
+ 
+       cb1 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb1) {
+               pr_err("no mem for cb1\n");
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id,
+                                     H_PORT_CB1, H_PORT_CB1_ALL, cb1);
+       if (hret != H_SUCCESS) {
+               pr_err("query_ehea_port failed\n");
+               goto out;
+       }
+ 
+       index = (vid / 64);
+       cb1->vlan_filter[index] |= ((u64)(0x8000000000000000 >> (vid & 0x3F)));
+ 
+       hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id,
+                                      H_PORT_CB1, H_PORT_CB1_ALL, cb1);
+       if (hret != H_SUCCESS)
+               pr_err("modify_ehea_port failed\n");
+ out:
+       free_page((unsigned long)cb1);
+       return;
+ }
+ 
+ static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_adapter *adapter = port->adapter;
+       struct hcp_ehea_port_cb1 *cb1;
+       int index;
+       u64 hret;
+ 
+       cb1 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb1) {
+               pr_err("no mem for cb1\n");
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id,
+                                     H_PORT_CB1, H_PORT_CB1_ALL, cb1);
+       if (hret != H_SUCCESS) {
+               pr_err("query_ehea_port failed\n");
+               goto out;
+       }
+ 
+       index = (vid / 64);
+       cb1->vlan_filter[index] &= ~((u64)(0x8000000000000000 >> (vid & 0x3F)));
+ 
+       hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id,
+                                      H_PORT_CB1, H_PORT_CB1_ALL, cb1);
+       if (hret != H_SUCCESS)
+               pr_err("modify_ehea_port failed\n");
+ out:
+       free_page((unsigned long)cb1);
+ }
+ 
+ int ehea_activate_qp(struct ehea_adapter *adapter, struct ehea_qp *qp)
+ {
+       int ret = -EIO;
+       u64 hret;
+       u16 dummy16 = 0;
+       u64 dummy64 = 0;
+       struct hcp_modify_qp_cb0 *cb0;
+ 
+       cb0 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb0) {
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                   EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0);
+       if (hret != H_SUCCESS) {
+               pr_err("query_ehea_qp failed (1)\n");
+               goto out;
+       }
+ 
+       cb0->qp_ctl_reg = H_QP_CR_STATE_INITIALIZED;
+       hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                    EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0,
+                                    &dummy64, &dummy64, &dummy16, &dummy16);
+       if (hret != H_SUCCESS) {
+               pr_err("modify_ehea_qp failed (1)\n");
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                   EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0);
+       if (hret != H_SUCCESS) {
+               pr_err("query_ehea_qp failed (2)\n");
+               goto out;
+       }
+ 
+       cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_INITIALIZED;
+       hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                    EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0,
+                                    &dummy64, &dummy64, &dummy16, &dummy16);
+       if (hret != H_SUCCESS) {
+               pr_err("modify_ehea_qp failed (2)\n");
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                   EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0);
+       if (hret != H_SUCCESS) {
+               pr_err("query_ehea_qp failed (3)\n");
+               goto out;
+       }
+ 
+       cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_RDY2SND;
+       hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                    EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0,
+                                    &dummy64, &dummy64, &dummy16, &dummy16);
+       if (hret != H_SUCCESS) {
+               pr_err("modify_ehea_qp failed (3)\n");
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                   EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0);
+       if (hret != H_SUCCESS) {
+               pr_err("query_ehea_qp failed (4)\n");
+               goto out;
+       }
+ 
+       ret = 0;
+ out:
+       free_page((unsigned long)cb0);
+       return ret;
+ }
+ 
+ static int ehea_port_res_setup(struct ehea_port *port, int def_qps)
+ {
+       int ret, i;
+       struct port_res_cfg pr_cfg, pr_cfg_small_rx;
+       enum ehea_eq_type eq_type = EHEA_EQ;
+ 
+       port->qp_eq = ehea_create_eq(port->adapter, eq_type,
+                                  EHEA_MAX_ENTRIES_EQ, 1);
+       if (!port->qp_eq) {
+               ret = -EINVAL;
+               pr_err("ehea_create_eq failed (qp_eq)\n");
+               goto out_kill_eq;
+       }
+ 
+       pr_cfg.max_entries_rcq = rq1_entries + rq2_entries + rq3_entries;
+       pr_cfg.max_entries_scq = sq_entries * 2;
+       pr_cfg.max_entries_sq = sq_entries;
+       pr_cfg.max_entries_rq1 = rq1_entries;
+       pr_cfg.max_entries_rq2 = rq2_entries;
+       pr_cfg.max_entries_rq3 = rq3_entries;
+ 
+       pr_cfg_small_rx.max_entries_rcq = 1;
+       pr_cfg_small_rx.max_entries_scq = sq_entries;
+       pr_cfg_small_rx.max_entries_sq = sq_entries;
+       pr_cfg_small_rx.max_entries_rq1 = 1;
+       pr_cfg_small_rx.max_entries_rq2 = 1;
+       pr_cfg_small_rx.max_entries_rq3 = 1;
+ 
+       for (i = 0; i < def_qps; i++) {
+               ret = ehea_init_port_res(port, &port->port_res[i], &pr_cfg, i);
+               if (ret)
+                       goto out_clean_pr;
+       }
+       for (i = def_qps; i < def_qps; i++) {
+               ret = ehea_init_port_res(port, &port->port_res[i],
+                                        &pr_cfg_small_rx, i);
+               if (ret)
+                       goto out_clean_pr;
+       }
+ 
+       return 0;
+ 
+ out_clean_pr:
+       while (--i >= 0)
+               ehea_clean_portres(port, &port->port_res[i]);
+ 
+ out_kill_eq:
+       ehea_destroy_eq(port->qp_eq);
+       return ret;
+ }
+ 
+ static int ehea_clean_all_portres(struct ehea_port *port)
+ {
+       int ret = 0;
+       int i;
+ 
+       for (i = 0; i < port->num_def_qps; i++)
+               ret |= ehea_clean_portres(port, &port->port_res[i]);
+ 
+       ret |= ehea_destroy_eq(port->qp_eq);
+ 
+       return ret;
+ }
+ 
+ static void ehea_remove_adapter_mr(struct ehea_adapter *adapter)
+ {
+       if (adapter->active_ports)
+               return;
+ 
+       ehea_rem_mr(&adapter->mr);
+ }
+ 
+ static int ehea_add_adapter_mr(struct ehea_adapter *adapter)
+ {
+       if (adapter->active_ports)
+               return 0;
+ 
+       return ehea_reg_kernel_mr(adapter, &adapter->mr);
+ }
+ 
+ static int ehea_up(struct net_device *dev)
+ {
+       int ret, i;
+       struct ehea_port *port = netdev_priv(dev);
+ 
+       if (port->state == EHEA_PORT_UP)
+               return 0;
+ 
+       ret = ehea_port_res_setup(port, port->num_def_qps);
+       if (ret) {
+               netdev_err(dev, "port_res_failed\n");
+               goto out;
+       }
+ 
+       /* Set default QP for this port */
+       ret = ehea_configure_port(port);
+       if (ret) {
+               netdev_err(dev, "ehea_configure_port failed. ret:%d\n", ret);
+               goto out_clean_pr;
+       }
+ 
+       ret = ehea_reg_interrupts(dev);
+       if (ret) {
+               netdev_err(dev, "reg_interrupts failed. ret:%d\n", ret);
+               goto out_clean_pr;
+       }
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               ret = ehea_activate_qp(port->adapter, port->port_res[i].qp);
+               if (ret) {
+                       netdev_err(dev, "activate_qp failed\n");
+                       goto out_free_irqs;
+               }
+       }
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               ret = ehea_fill_port_res(&port->port_res[i]);
+               if (ret) {
+                       netdev_err(dev, "out_free_irqs\n");
+                       goto out_free_irqs;
+               }
+       }
+ 
+       ret = ehea_broadcast_reg_helper(port, H_REG_BCMC);
+       if (ret) {
+               ret = -EIO;
+               goto out_free_irqs;
+       }
+ 
+       port->state = EHEA_PORT_UP;
+ 
+       ret = 0;
+       goto out;
+ 
+ out_free_irqs:
+       ehea_free_interrupts(dev);
+ 
+ out_clean_pr:
+       ehea_clean_all_portres(port);
+ out:
+       if (ret)
+               netdev_info(dev, "Failed starting. ret=%i\n", ret);
+ 
+       ehea_update_bcmc_registrations();
+       ehea_update_firmware_handles();
+ 
+       return ret;
+ }
+ 
+ static void port_napi_disable(struct ehea_port *port)
+ {
+       int i;
+ 
+       for (i = 0; i < port->num_def_qps; i++)
+               napi_disable(&port->port_res[i].napi);
+ }
+ 
+ static void port_napi_enable(struct ehea_port *port)
+ {
+       int i;
+ 
+       for (i = 0; i < port->num_def_qps; i++)
+               napi_enable(&port->port_res[i].napi);
+ }
+ 
+ static int ehea_open(struct net_device *dev)
+ {
+       int ret;
+       struct ehea_port *port = netdev_priv(dev);
+ 
+       mutex_lock(&port->port_lock);
+ 
+       netif_info(port, ifup, dev, "enabling port\n");
+ 
+       ret = ehea_up(dev);
+       if (!ret) {
+               port_napi_enable(port);
+               netif_tx_start_all_queues(dev);
+       }
+ 
+       mutex_unlock(&port->port_lock);
+       schedule_delayed_work(&port->stats_work, msecs_to_jiffies(1000));
+ 
+       return ret;
+ }
+ 
+ static int ehea_down(struct net_device *dev)
+ {
+       int ret;
+       struct ehea_port *port = netdev_priv(dev);
+ 
+       if (port->state == EHEA_PORT_DOWN)
+               return 0;
+ 
+       ehea_drop_multicast_list(dev);
+       ehea_broadcast_reg_helper(port, H_DEREG_BCMC);
+ 
+       ehea_free_interrupts(dev);
+ 
+       port->state = EHEA_PORT_DOWN;
+ 
+       ehea_update_bcmc_registrations();
+ 
+       ret = ehea_clean_all_portres(port);
+       if (ret)
+               netdev_info(dev, "Failed freeing resources. ret=%i\n", ret);
+ 
+       ehea_update_firmware_handles();
+ 
+       return ret;
+ }
+ 
+ static int ehea_stop(struct net_device *dev)
+ {
+       int ret;
+       struct ehea_port *port = netdev_priv(dev);
+ 
+       netif_info(port, ifdown, dev, "disabling port\n");
+ 
+       set_bit(__EHEA_DISABLE_PORT_RESET, &port->flags);
+       cancel_work_sync(&port->reset_task);
+       cancel_delayed_work_sync(&port->stats_work);
+       mutex_lock(&port->port_lock);
+       netif_tx_stop_all_queues(dev);
+       port_napi_disable(port);
+       ret = ehea_down(dev);
+       mutex_unlock(&port->port_lock);
+       clear_bit(__EHEA_DISABLE_PORT_RESET, &port->flags);
+       return ret;
+ }
+ 
+ static void ehea_purge_sq(struct ehea_qp *orig_qp)
+ {
+       struct ehea_qp qp = *orig_qp;
+       struct ehea_qp_init_attr *init_attr = &qp.init_attr;
+       struct ehea_swqe *swqe;
+       int wqe_index;
+       int i;
+ 
+       for (i = 0; i < init_attr->act_nr_send_wqes; i++) {
+               swqe = ehea_get_swqe(&qp, &wqe_index);
+               swqe->tx_control |= EHEA_SWQE_PURGE;
+       }
+ }
+ 
+ static void ehea_flush_sq(struct ehea_port *port)
+ {
+       int i;
+ 
+       for (i = 0; i < port->num_def_qps; i++) {
+               struct ehea_port_res *pr = &port->port_res[i];
+               int swqe_max = pr->sq_skba_size - 2 - pr->swqe_ll_count;
+               int ret;
+ 
+               ret = wait_event_timeout(port->swqe_avail_wq,
+                        atomic_read(&pr->swqe_avail) >= swqe_max,
+                        msecs_to_jiffies(100));
+ 
+               if (!ret) {
+                       pr_err("WARNING: sq not flushed completely\n");
+                       break;
+               }
+       }
+ }
+ 
+ int ehea_stop_qps(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_adapter *adapter = port->adapter;
+       struct hcp_modify_qp_cb0 *cb0;
+       int ret = -EIO;
+       int dret;
+       int i;
+       u64 hret;
+       u64 dummy64 = 0;
+       u16 dummy16 = 0;
+ 
+       cb0 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb0) {
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       for (i = 0; i < (port->num_def_qps); i++) {
+               struct ehea_port_res *pr =  &port->port_res[i];
+               struct ehea_qp *qp = pr->qp;
+ 
+               /* Purge send queue */
+               ehea_purge_sq(qp);
+ 
+               /* Disable queue pair */
+               hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                           EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF),
+                                           cb0);
+               if (hret != H_SUCCESS) {
+                       pr_err("query_ehea_qp failed (1)\n");
+                       goto out;
+               }
+ 
+               cb0->qp_ctl_reg = (cb0->qp_ctl_reg & H_QP_CR_RES_STATE) << 8;
+               cb0->qp_ctl_reg &= ~H_QP_CR_ENABLED;
+ 
+               hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                            EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG,
+                                                           1), cb0, &dummy64,
+                                            &dummy64, &dummy16, &dummy16);
+               if (hret != H_SUCCESS) {
+                       pr_err("modify_ehea_qp failed (1)\n");
+                       goto out;
+               }
+ 
+               hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                           EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF),
+                                           cb0);
+               if (hret != H_SUCCESS) {
+                       pr_err("query_ehea_qp failed (2)\n");
+                       goto out;
+               }
+ 
+               /* deregister shared memory regions */
+               dret = ehea_rem_smrs(pr);
+               if (dret) {
+                       pr_err("unreg shared memory region failed\n");
+                       goto out;
+               }
+       }
+ 
+       ret = 0;
+ out:
+       free_page((unsigned long)cb0);
+ 
+       return ret;
+ }
+ 
+ void ehea_update_rqs(struct ehea_qp *orig_qp, struct ehea_port_res *pr)
+ {
+       struct ehea_qp qp = *orig_qp;
+       struct ehea_qp_init_attr *init_attr = &qp.init_attr;
+       struct ehea_rwqe *rwqe;
+       struct sk_buff **skba_rq2 = pr->rq2_skba.arr;
+       struct sk_buff **skba_rq3 = pr->rq3_skba.arr;
+       struct sk_buff *skb;
+       u32 lkey = pr->recv_mr.lkey;
+ 
+ 
+       int i;
+       int index;
+ 
+       for (i = 0; i < init_attr->act_nr_rwqes_rq2 + 1; i++) {
+               rwqe = ehea_get_next_rwqe(&qp, 2);
+               rwqe->sg_list[0].l_key = lkey;
+               index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, rwqe->wr_id);
+               skb = skba_rq2[index];
+               if (skb)
+                       rwqe->sg_list[0].vaddr = ehea_map_vaddr(skb->data);
+       }
+ 
+       for (i = 0; i < init_attr->act_nr_rwqes_rq3 + 1; i++) {
+               rwqe = ehea_get_next_rwqe(&qp, 3);
+               rwqe->sg_list[0].l_key = lkey;
+               index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, rwqe->wr_id);
+               skb = skba_rq3[index];
+               if (skb)
+                       rwqe->sg_list[0].vaddr = ehea_map_vaddr(skb->data);
+       }
+ }
+ 
+ int ehea_restart_qps(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+       struct ehea_adapter *adapter = port->adapter;
+       int ret = 0;
+       int i;
+ 
+       struct hcp_modify_qp_cb0 *cb0;
+       u64 hret;
+       u64 dummy64 = 0;
+       u16 dummy16 = 0;
+ 
+       cb0 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb0) {
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       for (i = 0; i < (port->num_def_qps); i++) {
+               struct ehea_port_res *pr =  &port->port_res[i];
+               struct ehea_qp *qp = pr->qp;
+ 
+               ret = ehea_gen_smrs(pr);
+               if (ret) {
+                       netdev_err(dev, "creation of shared memory regions failed\n");
+                       goto out;
+               }
+ 
+               ehea_update_rqs(qp, pr);
+ 
+               /* Enable queue pair */
+               hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                           EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF),
+                                           cb0);
+               if (hret != H_SUCCESS) {
+                       netdev_err(dev, "query_ehea_qp failed (1)\n");
+                       goto out;
+               }
+ 
+               cb0->qp_ctl_reg = (cb0->qp_ctl_reg & H_QP_CR_RES_STATE) << 8;
+               cb0->qp_ctl_reg |= H_QP_CR_ENABLED;
+ 
+               hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                            EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG,
+                                                           1), cb0, &dummy64,
+                                            &dummy64, &dummy16, &dummy16);
+               if (hret != H_SUCCESS) {
+                       netdev_err(dev, "modify_ehea_qp failed (1)\n");
+                       goto out;
+               }
+ 
+               hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle,
+                                           EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF),
+                                           cb0);
+               if (hret != H_SUCCESS) {
+                       netdev_err(dev, "query_ehea_qp failed (2)\n");
+                       goto out;
+               }
+ 
+               /* refill entire queue */
+               ehea_refill_rq1(pr, pr->rq1_skba.index, 0);
+               ehea_refill_rq2(pr, 0);
+               ehea_refill_rq3(pr, 0);
+       }
+ out:
+       free_page((unsigned long)cb0);
+ 
+       return ret;
+ }
+ 
+ static void ehea_reset_port(struct work_struct *work)
+ {
+       int ret;
+       struct ehea_port *port =
+               container_of(work, struct ehea_port, reset_task);
+       struct net_device *dev = port->netdev;
+ 
+       mutex_lock(&dlpar_mem_lock);
+       port->resets++;
+       mutex_lock(&port->port_lock);
+       netif_tx_disable(dev);
+ 
+       port_napi_disable(port);
+ 
+       ehea_down(dev);
+ 
+       ret = ehea_up(dev);
+       if (ret)
+               goto out;
+ 
+       ehea_set_multicast_list(dev);
+ 
+       netif_info(port, timer, dev, "reset successful\n");
+ 
+       port_napi_enable(port);
+ 
+       netif_tx_wake_all_queues(dev);
+ out:
+       mutex_unlock(&port->port_lock);
+       mutex_unlock(&dlpar_mem_lock);
+ }
+ 
+ static void ehea_rereg_mrs(void)
+ {
+       int ret, i;
+       struct ehea_adapter *adapter;
+ 
+       pr_info("LPAR memory changed - re-initializing driver\n");
+ 
+       list_for_each_entry(adapter, &adapter_list, list)
+               if (adapter->active_ports) {
+                       /* Shutdown all ports */
+                       for (i = 0; i < EHEA_MAX_PORTS; i++) {
+                               struct ehea_port *port = adapter->port[i];
+                               struct net_device *dev;
+ 
+                               if (!port)
+                                       continue;
+ 
+                               dev = port->netdev;
+ 
+                               if (dev->flags & IFF_UP) {
+                                       mutex_lock(&port->port_lock);
+                                       netif_tx_disable(dev);
+                                       ehea_flush_sq(port);
+                                       ret = ehea_stop_qps(dev);
+                                       if (ret) {
+                                               mutex_unlock(&port->port_lock);
+                                               goto out;
+                                       }
+                                       port_napi_disable(port);
+                                       mutex_unlock(&port->port_lock);
+                               }
+                               reset_sq_restart_flag(port);
+                       }
+ 
+                       /* Unregister old memory region */
+                       ret = ehea_rem_mr(&adapter->mr);
+                       if (ret) {
+                               pr_err("unregister MR failed - driver inoperable!\n");
+                               goto out;
+                       }
+               }
+ 
+       clear_bit(__EHEA_STOP_XFER, &ehea_driver_flags);
+ 
+       list_for_each_entry(adapter, &adapter_list, list)
+               if (adapter->active_ports) {
+                       /* Register new memory region */
+                       ret = ehea_reg_kernel_mr(adapter, &adapter->mr);
+                       if (ret) {
+                               pr_err("register MR failed - driver inoperable!\n");
+                               goto out;
+                       }
+ 
+                       /* Restart all ports */
+                       for (i = 0; i < EHEA_MAX_PORTS; i++) {
+                               struct ehea_port *port = adapter->port[i];
+ 
+                               if (port) {
+                                       struct net_device *dev = port->netdev;
+ 
+                                       if (dev->flags & IFF_UP) {
+                                               mutex_lock(&port->port_lock);
+                                               ret = ehea_restart_qps(dev);
+                                               if (!ret) {
+                                                       check_sqs(port);
+                                                       port_napi_enable(port);
+                                                       netif_tx_wake_all_queues(dev);
+                                               } else {
+                                                       netdev_err(dev, "Unable to restart QPS\n");
+                                               }
+                                               mutex_unlock(&port->port_lock);
+                                       }
+                               }
+                       }
+               }
+       pr_info("re-initializing driver complete\n");
+ out:
+       return;
+ }
+ 
+ static void ehea_tx_watchdog(struct net_device *dev)
+ {
+       struct ehea_port *port = netdev_priv(dev);
+ 
+       if (netif_carrier_ok(dev) &&
+           !test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))
+               ehea_schedule_port_reset(port);
+ }
+ 
+ int ehea_sense_adapter_attr(struct ehea_adapter *adapter)
+ {
+       struct hcp_query_ehea *cb;
+       u64 hret;
+       int ret;
+ 
+       cb = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb) {
+               ret = -ENOMEM;
+               goto out;
+       }
+ 
+       hret = ehea_h_query_ehea(adapter->handle, cb);
+ 
+       if (hret != H_SUCCESS) {
+               ret = -EIO;
+               goto out_herr;
+       }
+ 
+       adapter->max_mc_mac = cb->max_mc_mac - 1;
+       ret = 0;
+ 
+ out_herr:
+       free_page((unsigned long)cb);
+ out:
+       return ret;
+ }
+ 
+ int ehea_get_jumboframe_status(struct ehea_port *port, int *jumbo)
+ {
+       struct hcp_ehea_port_cb4 *cb4;
+       u64 hret;
+       int ret = 0;
+ 
+       *jumbo = 0;
+ 
+       /* (Try to) enable *jumbo frames */
+       cb4 = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!cb4) {
+               pr_err("no mem for cb4\n");
+               ret = -ENOMEM;
+               goto out;
+       } else {
+               hret = ehea_h_query_ehea_port(port->adapter->handle,
+                                             port->logical_port_id,
+                                             H_PORT_CB4,
+                                             H_PORT_CB4_JUMBO, cb4);
+               if (hret == H_SUCCESS) {
+                       if (cb4->jumbo_frame)
+                               *jumbo = 1;
+                       else {
+                               cb4->jumbo_frame = 1;
+                               hret = ehea_h_modify_ehea_port(port->adapter->
+                                                              handle,
+                                                              port->
+                                                              logical_port_id,
+                                                              H_PORT_CB4,
+                                                              H_PORT_CB4_JUMBO,
+                                                              cb4);
+                               if (hret == H_SUCCESS)
+                                       *jumbo = 1;
+                       }
+               } else
+                       ret = -EINVAL;
+ 
+               free_page((unsigned long)cb4);
+       }
+ out:
+       return ret;
+ }
+ 
+ static ssize_t ehea_show_port_id(struct device *dev,
+                                struct device_attribute *attr, char *buf)
+ {
+       struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev);
+       return sprintf(buf, "%d", port->logical_port_id);
+ }
+ 
+ static DEVICE_ATTR(log_port_id, S_IRUSR | S_IRGRP | S_IROTH, ehea_show_port_id,
+                  NULL);
+ 
+ static void __devinit logical_port_release(struct device *dev)
+ {
+       struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev);
+       of_node_put(port->ofdev.dev.of_node);
+ }
+ 
+ static struct device *ehea_register_port(struct ehea_port *port,
+                                        struct device_node *dn)
+ {
+       int ret;
+ 
+       port->ofdev.dev.of_node = of_node_get(dn);
+       port->ofdev.dev.parent = &port->adapter->ofdev->dev;
+       port->ofdev.dev.bus = &ibmebus_bus_type;
+ 
+       dev_set_name(&port->ofdev.dev, "port%d", port_name_cnt++);
+       port->ofdev.dev.release = logical_port_release;
+ 
+       ret = of_device_register(&port->ofdev);
+       if (ret) {
+               pr_err("failed to register device. ret=%d\n", ret);
+               goto out;
+       }
+ 
+       ret = device_create_file(&port->ofdev.dev, &dev_attr_log_port_id);
+       if (ret) {
+               pr_err("failed to register attributes, ret=%d\n", ret);
+               goto out_unreg_of_dev;
+       }
+ 
+       return &port->ofdev.dev;
+ 
+ out_unreg_of_dev:
+       of_device_unregister(&port->ofdev);
+ out:
+       return NULL;
+ }
+ 
+ static void ehea_unregister_port(struct ehea_port *port)
+ {
+       device_remove_file(&port->ofdev.dev, &dev_attr_log_port_id);
+       of_device_unregister(&port->ofdev);
+ }
+ 
+ static const struct net_device_ops ehea_netdev_ops = {
+       .ndo_open               = ehea_open,
+       .ndo_stop               = ehea_stop,
+       .ndo_start_xmit         = ehea_start_xmit,
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = ehea_netpoll,
+ #endif
+       .ndo_get_stats64        = ehea_get_stats64,
+       .ndo_set_mac_address    = ehea_set_mac_addr,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_set_rx_mode        = ehea_set_multicast_list,
+       .ndo_change_mtu         = ehea_change_mtu,
+       .ndo_vlan_rx_add_vid    = ehea_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = ehea_vlan_rx_kill_vid,
+       .ndo_tx_timeout         = ehea_tx_watchdog,
+ };
+ 
+ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter,
+                                        u32 logical_port_id,
+                                        struct device_node *dn)
+ {
+       int ret;
+       struct net_device *dev;
+       struct ehea_port *port;
+       struct device *port_dev;
+       int jumbo;
+ 
+       /* allocate memory for the port structures */
+       dev = alloc_etherdev_mq(sizeof(struct ehea_port), EHEA_MAX_PORT_RES);
+ 
+       if (!dev) {
+               pr_err("no mem for net_device\n");
+               ret = -ENOMEM;
+               goto out_err;
+       }
+ 
+       port = netdev_priv(dev);
+ 
+       mutex_init(&port->port_lock);
+       port->state = EHEA_PORT_DOWN;
+       port->sig_comp_iv = sq_entries / 10;
+ 
+       port->adapter = adapter;
+       port->netdev = dev;
+       port->logical_port_id = logical_port_id;
+ 
+       port->msg_enable = netif_msg_init(msg_level, EHEA_MSG_DEFAULT);
+ 
+       port->mc_list = kzalloc(sizeof(struct ehea_mc_list), GFP_KERNEL);
+       if (!port->mc_list) {
+               ret = -ENOMEM;
+               goto out_free_ethdev;
+       }
+ 
+       INIT_LIST_HEAD(&port->mc_list->list);
+ 
+       ret = ehea_sense_port_attr(port);
+       if (ret)
+               goto out_free_mc_list;
+ 
+       netif_set_real_num_rx_queues(dev, port->num_def_qps);
+       netif_set_real_num_tx_queues(dev, port->num_def_qps);
+ 
+       port_dev = ehea_register_port(port, dn);
+       if (!port_dev)
+               goto out_free_mc_list;
+ 
+       SET_NETDEV_DEV(dev, port_dev);
+ 
+       /* initialize net_device structure */
+       memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN);
+ 
+       dev->netdev_ops = &ehea_netdev_ops;
+       ehea_set_ethtool_ops(dev);
+ 
+       dev->hw_features = NETIF_F_SG | NETIF_F_TSO
+                     | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX | NETIF_F_LRO;
+       dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO
+                     | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX
+                     | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER
+                     | NETIF_F_RXCSUM;
+       dev->vlan_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HIGHDMA |
+                       NETIF_F_IP_CSUM;
+       dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT;
+ 
+       INIT_WORK(&port->reset_task, ehea_reset_port);
+       INIT_DELAYED_WORK(&port->stats_work, ehea_update_stats);
+ 
+       init_waitqueue_head(&port->swqe_avail_wq);
+       init_waitqueue_head(&port->restart_wq);
+ 
+       memset(&port->stats, 0, sizeof(struct net_device_stats));
+       ret = register_netdev(dev);
+       if (ret) {
+               pr_err("register_netdev failed. ret=%d\n", ret);
+               goto out_unreg_port;
+       }
+ 
+       ret = ehea_get_jumboframe_status(port, &jumbo);
+       if (ret)
+               netdev_err(dev, "failed determining jumbo frame status\n");
+ 
+       netdev_info(dev, "Jumbo frames are %sabled\n",
+                   jumbo == 1 ? "en" : "dis");
+ 
+       adapter->active_ports++;
+ 
+       return port;
+ 
+ out_unreg_port:
+       ehea_unregister_port(port);
+ 
+ out_free_mc_list:
+       kfree(port->mc_list);
+ 
+ out_free_ethdev:
+       free_netdev(dev);
+ 
+ out_err:
+       pr_err("setting up logical port with id=%d failed, ret=%d\n",
+              logical_port_id, ret);
+       return NULL;
+ }
+ 
+ static void ehea_shutdown_single_port(struct ehea_port *port)
+ {
+       struct ehea_adapter *adapter = port->adapter;
+ 
+       cancel_work_sync(&port->reset_task);
+       cancel_delayed_work_sync(&port->stats_work);
+       unregister_netdev(port->netdev);
+       ehea_unregister_port(port);
+       kfree(port->mc_list);
+       free_netdev(port->netdev);
+       adapter->active_ports--;
+ }
+ 
+ static int ehea_setup_ports(struct ehea_adapter *adapter)
+ {
+       struct device_node *lhea_dn;
+       struct device_node *eth_dn = NULL;
+ 
+       const u32 *dn_log_port_id;
+       int i = 0;
+ 
+       lhea_dn = adapter->ofdev->dev.of_node;
+       while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) {
+ 
+               dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no",
+                                                NULL);
+               if (!dn_log_port_id) {
+                       pr_err("bad device node: eth_dn name=%s\n",
+                              eth_dn->full_name);
+                       continue;
+               }
+ 
+               if (ehea_add_adapter_mr(adapter)) {
+                       pr_err("creating MR failed\n");
+                       of_node_put(eth_dn);
+                       return -EIO;
+               }
+ 
+               adapter->port[i] = ehea_setup_single_port(adapter,
+                                                         *dn_log_port_id,
+                                                         eth_dn);
+               if (adapter->port[i])
+                       netdev_info(adapter->port[i]->netdev,
+                                   "logical port id #%d\n", *dn_log_port_id);
+               else
+                       ehea_remove_adapter_mr(adapter);
+ 
+               i++;
+       }
+       return 0;
+ }
+ 
+ static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter,
+                                          u32 logical_port_id)
+ {
+       struct device_node *lhea_dn;
+       struct device_node *eth_dn = NULL;
+       const u32 *dn_log_port_id;
+ 
+       lhea_dn = adapter->ofdev->dev.of_node;
+       while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) {
+ 
+               dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no",
+                                                NULL);
+               if (dn_log_port_id)
+                       if (*dn_log_port_id == logical_port_id)
+                               return eth_dn;
+       }
+ 
+       return NULL;
+ }
+ 
+ static ssize_t ehea_probe_port(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t count)
+ {
+       struct ehea_adapter *adapter = dev_get_drvdata(dev);
+       struct ehea_port *port;
+       struct device_node *eth_dn = NULL;
+       int i;
+ 
+       u32 logical_port_id;
+ 
+       sscanf(buf, "%d", &logical_port_id);
+ 
+       port = ehea_get_port(adapter, logical_port_id);
+ 
+       if (port) {
+               netdev_info(port->netdev, "adding port with logical port id=%d failed: port already configured\n",
+                           logical_port_id);
+               return -EINVAL;
+       }
+ 
+       eth_dn = ehea_get_eth_dn(adapter, logical_port_id);
+ 
+       if (!eth_dn) {
+               pr_info("no logical port with id %d found\n", logical_port_id);
+               return -EINVAL;
+       }
+ 
+       if (ehea_add_adapter_mr(adapter)) {
+               pr_err("creating MR failed\n");
+               return -EIO;
+       }
+ 
+       port = ehea_setup_single_port(adapter, logical_port_id, eth_dn);
+ 
+       of_node_put(eth_dn);
+ 
+       if (port) {
+               for (i = 0; i < EHEA_MAX_PORTS; i++)
+                       if (!adapter->port[i]) {
+                               adapter->port[i] = port;
+                               break;
+                       }
+ 
+               netdev_info(port->netdev, "added: (logical port id=%d)\n",
+                           logical_port_id);
+       } else {
+               ehea_remove_adapter_mr(adapter);
+               return -EIO;
+       }
+ 
+       return (ssize_t) count;
+ }
+ 
+ static ssize_t ehea_remove_port(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+ {
+       struct ehea_adapter *adapter = dev_get_drvdata(dev);
+       struct ehea_port *port;
+       int i;
+       u32 logical_port_id;
+ 
+       sscanf(buf, "%d", &logical_port_id);
+ 
+       port = ehea_get_port(adapter, logical_port_id);
+ 
+       if (port) {
+               netdev_info(port->netdev, "removed: (logical port id=%d)\n",
+                           logical_port_id);
+ 
+               ehea_shutdown_single_port(port);
+ 
+               for (i = 0; i < EHEA_MAX_PORTS; i++)
+                       if (adapter->port[i] == port) {
+                               adapter->port[i] = NULL;
+                               break;
+                       }
+       } else {
+               pr_err("removing port with logical port id=%d failed. port not configured.\n",
+                      logical_port_id);
+               return -EINVAL;
+       }
+ 
+       ehea_remove_adapter_mr(adapter);
+ 
+       return (ssize_t) count;
+ }
+ 
+ static DEVICE_ATTR(probe_port, S_IWUSR, NULL, ehea_probe_port);
+ static DEVICE_ATTR(remove_port, S_IWUSR, NULL, ehea_remove_port);
+ 
+ int ehea_create_device_sysfs(struct platform_device *dev)
+ {
+       int ret = device_create_file(&dev->dev, &dev_attr_probe_port);
+       if (ret)
+               goto out;
+ 
+       ret = device_create_file(&dev->dev, &dev_attr_remove_port);
+ out:
+       return ret;
+ }
+ 
+ void ehea_remove_device_sysfs(struct platform_device *dev)
+ {
+       device_remove_file(&dev->dev, &dev_attr_probe_port);
+       device_remove_file(&dev->dev, &dev_attr_remove_port);
+ }
+ 
+ static int __devinit ehea_probe_adapter(struct platform_device *dev,
+                                       const struct of_device_id *id)
+ {
+       struct ehea_adapter *adapter;
+       const u64 *adapter_handle;
+       int ret;
+ 
+       if (!dev || !dev->dev.of_node) {
+               pr_err("Invalid ibmebus device probed\n");
+               return -EINVAL;
+       }
+ 
+       adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+       if (!adapter) {
+               ret = -ENOMEM;
+               dev_err(&dev->dev, "no mem for ehea_adapter\n");
+               goto out;
+       }
+ 
+       list_add(&adapter->list, &adapter_list);
+ 
+       adapter->ofdev = dev;
+ 
+       adapter_handle = of_get_property(dev->dev.of_node, "ibm,hea-handle",
+                                        NULL);
+       if (adapter_handle)
+               adapter->handle = *adapter_handle;
+ 
+       if (!adapter->handle) {
+               dev_err(&dev->dev, "failed getting handle for adapter"
+                       " '%s'\n", dev->dev.of_node->full_name);
+               ret = -ENODEV;
+               goto out_free_ad;
+       }
+ 
+       adapter->pd = EHEA_PD_ID;
+ 
+       dev_set_drvdata(&dev->dev, adapter);
+ 
+ 
+       /* initialize adapter and ports */
+       /* get adapter properties */
+       ret = ehea_sense_adapter_attr(adapter);
+       if (ret) {
+               dev_err(&dev->dev, "sense_adapter_attr failed: %d\n", ret);
+               goto out_free_ad;
+       }
+ 
+       adapter->neq = ehea_create_eq(adapter,
+                                     EHEA_NEQ, EHEA_MAX_ENTRIES_EQ, 1);
+       if (!adapter->neq) {
+               ret = -EIO;
+               dev_err(&dev->dev, "NEQ creation failed\n");
+               goto out_free_ad;
+       }
+ 
+       tasklet_init(&adapter->neq_tasklet, ehea_neq_tasklet,
+                    (unsigned long)adapter);
+ 
+       ret = ibmebus_request_irq(adapter->neq->attr.ist1,
+                                 ehea_interrupt_neq, IRQF_DISABLED,
+                                 "ehea_neq", adapter);
+       if (ret) {
+               dev_err(&dev->dev, "requesting NEQ IRQ failed\n");
+               goto out_kill_eq;
+       }
+ 
+       ret = ehea_create_device_sysfs(dev);
+       if (ret)
+               goto out_free_irq;
+ 
+       ret = ehea_setup_ports(adapter);
+       if (ret) {
+               dev_err(&dev->dev, "setup_ports failed\n");
+               goto out_rem_dev_sysfs;
+       }
+ 
+       ret = 0;
+       goto out;
+ 
+ out_rem_dev_sysfs:
+       ehea_remove_device_sysfs(dev);
+ 
+ out_free_irq:
+       ibmebus_free_irq(adapter->neq->attr.ist1, adapter);
+ 
+ out_kill_eq:
+       ehea_destroy_eq(adapter->neq);
+ 
+ out_free_ad:
+       list_del(&adapter->list);
+       kfree(adapter);
+ 
+ out:
+       ehea_update_firmware_handles();
+ 
+       return ret;
+ }
+ 
+ static int __devexit ehea_remove(struct platform_device *dev)
+ {
+       struct ehea_adapter *adapter = dev_get_drvdata(&dev->dev);
+       int i;
+ 
+       for (i = 0; i < EHEA_MAX_PORTS; i++)
+               if (adapter->port[i]) {
+                       ehea_shutdown_single_port(adapter->port[i]);
+                       adapter->port[i] = NULL;
+               }
+ 
+       ehea_remove_device_sysfs(dev);
+ 
+       ibmebus_free_irq(adapter->neq->attr.ist1, adapter);
+       tasklet_kill(&adapter->neq_tasklet);
+ 
+       ehea_destroy_eq(adapter->neq);
+       ehea_remove_adapter_mr(adapter);
+       list_del(&adapter->list);
+       kfree(adapter);
+ 
+       ehea_update_firmware_handles();
+ 
+       return 0;
+ }
+ 
+ void ehea_crash_handler(void)
+ {
+       int i;
+ 
+       if (ehea_fw_handles.arr)
+               for (i = 0; i < ehea_fw_handles.num_entries; i++)
+                       ehea_h_free_resource(ehea_fw_handles.arr[i].adh,
+                                            ehea_fw_handles.arr[i].fwh,
+                                            FORCE_FREE);
+ 
+       if (ehea_bcmc_regs.arr)
+               for (i = 0; i < ehea_bcmc_regs.num_entries; i++)
+                       ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh,
+                                             ehea_bcmc_regs.arr[i].port_id,
+                                             ehea_bcmc_regs.arr[i].reg_type,
+                                             ehea_bcmc_regs.arr[i].macaddr,
+                                             0, H_DEREG_BCMC);
+ }
+ 
+ static int ehea_mem_notifier(struct notifier_block *nb,
+                              unsigned long action, void *data)
+ {
+       int ret = NOTIFY_BAD;
+       struct memory_notify *arg = data;
+ 
+       mutex_lock(&dlpar_mem_lock);
+ 
+       switch (action) {
+       case MEM_CANCEL_OFFLINE:
+               pr_info("memory offlining canceled");
+               /* Readd canceled memory block */
+       case MEM_ONLINE:
+               pr_info("memory is going online");
+               set_bit(__EHEA_STOP_XFER, &ehea_driver_flags);
+               if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages))
+                       goto out_unlock;
+               ehea_rereg_mrs();
+               break;
+       case MEM_GOING_OFFLINE:
+               pr_info("memory is going offline");
+               set_bit(__EHEA_STOP_XFER, &ehea_driver_flags);
+               if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages))
+                       goto out_unlock;
+               ehea_rereg_mrs();
+               break;
+       default:
+               break;
+       }
+ 
+       ehea_update_firmware_handles();
+       ret = NOTIFY_OK;
+ 
+ out_unlock:
+       mutex_unlock(&dlpar_mem_lock);
+       return ret;
+ }
+ 
+ static struct notifier_block ehea_mem_nb = {
+       .notifier_call = ehea_mem_notifier,
+ };
+ 
+ static int ehea_reboot_notifier(struct notifier_block *nb,
+                               unsigned long action, void *unused)
+ {
+       if (action == SYS_RESTART) {
+               pr_info("Reboot: freeing all eHEA resources\n");
+               ibmebus_unregister_driver(&ehea_driver);
+       }
+       return NOTIFY_DONE;
+ }
+ 
+ static struct notifier_block ehea_reboot_nb = {
+       .notifier_call = ehea_reboot_notifier,
+ };
+ 
+ static int check_module_parm(void)
+ {
+       int ret = 0;
+ 
+       if ((rq1_entries < EHEA_MIN_ENTRIES_QP) ||
+           (rq1_entries > EHEA_MAX_ENTRIES_RQ1)) {
+               pr_info("Bad parameter: rq1_entries\n");
+               ret = -EINVAL;
+       }
+       if ((rq2_entries < EHEA_MIN_ENTRIES_QP) ||
+           (rq2_entries > EHEA_MAX_ENTRIES_RQ2)) {
+               pr_info("Bad parameter: rq2_entries\n");
+               ret = -EINVAL;
+       }
+       if ((rq3_entries < EHEA_MIN_ENTRIES_QP) ||
+           (rq3_entries > EHEA_MAX_ENTRIES_RQ3)) {
+               pr_info("Bad parameter: rq3_entries\n");
+               ret = -EINVAL;
+       }
+       if ((sq_entries < EHEA_MIN_ENTRIES_QP) ||
+           (sq_entries > EHEA_MAX_ENTRIES_SQ)) {
+               pr_info("Bad parameter: sq_entries\n");
+               ret = -EINVAL;
+       }
+ 
+       return ret;
+ }
+ 
+ static ssize_t ehea_show_capabilities(struct device_driver *drv,
+                                     char *buf)
+ {
+       return sprintf(buf, "%d", EHEA_CAPABILITIES);
+ }
+ 
+ static DRIVER_ATTR(capabilities, S_IRUSR | S_IRGRP | S_IROTH,
+                  ehea_show_capabilities, NULL);
+ 
+ int __init ehea_module_init(void)
+ {
+       int ret;
+ 
+       pr_info("IBM eHEA ethernet device driver (Release %s)\n", DRV_VERSION);
+ 
+       memset(&ehea_fw_handles, 0, sizeof(ehea_fw_handles));
+       memset(&ehea_bcmc_regs, 0, sizeof(ehea_bcmc_regs));
+ 
+       mutex_init(&ehea_fw_handles.lock);
+       spin_lock_init(&ehea_bcmc_regs.lock);
+ 
+       ret = check_module_parm();
+       if (ret)
+               goto out;
+ 
+       ret = ehea_create_busmap();
+       if (ret)
+               goto out;
+ 
+       ret = register_reboot_notifier(&ehea_reboot_nb);
+       if (ret)
+               pr_info("failed registering reboot notifier\n");
+ 
+       ret = register_memory_notifier(&ehea_mem_nb);
+       if (ret)
+               pr_info("failed registering memory remove notifier\n");
+ 
+       ret = crash_shutdown_register(ehea_crash_handler);
+       if (ret)
+               pr_info("failed registering crash handler\n");
+ 
+       ret = ibmebus_register_driver(&ehea_driver);
+       if (ret) {
+               pr_err("failed registering eHEA device driver on ebus\n");
+               goto out2;
+       }
+ 
+       ret = driver_create_file(&ehea_driver.driver,
+                                &driver_attr_capabilities);
+       if (ret) {
+               pr_err("failed to register capabilities attribute, ret=%d\n",
+                      ret);
+               goto out3;
+       }
+ 
+       return ret;
+ 
+ out3:
+       ibmebus_unregister_driver(&ehea_driver);
+ out2:
+       unregister_memory_notifier(&ehea_mem_nb);
+       unregister_reboot_notifier(&ehea_reboot_nb);
+       crash_shutdown_unregister(ehea_crash_handler);
+ out:
+       return ret;
+ }
+ 
+ static void __exit ehea_module_exit(void)
+ {
+       int ret;
+ 
+       driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities);
+       ibmebus_unregister_driver(&ehea_driver);
+       unregister_reboot_notifier(&ehea_reboot_nb);
+       ret = crash_shutdown_unregister(ehea_crash_handler);
+       if (ret)
+               pr_info("failed unregistering crash handler\n");
+       unregister_memory_notifier(&ehea_mem_nb);
+       kfree(ehea_fw_handles.arr);
+       kfree(ehea_bcmc_regs.arr);
+       ehea_destroy_busmap();
+ }
+ 
+ module_init(ehea_module_init);
+ module_exit(ehea_module_exit);
diff --cc drivers/net/wireless/b43/main.c
Simple merge
diff --cc drivers/scsi/device_handler/scsi_dh.c

index dc3a589,23149b9..a60b10b
--- 1/drivers/scsi/device_handler/scsi_dh.c
--- 2/drivers/scsi/device_handler/scsi_dh.c
+++ b/drivers/scsi/device_handler/scsi_dh.c
@@@ -440,8 -441,16 +441,16 @@@ int scsi_dh_activate(struct request_que
         struct device *dev = NULL;
   
         spin_lock_irqsave(q->queue_lock, flags);
- -      sdev = q->queuedata;
+ +      sdev = scsi_device_from_queue(q);
-       if (sdev && sdev->scsi_dh_data)
+       if (!sdev) {
+               spin_unlock_irqrestore(q->queue_lock, flags);
+               err = SCSI_DH_NOSYS;
+               if (fn)
+                       fn(data, err);
+               return err;
+       }
+ 
+       if (sdev->scsi_dh_data)
                 scsi_dh = sdev->scsi_dh_data->scsi_dh;
         dev = get_device(&sdev->sdev_gendev);
         if (!scsi_dh || !dev ||
diff --cc drivers/scsi/megaraid/megaraid_mbox.c
Simple merge
diff --cc drivers/scsi/scsi_error.c
Simple merge
diff --cc drivers/scsi/scsi_lib.c
Simple merge
diff --cc drivers/scsi/scsi_netlink.c
Simple merge
diff --cc drivers/scsi/scsi_scan.c
Simple merge
diff --cc drivers/scsi/sd.c
Simple merge
diff --cc drivers/tty/serial/8250.c
Simple merge
diff --cc drivers/tty/vt/keyboard.c

index 092beb0,a605549..474fb75
--- 1/drivers/tty/vt/keyboard.c
--- 2/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@@ -43,8 -42,8 +42,10 @@@
   #include <linux/notifier.h>
   #include <linux/jiffies.h>
   
+ #include <asm/irq_regs.h>
+ 
+ +#include <linux/bootsplash.h>
+ +
   extern void ctrl_alt_del(void);
   
   /*
diff --cc drivers/tty/vt/vt.c
Simple merge
diff --cc drivers/video/Kconfig
Simple merge
diff --cc drivers/video/Makefile
Simple merge
diff --cc drivers/video/console/vgacon.c
Simple merge
diff --cc fs/Kconfig
Simple merge
diff --cc fs/Makefile
Simple merge
diff --cc fs/ext4/ext4.h
Simple merge
diff --cc fs/ext4/file.c
Simple merge
diff --cc fs/ext4/ialloc.c
Simple merge
diff --cc fs/ext4/inode.c

index fa42836,240f6e2..8061ce5
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -42,9 -42,7 +42,8 @@@
   #include "ext4_jbd2.h"
   #include "xattr.h"
   #include "acl.h"
- #include "ext4_extents.h"
   #include "truncate.h"
+ +#include "richacl.h"
   
   #include <trace/events/ext4.h>
   
@@@ -3415,12 -3791,9 +3792,12 @@@ struct inode *ext4_iget(struct super_bl
                 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
         }
-       inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+       set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
   
         ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
+ +#ifdef CONFIG_EXT4_FS_RICHACL
+ +      ei->i_richacl = EXT4_RICHACL_NOT_CACHED;
+ +#endif
         ei->i_dir_start_lookup = 0;
         ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
         /* We now have enough fields to check if the inode was active or not.
diff --cc fs/ext4/namei.c
Simple merge
diff --cc fs/ext4/super.c

index 820b807,9953d80..e63a7be
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -3247,6 -3253,36 +3276,33 @@@ static int ext4_fill_super(struct super
                            &journal_ioprio, NULL, 0))
                 goto failed_mount;
   
+       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+               printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
+                           "with data=journal disables delayed "
+                           "allocation and O_DIRECT support!\n");
+               if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "both data=journal and delalloc");
+                       goto failed_mount;
+               }
+               if (test_opt(sb, DIOREAD_NOLOCK)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "both data=journal and delalloc");
+                       goto failed_mount;
+               }
+               if (test_opt(sb, DELALLOC))
+                       clear_opt(sb, DELALLOC);
+       }
+ 
+       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+       if (test_opt(sb, DIOREAD_NOLOCK)) {
+               if (blocksize < PAGE_SIZE) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "dioread_nolock if block size != PAGE_SIZE");
+                       goto failed_mount;
+               }
+       }
+ 
- -      sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- -              (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
- -
         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
             (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
              EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
diff --cc fs/ext4/xattr.c
Simple merge
diff --cc fs/namei.c
Simple merge
diff --cc fs/nfs/inode.c
Simple merge
diff --cc fs/super.c

index b8f7b5f,afd0f1a..5210e18
--- 1/fs/super.c
--- 2/fs/super.c
+++ b/fs/super.c
@@@ -722,8 -727,13 +722,13 @@@ static int __do_remount_sb(struct super
   
         if (sb->s_op->remount_fs) {
                 retval = sb->s_op->remount_fs(sb, &flags, data);
-               if (retval)
-                       return retval;
+               if (retval) {
- -                      if (!force)
++                      if (!(rflags & REMOUNT_FORCE))
+                               return retval;
+                       /* If forced remount, go ahead despite any errors */
+                       WARN(1, "forced remount of a %s fs returned %i\n",
+                            sb->s_type->name, retval);
+               }
         }
         sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
   
diff --cc include/acpi/acpiosxf.h
Simple merge
diff --cc include/asm-generic/vmlinux.lds.h
Simple merge
diff --cc include/linux/acpi.h
Simple merge
diff --cc include/linux/blkdev.h
Simple merge
diff --cc include/linux/device.h

index def6bac,ffbcf95..3151417
--- 1/include/linux/device.h
--- 2/include/linux/device.h
+++ b/include/linux/device.h
@@@ -785,95 -803,58 +803,93 @@@ extern const char *dev_driver_string(co
   
   #ifdef CONFIG_PRINTK
   
+ extern int __dev_printk(const char *level, const struct device *dev,
+                       struct va_format *vaf);
+ extern __printf(3, 4)
++
+ +#if defined(KMSG_COMPONENT) && (defined(CONFIG_KMSG_IDS) || defined(__KMSG_CHECKER))
+ +/* dev_printk_hash for message documentation */
+ +#if defined(__KMSG_CHECKER) && defined(KMSG_COMPONENT)
+ +
+ +/* generate magic string for scripts/kmsg-doc to parse */
+ +#define dev_printk_hash(level, dev, format, arg...) \
+ +      __KMSG_DEV(level _FMT_ format _ARGS_ dev, ## arg _END_)
+ +
+ +#elif defined(CONFIG_KMSG_IDS) && defined(KMSG_COMPONENT)
+ +
+ +int printk_dev_hash(const char *, const char *, const char *, ...);
+ +#define dev_printk_hash(level, dev, format, arg...) \
+ +      printk_dev_hash(level "%s.%06x: ", dev_driver_string(dev), \
+ +                      "%s: " format, dev_name(dev), ## arg)
+ +
+ +#endif
+ +
+ +#define dev_printk(level, dev, format, arg...)                \
+ +      dev_printk_hash(level , dev, format, ## arg)
+ +#define dev_emerg(dev, format, arg...)                \
+ +      dev_printk_hash(KERN_EMERG , dev , format , ## arg)
+ +#define dev_alert(dev, format, arg...)                \
+ +      dev_printk_hash(KERN_ALERT , dev , format , ## arg)
+ +#define dev_crit(dev, format, arg...)         \
+ +      dev_printk_hash(KERN_CRIT , dev , format , ## arg)
+ +#define dev_err(dev, format, arg...)          \
+ +      dev_printk_hash(KERN_ERR , dev , format , ## arg)
+ +#define dev_warn(dev, format, arg...)         \
+ +      dev_printk_hash(KERN_WARNING , dev , format , ## arg)
+ +#define dev_notice(dev, format, arg...)               \
+ +      dev_printk_hash(KERN_NOTICE , dev , format , ## arg)
+ +#define _dev_info(dev, format, arg...)                \
+ +      dev_printk_hash(KERN_INFO , dev , format , ## arg)
+ +#else
- extern int dev_printk(const char *level, const struct device *dev,
-                     const char *fmt, ...)
-       __attribute__ ((format (printf, 3, 4)));
- extern int dev_emerg(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- extern int dev_alert(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- extern int dev_crit(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- extern int dev_err(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- extern int dev_warn(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- extern int dev_notice(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- extern int _dev_info(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
+ int dev_printk(const char *level, const struct device *dev,
+              const char *fmt, ...)
+       ;
+ extern __printf(2, 3)
+ int dev_emerg(const struct device *dev, const char *fmt, ...);
+ extern __printf(2, 3)
+ int dev_alert(const struct device *dev, const char *fmt, ...);
+ extern __printf(2, 3)
+ int dev_crit(const struct device *dev, const char *fmt, ...);
+ extern __printf(2, 3)
+ int dev_err(const struct device *dev, const char *fmt, ...);
+ extern __printf(2, 3)
+ int dev_warn(const struct device *dev, const char *fmt, ...);
+ extern __printf(2, 3)
+ int dev_notice(const struct device *dev, const char *fmt, ...);
+ extern __printf(2, 3)
+ int _dev_info(const struct device *dev, const char *fmt, ...);
- -
+ +#endif
   #else
   
- static inline int dev_printk(const char *level, const struct device *dev,
-                     const char *fmt, ...)
-       __attribute__ ((format (printf, 3, 4)));
- static inline int dev_printk(const char *level, const struct device *dev,
-                     const char *fmt, ...)
-        { return 0; }
- 
- static inline int dev_emerg(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- static inline int dev_emerg(const struct device *dev, const char *fmt, ...)
-       { return 0; }
- static inline int dev_crit(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- static inline int dev_crit(const struct device *dev, const char *fmt, ...)
-       { return 0; }
- static inline int dev_alert(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- static inline int dev_alert(const struct device *dev, const char *fmt, ...)
-       { return 0; }
- static inline int dev_err(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- static inline int dev_err(const struct device *dev, const char *fmt, ...)
-       { return 0; }
- static inline int dev_warn(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- static inline int dev_warn(const struct device *dev, const char *fmt, ...)
-       { return 0; }
- static inline int dev_notice(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- static inline int dev_notice(const struct device *dev, const char *fmt, ...)
-       { return 0; }
- static inline int _dev_info(const struct device *dev, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
- static inline int _dev_info(const struct device *dev, const char *fmt, ...)
-       { return 0; }
+ static inline int __dev_printk(const char *level, const struct device *dev,
+                              struct va_format *vaf)
+ { return 0; }
+ static inline __printf(3, 4)
+ int dev_printk(const char *level, const struct device *dev,
+              const char *fmt, ...)
+ { return 0; }
+ 
+ static inline __printf(2, 3)
+ int dev_emerg(const struct device *dev, const char *fmt, ...)
+ { return 0; }
+ static inline __printf(2, 3)
+ int dev_crit(const struct device *dev, const char *fmt, ...)
+ { return 0; }
+ static inline __printf(2, 3)
+ int dev_alert(const struct device *dev, const char *fmt, ...)
+ { return 0; }
+ static inline __printf(2, 3)
+ int dev_err(const struct device *dev, const char *fmt, ...)
+ { return 0; }
+ static inline __printf(2, 3)
+ int dev_warn(const struct device *dev, const char *fmt, ...)
+ { return 0; }
+ static inline __printf(2, 3)
+ int dev_notice(const struct device *dev, const char *fmt, ...)
+ { return 0; }
+ static inline __printf(2, 3)
+ int _dev_info(const struct device *dev, const char *fmt, ...)
+ { return 0; }
   
   #endif
   
diff --cc include/linux/fs.h
Simple merge
diff --cc include/linux/kernel.h

index bfc31d2,e8b1597..cb08848
--- 1/include/linux/kernel.h
--- 2/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@@ -371,16 -371,8 +373,17 @@@ extern enum system_states 
   #define TAINT_WARN                    9
   #define TAINT_CRAP                    10
   #define TAINT_FIRMWARE_WORKAROUND     11
+ #define TAINT_OOT_MODULE              12
   
+ +#ifdef CONFIG_ENTERPRISE_SUPPORT
+ +/*
+ + * Take the upper bits to hopefully allow them
+ + * to stay the same for more than one release.
+ + */
+ +#define TAINT_NO_SUPPORT              30
+ +#define TAINT_EXTERNAL_SUPPORT                31
+ +#endif
+ +
   extern const char hex_asc[];
   #define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
   #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4]
diff --cc include/linux/mm.h
Simple merge
diff --cc include/linux/module.h
Simple merge
diff --cc include/linux/nfs_fs.h

index 2233710,ab2c634..3b141fd
--- 1/include/linux/nfs_fs.h
--- 2/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@@ -230,10 -229,7 +229,11 @@@ struct nfs_inode 
   #define NFS_INO_COMMIT                (7)             /* inode is committing unstable writes */
   #define NFS_INO_PNFS_COMMIT   (8)             /* use pnfs code for commit */
   #define NFS_INO_LAYOUTCOMMIT  (9)             /* layoutcommit required */
- #define NFS_INO_SEEN_GETATTR  (10)            /* flag to track if app is calling
+ #define NFS_INO_LAYOUTCOMMITTING (10)         /* layoutcommit inflight */
++#define NFS_INO_SEEN_GETATTR  (11)            /* flag to track if app is calling
+ +                                               * getattr in a directory during
+ +                                               * readdir
+ +                                               */
   
   static inline struct nfs_inode *NFS_I(const struct inode *inode)
   {
diff --cc include/linux/printk.h
Simple merge
diff --cc init/Kconfig
Simple merge
diff --cc init/main.c
Simple merge
diff --cc kernel/Kconfig.preempt
Simple merge
diff --cc kernel/Makefile
Simple merge
diff --cc kernel/ksysfs.c
Simple merge
diff --cc kernel/module.c
Simple merge
diff --cc kernel/panic.c

index d8470c4,b265936..c2ed5d9
--- 1/kernel/panic.c
--- 2/kernel/panic.c
+++ b/kernel/panic.c
@@@ -177,10 -177,7 +177,11 @@@ static const struct tnt tnts[] = 
         { TAINT_WARN,                   'W', ' ' },
         { TAINT_CRAP,                   'C', ' ' },
         { TAINT_FIRMWARE_WORKAROUND,    'I', ' ' },
+       { TAINT_OOT_MODULE,             'O', ' ' },
+ +#ifdef CONFIG_ENTERPRISE_SUPPORT
+ +      { TAINT_NO_SUPPORT,             'N', ' ' },
+ +      { TAINT_EXTERNAL_SUPPORT,       'X', ' ' },
+ +#endif
   };
   
   /**
@@@ -198,8 -195,7 +199,9 @@@
    *  'W' - Taint on warning.
    *  'C' - modules from drivers/staging are loaded.
    *  'I' - Working around severe firmware bug.
+  *  'O' - Out-of-tree module has been loaded.
+ + *  'N' - Unsuported modules loaded.
+ + *  'X' - Modules with external support loaded.
    *
    *    The string is overwritten by the next call to print_tainted().
    */
diff --cc kernel/printk.c
Simple merge
diff --cc kernel/sysctl.c
Simple merge
diff --cc kernel/sysctl_binary.c
Simple merge
diff --cc lib/Kconfig.debug

index cf3f767,82928f5..e4f9aa8
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -1099,8 -1093,7 +1111,8 @@@ config FAULT_INJECTION_STACKTRACE_FILTE
         depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
         depends on !X86_64
         select STACKTRACE
-       select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE && !X86
- -      select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
++      select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE && !X86 && !ARM_UNWIND
+ +      select UNWIND_INFO if X86 && !FRAME_POINTER
         help
           Provide stacktrace filter for fault-injection capabilities
   
@@@ -1110,8 -1103,7 +1122,8 @@@ config LATENCYTO
         depends on DEBUG_KERNEL
         depends on STACKTRACE_SUPPORT
         depends on PROC_FS
-       select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !X86
- -      select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
++      select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !X86 && !ARM_UNWIND
+ +      select UNWIND_INFO if X86 && !FRAME_POINTER
         select KALLSYMS
         select KALLSYMS_ALL
         select STACKTRACE
diff --cc mm/page_alloc.c

index 1f3c55c,9dd443d..a6b804e
--- 1/mm/page_alloc.c
--- 2/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@@ -1791,14 -1785,8 +1785,14 @@@ void warn_alloc_failed(gfp_t gfp_mask, 
                 va_end(args);
         }
   
- -      pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+ +      if (!(gfp_mask & __GFP_WAIT)) {
+ +              pr_info("The following is only an harmless informational message.\n");
+ +              pr_info("Unless you get a _continuous_flood_ of these messages it means\n");
+ +              pr_info("everything is working fine. Allocations from irqs cannot be\n");
+ +              pr_info("perfectly reliable and the kernel is designed to handle that.\n");
+ +      }
+ +      pr_info("%s: page allocation failure. order:%d, mode:0x%x\n",
-                  current->comm, order, gfp_mask);
+               current->comm, order, gfp_mask);
   
         dump_stack();
         if (!should_suppress_show_mem())
diff --cc mm/thrash.c
Simple merge
diff --cc mm/truncate.c
Simple merge
diff --cc net/bridge/br_if.c
Simple merge
diff --cc net/netfilter/Kconfig
Simple merge
diff --cc scripts/Makefile.build
Simple merge
diff --cc scripts/genksyms/genksyms.c

index e1136ea,8a10649..fcb87e3
--- 1/scripts/genksyms/genksyms.c
--- 2/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@@ -40,10 -40,11 +40,11 @@@ static struct symbol *symtab[HASH_BUCKE
   static FILE *debugfile;
   
   int cur_line = 1;
- char *cur_filename;
+ char *cur_filename, *source_file;
+ int in_source_file;
   
   static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types,
- -         flag_preserve, flag_warnings;
+ +         flag_override, flag_preserve, flag_warnings;
   static const char *arch = "";
   static const char *mod_prefix = "";
   
diff --cc scripts/kconfig/Makefile
Simple merge
diff --cc scripts/mod/modpost.c

index a4e371a,2bd594e..21177d7
--- 1/scripts/mod/modpost.c
--- 2/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@@ -2231,8 -2175,8 +2237,9 @@@ int main(int argc, char **argv
                 buf.pos = 0;
   
                 add_header(&buf, mod);
+               add_intree_flag(&buf, !external_module);
                 add_staging_flag(&buf, mod->name);
+ +              add_supported_flag(&buf, mod);
                 err |= add_versions(&buf, mod);
                 add_depends(&buf, mod, modules);
                 add_moddevtable(&buf, mod);
diff --cc security/apparmor/apparmorfs.c
Simple merge
diff --cc security/apparmor/policy_unpack.c
Simple merge
diff --cc virt/kvm/ioapic.c
Simple merge
author	Jeff Mahoney <jeffm@suse.com>
	Wed, 16 Nov 2011 05:32:19 +0000 (00:32 -0500)
committer	Jeff Mahoney <jeffm@suse.com>
	Wed, 16 Nov 2011 05:32:19 +0000 (00:32 -0500)
		1	2
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/sysctl/kernel.txt	patch \|	diff1 \|	diff2 \|	blob \| history
Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/ia64/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/ia64/kernel/acpi.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/exceptions-64s.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/legacy_serial.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/prom_init.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/pseries/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/xmon/xmon.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/acpi/boot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic/bigsmp_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic/probe_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apm_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perf_event.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/dumpstack_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/e820.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/entry_32.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/hpet.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/reboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/vmlinux.lds.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/acpi/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/acpi/ec_sys.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/acpi/osl.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/char/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/connector/cn_proc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/cpufreq/cpufreq_ondemand.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/hid/hid-apple.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/hid/hid-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/hid/hid-ids.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/idle/intel_idle.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/input/mouse/synaptics.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/input/mouse/synaptics.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/input/touchscreen/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/input/touchscreen/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/isdn/mISDN/socket.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-least-pending.c	patch \|	diff1 \|	\|	blob \| history
drivers/md/dm-memcache.c	patch \|	diff1 \|	\|	blob \| history
drivers/md/dm-raid45.c	patch \|	diff1 \|	\|	blob \| history
drivers/md/dm-table.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/misc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/dec/tulip/tulip_core.c	patch \|	\|	diff2 \|	blob \| history
drivers/net/ethernet/ibm/ehea/ehea_main.c	patch \|	\|	diff2 \|	blob \| history
drivers/net/wireless/b43/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/device_handler/scsi_dh.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/megaraid/megaraid_mbox.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/scsi_error.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/scsi_lib.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/scsi_netlink.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/scsi_scan.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/sd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/tty/serial/8250.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/tty/vt/keyboard.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/tty/vt/vt.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/console/vgacon.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
fs/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ialloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/xattr.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/acpi/acpiosxf.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-generic/vmlinux.lds.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/acpi.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/device.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kernel.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/module.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/nfs_fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/printk.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/Kconfig.preempt	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/ksysfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/module.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/panic.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/printk.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl_binary.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page_alloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/thrash.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/truncate.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/bridge/br_if.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/netfilter/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/Makefile.build	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/genksyms/genksyms.c	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/kconfig/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/mod/modpost.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/apparmor/apparmorfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/apparmor/policy_unpack.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/ioapic.c	patch \|	diff1 \|	diff2 \|	blob \| history