- Update to 2.6.37-rc1-git11.
authorJeff Mahoney <jeffm@suse.com>
Mon, 15 Nov 2010 20:30:37 +0000 (15:30 -0500)
committerJeff Mahoney <jeffm@suse.com>
Mon, 15 Nov 2010 20:30:37 +0000 (15:30 -0500)
  - Eliminated 2 patches.

suse-commit: 5959a6775d5f483080cab2903c7fb45691110a1c

36 files changed:
1  2 
Documentation/kernel-parameters.txt
Documentation/sysctl/kernel.txt
Makefile
arch/x86/include/asm/apic.h
arch/x86/kvm/x86.c
drivers/Makefile
drivers/char/Makefile
drivers/input/serio/i8042-x86ia64io.h
drivers/md/dm-raid45.c
drivers/net/ixgbe/ixgbe_main.c
drivers/scsi/scsi_error.c
drivers/tty/n_tty.c
drivers/tty/tty_io.c
drivers/tty/vt/keyboard.c
drivers/tty/vt/vt.c
fs/ext4/ext4.h
fs/ext4/inode.c
fs/ext4/super.c
include/linux/blkdev.h
include/linux/kernel.h
include/net/sock.h
kernel/printk.c
kernel/sysctl.c
mm/filemap.c
mm/vmscan.c
mm/vmstat.c
net/caif/caif_socket.c
net/core/dev.c
net/core/filter.c
net/core/sock.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv6/reassembly.c
net/ipv6/route.c
security/apparmor/lsm.c
security/apparmor/policy.c

Simple merge
Simple merge
diff --cc Makefile
+++ b/Makefile
@@@ -1,7 -1,7 +1,7 @@@
  VERSION = 2
  PATCHLEVEL = 6
  SUBLEVEL = 37
--EXTRAVERSION = -rc1
++EXTRAVERSION = -rc1-git11
  NAME = Flesh-Eating Bats with Fangs
  
  # *DOCUMENTATION*
Simple merge
Simple merge
@@@ -26,10 -26,18 +26,11 @@@ obj-$(CONFIG_REGULATOR)            += regulator
  
  # char/ comes before serial/ etc so that the VT console is the boot-time
  # default.
+ obj-y                         += tty/
  obj-y                         += char/
  
 -# gpu/ comes after char for AGP vs DRM startup
 -obj-y                         += gpu/
 -
  obj-$(CONFIG_CONNECTOR)               += connector/
  
 -# i810fb and intelfb depend on char/agp/
 -obj-$(CONFIG_FB_I810)           += video/i810/
 -obj-$(CONFIG_FB_INTEL)          += video/intelfb/
 -
  obj-y                         += serial/
  obj-$(CONFIG_PARPORT)         += parport/
  obj-y                         += base/ block/ misc/ mfd/
Simple merge
Simple merge
index 31f05a7,0000000..71a6d31
mode 100644,000000..100644
--- /dev/null
@@@ -1,4716 -1,0 +1,4712 @@@
 +/*
 + * Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
 + *
 + * Module Author: Heinz Mauelshagen <heinzm@redhat.com>
 + *
 + * This file is released under the GPL.
 + *
 + *
 + * Linux 2.6 Device Mapper RAID4 and RAID5 target.
 + *
 + * Tested-by: Intel; Marcin.Labun@intel.com, krzysztof.wojcik@intel.com
 + *
 + *
 + * Supports the following ATARAID vendor solutions (and SNIA DDF):
 + *
 + *    Adaptec HostRAID ASR
 + *    SNIA DDF1
 + *    Hiphpoint 37x
 + *    Hiphpoint 45x
 + *    Intel IMSM
 + *    Jmicron ATARAID
 + *    LSI Logic MegaRAID
 + *    NVidia RAID
 + *    Promise FastTrack
 + *    Silicon Image Medley
 + *    VIA Software RAID
 + *
 + * via the dmraid application.
 + *
 + *
 + * Features:
 + *
 + *    o RAID4 with dedicated and selectable parity device
 + *    o RAID5 with rotating parity (left+right, symmetric+asymmetric)
 + *    o recovery of out of sync device for initial
 + *      RAID set creation or after dead drive replacement
 + *    o run time optimization of xor algorithm used to calculate parity
 + *
 + *
 + * Thanks to MD for:
 + *    o the raid address calculation algorithm
 + *    o the base of the biovec <-> page list copier.
 + *
 + *
 + * Uses region hash to keep track of how many writes are in flight to
 + * regions in order to use dirty log to keep state of regions to recover:
 + *
 + *    o clean regions (those which are synchronized
 + *    and don't have write io in flight)
 + *    o dirty regions (those with write io in flight)
 + *
 + *
 + * On startup, any dirty regions are migrated to the
 + * 'nosync' state and are subject to recovery by the daemon.
 + *
 + * See raid_ctr() for table definition.
 + *
 + * ANALYZEME: recovery bandwidth
 + */
 +
 +static const char *version = "v0.2597k";
 +
 +#include "dm.h"
 +#include "dm-memcache.h"
 +#include "dm-raid45.h"
 +
 +#include <linux/kernel.h>
 +#include <linux/vmalloc.h>
 +#include <linux/raid/xor.h>
 +#include <linux/slab.h>
 +
 +#include <linux/bio.h>
 +#include <linux/dm-io.h>
 +#include <linux/dm-dirty-log.h>
 +#include <linux/dm-region-hash.h>
 +
 +
 +/*
 + * Configurable parameters
 + */
 +
 +/* Minimum/maximum and default # of selectable stripes. */
 +#define       STRIPES_MIN             8
 +#define       STRIPES_MAX             16384
 +#define       STRIPES_DEFAULT         80
 +
 +/* Maximum and default chunk size in sectors if not set in constructor. */
 +#define       CHUNK_SIZE_MIN          8
 +#define       CHUNK_SIZE_MAX          16384
 +#define       CHUNK_SIZE_DEFAULT      64
 +
 +/* Default io size in sectors if not set in constructor. */
 +#define       IO_SIZE_MIN             CHUNK_SIZE_MIN
 +#define       IO_SIZE_DEFAULT         IO_SIZE_MIN
 +
 +/* Recover io size default in sectors. */
 +#define       RECOVER_IO_SIZE_MIN             64
 +#define       RECOVER_IO_SIZE_DEFAULT         256
 +
 +/* Default, minimum and maximum percentage of recover io bandwidth. */
 +#define       BANDWIDTH_DEFAULT       10
 +#define       BANDWIDTH_MIN           1
 +#define       BANDWIDTH_MAX           100
 +
 +/* # of parallel recovered regions */
 +#define RECOVERY_STRIPES_MIN  1
 +#define RECOVERY_STRIPES_MAX  64
 +#define RECOVERY_STRIPES_DEFAULT      RECOVERY_STRIPES_MIN
 +/*
 + * END Configurable parameters
 + */
 +
 +#define       TARGET  "dm-raid45"
 +#define       DAEMON  "kraid45d"
 +#define       DM_MSG_PREFIX   TARGET
 +
 +#define       SECTORS_PER_PAGE        (PAGE_SIZE >> SECTOR_SHIFT)
 +
 +/* Amount/size for __xor(). */
 +#define       XOR_SIZE        PAGE_SIZE
 +
 +/* Ticks to run xor_speed() test for. */
 +#define       XOR_SPEED_TICKS 5
 +
 +/* Check value in range. */
 +#define       range_ok(i, min, max)   (i >= min && i <= max)
 +
 +/* Structure access macros. */
 +/* Derive raid_set from stripe_cache pointer. */
 +#define       RS(x)   container_of(x, struct raid_set, sc)
 +
 +/* Page reference. */
 +#define PAGE(stripe, p)  ((stripe)->obj[p].pl->page)
 +
 +/* Stripe chunk reference. */
 +#define CHUNK(stripe, p) ((stripe)->chunk + p)
 +
 +/* Bio list reference. */
 +#define       BL(stripe, p, rw)       (stripe->chunk[p].bl + rw)
 +#define       BL_CHUNK(chunk, rw)     (chunk->bl + rw)
 +
 +/* Page list reference. */
 +#define       PL(stripe, p)           (stripe->obj[p].pl)
 +/* END: structure access macros. */
 +
 +/* Factor out to dm-bio-list.h */
 +static inline void bio_list_push(struct bio_list *bl, struct bio *bio)
 +{
 +      bio->bi_next = bl->head;
 +      bl->head = bio;
 +
 +      if (!bl->tail)
 +              bl->tail = bio;
 +}
 +
 +/* Factor out to dm.h */
 +#define TI_ERR_RET(str, ret) \
 +      do { ti->error = str; return ret; } while (0);
 +#define TI_ERR(str)     TI_ERR_RET(str, -EINVAL)
 +
 +/* Macro to define access IO flags access inline functions. */
 +#define       BITOPS(name, what, var, flag) \
 +static inline int TestClear ## name ## what(struct var *v) \
 +{ return test_and_clear_bit(flag, &v->io.flags); } \
 +static inline int TestSet ## name ## what(struct var *v) \
 +{ return test_and_set_bit(flag, &v->io.flags); } \
 +static inline void Clear ## name ## what(struct var *v) \
 +{ clear_bit(flag, &v->io.flags); } \
 +static inline void Set ## name ## what(struct var *v) \
 +{ set_bit(flag, &v->io.flags); } \
 +static inline int name ## what(struct var *v) \
 +{ return test_bit(flag, &v->io.flags); }
 +
 +/*-----------------------------------------------------------------
 + * Stripe cache
 + *
 + * Cache for all reads and writes to raid sets (operational or degraded)
 + *
 + * We need to run all data to and from a RAID set through this cache,
 + * because parity chunks need to get calculated from data chunks
 + * or, in the degraded/resynchronization case, missing chunks need
 + * to be reconstructed using the other chunks of the stripe.
 + *---------------------------------------------------------------*/
 +/* Unique kmem cache name suffix # counter. */
 +static atomic_t _stripe_sc_nr = ATOMIC_INIT(-1); /* kmem cache # counter. */
 +
 +/* A chunk within a stripe (holds bios hanging off). */
 +/* IO status flags for chunks of a stripe. */
 +enum chunk_flags {
 +      CHUNK_DIRTY,            /* Pages of chunk dirty; need writing. */
 +      CHUNK_ERROR,            /* IO error on any chunk page. */
 +      CHUNK_IO,               /* Allow/prohibit IO on chunk pages. */
 +      CHUNK_LOCKED,           /* Chunk pages locked during IO. */
 +      CHUNK_MUST_IO,          /* Chunk must io. */
 +      CHUNK_UNLOCK,           /* Enforce chunk unlock. */
 +      CHUNK_UPTODATE,         /* Chunk pages are uptodate. */
 +};
 +
- #if READ != 0 || WRITE != 1
- #error dm-raid45: READ/WRITE != 0/1 used as index!!!
- #endif
 +enum bl_type {
 +      WRITE_QUEUED = WRITE + 1,
 +      WRITE_MERGED,
 +      NR_BL_TYPES,    /* Must be last one! */
 +};
 +struct stripe_chunk {
 +      atomic_t cnt;           /* Reference count. */
 +      struct stripe *stripe;  /* Backpointer to stripe for endio(). */
 +      /* Bio lists for reads, writes, and writes merged. */
 +      struct bio_list bl[NR_BL_TYPES];
 +      struct {
 +              unsigned long flags; /* IO status flags. */
 +      } io;
 +};
 +
 +/* Define chunk bit operations. */
 +BITOPS(Chunk, Dirty,   stripe_chunk, CHUNK_DIRTY)
 +BITOPS(Chunk, Error,   stripe_chunk, CHUNK_ERROR)
 +BITOPS(Chunk, Io,      stripe_chunk, CHUNK_IO)
 +BITOPS(Chunk, Locked,  stripe_chunk, CHUNK_LOCKED)
 +BITOPS(Chunk, MustIo,  stripe_chunk, CHUNK_MUST_IO)
 +BITOPS(Chunk, Unlock,  stripe_chunk, CHUNK_UNLOCK)
 +BITOPS(Chunk, Uptodate,        stripe_chunk, CHUNK_UPTODATE)
 +
 +/*
 + * Stripe linked list indexes. Keep order, because the stripe
 + * and the stripe cache rely on the first 3!
 + */
 +enum list_types {
 +      LIST_FLUSH,     /* Stripes to flush for io. */
 +      LIST_ENDIO,     /* Stripes to endio. */
 +      LIST_LRU,       /* Least recently used stripes. */
 +      SC_NR_LISTS,    /* # of lists in stripe cache. */
 +      LIST_HASH = SC_NR_LISTS,        /* Hashed stripes. */
 +      LIST_RECOVER = LIST_HASH, /* For recovery type stripes only. */
 +      STRIPE_NR_LISTS,/* To size array in struct stripe. */
 +};
 +
 +/* Adressing region recovery. */
 +struct recover_addr {
 +      struct dm_region *reg;  /* Actual region to recover. */
 +      sector_t pos;   /* Position within region to recover. */
 +      sector_t end;   /* End of region to recover. */
 +};
 +
 +/* A stripe: the io object to handle all reads and writes to a RAID set. */
 +struct stripe {
 +      atomic_t cnt;                   /* Reference count. */
 +      struct stripe_cache *sc;        /* Backpointer to stripe cache. */
 +
 +      /*
 +       * 4 linked lists:
 +       *   o io list to flush io
 +       *   o endio list
 +       *   o LRU list to put stripes w/o reference count on
 +       *   o stripe cache hash
 +       */
 +      struct list_head lists[STRIPE_NR_LISTS];
 +
 +      sector_t key;    /* Hash key. */
 +      region_t region; /* Region stripe is mapped to. */
 +
 +      struct {
 +              unsigned long flags;    /* Stripe state flags (see below). */
 +
 +              /*
 +               * Pending ios in flight:
 +               *
 +               * used to control move of stripe to endio list
 +               */
 +              atomic_t pending;
 +
 +              /* Sectors to read and write for multi page stripe sets. */
 +              unsigned size;
 +      } io;
 +
 +      /* Address region recovery. */
 +      struct recover_addr *recover;
 +
 +      /* Lock on stripe (Future: for clustering). */
 +      void *lock;
 +
 +      struct {
 +              unsigned short parity;  /* Parity chunk index. */
 +              short recover;          /* Recovery chunk index. */
 +      } idx;
 +
 +      /*
 +       * This stripe's memory cache object (dm-mem-cache);
 +       * i.e. the io chunk pages.
 +       */
 +      struct dm_mem_cache_object *obj;
 +
 +      /* Array of stripe sets (dynamically allocated). */
 +      struct stripe_chunk chunk[0];
 +};
 +
 +/* States stripes can be in (flags field). */
 +enum stripe_states {
 +      STRIPE_ERROR,           /* io error on stripe. */
 +      STRIPE_MERGED,          /* Writes got merged to be written. */
 +      STRIPE_RBW,             /* Read-before-write stripe. */
 +      STRIPE_RECONSTRUCT,     /* Reconstruct of a missing chunk required. */
 +      STRIPE_RECONSTRUCTED,   /* Reconstructed of a missing chunk. */
 +      STRIPE_RECOVER,         /* Stripe used for RAID set recovery. */
 +};
 +
 +/* Define stripe bit operations. */
 +BITOPS(Stripe, Error,       stripe, STRIPE_ERROR)
 +BITOPS(Stripe, Merged,        stripe, STRIPE_MERGED)
 +BITOPS(Stripe, RBW,         stripe, STRIPE_RBW)
 +BITOPS(Stripe, Reconstruct,   stripe, STRIPE_RECONSTRUCT)
 +BITOPS(Stripe, Reconstructed, stripe, STRIPE_RECONSTRUCTED)
 +BITOPS(Stripe, Recover,             stripe, STRIPE_RECOVER)
 +
 +/* A stripe hash. */
 +struct stripe_hash {
 +      struct list_head *hash;
 +      unsigned buckets;
 +      unsigned mask;
 +      unsigned prime;
 +      unsigned shift;
 +};
 +
 +enum sc_lock_types {
 +      LOCK_ENDIO,     /* Protect endio list. */
 +      NR_LOCKS,       /* To size array in struct stripe_cache. */
 +};
 +
 +/* A stripe cache. */
 +struct stripe_cache {
 +      /* Stripe hash. */
 +      struct stripe_hash hash;
 +
 +      spinlock_t locks[NR_LOCKS];     /* Locks to protect lists. */
 +
 +      /* Stripes with io to flush, stripes to endio and LRU lists. */
 +      struct list_head lists[SC_NR_LISTS];
 +
 +      /* Slab cache to allocate stripes from. */
 +      struct {
 +              struct kmem_cache *cache;       /* Cache itself. */
 +              char name[32];  /* Unique name. */
 +      } kc;
 +
 +      struct dm_io_client *dm_io_client; /* dm-io client resource context. */
 +
 +      /* dm-mem-cache client resource context. */
 +      struct dm_mem_cache_client *mem_cache_client;
 +
 +      int stripes_parm;           /* # stripes parameter from constructor. */
 +      atomic_t stripes;           /* actual # of stripes in cache. */
 +      atomic_t stripes_to_set;    /* # of stripes to resize cache to. */
 +      atomic_t stripes_last;      /* last # of stripes in cache. */
 +      atomic_t active_stripes;    /* actual # of active stripes in cache. */
 +
 +      /* REMOVEME: */
 +      atomic_t active_stripes_max; /* actual # of active stripes in cache. */
 +};
 +
 +/* Flag specs for raid_dev */ ;
 +enum raid_dev_flags {
 +      DEV_FAILED,     /* Device failed. */
 +      DEV_IO_QUEUED,  /* Io got queued to device. */
 +};
 +
 +/* The raid device in a set. */
 +struct raid_dev {
 +      struct dm_dev *dev;
 +      sector_t start;         /* Offset to map to. */
 +      struct {        /* Using struct to be able to BITOPS(). */
 +              unsigned long flags;    /* raid_dev_flags. */
 +      } io;
 +};
 +
 +BITOPS(Dev, Failed,   raid_dev, DEV_FAILED)
 +BITOPS(Dev, IoQueued, raid_dev, DEV_IO_QUEUED)
 +
 +/* Flags spec for raid_set. */
 +enum raid_set_flags {
 +      RS_CHECK_OVERWRITE,     /* Check for chunk overwrites. */
 +      RS_DEAD,                /* RAID set inoperational. */
 +      RS_DEAD_ENDIO_MESSAGE,  /* RAID set dead endio one-off message. */
 +      RS_DEGRADED,            /* Io errors on RAID device. */
 +      RS_DEVEL_STATS,         /* REMOVEME: display status information. */
 +      RS_ENFORCE_PARITY_CREATION,/* Enforce parity creation. */
 +      RS_PROHIBIT_WRITES,     /* Prohibit writes on device failure. */
 +      RS_RECOVER,             /* Do recovery. */
 +      RS_RECOVERY_BANDWIDTH,  /* Allow recovery bandwidth (delayed bios). */
 +      RS_SC_BUSY,             /* Stripe cache busy -> send an event. */
 +      RS_SUSPEND,             /* Suspend RAID set. */
 +};
 +
 +/* REMOVEME: devel stats counters. */
 +enum stats_types {
 +      S_BIOS_READ,
 +      S_BIOS_ADDED_READ,
 +      S_BIOS_ENDIO_READ,
 +      S_BIOS_WRITE,
 +      S_BIOS_ADDED_WRITE,
 +      S_BIOS_ENDIO_WRITE,
 +      S_CAN_MERGE,
 +      S_CANT_MERGE,
 +      S_CONGESTED,
 +      S_DM_IO_READ,
 +      S_DM_IO_WRITE,
 +      S_BANDWIDTH,
 +      S_BARRIER,
 +      S_BIO_COPY_PL_NEXT,
 +      S_DEGRADED,
 +      S_DELAYED_BIOS,
 +      S_FLUSHS,
 +      S_HITS_1ST,
 +      S_IOS_POST,
 +      S_INSCACHE,
 +      S_MAX_LOOKUP,
 +      S_CHUNK_LOCKED,
 +      S_NO_BANDWIDTH,
 +      S_NOT_CONGESTED,
 +      S_NO_RW,
 +      S_NOSYNC,
 +      S_OVERWRITE,
 +      S_PROHIBITCHUNKIO,
 +      S_RECONSTRUCT_EI,
 +      S_RECONSTRUCT_DEV,
 +      S_RECONSTRUCT_SET,
 +      S_RECONSTRUCTED,
 +      S_REQUEUE,
 +      S_STRIPE_ERROR,
 +      S_SUM_DELAYED_BIOS,
 +      S_XORS,
 +      S_NR_STATS,     /* # of stats counters. Must be last! */
 +};
 +
 +/* Status type -> string mappings. */
 +struct stats_map {
 +      const enum stats_types type;
 +      const char *str;
 +};
 +
 +static struct stats_map stats_map[] = {
 +      { S_BIOS_READ, "r=" },
 +      { S_BIOS_ADDED_READ, "/" },
 +      { S_BIOS_ENDIO_READ, "/" },
 +      { S_BIOS_WRITE, " w=" },
 +      { S_BIOS_ADDED_WRITE, "/" },
 +      { S_BIOS_ENDIO_WRITE, "/" },
 +      { S_DM_IO_READ, " rc=" },
 +      { S_DM_IO_WRITE, " wc=" },
 +      { S_BANDWIDTH, "\nbw=" },
 +      { S_NO_BANDWIDTH, " no_bw=" },
 +      { S_BARRIER, "\nbarrier=" },
 +      { S_BIO_COPY_PL_NEXT, "\nbio_cp_next=" },
 +      { S_CAN_MERGE, "\nmerge=" },
 +      { S_CANT_MERGE, "/no_merge=" },
 +      { S_CHUNK_LOCKED, "\nchunk_locked=" },
 +      { S_CONGESTED, "\ncgst=" },
 +      { S_NOT_CONGESTED, "/not_cgst=" },
 +      { S_DEGRADED, "\ndegraded=" },
 +      { S_DELAYED_BIOS, "\ndel_bios=" },
 +      { S_SUM_DELAYED_BIOS, "/sum_del_bios=" },
 +      { S_FLUSHS, "\nflushs=" },
 +      { S_HITS_1ST, "\nhits_1st=" },
 +      { S_IOS_POST, " ios_post=" },
 +      { S_INSCACHE, " inscache=" },
 +      { S_MAX_LOOKUP, " maxlookup=" },
 +      { S_NO_RW, "\nno_rw=" },
 +      { S_NOSYNC, " nosync=" },
 +      { S_OVERWRITE, " ovr=" },
 +      { S_PROHIBITCHUNKIO, " prhbt_io=" },
 +      { S_RECONSTRUCT_EI, "\nrec_ei=" },
 +      { S_RECONSTRUCT_DEV, " rec_dev=" },
 +      { S_RECONSTRUCT_SET, " rec_set=" },
 +      { S_RECONSTRUCTED, " rec=" },
 +      { S_REQUEUE, " requeue=" },
 +      { S_STRIPE_ERROR, " stripe_err=" },
 +      { S_XORS, " xors=" },
 +};
 +
 +/*
 + * A RAID set.
 + */
 +#define       dm_rh_client    dm_region_hash
 +enum count_type { IO_WORK = 0, IO_RECOVER, IO_NR_COUNT };
 +typedef void (*xor_function_t)(unsigned count, unsigned long **data);
 +struct raid_set {
 +      struct dm_target *ti;   /* Target pointer. */
 +
 +      struct {
 +              unsigned long flags;    /* State flags. */
 +              struct mutex in_lock;   /* Protects central input list below. */
 +              struct mutex xor_lock;  /* Protects xor algorithm set. */
 +              struct bio_list in;     /* Pending ios (central input list). */
 +              struct bio_list work;   /* ios work set. */
 +              wait_queue_head_t suspendq;     /* suspend synchronization. */
 +              atomic_t in_process;    /* counter of queued bios (suspendq). */
 +              atomic_t in_process_max;/* counter of queued bios max. */
 +
 +              /* io work. */
 +              struct workqueue_struct *wq;
 +              struct delayed_work dws_do_raid;        /* For main worker. */
 +              struct work_struct ws_do_table_event;   /* For event worker. */
 +      } io;
 +
 +      /* Stripe locking abstraction. */
 +      struct dm_raid45_locking_type *locking;
 +
 +      struct stripe_cache sc; /* Stripe cache for this set. */
 +
 +      /* Xor optimization. */
 +      struct {
 +              struct xor_func *f;
 +              unsigned chunks;
 +              unsigned speed;
 +      } xor;
 +
 +      /* Recovery parameters. */
 +      struct recover {
 +              struct dm_dirty_log *dl;        /* Dirty log. */
 +              struct dm_rh_client *rh;        /* Region hash. */
 +
 +              struct dm_io_client *dm_io_client; /* recovery dm-io client. */
 +              /* dm-mem-cache client resource context for recovery stripes. */
 +              struct dm_mem_cache_client *mem_cache_client;
 +
 +              struct list_head stripes;       /* List of recovery stripes. */
 +
 +              region_t nr_regions;
 +              region_t nr_regions_to_recover;
 +              region_t nr_regions_recovered;
 +              unsigned long start_jiffies;
 +              unsigned long end_jiffies;
 +
 +              unsigned bandwidth;      /* Recovery bandwidth [%]. */
 +              unsigned bandwidth_work; /* Recovery bandwidth [factor]. */
 +              unsigned bandwidth_parm; /*  " constructor parm. */
 +              unsigned io_size;        /* recovery io size <= region size. */
 +              unsigned io_size_parm;   /* recovery io size ctr parameter. */
 +              unsigned recovery;       /* Recovery allowed/prohibited. */
 +              unsigned recovery_stripes; /* # of parallel recovery stripes. */
 +
 +              /* recovery io throttling. */
 +              atomic_t io_count[IO_NR_COUNT]; /* counter recover/regular io.*/
 +              unsigned long last_jiffies;
 +      } recover;
 +
 +      /* RAID set parameters. */
 +      struct {
 +              struct raid_type *raid_type;    /* RAID type (eg, RAID4). */
 +              unsigned raid_parms;    /* # variable raid parameters. */
 +
 +              unsigned chunk_size;    /* Sectors per chunk. */
 +              unsigned chunk_size_parm;
 +              unsigned chunk_shift;   /* rsector chunk size shift. */
 +
 +              unsigned io_size;       /* Sectors per io. */
 +              unsigned io_size_parm;
 +              unsigned io_mask;       /* Mask for bio_copy_page_list(). */
 +              unsigned io_inv_mask;   /* Mask for raid_address(). */
 +
 +              sector_t sectors_per_dev;       /* Sectors per device. */
 +
 +              atomic_t failed_devs;           /* Amount of devices failed. */
 +
 +              /* Index of device to initialize. */
 +              int dev_to_init;
 +              int dev_to_init_parm;
 +
 +              /* Raid devices dynamically allocated. */
 +              unsigned raid_devs;     /* # of RAID devices below. */
 +              unsigned data_devs;     /* # of RAID data devices. */
 +
 +              int ei;         /* index of failed RAID device. */
 +
 +              /* Index of dedicated parity device (i.e. RAID4). */
 +              int pi;
 +              int pi_parm;    /* constructor parm for status output. */
 +      } set;
 +
 +      /* REMOVEME: devel stats counters. */
 +      atomic_t stats[S_NR_STATS];
 +
 +      /* Dynamically allocated temporary pointers for xor(). */
 +      unsigned long **data;
 +
 +      /* Dynamically allocated RAID devices. Alignment? */
 +      struct raid_dev dev[0];
 +};
 +
 +/* Define RAID set bit operations. */
 +BITOPS(RS, Bandwidth, raid_set, RS_RECOVERY_BANDWIDTH)
 +BITOPS(RS, CheckOverwrite, raid_set, RS_CHECK_OVERWRITE)
 +BITOPS(RS, Dead, raid_set, RS_DEAD)
 +BITOPS(RS, DeadEndioMessage, raid_set, RS_DEAD_ENDIO_MESSAGE)
 +BITOPS(RS, Degraded, raid_set, RS_DEGRADED)
 +BITOPS(RS, DevelStats, raid_set, RS_DEVEL_STATS)
 +BITOPS(RS, EnforceParityCreation, raid_set, RS_ENFORCE_PARITY_CREATION)
 +BITOPS(RS, ProhibitWrites, raid_set, RS_PROHIBIT_WRITES)
 +BITOPS(RS, Recover, raid_set, RS_RECOVER)
 +BITOPS(RS, ScBusy, raid_set, RS_SC_BUSY)
 +BITOPS(RS, Suspend, raid_set, RS_SUSPEND)
 +#undef BITOPS
 +
 +/*-----------------------------------------------------------------
 + * Raid-4/5 set structures.
 + *---------------------------------------------------------------*/
 +/* RAID level definitions. */
 +enum raid_level {
 +      raid4,
 +      raid5,
 +};
 +
 +/* Symmetric/Asymmetric, Left/Right parity rotating algorithms. */
 +enum raid_algorithm {
 +      none,
 +      left_asym,
 +      right_asym,
 +      left_sym,
 +      right_sym,
 +};
 +
 +struct raid_type {
 +      const char *name;               /* RAID algorithm. */
 +      const char *descr;              /* Descriptor text for logging. */
 +      const unsigned parity_devs;     /* # of parity devices. */
 +      const unsigned minimal_devs;    /* minimal # of devices in set. */
 +      const enum raid_level level;            /* RAID level. */
 +      const enum raid_algorithm algorithm;    /* RAID algorithm. */
 +};
 +
 +/* Supported raid types and properties. */
 +static struct raid_type raid_types[] = {
 +      {"raid4",    "RAID4 (dedicated parity disk)", 1, 3, raid4, none},
 +      {"raid5_la", "RAID5 (left asymmetric)",       1, 3, raid5, left_asym},
 +      {"raid5_ra", "RAID5 (right asymmetric)",      1, 3, raid5, right_asym},
 +      {"raid5_ls", "RAID5 (left symmetric)",        1, 3, raid5, left_sym},
 +      {"raid5_rs", "RAID5 (right symmetric)",       1, 3, raid5, right_sym},
 +};
 +
 +/* Address as calculated by raid_address(). */
 +struct raid_address {
 +      sector_t key;           /* Hash key (address of stripe % chunk_size). */
 +      unsigned di, pi;        /* Data and parity disks index. */
 +};
 +
 +/* REMOVEME: reset statistics counters. */
 +static void stats_reset(struct raid_set *rs)
 +{
 +      unsigned s = S_NR_STATS;
 +
 +      while (s--)
 +              atomic_set(rs->stats + s, 0);
 +}
 +
 +/*----------------------------------------------------------------
 + * RAID set management routines.
 + *--------------------------------------------------------------*/
 +/*
 + * Begin small helper functions.
 + */
 +/* No need to be called from region hash indirectly at dm_rh_dec(). */
 +static void wake_dummy(void *context) {}
 +
 +/* Return # of io reference. */
 +static int io_ref(struct raid_set *rs)
 +{
 +      return atomic_read(&rs->io.in_process);
 +}
 +
 +/* Get an io reference. */
 +static void io_get(struct raid_set *rs)
 +{
 +      int p = atomic_inc_return(&rs->io.in_process);
 +
 +      if (p > atomic_read(&rs->io.in_process_max))
 +              atomic_set(&rs->io.in_process_max, p); /* REMOVEME: max. */
 +}
 +
 +/* Put the io reference and conditionally wake io waiters. */
 +static void io_put(struct raid_set *rs)
 +{
 +      /* Intel: rebuild data corrupter? */
 +      if (atomic_dec_and_test(&rs->io.in_process))
 +              wake_up(&rs->io.suspendq);
 +      else
 +              BUG_ON(io_ref(rs) < 0);
 +}
 +
 +/* Wait until all io has been processed. */
 +static void wait_ios(struct raid_set *rs)
 +{
 +      wait_event(rs->io.suspendq, !io_ref(rs));
 +}
 +
 +/* Queue (optionally delayed) io work. */
 +static void wake_do_raid_delayed(struct raid_set *rs, unsigned long delay)
 +{
 +      queue_delayed_work(rs->io.wq, &rs->io.dws_do_raid, delay);
 +}
 +
 +/* Queue io work immediately (called from region hash too). */
 +static void wake_do_raid(void *context)
 +{
 +      struct raid_set *rs = context;
 +
 +      queue_work(rs->io.wq, &rs->io.dws_do_raid.work);
 +}
 +
 +/* Calculate device sector offset. */
 +static sector_t _sector(struct raid_set *rs, struct bio *bio)
 +{
 +      sector_t sector = bio->bi_sector;
 +
 +      sector_div(sector, rs->set.data_devs);
 +      return sector;
 +}
 +
 +/* Return # of active stripes in stripe cache. */
 +static int sc_active(struct stripe_cache *sc)
 +{
 +      return atomic_read(&sc->active_stripes);
 +}
 +
 +/* Stripe cache busy indicator. */
 +static int sc_busy(struct raid_set *rs)
 +{
 +      return sc_active(&rs->sc) >
 +             atomic_read(&rs->sc.stripes) - (STRIPES_MIN / 2);
 +}
 +
 +/* Set chunks states. */
 +enum chunk_dirty_type { CLEAN, DIRTY, ERROR };
 +static void chunk_set(struct stripe_chunk *chunk, enum chunk_dirty_type type)
 +{
 +      switch (type) {
 +      case CLEAN:
 +              ClearChunkDirty(chunk);
 +              break;
 +      case DIRTY:
 +              SetChunkDirty(chunk);
 +              break;
 +      case ERROR:
 +              SetChunkError(chunk);
 +              SetStripeError(chunk->stripe);
 +              return;
 +      default:
 +              BUG();
 +      }
 +
 +      SetChunkUptodate(chunk);
 +      SetChunkIo(chunk);
 +      ClearChunkError(chunk);
 +}
 +
 +/* Return region state for a sector. */
 +static int region_state(struct raid_set *rs, sector_t sector,
 +                      enum dm_rh_region_states state)
 +{
 +      struct dm_rh_client *rh = rs->recover.rh;
 +      region_t region = dm_rh_sector_to_region(rh, sector);
 +
 +      return !!(dm_rh_get_state(rh, region, 1) & state);
 +}
 +
 +/*
 + * Return true in case a chunk should be read/written
 + *
 + * Conditions to read/write:
 + *    o chunk not uptodate
 + *    o chunk dirty
 + *
 + * Conditios to avoid io:
 + *    o io already ongoing on chunk
 + *    o io explitely prohibited
 + */
 +static int chunk_io(struct stripe_chunk *chunk)
 +{
 +      /* 2nd run optimization (flag set below on first run). */
 +      if (TestClearChunkMustIo(chunk))
 +              return 1;
 +
 +      /* Avoid io if prohibited or a locked chunk. */
 +      if (!ChunkIo(chunk) || ChunkLocked(chunk))
 +              return 0;
 +
 +      if (!ChunkUptodate(chunk) || ChunkDirty(chunk)) {
 +              SetChunkMustIo(chunk); /* 2nd run optimization. */
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Call a function on each chunk needing io unless device failed. */
 +static unsigned for_each_io_dev(struct stripe *stripe,
 +                              void (*f_io)(struct stripe *stripe, unsigned p))
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned p, r = 0;
 +
 +      for (p = 0; p < rs->set.raid_devs; p++) {
 +              if (chunk_io(CHUNK(stripe, p)) && !DevFailed(rs->dev + p)) {
 +                      f_io(stripe, p);
 +                      r++;
 +              }
 +      }
 +
 +      return r;
 +}
 +
 +/*
 + * Index of device to calculate parity on.
 + *
 + * Either the parity device index *or* the selected
 + * device to init after a spare replacement.
 + */
 +static int dev_for_parity(struct stripe *stripe, int *sync)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      int r = region_state(rs, stripe->key, DM_RH_NOSYNC | DM_RH_RECOVERING);
 +
 +      *sync = !r;
 +
 +      /* Reconstruct a particular device ?. */
 +      if (r && rs->set.dev_to_init > -1)
 +              return rs->set.dev_to_init;
 +      else if (rs->set.raid_type->level == raid4)
 +              return rs->set.pi;
 +      else if (!StripeRecover(stripe))
 +              return stripe->idx.parity;
 +      else
 +              return -1;
 +}
 +
 +/* RAID set congested function. */
 +static int rs_congested(void *congested_data, int bdi_bits)
 +{
 +      int r;
 +      unsigned p;
 +      struct raid_set *rs = congested_data;
 +
 +      if (sc_busy(rs) || RSSuspend(rs) || RSProhibitWrites(rs))
 +              r = 1;
 +      else for (r = 0, p = rs->set.raid_devs; !r && p--; ) {
 +              /* If any of our component devices are overloaded. */
 +              struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
 +
 +              r |= bdi_congested(&q->backing_dev_info, bdi_bits);
 +      }
 +
 +      /* REMOVEME: statistics. */
 +      atomic_inc(rs->stats + (r ? S_CONGESTED : S_NOT_CONGESTED));
 +      return r;
 +}
 +
 +/* RAID device degrade check. */
 +static void rs_check_degrade_dev(struct raid_set *rs,
 +                               struct stripe *stripe, unsigned p)
 +{
 +      if (TestSetDevFailed(rs->dev + p))
 +              return;
 +
 +      /* Through an event in case of member device errors. */
 +      if ((atomic_inc_return(&rs->set.failed_devs) >
 +           rs->set.raid_type->parity_devs) &&
 +           !TestSetRSDead(rs)) {
 +              /* Display RAID set dead message once. */
 +              unsigned p;
 +              char buf[BDEVNAME_SIZE];
 +
 +              DMERR("FATAL: too many devices failed -> RAID set broken");
 +              for (p = 0; p < rs->set.raid_devs; p++) {
 +                      if (DevFailed(rs->dev + p))
 +                              DMERR("device /dev/%s failed",
 +                                    bdevname(rs->dev[p].dev->bdev, buf));
 +              }
 +      }
 +
 +      /* Only log the first member error. */
 +      if (!TestSetRSDegraded(rs)) {
 +              char buf[BDEVNAME_SIZE];
 +
 +              /* Store index for recovery. */
 +              rs->set.ei = p;
 +              DMERR("CRITICAL: %sio error on device /dev/%s "
 +                    "in region=%llu; DEGRADING RAID set\n",
 +                    stripe ? "" : "FAKED ",
 +                    bdevname(rs->dev[p].dev->bdev, buf),
 +                    (unsigned long long) (stripe ? stripe->key : 0));
 +              DMERR("further device error messages suppressed");
 +      }
 +
 +      /* Prohibit further writes to allow for userpace to update metadata. */
 +      SetRSProhibitWrites(rs);
 +      schedule_work(&rs->io.ws_do_table_event);
 +}
 +
 +/* RAID set degrade check. */
 +static void rs_check_degrade(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned p = rs->set.raid_devs;
 +
 +      while (p--) {
 +              if (ChunkError(CHUNK(stripe, p)))
 +                      rs_check_degrade_dev(rs, stripe, p);
 +      }
 +}
 +
 +/* Lookup a RAID device by name or by major:minor number. */
 +static int raid_dev_lookup(struct raid_set *rs, struct raid_dev *dev_lookup)
 +{
 +      unsigned p;
 +      struct raid_dev *dev;
 +
 +      /*
 +       * Must be an incremental loop, because the device array
 +       * can have empty slots still on calls from raid_ctr()
 +       */
 +      for (dev = rs->dev, p = 0;
 +           dev->dev && p < rs->set.raid_devs;
 +           dev++, p++) {
 +              if (dev_lookup->dev->bdev->bd_dev == dev->dev->bdev->bd_dev)
 +                      return p;
 +      }
 +
 +      return -ENODEV;
 +}
 +/*
 + * End small helper functions.
 + */
 +
 +/*
 + * Stripe hash functions
 + */
 +/* Initialize/destroy stripe hash. */
 +static int hash_init(struct stripe_hash *hash, unsigned stripes)
 +{
 +      unsigned buckets = roundup_pow_of_two(stripes >> 1);
 +      static unsigned hash_primes[] = {
 +              /* Table of primes for hash_fn/table size optimization. */
 +              1, 2, 3, 7, 13, 27, 53, 97, 193, 389, 769,
 +              1543, 3079, 6151, 12289, 24593, 49157, 98317,
 +      };
 +
 +      /* Allocate stripe hash buckets. */
 +      hash->hash = vmalloc(buckets * sizeof(*hash->hash));
 +      if (!hash->hash)
 +              return -ENOMEM;
 +
 +      hash->buckets = buckets;
 +      hash->mask = buckets - 1;
 +      hash->shift = ffs(buckets);
 +      if (hash->shift > ARRAY_SIZE(hash_primes))
 +              hash->shift = ARRAY_SIZE(hash_primes) - 1;
 +
 +      BUG_ON(hash->shift < 2);
 +      hash->prime = hash_primes[hash->shift];
 +
 +      /* Initialize buckets. */
 +      while (buckets--)
 +              INIT_LIST_HEAD(hash->hash + buckets);
 +      return 0;
 +}
 +
 +static void hash_exit(struct stripe_hash *hash)
 +{
 +      if (hash->hash) {
 +              vfree(hash->hash);
 +              hash->hash = NULL;
 +      }
 +}
 +
 +static unsigned hash_fn(struct stripe_hash *hash, sector_t key)
 +{
 +      return (unsigned) (((key * hash->prime) >> hash->shift) & hash->mask);
 +}
 +
 +static struct list_head *hash_bucket(struct stripe_hash *hash, sector_t key)
 +{
 +      return hash->hash + hash_fn(hash, key);
 +}
 +
 +/* Insert an entry into a hash. */
 +static void stripe_insert(struct stripe_hash *hash, struct stripe *stripe)
 +{
 +      list_add(stripe->lists + LIST_HASH, hash_bucket(hash, stripe->key));
 +}
 +
 +/* Lookup an entry in the stripe hash. */
 +static struct stripe *stripe_lookup(struct stripe_cache *sc, sector_t key)
 +{
 +      unsigned look = 0;
 +      struct stripe *stripe;
 +      struct list_head *bucket = hash_bucket(&sc->hash, key);
 +
 +      list_for_each_entry(stripe, bucket, lists[LIST_HASH]) {
 +              look++;
 +
 +              if (stripe->key == key) {
 +                      /* REMOVEME: statisics. */
 +                      if (look > atomic_read(RS(sc)->stats + S_MAX_LOOKUP))
 +                              atomic_set(RS(sc)->stats + S_MAX_LOOKUP, look);
 +                      return stripe;
 +              }
 +      }
 +
 +      return NULL;
 +}
 +
 +/* Resize the stripe cache hash on size changes. */
 +static int sc_hash_resize(struct stripe_cache *sc)
 +{
 +      /* Resize indicated ? */
 +      if (atomic_read(&sc->stripes) != atomic_read(&sc->stripes_last)) {
 +              int r;
 +              struct stripe_hash hash;
 +
 +              r = hash_init(&hash, atomic_read(&sc->stripes));
 +              if (r)
 +                      return r;
 +
 +              if (sc->hash.hash) {
 +                      unsigned b = sc->hash.buckets;
 +                      struct list_head *pos, *tmp;
 +
 +                      /* Walk old buckets and insert into new. */
 +                      while (b--) {
 +                              list_for_each_safe(pos, tmp, sc->hash.hash + b)
 +                                  stripe_insert(&hash,
 +                                                list_entry(pos, struct stripe,
 +                                                           lists[LIST_HASH]));
 +                      }
 +
 +              }
 +
 +              hash_exit(&sc->hash);
 +              memcpy(&sc->hash, &hash, sizeof(sc->hash));
 +              atomic_set(&sc->stripes_last, atomic_read(&sc->stripes));
 +      }
 +
 +      return 0;
 +}
 +/* End hash stripe hash function. */
 +
 +/* List add, delete, push and pop functions. */
 +/* Add stripe to flush list. */
 +#define       DEL_LIST(lh) \
 +      if (!list_empty(lh)) \
 +              list_del_init(lh);
 +
 +/* Delete stripe from hash. */
 +static void stripe_hash_del(struct stripe *stripe)
 +{
 +      DEL_LIST(stripe->lists + LIST_HASH);
 +}
 +
 +/* Return stripe reference count. */
 +static inline int stripe_ref(struct stripe *stripe)
 +{
 +      return atomic_read(&stripe->cnt);
 +}
 +
 +static void stripe_flush_add(struct stripe *stripe)
 +{
 +      struct stripe_cache *sc = stripe->sc;
 +      struct list_head *lh = stripe->lists + LIST_FLUSH;
 +
 +      if (!StripeReconstruct(stripe) && list_empty(lh))
 +              list_add_tail(lh, sc->lists + LIST_FLUSH);
 +}
 +
 +/*
 + * Add stripe to LRU (inactive) list.
 + *
 + * Need lock, because of concurrent access from message interface.
 + */
 +static void stripe_lru_add(struct stripe *stripe)
 +{
 +      if (!StripeRecover(stripe)) {
 +              struct list_head *lh = stripe->lists + LIST_LRU;
 +
 +              if (list_empty(lh))
 +                      list_add_tail(lh, stripe->sc->lists + LIST_LRU);
 +      }
 +}
 +
 +#define POP_LIST(list) \
 +      do { \
 +              if (list_empty(sc->lists + (list))) \
 +                      stripe = NULL; \
 +              else { \
 +                      stripe = list_first_entry(sc->lists + (list), \
 +                                                struct stripe, \
 +                                                lists[(list)]); \
 +                      list_del_init(stripe->lists + (list)); \
 +              } \
 +      } while (0);
 +
 +/* Pop an available stripe off the LRU list. */
 +static struct stripe *stripe_lru_pop(struct stripe_cache *sc)
 +{
 +      struct stripe *stripe;
 +
 +      POP_LIST(LIST_LRU);
 +      return stripe;
 +}
 +
 +/* Pop an available stripe off the io list. */
 +static struct stripe *stripe_io_pop(struct stripe_cache *sc)
 +{
 +      struct stripe *stripe;
 +
 +      POP_LIST(LIST_FLUSH);
 +      return stripe;
 +}
 +
 +/* Push a stripe safely onto the endio list to be handled by do_endios(). */
 +static void stripe_endio_push(struct stripe *stripe)
 +{
 +      unsigned long flags;
 +      struct stripe_cache *sc = stripe->sc;
 +      struct list_head *stripe_list = stripe->lists + LIST_ENDIO,
 +                       *sc_list = sc->lists + LIST_ENDIO;
 +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
 +
 +      /* This runs in parallel with do_endios(). */
 +      spin_lock_irqsave(lock, flags);
 +      if (list_empty(stripe_list))
 +              list_add_tail(stripe_list, sc_list);
 +      spin_unlock_irqrestore(lock, flags);
 +
 +      wake_do_raid(RS(sc)); /* Wake myself. */
 +}
 +
 +/* Pop a stripe off safely off the endio list. */
 +static struct stripe *stripe_endio_pop(struct stripe_cache *sc)
 +{
 +      struct stripe *stripe;
 +      spinlock_t *lock = sc->locks + LOCK_ENDIO;
 +
 +      /* This runs in parallel with endio(). */
 +      spin_lock_irq(lock);
 +      POP_LIST(LIST_ENDIO)
 +      spin_unlock_irq(lock);
 +      return stripe;
 +}
 +#undef POP_LIST
 +
 +/*
 + * Stripe cache locking functions
 + */
 +/* Dummy lock function for single host RAID4+5. */
 +static void *no_lock(sector_t key, enum dm_lock_type type)
 +{
 +      return &no_lock;
 +}
 +
 +/* Dummy unlock function for single host RAID4+5. */
 +static void no_unlock(void *lock_handle)
 +{
 +}
 +
 +/* No locking (for single host RAID 4+5). */
 +static struct dm_raid45_locking_type locking_none = {
 +      .lock = no_lock,
 +      .unlock = no_unlock,
 +};
 +
 +/* Lock a stripe (for clustering). */
 +static int
 +stripe_lock(struct stripe *stripe, int rw, sector_t key)
 +{
 +      stripe->lock = RS(stripe->sc)->locking->lock(key, rw == READ ? DM_RAID45_SHARED : DM_RAID45_EX);
 +      return stripe->lock ? 0 : -EPERM;
 +}
 +
 +/* Unlock a stripe (for clustering). */
 +static void stripe_unlock(struct stripe *stripe)
 +{
 +      RS(stripe->sc)->locking->unlock(stripe->lock);
 +      stripe->lock = NULL;
 +}
 +
 +/* Test io pending on stripe. */
 +static int stripe_io_ref(struct stripe *stripe)
 +{
 +      return atomic_read(&stripe->io.pending);
 +}
 +
 +static void stripe_io_get(struct stripe *stripe)
 +{
 +      if (atomic_inc_return(&stripe->io.pending) == 1)
 +              /* REMOVEME: statistics */
 +              atomic_inc(&stripe->sc->active_stripes);
 +      else
 +              BUG_ON(stripe_io_ref(stripe) < 0);
 +}
 +
 +static void stripe_io_put(struct stripe *stripe)
 +{
 +      if (atomic_dec_and_test(&stripe->io.pending)) {
 +              if (unlikely(StripeRecover(stripe)))
 +                      /* Don't put recovery stripe on endio list. */
 +                      wake_do_raid(RS(stripe->sc));
 +              else
 +                      /* Add regular stripe to endio list and wake daemon. */
 +                      stripe_endio_push(stripe);
 +
 +              /* REMOVEME: statistics */
 +              atomic_dec(&stripe->sc->active_stripes);
 +      } else
 +              BUG_ON(stripe_io_ref(stripe) < 0);
 +}
 +
 +/* Take stripe reference out. */
 +static int stripe_get(struct stripe *stripe)
 +{
 +      int r;
 +      struct list_head *lh = stripe->lists + LIST_LRU;
 +
 +      /* Delete stripe from LRU (inactive) list if on. */
 +      DEL_LIST(lh);
 +      BUG_ON(stripe_ref(stripe) < 0);
 +
 +      /* Lock stripe on first reference */
 +      r = (atomic_inc_return(&stripe->cnt) == 1) ?
 +          stripe_lock(stripe, WRITE, stripe->key) : 0;
 +
 +      return r;
 +}
 +#undef DEL_LIST
 +
 +/* Return references on a chunk. */
 +static int chunk_ref(struct stripe_chunk *chunk)
 +{
 +      return atomic_read(&chunk->cnt);
 +}
 +
 +/* Take out reference on a chunk. */
 +static int chunk_get(struct stripe_chunk *chunk)
 +{
 +      return atomic_inc_return(&chunk->cnt);
 +}
 +
 +/* Drop reference on a chunk. */
 +static void chunk_put(struct stripe_chunk *chunk)
 +{
 +      BUG_ON(atomic_dec_return(&chunk->cnt) < 0);
 +}
 +
 +/*
 + * Drop reference on a stripe.
 + *
 + * Move it to list of LRU stripes if zero.
 + */
 +static void stripe_put(struct stripe *stripe)
 +{
 +      if (atomic_dec_and_test(&stripe->cnt)) {
 +              BUG_ON(stripe_io_ref(stripe));
 +              stripe_unlock(stripe);
 +      } else
 +              BUG_ON(stripe_ref(stripe) < 0);
 +}
 +
 +/* Helper needed by for_each_io_dev(). */
 +static void stripe_get_references(struct stripe *stripe, unsigned p)
 +{
 +
 +      /*
 +       * Another one to reference the stripe in
 +       * order to protect vs. LRU list moves.
 +       */
 +      io_get(RS(stripe->sc)); /* Global io references. */
 +      stripe_get(stripe);
 +      stripe_io_get(stripe);  /* One for each chunk io. */
 +}
 +
 +/* Helper for endio() to put all take references. */
 +static void stripe_put_references(struct stripe *stripe)
 +{
 +      stripe_io_put(stripe);  /* One for each chunk io. */
 +      stripe_put(stripe);
 +      io_put(RS(stripe->sc));
 +}
 +
 +/*
 + * Stripe cache functions.
 + */
 +/*
 + * Invalidate all chunks (i.e. their pages)  of a stripe.
 + *
 + * I only keep state for the whole chunk.
 + */
 +static inline void stripe_chunk_invalidate(struct stripe_chunk *chunk)
 +{
 +      chunk->io.flags = 0;
 +}
 +
 +static void
 +stripe_chunks_invalidate(struct stripe *stripe)
 +{
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--)
 +              stripe_chunk_invalidate(CHUNK(stripe, p));
 +}
 +
 +/* Prepare stripe for (re)use. */
 +static void stripe_invalidate(struct stripe *stripe)
 +{
 +      stripe->io.flags = 0;
 +      stripe->idx.parity = stripe->idx.recover = -1;
 +      stripe_chunks_invalidate(stripe);
 +}
 +
 +/*
 + * Allow io on all chunks of a stripe.
 + * If not set, IO will not occur; i.e. it's prohibited.
 + *
 + * Actual IO submission for allowed chunks depends
 + * on their !uptodate or dirty state.
 + */
 +static void stripe_allow_io(struct stripe *stripe)
 +{
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--)
 +              SetChunkIo(CHUNK(stripe, p));
 +}
 +
 +/* Initialize a stripe. */
 +static void stripe_init(struct stripe_cache *sc, struct stripe *stripe)
 +{
 +      unsigned i, p = RS(sc)->set.raid_devs;
 +
 +      /* Work all io chunks. */
 +      while (p--) {
 +              struct stripe_chunk *chunk = CHUNK(stripe, p);
 +
 +              atomic_set(&chunk->cnt, 0);
 +              chunk->stripe = stripe;
 +              i = ARRAY_SIZE(chunk->bl);
 +              while (i--)
 +                      bio_list_init(chunk->bl + i);
 +      }
 +
 +      stripe->sc = sc;
 +
 +      i = ARRAY_SIZE(stripe->lists);
 +      while (i--)
 +              INIT_LIST_HEAD(stripe->lists + i);
 +
 +      stripe->io.size = RS(sc)->set.io_size;
 +      atomic_set(&stripe->cnt, 0);
 +      atomic_set(&stripe->io.pending, 0);
 +      stripe_invalidate(stripe);
 +}
 +
 +/* Number of pages per chunk. */
 +static inline unsigned chunk_pages(unsigned sectors)
 +{
 +      return dm_div_up(sectors, SECTORS_PER_PAGE);
 +}
 +
 +/* Number of pages per stripe. */
 +static inline unsigned stripe_pages(struct raid_set *rs, unsigned io_size)
 +{
 +      return chunk_pages(io_size) * rs->set.raid_devs;
 +}
 +
 +/* Initialize part of page_list (recovery). */
 +static void stripe_zero_pl_part(struct stripe *stripe, int p,
 +                              unsigned start, unsigned count)
 +{
 +      unsigned o = start / SECTORS_PER_PAGE, pages = chunk_pages(count);
 +      /* Get offset into the page_list. */
 +      struct page_list *pl = pl_elem(PL(stripe, p), o);
 +
 +      BUG_ON(!pl);
 +      while (pl && pages--) {
 +              BUG_ON(!pl->page);
 +              memset(page_address(pl->page), 0, PAGE_SIZE);
 +              pl = pl->next;
 +      }
 +}
 +
 +/* Initialize parity chunk of stripe. */
 +static void stripe_zero_chunk(struct stripe *stripe, int p)
 +{
 +      if (p > -1)
 +              stripe_zero_pl_part(stripe, p, 0, stripe->io.size);
 +}
 +
 +/* Return dynamic stripe structure size. */
 +static size_t stripe_size(struct raid_set *rs)
 +{
 +      return sizeof(struct stripe) +
 +                    rs->set.raid_devs * sizeof(struct stripe_chunk);
 +}
 +
 +/* Allocate a stripe and its memory object. */
 +/* XXX adjust to cope with stripe cache and recovery stripe caches. */
 +enum grow { SC_GROW, SC_KEEP };
 +static struct stripe *stripe_alloc(struct stripe_cache *sc,
 +                                 struct dm_mem_cache_client *mc,
 +                                 enum grow grow)
 +{
 +      int r;
 +      struct stripe *stripe;
 +
 +      stripe = kmem_cache_zalloc(sc->kc.cache, GFP_KERNEL);
 +      if (stripe) {
 +              /* Grow the dm-mem-cache by one object. */
 +              if (grow == SC_GROW) {
 +                      r = dm_mem_cache_grow(mc, 1);
 +                      if (r)
 +                              goto err_free;
 +              }
 +
 +              stripe->obj = dm_mem_cache_alloc(mc);
 +              if (IS_ERR(stripe->obj))
 +                      goto err_shrink;
 +
 +              stripe_init(sc, stripe);
 +      }
 +
 +      return stripe;
 +
 +err_shrink:
 +      if (grow == SC_GROW)
 +              dm_mem_cache_shrink(mc, 1);
 +err_free:
 +      kmem_cache_free(sc->kc.cache, stripe);
 +      return NULL;
 +}
 +
 +/*
 + * Free a stripes memory object, shrink the
 + * memory cache and free the stripe itself.
 + */
 +static void stripe_free(struct stripe *stripe, struct dm_mem_cache_client *mc)
 +{
 +      dm_mem_cache_free(mc, stripe->obj);
 +      dm_mem_cache_shrink(mc, 1);
 +      kmem_cache_free(stripe->sc->kc.cache, stripe);
 +}
 +
 +/* Free the recovery stripe. */
 +static void stripe_recover_free(struct raid_set *rs)
 +{
 +      struct recover *rec = &rs->recover;
 +      struct dm_mem_cache_client *mc;
 +
 +      mc = rec->mem_cache_client;
 +      rec->mem_cache_client = NULL;
 +      if (mc) {
 +              struct stripe *stripe;
 +
 +              while (!list_empty(&rec->stripes)) {
 +                      stripe = list_first_entry(&rec->stripes, struct stripe,
 +                                                lists[LIST_RECOVER]);
 +                      list_del(stripe->lists + LIST_RECOVER);
 +                      kfree(stripe->recover);
 +                      stripe_free(stripe, mc);
 +              }
 +
 +              dm_mem_cache_client_destroy(mc);
 +              dm_io_client_destroy(rec->dm_io_client);
 +              rec->dm_io_client = NULL;
 +      }
 +}
 +
 +/* Grow stripe cache. */
 +static int sc_grow(struct stripe_cache *sc, unsigned stripes, enum grow grow)
 +{
 +      int r = 0;
 +
 +      /* Try to allocate this many (additional) stripes. */
 +      while (stripes--) {
 +              struct stripe *stripe =
 +                      stripe_alloc(sc, sc->mem_cache_client, grow);
 +
 +              if (likely(stripe)) {
 +                      stripe_lru_add(stripe);
 +                      atomic_inc(&sc->stripes);
 +              } else {
 +                      r = -ENOMEM;
 +                      break;
 +              }
 +      }
 +
 +      return r ? r : sc_hash_resize(sc);
 +}
 +
 +/* Shrink stripe cache. */
 +static int sc_shrink(struct stripe_cache *sc, unsigned stripes)
 +{
 +      int r = 0;
 +
 +      /* Try to get unused stripe from LRU list. */
 +      while (stripes--) {
 +              struct stripe *stripe;
 +
 +              stripe = stripe_lru_pop(sc);
 +              if (stripe) {
 +                      /* An LRU stripe may never have ios pending! */
 +                      BUG_ON(stripe_io_ref(stripe));
 +                      BUG_ON(stripe_ref(stripe));
 +                      atomic_dec(&sc->stripes);
 +                      /* Remove from hash if on before deletion. */
 +                      stripe_hash_del(stripe);
 +                      stripe_free(stripe, sc->mem_cache_client);
 +              } else {
 +                      r = -ENOENT;
 +                      break;
 +              }
 +      }
 +
 +      /* Check if stats are still sane. */
 +      if (atomic_read(&sc->active_stripes_max) >
 +          atomic_read(&sc->stripes))
 +              atomic_set(&sc->active_stripes_max, 0);
 +
 +      if (r)
 +              return r;
 +
 +      return atomic_read(&sc->stripes) ? sc_hash_resize(sc) : 0;
 +}
 +
 +/* Create stripe cache and recovery. */
 +static int sc_init(struct raid_set *rs, unsigned stripes)
 +{
 +      unsigned i, r, rstripes;
 +      struct stripe_cache *sc = &rs->sc;
 +      struct stripe *stripe;
 +      struct recover *rec = &rs->recover;
 +      struct mapped_device *md;
 +      struct gendisk *disk;
 +
 +
 +      /* Initialize lists and locks. */
 +      i = ARRAY_SIZE(sc->lists);
 +      while (i--)
 +              INIT_LIST_HEAD(sc->lists + i);
 +
 +      INIT_LIST_HEAD(&rec->stripes);
 +
 +      /* Initialize endio and LRU list locks. */
 +      i = NR_LOCKS;
 +      while (i--)
 +              spin_lock_init(sc->locks + i);
 +
 +      /* Initialize atomic variables. */
 +      atomic_set(&sc->stripes, 0);
 +      atomic_set(&sc->stripes_to_set, 0);
 +      atomic_set(&sc->active_stripes, 0);
 +      atomic_set(&sc->active_stripes_max, 0); /* REMOVEME: statistics. */
 +
 +      /*
 +       * We need a runtime unique # to suffix the kmem cache name
 +       * because we'll have one for each active RAID set.
 +       */
 +      md = dm_table_get_md(rs->ti->table);
 +      disk = dm_disk(md);
 +      snprintf(sc->kc.name, sizeof(sc->kc.name), "%s-%d.%d", TARGET,
 +               disk->first_minor, atomic_inc_return(&_stripe_sc_nr));
 +      sc->kc.cache = kmem_cache_create(sc->kc.name, stripe_size(rs),
 +                                       0, 0, NULL);
 +      if (!sc->kc.cache)
 +              return -ENOMEM;
 +
 +      /* Create memory cache client context for RAID stripe cache. */
 +      sc->mem_cache_client =
 +              dm_mem_cache_client_create(stripes, rs->set.raid_devs,
 +                                         chunk_pages(rs->set.io_size));
 +      if (IS_ERR(sc->mem_cache_client))
 +              return PTR_ERR(sc->mem_cache_client);
 +
 +      /* Create memory cache client context for RAID recovery stripe(s). */
 +      rstripes = rec->recovery_stripes;
 +      rec->mem_cache_client =
 +              dm_mem_cache_client_create(rstripes, rs->set.raid_devs,
 +                                         chunk_pages(rec->io_size));
 +      if (IS_ERR(rec->mem_cache_client))
 +              return PTR_ERR(rec->mem_cache_client);
 +
 +      /* Create dm-io client context for IO stripes. */
 +      sc->dm_io_client =
 +              dm_io_client_create((stripes > 32 ? 32 : stripes) *
 +                                  rs->set.raid_devs *
 +                                  chunk_pages(rs->set.io_size));
 +      if (IS_ERR(sc->dm_io_client))
 +              return PTR_ERR(sc->dm_io_client);
 +
 +      /* FIXME: intermingeled with stripe cache initialization. */
 +      /* Create dm-io client context for recovery stripes. */
 +      rec->dm_io_client =
 +              dm_io_client_create(rstripes * rs->set.raid_devs *
 +                                  chunk_pages(rec->io_size));
 +      if (IS_ERR(rec->dm_io_client))
 +              return PTR_ERR(rec->dm_io_client);
 +
 +      /* Allocate stripes for set recovery. */
 +      while (rstripes--) {
 +              stripe = stripe_alloc(sc, rec->mem_cache_client, SC_KEEP);
 +              if (!stripe)
 +                      return -ENOMEM;
 +
 +              stripe->recover = kzalloc(sizeof(*stripe->recover), GFP_KERNEL);
 +              if (!stripe->recover) {
 +                      stripe_free(stripe, rec->mem_cache_client);
 +                      return -ENOMEM;
 +              }
 +
 +              SetStripeRecover(stripe);
 +              stripe->io.size = rec->io_size;
 +              list_add_tail(stripe->lists + LIST_RECOVER, &rec->stripes);
 +              /* Don't add recovery stripes to LRU list! */
 +      }
 +
 +      /*
 +       * Allocate the stripe objetcs from the
 +       * cache and add them to the LRU list.
 +       */
 +      r = sc_grow(sc, stripes, SC_KEEP);
 +      if (!r)
 +              atomic_set(&sc->stripes_last, stripes);
 +
 +      return r;
 +}
 +
 +/* Destroy the stripe cache. */
 +static void sc_exit(struct stripe_cache *sc)
 +{
 +      struct raid_set *rs = RS(sc);
 +
 +      if (sc->kc.cache) {
 +              stripe_recover_free(rs);
 +              BUG_ON(sc_shrink(sc, atomic_read(&sc->stripes)));
 +              kmem_cache_destroy(sc->kc.cache);
 +              sc->kc.cache = NULL;
 +
 +              if (sc->mem_cache_client && !IS_ERR(sc->mem_cache_client))
 +                      dm_mem_cache_client_destroy(sc->mem_cache_client);
 +
 +              if (sc->dm_io_client && !IS_ERR(sc->dm_io_client))
 +                      dm_io_client_destroy(sc->dm_io_client);
 +
 +              hash_exit(&sc->hash);
 +      }
 +}
 +
 +/*
 + * Calculate RAID address
 + *
 + * Delivers tuple with the index of the data disk holding the chunk
 + * in the set, the parity disks index and the start of the stripe
 + * within the address space of the set (used as the stripe cache hash key).
 + */
 +/* thx MD. */
 +static struct raid_address *raid_address(struct raid_set *rs, sector_t sector,
 +                                       struct raid_address *addr)
 +{
 +      sector_t stripe, tmp;
 +
 +      /*
 +       * chunk_number = sector / chunk_size
 +       * stripe_number = chunk_number / data_devs
 +       * di = stripe % data_devs;
 +       */
 +      stripe = sector >> rs->set.chunk_shift;
 +      addr->di = sector_div(stripe, rs->set.data_devs);
 +
 +      switch (rs->set.raid_type->level) {
 +      case raid4:
 +              addr->pi = rs->set.pi;
 +              goto check_shift_di;
 +      case raid5:
 +              tmp = stripe;
 +              addr->pi = sector_div(tmp, rs->set.raid_devs);
 +
 +              switch (rs->set.raid_type->algorithm) {
 +              case left_asym:         /* Left asymmetric. */
 +                      addr->pi = rs->set.data_devs - addr->pi;
 +              case right_asym:        /* Right asymmetric. */
 +check_shift_di:
 +                      if (addr->di >= addr->pi)
 +                              addr->di++;
 +                      break;
 +              case left_sym:          /* Left symmetric. */
 +                      addr->pi = rs->set.data_devs - addr->pi;
 +              case right_sym:         /* Right symmetric. */
 +                      addr->di = (addr->pi + addr->di + 1) %
 +                                 rs->set.raid_devs;
 +                      break;
 +              case none: /* Ain't happen: RAID4 algorithm placeholder. */
 +                      BUG();
 +              }
 +      }
 +
 +      /*
 +       * Start offset of the stripes chunk on any single device of the RAID
 +       * set, adjusted in case io size differs from chunk size.
 +       */
 +      addr->key = (stripe << rs->set.chunk_shift) +
 +                  (sector & rs->set.io_inv_mask);
 +      return addr;
 +}
 +
 +/*
 + * Copy data across between stripe pages and bio vectors.
 + *
 + * Pay attention to data alignment in stripe and bio pages.
 + */
 +static void bio_copy_page_list(int rw, struct stripe *stripe,
 +                             struct page_list *pl, struct bio *bio)
 +{
 +      unsigned i, page_offset;
 +      void *page_addr;
 +      struct raid_set *rs = RS(stripe->sc);
 +      struct bio_vec *bv;
 +
 +      /* Get start page in page list for this sector. */
 +      i = (bio->bi_sector & rs->set.io_mask) / SECTORS_PER_PAGE;
 +      pl = pl_elem(pl, i);
 +      BUG_ON(!pl);
 +      BUG_ON(!pl->page);
 +
 +      page_addr = page_address(pl->page);
 +      page_offset = to_bytes(bio->bi_sector & (SECTORS_PER_PAGE - 1));
 +
 +      /* Walk all segments and copy data across between bio_vecs and pages. */
 +      bio_for_each_segment(bv, bio, i) {
 +              int len = bv->bv_len, size;
 +              unsigned bio_offset = 0;
 +              void *bio_addr = __bio_kmap_atomic(bio, i, KM_USER0);
 +redo:
 +              size = (page_offset + len > PAGE_SIZE) ?
 +                     PAGE_SIZE - page_offset : len;
 +
 +              if (rw == READ)
 +                      memcpy(bio_addr + bio_offset,
 +                             page_addr + page_offset, size);
 +              else
 +                      memcpy(page_addr + page_offset,
 +                             bio_addr + bio_offset, size);
 +
 +              page_offset += size;
 +              if (page_offset == PAGE_SIZE) {
 +                      /*
 +                       * We reached the end of the chunk page ->
 +                       * need to refer to the next one to copy more data.
 +                       */
 +                      len -= size;
 +                      if (len) {
 +                              /* Get next page. */
 +                              pl = pl->next;
 +                              BUG_ON(!pl);
 +                              BUG_ON(!pl->page);
 +                              page_addr = page_address(pl->page);
 +                              page_offset = 0;
 +                              bio_offset += size;
 +                              /* REMOVEME: statistics. */
 +                              atomic_inc(rs->stats + S_BIO_COPY_PL_NEXT);
 +                              goto redo;
 +                      }
 +              }
 +
 +              __bio_kunmap_atomic(bio_addr, KM_USER0);
 +      }
 +}
 +
 +/*
 + * Xor optimization macros.
 + */
 +/* Xor data pointer declaration and initialization macros. */
 +#define DECLARE_2     unsigned long *d0 = data[0], *d1 = data[1]
 +#define DECLARE_3     DECLARE_2, *d2 = data[2]
 +#define DECLARE_4     DECLARE_3, *d3 = data[3]
 +#define DECLARE_5     DECLARE_4, *d4 = data[4]
 +#define DECLARE_6     DECLARE_5, *d5 = data[5]
 +#define DECLARE_7     DECLARE_6, *d6 = data[6]
 +#define DECLARE_8     DECLARE_7, *d7 = data[7]
 +
 +/* Xor unrole macros. */
 +#define D2(n) d0[n] = d0[n] ^ d1[n]
 +#define D3(n) D2(n) ^ d2[n]
 +#define D4(n) D3(n) ^ d3[n]
 +#define D5(n) D4(n) ^ d4[n]
 +#define D6(n) D5(n) ^ d5[n]
 +#define D7(n) D6(n) ^ d6[n]
 +#define D8(n) D7(n) ^ d7[n]
 +
 +#define       X_2(macro, offset)      macro(offset); macro(offset + 1);
 +#define       X_4(macro, offset)      X_2(macro, offset); X_2(macro, offset + 2);
 +#define       X_8(macro, offset)      X_4(macro, offset); X_4(macro, offset + 4);
 +#define       X_16(macro, offset)     X_8(macro, offset); X_8(macro, offset + 8);
 +#define       X_32(macro, offset)     X_16(macro, offset); X_16(macro, offset + 16);
 +#define       X_64(macro, offset)     X_32(macro, offset); X_32(macro, offset + 32);
 +
 +/* Define a _xor_#chunks_#xors_per_run() function. */
 +#define       _XOR(chunks, xors_per_run) \
 +static void _xor ## chunks ## _ ## xors_per_run(unsigned long **data) \
 +{ \
 +      unsigned end = XOR_SIZE / sizeof(data[0]), i; \
 +      DECLARE_ ## chunks; \
 +\
 +      for (i = 0; i < end; i += xors_per_run) { \
 +              X_ ## xors_per_run(D ## chunks, i); \
 +      } \
 +}
 +
 +/* Define xor functions for 2 - 8 chunks and xors per run. */
 +#define       MAKE_XOR_PER_RUN(xors_per_run) \
 +      _XOR(2, xors_per_run); _XOR(3, xors_per_run); \
 +      _XOR(4, xors_per_run); _XOR(5, xors_per_run); \
 +      _XOR(6, xors_per_run); _XOR(7, xors_per_run); \
 +      _XOR(8, xors_per_run);
 +
 +MAKE_XOR_PER_RUN(8)   /* Define _xor_*_8() functions. */
 +MAKE_XOR_PER_RUN(16)  /* Define _xor_*_16() functions. */
 +MAKE_XOR_PER_RUN(32)  /* Define _xor_*_32() functions. */
 +MAKE_XOR_PER_RUN(64)  /* Define _xor_*_64() functions. */
 +
 +#define MAKE_XOR(xors_per_run) \
 +struct { \
 +      void (*f)(unsigned long **); \
 +} static xor_funcs ## xors_per_run[] = { \
 +      { NULL }, /* NULL pointers to optimize indexing in xor(). */ \
 +      { NULL }, \
 +      { _xor2_ ## xors_per_run }, \
 +      { _xor3_ ## xors_per_run }, \
 +      { _xor4_ ## xors_per_run }, \
 +      { _xor5_ ## xors_per_run }, \
 +      { _xor6_ ## xors_per_run }, \
 +      { _xor7_ ## xors_per_run }, \
 +      { _xor8_ ## xors_per_run }, \
 +}; \
 +\
 +static void xor_ ## xors_per_run(unsigned n, unsigned long **data) \
 +{ \
 +      /* Call respective function for amount of chunks. */ \
 +      xor_funcs ## xors_per_run[n].f(data); \
 +}
 +
 +/* Define xor_8() - xor_64 functions. */
 +MAKE_XOR(8)
 +MAKE_XOR(16)
 +MAKE_XOR(32)
 +MAKE_XOR(64)
 +/*
 + * END xor optimization macros.
 + */
 +
 +/* Maximum number of chunks, which can be xor'ed in one go. */
 +#define       XOR_CHUNKS_MAX  (ARRAY_SIZE(xor_funcs8) - 1)
 +
 +/* xor_blocks wrapper to allow for using that crypto library function. */
 +static void xor_blocks_wrapper(unsigned n, unsigned long **data)
 +{
 +      BUG_ON(n < 2 || n > MAX_XOR_BLOCKS + 1);
 +      xor_blocks(n - 1, XOR_SIZE, (void *) data[0], (void **) data + 1);
 +}
 +
 +struct xor_func {
 +      xor_function_t f;
 +      const char *name;
 +} static xor_funcs[] = {
 +      { xor_64,  "xor_64" },
 +      { xor_32,  "xor_32" },
 +      { xor_16,  "xor_16" },
 +      { xor_8,   "xor_8"  },
 +      { xor_blocks_wrapper, "xor_blocks" },
 +};
 +
 +/*
 + * Check, if chunk has to be xored in/out:
 + *
 + * o if writes are queued
 + * o if writes are merged
 + * o if stripe is to be reconstructed
 + * o if recovery stripe
 + */
 +static inline int chunk_must_xor(struct stripe_chunk *chunk)
 +{
 +      if (ChunkUptodate(chunk)) {
 +              BUG_ON(!bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)) &&
 +                     !bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED)));
 +
 +              if (!bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)) ||
 +                  !bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED)))
 +                      return 1;
 +
 +              if (StripeReconstruct(chunk->stripe) ||
 +                  StripeRecover(chunk->stripe))
 +                      return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * Calculate crc.
 + *
 + * This indexes into the chunks of a stripe and their pages.
 + *
 + * All chunks will be xored into the indexed (@pi)
 + * chunk in maximum groups of xor.chunks.
 + *
 + */
 +static void xor(struct stripe *stripe, unsigned pi, unsigned sector)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned max_chunks = rs->xor.chunks, n = 1,
 +               o = sector / SECTORS_PER_PAGE, /* Offset into the page_list. */
 +               p = rs->set.raid_devs;
 +      unsigned long **d = rs->data;
 +      xor_function_t xor_f = rs->xor.f->f;
 +
 +      BUG_ON(sector > stripe->io.size);
 +
 +      /* Address of parity page to xor into. */
 +      d[0] = page_address(pl_elem(PL(stripe, pi), o)->page);
 +
 +      while (p--) {
 +              /* Preset pointers to data pages. */
 +              if (p != pi && chunk_must_xor(CHUNK(stripe, p)))
 +                      d[n++] = page_address(pl_elem(PL(stripe, p), o)->page);
 +
 +              /* If max chunks -> xor. */
 +              if (n == max_chunks) {
 +                      mutex_lock(&rs->io.xor_lock);
 +                      xor_f(n, d);
 +                      mutex_unlock(&rs->io.xor_lock);
 +                      n = 1;
 +              }
 +      }
 +
 +      /* If chunks -> xor. */
 +      if (n > 1) {
 +              mutex_lock(&rs->io.xor_lock);
 +              xor_f(n, d);
 +              mutex_unlock(&rs->io.xor_lock);
 +      }
 +}
 +
 +/* Common xor loop through all stripe page lists. */
 +static void common_xor(struct stripe *stripe, sector_t count,
 +                     unsigned off, unsigned pi)
 +{
 +      unsigned sector;
 +
 +      BUG_ON(!count);
 +      for (sector = off; sector < count; sector += SECTORS_PER_PAGE)
 +              xor(stripe, pi, sector);
 +
 +      /* Set parity page uptodate and clean. */
 +      chunk_set(CHUNK(stripe, pi), CLEAN);
 +      atomic_inc(RS(stripe->sc)->stats + S_XORS); /* REMOVEME: statistics. */
 +}
 +
 +/*
 + * Calculate parity sectors on intact stripes.
 + *
 + * Need to calculate raid address for recover stripe, because its
 + * chunk sizes differs and is typically larger than io chunk size.
 + */
 +static void parity_xor(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      int size_differs = stripe->io.size != rs->set.io_size;
 +      unsigned chunk_size = rs->set.chunk_size, io_size = stripe->io.size,
 +               xor_size = chunk_size > io_size ? io_size : chunk_size;
 +      sector_t off;
 +
 +      /* This can be the recover stripe with a larger io size. */
 +      for (off = 0; off < io_size; off += xor_size) {
 +              /*
 +               * Recover stripe is likely bigger than regular io
 +               * ones and has no precalculated parity disk index ->
 +               * need to calculate RAID address.
 +               */
 +              if (unlikely(size_differs)) {
 +                      struct raid_address addr;
 +
 +                      raid_address(rs, (stripe->key + off) *
 +                                       rs->set.data_devs, &addr);
 +                      stripe->idx.parity = addr.pi;
 +                      stripe_zero_pl_part(stripe, addr.pi, off, xor_size);
 +              }
 +
 +              common_xor(stripe, xor_size, off, stripe->idx.parity);
 +              chunk_set(CHUNK(stripe, stripe->idx.parity), DIRTY);
 +      }
 +}
 +
 +/* Reconstruct missing chunk. */
 +static void stripe_reconstruct(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      int p = rs->set.raid_devs, pr = stripe->idx.recover;
 +
 +      BUG_ON(pr < 0);
 +
 +      /* Check if all but the chunk to be reconstructed are uptodate. */
 +      while (p--)
 +              BUG_ON(p != pr && !ChunkUptodate(CHUNK(stripe, p)));
 +
 +      /* REMOVEME: statistics. */
 +      atomic_inc(rs->stats + (RSDegraded(rs) ? S_RECONSTRUCT_EI :
 +                                               S_RECONSTRUCT_DEV));
 +      /* Zero chunk to be reconstructed. */
 +      stripe_zero_chunk(stripe, pr);
 +      common_xor(stripe, stripe->io.size, 0, pr);
 +}
 +
 +/*
 + * Recovery io throttling
 + */
 +/* Conditionally reset io counters. */
 +static int recover_io_reset(struct raid_set *rs)
 +{
 +      unsigned long j = jiffies;
 +
 +      /* Pay attention to jiffies overflows. */
 +      if (j > rs->recover.last_jiffies + HZ ||
 +          j < rs->recover.last_jiffies) {
 +              atomic_set(rs->recover.io_count + IO_WORK, 0);
 +              atomic_set(rs->recover.io_count + IO_RECOVER, 0);
 +              rs->recover.last_jiffies = j;
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +/* Count ios. */
 +static void recover_io_count(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +
 +      atomic_inc(rs->recover.io_count +
 +                 (StripeRecover(stripe) ? IO_RECOVER : IO_WORK));
 +}
 +
 +/* Try getting a stripe either from the hash or from the LRU list. */
 +static struct stripe *stripe_find(struct raid_set *rs,
 +                                struct raid_address *addr)
 +{
 +      int r;
 +      struct stripe_cache *sc = &rs->sc;
 +      struct stripe *stripe;
 +
 +      /* Try stripe from hash. */
 +      stripe = stripe_lookup(sc, addr->key);
 +      if (stripe) {
 +              r = stripe_get(stripe);
 +              if (r)
 +                      goto get_lock_failed;
 +
 +              atomic_inc(rs->stats + S_HITS_1ST); /* REMOVEME: statistics. */
 +      } else {
 +              /* Not in hash -> try to get an LRU stripe. */
 +              stripe = stripe_lru_pop(sc);
 +              if (stripe) {
 +                      /*
 +                       * An LRU stripe may not be referenced
 +                       * and may never have ios pending!
 +                       */
 +                      BUG_ON(stripe_ref(stripe));
 +                      BUG_ON(stripe_io_ref(stripe));
 +
 +                      /* Remove from hash if on before reuse. */
 +                      stripe_hash_del(stripe);
 +
 +                      /* Invalidate before reinserting with changed key. */
 +                      stripe_invalidate(stripe);
 +
 +                      stripe->key = addr->key;
 +                      stripe->region = dm_rh_sector_to_region(rs->recover.rh,
 +                                                              addr->key);
 +                      stripe->idx.parity = addr->pi;
 +                      r = stripe_get(stripe);
 +                      if (r)
 +                              goto get_lock_failed;
 +
 +                      /* Insert stripe into the stripe hash. */
 +                      stripe_insert(&sc->hash, stripe);
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_INSCACHE);
 +              }
 +      }
 +
 +      return stripe;
 +
 +get_lock_failed:
 +      stripe_put(stripe);
 +      return NULL;
 +}
 +
 +/*
 + * Process end io
 + *
 + * I need to do it here because I can't in interrupt
 + */
 +/* End io all bios on a bio list. */
 +static void bio_list_endio(struct stripe *stripe, struct bio_list *bl,
 +                         int p, int error)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      struct bio *bio;
 +      struct page_list *pl = PL(stripe, p);
 +      struct stripe_chunk *chunk = CHUNK(stripe, p);
 +
 +      /* Update region counters. */
 +      while ((bio = bio_list_pop(bl))) {
 +              if (bio_data_dir(bio) == WRITE)
 +                      /* Drop io pending count for any writes. */
 +                      dm_rh_dec(rs->recover.rh, stripe->region);
 +              else if (!error)
 +                      /* Copy data accross. */
 +                      bio_copy_page_list(READ, stripe, pl, bio);
 +
 +              bio_endio(bio, error);
 +
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + (bio_data_dir(bio) == READ ?
 +                         S_BIOS_ENDIO_READ : S_BIOS_ENDIO_WRITE));
 +
 +              chunk_put(chunk);
 +              stripe_put(stripe);
 +              io_put(rs);     /* Wake any suspend waiters on last bio. */
 +      }
 +}
 +
 +/*
 + * End io all reads/writes on a stripe copying
 + * read data accross from stripe to bios and
 + * decrementing region counters for writes.
 + *
 + * Processing of ios depeding on state:
 + * o no chunk error -> endio ok
 + * o degraded:
 + *   - chunk error and read -> ignore to be requeued
 + *   - chunk error and write -> endio ok
 + * o dead (more than parity_devs failed) and chunk_error-> endio failed
 + */
 +static void stripe_endio(int rw, struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned p = rs->set.raid_devs;
 +      int write = (rw != READ);
 +
 +      while (p--) {
 +              struct stripe_chunk *chunk = CHUNK(stripe, p);
 +              struct bio_list *bl;
 +
 +              BUG_ON(ChunkLocked(chunk));
 +
 +              bl = BL_CHUNK(chunk, rw);
 +              if (bio_list_empty(bl))
 +                      continue;
 +
 +              if (unlikely(ChunkError(chunk) || !ChunkUptodate(chunk))) {
 +                      /* RAID set dead. */
 +                      if (unlikely(RSDead(rs)))
 +                              bio_list_endio(stripe, bl, p, -EIO);
 +                      /* RAID set degraded. */
 +                      else if (write)
 +                              bio_list_endio(stripe, bl, p, 0);
 +              } else {
 +                      BUG_ON(!RSDegraded(rs) && ChunkDirty(chunk));
 +                      bio_list_endio(stripe, bl, p, 0);
 +              }
 +      }
 +}
 +
 +/* Fail all ios hanging off all bio lists of a stripe. */
 +static void stripe_fail_io(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned p = rs->set.raid_devs;
 +
 +      while (p--) {
 +              struct stripe_chunk *chunk = CHUNK(stripe, p);
 +              int i = ARRAY_SIZE(chunk->bl);
 +
 +              /* Fail all bios on all bio lists of the stripe. */
 +              while (i--) {
 +                      struct bio_list *bl = chunk->bl + i;
 +
 +                      if (!bio_list_empty(bl))
 +                              bio_list_endio(stripe, bl, p, -EIO);
 +              }
 +      }
 +
 +      /* Put stripe on LRU list. */
 +      BUG_ON(stripe_io_ref(stripe));
 +      BUG_ON(stripe_ref(stripe));
 +}
 +
 +/* Unlock all required chunks. */
 +static void stripe_chunks_unlock(struct stripe *stripe)
 +{
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +      struct stripe_chunk *chunk;
 +
 +      while (p--) {
 +              chunk = CHUNK(stripe, p);
 +
 +              if (TestClearChunkUnlock(chunk))
 +                      ClearChunkLocked(chunk);
 +      }
 +}
 +
 +/*
 + * Queue reads and writes to a stripe by hanging
 + * their bios off the stripesets read/write lists.
 + */
 +static int stripe_queue_bio(struct raid_set *rs, struct bio *bio,
 +                          struct bio_list *reject)
 +{
 +      struct raid_address addr;
 +      struct stripe *stripe;
 +
 +      stripe = stripe_find(rs, raid_address(rs, bio->bi_sector, &addr));
 +      if (stripe) {
 +              int r = 0, rw = bio_data_dir(bio);
 +
 +              /* Distinguish reads and writes. */
 +              bio_list_add(BL(stripe, addr.di, rw), bio);
 +
 +              if (rw == READ)
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_BIOS_ADDED_READ);
 +              else {
 +                      /* Inrement pending write count on region. */
 +                      dm_rh_inc(rs->recover.rh, stripe->region);
 +                      r = 1;
 +
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_BIOS_ADDED_WRITE);
 +              }
 +
 +              /*
 +               * Put on io (flush) list in case of
 +               * initial bio queued to chunk.
 +               */
 +              if (chunk_get(CHUNK(stripe, addr.di)) == 1)
 +                      stripe_flush_add(stripe);
 +
 +              return r;
 +      }
 +
 +      /* Got no stripe from cache or failed to lock it -> reject bio. */
 +      bio_list_add(reject, bio);
 +      atomic_inc(rs->stats + S_IOS_POST); /* REMOVEME: statistics. */
 +      return 0;
 +}
 +
 +/*
 + * Handle all stripes by handing them to the daemon, because we can't
 + * map their chunk pages to copy the data in interrupt context.
 + *
 + * We don't want to handle them here either, while interrupts are disabled.
 + */
 +
 +/* Read/write endio function for dm-io (interrupt context). */
 +static void endio(unsigned long error, void *context)
 +{
 +      struct stripe_chunk *chunk = context;
 +
 +      if (unlikely(error)) {
 +              chunk_set(chunk, ERROR);
 +              /* REMOVEME: statistics. */
 +              atomic_inc(RS(chunk->stripe->sc)->stats + S_STRIPE_ERROR);
 +      } else
 +              chunk_set(chunk, CLEAN);
 +
 +      /*
 +       * For recovery stripes, I need to reset locked locked
 +       * here, because those aren't processed in do_endios().
 +       */
 +      if (unlikely(StripeRecover(chunk->stripe)))
 +              ClearChunkLocked(chunk);
 +      else
 +              SetChunkUnlock(chunk);
 +
 +      /* Indirectly puts stripe on cache's endio list via stripe_io_put(). */
 +      stripe_put_references(chunk->stripe);
 +}
 +
 +/* Read/Write a chunk asynchronously. */
 +static void stripe_chunk_rw(struct stripe *stripe, unsigned p)
 +{
 +      struct stripe_cache *sc = stripe->sc;
 +      struct raid_set *rs = RS(sc);
 +      struct dm_mem_cache_object *obj = stripe->obj + p;
 +      struct page_list *pl = obj->pl;
 +      struct stripe_chunk *chunk = CHUNK(stripe, p);
 +      struct raid_dev *dev = rs->dev + p;
 +      struct dm_io_region io = {
 +              .bdev = dev->dev->bdev,
 +              .sector = stripe->key,
 +              .count = stripe->io.size,
 +      };
 +      struct dm_io_request control = {
 +              .bi_rw = ChunkDirty(chunk) ? WRITE : READ,
 +              .mem = {
 +                      .type = DM_IO_PAGE_LIST,
 +                      .ptr.pl = pl,
 +                      .offset = 0,
 +              },
 +              .notify = {
 +                      .fn = endio,
 +                      .context = chunk,
 +              },
 +              .client = StripeRecover(stripe) ? rs->recover.dm_io_client :
 +                                                sc->dm_io_client,
 +      };
 +
 +      BUG_ON(ChunkLocked(chunk));
 +      BUG_ON(!ChunkUptodate(chunk) && ChunkDirty(chunk));
 +      BUG_ON(ChunkUptodate(chunk) && !ChunkDirty(chunk));
 +
 +      /*
 +       * Don't rw past end of device, which can happen, because
 +       * typically sectors_per_dev isn't divisible by io_size.
 +       */
 +      if (unlikely(io.sector + io.count > rs->set.sectors_per_dev))
 +              io.count = rs->set.sectors_per_dev - io.sector;
 +
 +      BUG_ON(!io.count);
 +      io.sector += dev->start;        /* Add <offset>. */
 +      if (RSRecover(rs))
 +              recover_io_count(stripe);       /* Recovery io accounting. */
 +
 +      /* REMOVEME: statistics. */
 +      atomic_inc(rs->stats + (ChunkDirty(chunk) ? S_DM_IO_WRITE :
 +                                                  S_DM_IO_READ));
 +      SetChunkLocked(chunk);
 +      SetDevIoQueued(dev);
 +      BUG_ON(dm_io(&control, 1, &io, NULL));
 +}
 +
 +/*
 + * Write dirty or read not uptodate page lists of a stripe.
 + */
 +static int stripe_chunks_rw(struct stripe *stripe)
 +{
 +      int r;
 +      struct raid_set *rs = RS(stripe->sc);
 +
 +      /*
 +       * Increment the pending count on the stripe
 +       * first, so that we don't race in endio().
 +       *
 +       * An inc (IO) is needed for any chunk unless !ChunkIo(chunk):
 +       *
 +       * o not uptodate
 +       * o dirtied by writes merged
 +       * o dirtied by parity calculations
 +       */
 +      r = for_each_io_dev(stripe, stripe_get_references);
 +      if (r) {
 +              /* Io needed: chunks are either not uptodate or dirty. */
 +              int max;        /* REMOVEME: */
 +              struct stripe_cache *sc = &rs->sc;
 +
 +              /* Submit actual io. */
 +              for_each_io_dev(stripe, stripe_chunk_rw);
 +
 +              /* REMOVEME: statistics */
 +              max = sc_active(sc);
 +              if (atomic_read(&sc->active_stripes_max) < max)
 +                      atomic_set(&sc->active_stripes_max, max);
 +
 +              atomic_inc(rs->stats + S_FLUSHS);
 +              /* END REMOVEME: statistics */
 +      }
 +
 +      return r;
 +}
 +
 +/* Merge in all writes hence dirtying respective chunks. */
 +static void stripe_merge_writes(struct stripe *stripe)
 +{
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--) {
 +              struct stripe_chunk *chunk = CHUNK(stripe, p);
 +              struct bio_list *write = BL_CHUNK(chunk, WRITE_QUEUED);
 +
 +              if (!bio_list_empty(write)) {
 +                      struct bio *bio;
 +                      struct page_list *pl = stripe->obj[p].pl;
 +
 +                      /*
 +                       * We can play with the lists without holding a lock,
 +                       * because it is just us accessing them anyway.
 +                       */
 +                      bio_list_for_each(bio, write)
 +                              bio_copy_page_list(WRITE, stripe, pl, bio);
 +
 +                      bio_list_merge(BL_CHUNK(chunk, WRITE_MERGED), write);
 +                      bio_list_init(write);
 +                      chunk_set(chunk, DIRTY);
 +              }
 +      }
 +}
 +
 +/* Queue all writes to get merged. */
 +static int stripe_queue_writes(struct stripe *stripe)
 +{
 +      int r = 0;
 +      unsigned p = RS(stripe->sc)->set.raid_devs;
 +
 +      while (p--) {
 +              struct stripe_chunk *chunk = CHUNK(stripe, p);
 +              struct bio_list *write = BL_CHUNK(chunk, WRITE);
 +
 +              if (!bio_list_empty(write)) {
 +                      bio_list_merge(BL_CHUNK(chunk, WRITE_QUEUED), write);
 +                      bio_list_init(write);
 +SetChunkIo(chunk);
 +                      r = 1;
 +              }
 +      }
 +
 +      return r;
 +}
 +
 +
 +/* Check, if a chunk gets completely overwritten. */
 +static int stripe_check_chunk_overwrite(struct stripe *stripe, unsigned p)
 +{
 +      unsigned sectors = 0;
 +      struct bio *bio;
 +      struct bio_list *bl = BL(stripe, p, WRITE_QUEUED);
 +
 +      bio_list_for_each(bio, bl)
 +              sectors += bio_sectors(bio);
 +
 +      BUG_ON(sectors > RS(stripe->sc)->set.io_size);
 +      return sectors == RS(stripe->sc)->set.io_size;
 +}
 +
 +/*
 + * Avoid io on broken/reconstructed drive in order to
 + * reconstruct date on endio.
 + *
 + * (*1*) We set StripeReconstruct() in here, so that _do_endios()
 + *     will trigger a reconstruct call before resetting it.
 + */
 +static int stripe_chunk_set_io_flags(struct stripe *stripe, int pr)
 +{
 +      struct stripe_chunk *chunk = CHUNK(stripe, pr);
 +
 +      /*
 +       * Allow io on all chunks but the indexed one,
 +       * because we're either degraded or prohibit it
 +       * on the one for later reconstruction.
 +       */
 +      /* Includes ClearChunkIo(), ClearChunkUptodate(). */
 +      stripe_chunk_invalidate(chunk);
 +      stripe->idx.recover = pr;
 +      SetStripeReconstruct(stripe);
 +
 +      /* REMOVEME: statistics. */
 +      atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
 +      return -EPERM;
 +}
 +
 +/* Chunk locked/uptodate and device failed tests. */
 +static struct stripe_chunk *
 +stripe_chunk_check(struct stripe *stripe, unsigned p, unsigned *chunks_uptodate)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      struct stripe_chunk *chunk = CHUNK(stripe, p);
 +
 +      /* Can't access active chunks. */
 +      if (ChunkLocked(chunk)) {
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + S_CHUNK_LOCKED);
 +              return NULL;
 +      }
 +
 +      /* Can't access broken devive. */
 +      if (ChunkError(chunk) || DevFailed(rs->dev + p))
 +              return NULL;
 +
 +      /* Can access uptodate chunks. */
 +      if (ChunkUptodate(chunk)) {
 +              (*chunks_uptodate)++;
 +              return NULL;
 +      }
 +
 +      return chunk;
 +}
 +
 +/*
 + * Degraded/reconstruction mode.
 + *
 + * Check stripe state to figure which chunks don't need IO.
 + *
 + * Returns 0 for fully operational, -EPERM for degraded/resynchronizing.
 + */
 +static int stripe_check_reconstruct(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +
 +      if (RSDead(rs)) {
 +              ClearStripeReconstruct(stripe);
 +              ClearStripeReconstructed(stripe);
 +              stripe_allow_io(stripe);
 +              return 0;
 +      }
 +
 +      /* Avoid further reconstruction setting, when already set. */
 +      if (StripeReconstruct(stripe)) {
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + S_RECONSTRUCT_SET);
 +              return -EBUSY;
 +      }
 +
 +      /* Initially allow io on all chunks. */
 +      stripe_allow_io(stripe);
 +
 +      /* Return if stripe is already reconstructed. */
 +      if (StripeReconstructed(stripe)) {
 +              atomic_inc(rs->stats + S_RECONSTRUCTED);
 +              return 0;
 +      }
 +
 +      /*
 +       * Degraded/reconstruction mode (device failed) ->
 +       * avoid io on the failed device.
 +       */
 +      if (unlikely(RSDegraded(rs))) {
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + S_DEGRADED);
 +              /* Allow IO on all devices but the dead one. */
 +              BUG_ON(rs->set.ei < 0);
 +              return stripe_chunk_set_io_flags(stripe, rs->set.ei);
 +      } else {
 +              int sync, pi = dev_for_parity(stripe, &sync);
 +
 +              /*
 +               * Reconstruction mode (ie. a particular (replaced) device or
 +               * some (rotating) parity chunk is being resynchronized) ->
 +               *   o make sure all needed chunks are read in
 +               *   o cope with 3/4 disk array special case where it
 +               *     doesn't make a difference to read in parity
 +               *     to xor data in/out
 +               */
 +              if (RSEnforceParityCreation(rs) || !sync) {
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_NOSYNC);
 +                      /* Allow IO on all devs but the one to reconstruct. */
 +                      return stripe_chunk_set_io_flags(stripe, pi);
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * Check, if stripe is ready to merge writes.
 + * I.e. if all chunks present to allow to merge bios.
 + *
 + * We prohibit io on:
 + *
 + * o chunks without bios
 + * o chunks which get completely written over
 + */
 +static int stripe_merge_possible(struct stripe *stripe, int nosync)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned chunks_overwrite = 0, chunks_prohibited = 0,
 +               chunks_uptodate = 0, p = rs->set.raid_devs;
 +
 +      /* Walk all chunks. */
 +      while (p--) {
 +              struct stripe_chunk *chunk;
 +
 +              /* Prohibit io on broken devices. */
 +              if (DevFailed(rs->dev + p)) {
 +                      chunk = CHUNK(stripe, p);
 +                      goto prohibit_io;
 +              }
 +
 +              /* We can't optimize any further if no chunk. */
 +              chunk = stripe_chunk_check(stripe, p, &chunks_uptodate);
 +              if (!chunk || nosync)
 +                      continue;
 +
 +              /*
 +               * We have a chunk, which is not uptodate.
 +               *
 +               * If this is not parity and we don't have
 +               * reads queued, we can optimize further.
 +               */
 +              if (p != stripe->idx.parity &&
 +                  bio_list_empty(BL_CHUNK(chunk, READ)) &&
 +                  bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED))) {
 +                      if (bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)))
 +                              goto prohibit_io;
 +                      else if (RSCheckOverwrite(rs) &&
 +                               stripe_check_chunk_overwrite(stripe, p))
 +                              /* Completely overwritten chunk. */
 +                              chunks_overwrite++;
 +              }
 +
 +              /* Allow io for chunks with bios and overwritten ones. */
 +              SetChunkIo(chunk);
 +              continue;
 +
 +prohibit_io:
 +              /* No io for broken devices or for chunks w/o bios. */
 +              ClearChunkIo(chunk);
 +              chunks_prohibited++;
 +              /* REMOVEME: statistics. */
 +              atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
 +      }
 +
 +      /* All data chunks will get written over. */
 +      if (chunks_overwrite == rs->set.data_devs)
 +              atomic_inc(rs->stats + S_OVERWRITE); /* REMOVEME: statistics.*/
 +      else if (chunks_uptodate + chunks_prohibited < rs->set.raid_devs) {
 +              /* We don't have enough chunks to merge. */
 +              atomic_inc(rs->stats + S_CANT_MERGE); /* REMOVEME: statistics.*/
 +              return -EPERM;
 +      }
 +
 +      /*
 +       * If we have all chunks up to date or overwrite them, we
 +       * just zero the parity chunk and let stripe_rw() recreate it.
 +       */
 +      if (chunks_uptodate == rs->set.raid_devs ||
 +          chunks_overwrite == rs->set.data_devs) {
 +              stripe_zero_chunk(stripe, stripe->idx.parity);
 +              BUG_ON(StripeReconstruct(stripe));
 +              SetStripeReconstruct(stripe);   /* Enforce xor in caller. */
 +      } else {
 +              /*
 +               * With less chunks, we xor parity out.
 +               *
 +               * (*4*) We rely on !StripeReconstruct() in chunk_must_xor(),
 +               *       so that only chunks with queued or merged writes
 +               *       are being xored.
 +               */
 +              parity_xor(stripe);
 +      }
 +
 +      /*
 +       * We do have enough chunks to merge.
 +       * All chunks are uptodate or get written over.
 +       */
 +      atomic_inc(rs->stats + S_CAN_MERGE); /* REMOVEME: statistics. */
 +      return 0;
 +}
 +
 +/*
 + * Avoid reading chunks in case we're fully operational.
 + *
 + * We prohibit io on any chunks without bios but the parity chunk.
 + */
 +static void stripe_avoid_reads(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      unsigned dummy = 0, p = rs->set.raid_devs;
 +
 +      /* Walk all chunks. */
 +      while (p--) {
 +              struct stripe_chunk *chunk =
 +                      stripe_chunk_check(stripe, p, &dummy);
 +
 +              if (!chunk)
 +                      continue;
 +
 +              /* If parity or any bios pending -> allow io. */
 +              if (chunk_ref(chunk) || p == stripe->idx.parity)
 +                      SetChunkIo(chunk);
 +              else {
 +                      ClearChunkIo(chunk);
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
 +              }
 +      }
 +}
 +
 +/*
 + * Read/write a stripe.
 + *
 + * All stripe read/write activity goes through this function
 + * unless recovery, which has to call stripe_chunk_rw() directly.
 + *
 + * Make sure we don't try already merged stripes in order
 + * to avoid data corruption.
 + *
 + * Check the state of the RAID set and if degraded (or
 + * resynchronizing for reads), read in all other chunks but
 + * the one on the dead/resynchronizing device in order to be
 + * able to reconstruct the missing one in _do_endios().
 + *
 + * Can be called on active stripes in order
 + * to dispatch new io on inactive chunks.
 + *
 + * States to cover:
 + *   o stripe to read and/or write
 + *   o stripe with error to reconstruct
 + */
 +static int stripe_rw(struct stripe *stripe)
 +{
 +      int nosync, r;
 +      struct raid_set *rs = RS(stripe->sc);
 +
 +      /*
 +       * Check, if a chunk needs to be reconstructed
 +       * because of a degraded set or a region out of sync.
 +       */
 +      nosync = stripe_check_reconstruct(stripe);
 +      switch (nosync) {
 +      case -EBUSY:
 +              return 0; /* Wait for stripe reconstruction to finish. */
 +      case -EPERM:
 +              goto io;
 +      }
 +
 +      /*
 +       * If we don't have merged writes pending, we can schedule
 +       * queued writes to be merged next without corrupting data.
 +       */
 +      if (!StripeMerged(stripe)) {
 +              r = stripe_queue_writes(stripe);
 +              if (r)
 +                      /* Writes got queued -> flag RBW. */
 +                      SetStripeRBW(stripe);
 +      }
 +
 +      /*
 +       * Merge all writes hanging off uptodate/overwritten
 +       * chunks of the stripe.
 +       */
 +      if (StripeRBW(stripe)) {
 +              r = stripe_merge_possible(stripe, nosync);
 +              if (!r) { /* Merge possible. */
 +                      struct stripe_chunk *chunk;
 +
 +                      /*
 +                       * I rely on valid parity in order
 +                       * to xor a fraction of chunks out
 +                       * of parity and back in.
 +                       */
 +                      stripe_merge_writes(stripe);    /* Merge writes in. */
 +                      parity_xor(stripe);             /* Update parity. */
 +                      ClearStripeReconstruct(stripe); /* Reset xor enforce. */
 +                      SetStripeMerged(stripe);        /* Writes merged. */
 +                      ClearStripeRBW(stripe);         /* Disable RBW. */
 +
 +                      /*
 +                       * REMOVEME: sanity check on parity chunk
 +                       *           states after writes got merged.
 +                       */
 +                      chunk = CHUNK(stripe, stripe->idx.parity);
 +                      BUG_ON(ChunkLocked(chunk));
 +                      BUG_ON(!ChunkUptodate(chunk));
 +                      BUG_ON(!ChunkDirty(chunk));
 +                      BUG_ON(!ChunkIo(chunk));
 +              }
 +      } else if (!nosync && !StripeMerged(stripe))
 +              /* Read avoidance if not degraded/resynchronizing/merged. */
 +              stripe_avoid_reads(stripe);
 +
 +io:
 +      /* Now submit any reads/writes for non-uptodate or dirty chunks. */
 +      r = stripe_chunks_rw(stripe);
 +      if (!r) {
 +              /*
 +               * No io submitted because of chunk io
 +               * prohibited or locked chunks/failed devices
 +               * -> push to end io list for processing.
 +               */
 +              stripe_endio_push(stripe);
 +              atomic_inc(rs->stats + S_NO_RW); /* REMOVEME: statistics. */
 +      }
 +
 +      return r;
 +}
 +
 +/*
 + * Recovery functions
 + */
 +/* Read a stripe off a raid set for recovery. */
 +static int stripe_recover_read(struct stripe *stripe, int pi)
 +{
 +      BUG_ON(stripe_io_ref(stripe));
 +
 +      /* Invalidate all chunks so that they get read in. */
 +      stripe_chunks_invalidate(stripe);
 +      stripe_allow_io(stripe); /* Allow io on all recovery chunks. */
 +
 +      /*
 +       * If we are reconstructing a perticular device, we can avoid
 +       * reading the respective chunk in, because we're going to
 +       * reconstruct it anyway.
 +       *
 +       * We can't do that for resynchronization of rotating parity,
 +       * because the recovery stripe chunk size is typically larger
 +       * than the sets chunk size.
 +       */
 +      if (pi > -1)
 +              ClearChunkIo(CHUNK(stripe, pi));
 +
 +      return stripe_chunks_rw(stripe);
 +}
 +
 +/* Write a stripe to a raid set for recovery. */
 +static int stripe_recover_write(struct stripe *stripe, int pi)
 +{
 +      BUG_ON(stripe_io_ref(stripe));
 +
 +      /*
 +       * If this is a reconstruct of a particular device, then
 +       * reconstruct the respective chunk, else create parity chunk.
 +       */
 +      if (pi > -1) {
 +              stripe_zero_chunk(stripe, pi);
 +              common_xor(stripe, stripe->io.size, 0, pi);
 +              chunk_set(CHUNK(stripe, pi), DIRTY);
 +      } else
 +              parity_xor(stripe);
 +
 +      return stripe_chunks_rw(stripe);
 +}
 +
 +/* Read/write a recovery stripe. */
 +static int stripe_recover_rw(struct stripe *stripe)
 +{
 +      int r = 0, sync = 0;
 +
 +      /* Read/write flip-flop. */
 +      if (TestClearStripeRBW(stripe)) {
 +              SetStripeMerged(stripe);
 +              stripe->key = stripe->recover->pos;
 +              r = stripe_recover_read(stripe, dev_for_parity(stripe, &sync));
 +              BUG_ON(!r);
 +      } else if (TestClearStripeMerged(stripe)) {
 +              r = stripe_recover_write(stripe, dev_for_parity(stripe, &sync));
 +              BUG_ON(!r);
 +      }
 +
 +      BUG_ON(sync);
 +      return r;
 +}
 +
 +/* Recover bandwidth available ?. */
 +static int recover_bandwidth(struct raid_set *rs)
 +{
 +      int r, work;
 +
 +      /* On reset or when bios delayed -> allow recovery. */
 +      r = recover_io_reset(rs);
 +      if (r || RSBandwidth(rs))
 +              goto out;
 +
 +      work = atomic_read(rs->recover.io_count + IO_WORK);
 +      if (work) {
 +              /* Pay attention to larger recover stripe size. */
 +              int recover = atomic_read(rs->recover.io_count + IO_RECOVER) *
 +                                        rs->recover.io_size / rs->set.io_size;
 +
 +              /*
 +               * Don't use more than given bandwidth
 +               * of the work io for recovery.
 +               */
 +              if (recover > work / rs->recover.bandwidth_work) {
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_NO_BANDWIDTH);
 +                      return 0;
 +              }
 +      }
 +
 +out:
 +      atomic_inc(rs->stats + S_BANDWIDTH);    /* REMOVEME: statistics. */
 +      return 1;
 +}
 +
 +/* Try to get a region to recover. */
 +static int stripe_recover_get_region(struct stripe *stripe)
 +{
 +      struct raid_set *rs = RS(stripe->sc);
 +      struct recover *rec = &rs->recover;
 +      struct recover_addr *addr = stripe->recover;
 +      struct dm_dirty_log *dl = rec->dl;
 +      struct dm_rh_client *rh = rec->rh;
 +
 +      BUG_ON(!dl);
 +      BUG_ON(!rh);
 +
 +      /* Return, that we have region first to finish it during suspension. */
 +      if (addr->reg)
 +              return 1;
 +
 +      if (RSSuspend(rs))
 +              return -EPERM;
 +
 +      if (dl->type->get_sync_count(dl) >= rec->nr_regions)
 +              return -ENOENT;
 +
 +      /* If we don't have enough bandwidth, we don't proceed recovering. */
 +      if (!recover_bandwidth(rs))
 +              return -EAGAIN;
 +
 +      /* Start quiescing a region. */
 +      dm_rh_recovery_prepare(rh);
 +      addr->reg = dm_rh_recovery_start(rh);
 +      if (!addr->reg)
 +              return -EAGAIN;
 +
 +      addr->pos = dm_rh_region_to_sector(rh, dm_rh_get_region_key(addr->reg));
 +      addr->end = addr->pos + dm_rh_get_region_size(rh);
 +
 +      /*
 +       * Take one global io reference out for the
 +       * whole region, which is going to be released
 +       * when the region is completely done with.
 +       */
 +      io_get(rs);
 +      return 0;
 +}
 +
 +/* Update region hash state. */
 +enum recover_type { REC_FAILURE = 0, REC_SUCCESS = 1 };
 +static void recover_rh_update(struct stripe *stripe, enum recover_type success)
 +{
 +      struct recover_addr *addr = stripe->recover;
 +      struct raid_set *rs = RS(stripe->sc);
 +      struct recover *rec = &rs->recover;
 +
 +      if (!addr->reg) {
 +              DMERR("%s- Called w/o region", __func__);
 +              return;
 +      }
 +
 +      dm_rh_recovery_end(addr->reg, success);
 +      if (success)
 +              rec->nr_regions_recovered++;
 +
 +      addr->reg = NULL;
 +
 +      /*
 +       * Completely done with this region ->
 +       * release the 1st io reference.
 +       */
 +      io_put(rs);
 +}
 +
 +/* Set start of recovery state. */
 +static void set_start_recovery(struct raid_set *rs)
 +{
 +      /* Initialize recovery. */
 +      rs->recover.start_jiffies = jiffies;
 +      rs->recover.end_jiffies = 0;
 +}
 +
 +/* Set end of recovery state. */
 +static void set_end_recovery(struct raid_set *rs)
 +{
 +      ClearRSRecover(rs);
 +/* Achtung: nicht mehr zurück setzten -> 'i' belibt in status output und userpace könnte sich darauf verlassen, das es verschiwndet!!!! */
 +      rs->set.dev_to_init = -1;
 +
 +      /* Check for jiffies overrun. */
 +      rs->recover.end_jiffies = jiffies;
 +      if (rs->recover.end_jiffies < rs->recover.start_jiffies)
 +              rs->recover.end_jiffies = ~0;
 +}
 +
 +/* Handle recovery on one recovery stripe. */
 +static int _do_recovery(struct stripe *stripe)
 +{
 +      int r;
 +      struct raid_set *rs = RS(stripe->sc);
 +      struct recover_addr *addr = stripe->recover;
 +
 +      /* If recovery is active -> return. */
 +      if (stripe_io_ref(stripe))
 +              return 1;
 +
 +      /* IO error is fatal for recovery -> stop it. */
 +      if (unlikely(StripeError(stripe)))
 +              goto err;
 +
 +      /* Recovery end required. */
 +      if (unlikely(RSDegraded(rs)))
 +              goto err;
 +
 +      /* Get a region to recover. */
 +      r = stripe_recover_get_region(stripe);
 +      switch (r) {
 +      case 0: /* Got a new region: flag initial read before write. */
 +              SetStripeRBW(stripe);
 +      case 1: /* Have a region in the works. */
 +              break;
 +      case -EAGAIN:
 +              /* No bandwidth/quiesced region yet, try later. */
 +              if (!io_ref(rs))
 +                      wake_do_raid_delayed(rs, HZ / 4);
 +      case -EPERM:
 +              /* Suspend. */
 +              return 1;
 +      case -ENOENT:   /* No more regions to recover. */
 +              schedule_work(&rs->io.ws_do_table_event);
 +              return 0;
 +      default:
 +              BUG();
 +      }
 +
 +      /* Read/write a recover stripe. */
 +      r = stripe_recover_rw(stripe);
 +      if (r)
 +              /* IO initiated. */
 +              return 1;
 +
 +      /* Read and write finished-> update recovery position within region. */
 +      addr->pos += stripe->io.size;
 +
 +      /* If we're at end of region, update region hash. */
 +      if (addr->pos >= addr->end ||
 +          addr->pos >= rs->set.sectors_per_dev)
 +              recover_rh_update(stripe, REC_SUCCESS);
 +      else
 +              /* Prepare to read next region segment. */
 +              SetStripeRBW(stripe);
 +
 +      /* Schedule myself for another round... */
 +      wake_do_raid(rs);
 +      return 1;
 +
 +err:
 +      /* FIXME: rather try recovering other regions on error? */
 +      rs_check_degrade(stripe);
 +      recover_rh_update(stripe, REC_FAILURE);
 +
 +      /* Check state of partially recovered array. */
 +      if (RSDegraded(rs) && !RSDead(rs) &&
 +          rs->set.dev_to_init != -1 &&
 +          rs->set.ei != rs->set.dev_to_init) {
 +              /* Broken drive != drive to recover -> FATAL. */
 +              SetRSDead(rs);
 +              DMERR("FATAL: failed device != device to initialize -> "
 +                    "RAID set broken");
 +      }
 +
 +      if (StripeError(stripe) || RSDegraded(rs)) {
 +              char buf[BDEVNAME_SIZE];
 +
 +              DMERR("stopping recovery due to "
 +                    "ERROR on /dev/%s, stripe at offset %llu",
 +                    bdevname(rs->dev[rs->set.ei].dev->bdev, buf),
 +                    (unsigned long long) stripe->key);
 +
 +      }
 +
 +      /* Make sure, that all quiesced regions get released. */
 +      while (addr->reg) {
 +              dm_rh_recovery_end(addr->reg, -EIO);
 +              addr->reg = dm_rh_recovery_start(rs->recover.rh);
 +      }
 +
 +      return 0;
 +}
 +
 +/* Called by main io daemon to recover regions. */
 +static int do_recovery(struct raid_set *rs)
 +{
 +      if (RSRecover(rs)) {
 +              int r = 0;
 +              struct stripe *stripe;
 +
 +              list_for_each_entry(stripe, &rs->recover.stripes,
 +                                  lists[LIST_RECOVER])
 +                      r += _do_recovery(stripe);
 +
 +              if (r)
 +                      return r;
 +
 +              set_end_recovery(rs);
 +              stripe_recover_free(rs);
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * END recovery functions
 + */
 +
 +/* End io process all stripes handed in by endio() callback. */
 +static void _do_endios(struct raid_set *rs, struct stripe *stripe,
 +                     struct list_head *flush_list)
 +{
 +      /* First unlock all required chunks. */
 +      stripe_chunks_unlock(stripe);
 +
 +      /*
 +       * If an io error on a stripe occured, degrade the RAID set
 +       * and try to endio as many bios as possible. If any bios can't
 +       * be endio processed, requeue the stripe (stripe_ref() != 0).
 +       */
 +      if (TestClearStripeError(stripe)) {
 +              /*
 +               * FIXME: if read, rewrite the failed chunk after reconstruction
 +               *        in order to trigger disk bad sector relocation.
 +               */
 +              rs_check_degrade(stripe); /* Resets ChunkError(). */
 +              ClearStripeReconstruct(stripe);
 +              ClearStripeReconstructed(stripe);
 +
 +              /*
 +               * FIXME: if write, don't endio writes in flight and don't
 +               *        allow for new writes until userspace has updated
 +               *        its metadata.
 +               */
 +      }
 +
 +      /* Got to reconstruct a missing chunk. */
 +      if (StripeReconstruct(stripe)) {
 +              /*
 +               * (*2*) We use StripeReconstruct() to allow for
 +               *       all chunks to be xored into the reconstructed
 +               *       one (see chunk_must_xor()).
 +               */
 +              stripe_reconstruct(stripe);
 +
 +              /*
 +               * (*3*) Now we reset StripeReconstruct() and flag
 +               *       StripeReconstructed() to show to stripe_rw(),
 +               *       that we have reconstructed a missing chunk.
 +               */
 +              ClearStripeReconstruct(stripe);
 +              SetStripeReconstructed(stripe);
 +
 +              /* FIXME: reschedule to be written in case of read. */
 +              /* if (!RSDead && RSDegraded(rs) !StripeRBW(stripe)) {
 +                      chunk_set(CHUNK(stripe, stripe->idx.recover), DIRTY);
 +                      stripe_chunks_rw(stripe);
 +              } */
 +
 +              stripe->idx.recover = -1;
 +      }
 +
 +      /*
 +       * Now that we eventually got a complete stripe, we
 +       * can process the rest of the end ios on reads.
 +       */
 +      stripe_endio(READ, stripe);
 +
 +      /* End io all merged writes if not prohibited. */
 +      if (!RSProhibitWrites(rs) && StripeMerged(stripe)) {
 +              ClearStripeMerged(stripe);
 +              stripe_endio(WRITE_MERGED, stripe);
 +      }
 +
 +      /* If RAID set is dead -> fail any ios to dead drives. */
 +      if (RSDead(rs)) {
 +              if (!TestSetRSDeadEndioMessage(rs))
 +                      DMERR("RAID set dead: failing ios to dead devices");
 +
 +              stripe_fail_io(stripe);
 +      }
 +
 +      /*
 +       * We have stripe references still,
 +       * beacuse of read before writes or IO errors ->
 +       * got to put on flush list for processing.
 +       */
 +      if (stripe_ref(stripe)) {
 +              BUG_ON(!list_empty(stripe->lists + LIST_LRU));
 +              list_add_tail(stripe->lists + LIST_FLUSH, flush_list);
 +              atomic_inc(rs->stats + S_REQUEUE); /* REMOVEME: statistics. */
 +      } else
 +              stripe_lru_add(stripe);
 +}
 +
 +/* Pop any endio stripes off of the endio list and belabour them. */
 +static void do_endios(struct raid_set *rs)
 +{
 +      struct stripe_cache *sc = &rs->sc;
 +      struct stripe *stripe;
 +      /* IO flush list for sorted requeued stripes. */
 +      struct list_head flush_list;
 +
 +      INIT_LIST_HEAD(&flush_list);
 +
 +      while ((stripe = stripe_endio_pop(sc))) {
 +              /* Avoid endio on stripes with newly io'ed chunks. */
 +              if (!stripe_io_ref(stripe))
 +                      _do_endios(rs, stripe, &flush_list);
 +      }
 +
 +      /*
 +       * Insert any requeued stripes in the proper
 +       * order at the beginning of the io (flush) list.
 +       */
 +      list_splice(&flush_list, sc->lists + LIST_FLUSH);
 +}
 +
 +/* Flush any stripes on the io list. */
 +static int do_flush(struct raid_set *rs)
 +{
 +      int r = 0;
 +      struct stripe *stripe;
 +
 +      while ((stripe = stripe_io_pop(&rs->sc)))
 +              r += stripe_rw(stripe); /* Read/write stripe. */
 +
 +      return r;
 +}
 +
 +/* Stripe cache resizing. */
 +static void do_sc_resize(struct raid_set *rs)
 +{
 +      unsigned set = atomic_read(&rs->sc.stripes_to_set);
 +
 +      if (set) {
 +              unsigned cur = atomic_read(&rs->sc.stripes);
 +              int r = (set > cur) ? sc_grow(&rs->sc, set - cur, SC_GROW) :
 +                                    sc_shrink(&rs->sc, cur - set);
 +
 +              /* Flag end of resizeing if ok. */
 +              if (!r)
 +                      atomic_set(&rs->sc.stripes_to_set, 0);
 +      }
 +}
 +
 +/*
 + * Process all ios
 + *
 + * We do different things with the io depending
 + * on the state of the region that it is in:
 + *
 + * o reads: hang off stripe cache or postpone if full
 + *
 + * o writes:
 + *
 + *  CLEAN/DIRTY/NOSYNC:       increment pending and hang io off stripe's stripe set.
 + *                    In case stripe cache is full or busy, postpone the io.
 + *
 + *  RECOVERING:               delay the io until recovery of the region completes.
 + *
 + */
 +static void do_ios(struct raid_set *rs, struct bio_list *ios)
 +{
 +      int r;
 +      unsigned flush = 0, delay = 0;
 +      sector_t sector;
 +      struct dm_rh_client *rh = rs->recover.rh;
 +      struct bio *bio;
 +      struct bio_list reject;
 +
 +      bio_list_init(&reject);
 +
 +      /*
 +       * Classify each io:
 +       *    o delay writes to recovering regions (let reads go through)
 +       *    o queue io to all other regions
 +       */
 +      while ((bio = bio_list_pop(ios))) {
 +              /*
 +               * In case we get a barrier bio, push it back onto
 +               * the input queue unless all work queues are empty
 +               * and the stripe cache is inactive.
 +               */
-               if (unlikely(bio_empty_barrier(bio))) {
++              if (bio->bi_rw & REQ_FLUSH) {
 +                      /* REMOVEME: statistics. */
 +                      atomic_inc(rs->stats + S_BARRIER);
 +                      if (delay ||
 +                          !list_empty(rs->sc.lists + LIST_FLUSH) ||
 +                          !bio_list_empty(&reject) ||
 +                          sc_active(&rs->sc)) {
 +                              bio_list_push(ios, bio);
 +                              break;
 +                      }
 +              }
 +
 +              /* If writes prohibited because of failures -> postpone. */
 +              if (RSProhibitWrites(rs) && bio_data_dir(bio) == WRITE) {
 +                      bio_list_add(&reject, bio);
 +                      continue;
 +              }
 +
 +              /* Check for recovering regions. */
 +              sector = _sector(rs, bio);
 +              r = region_state(rs, sector, DM_RH_RECOVERING);
 +              if (unlikely(r)) {
 +                      delay++;
 +                      /* Wait writing to recovering regions. */
 +                      dm_rh_delay_by_region(rh, bio,
 +                                            dm_rh_sector_to_region(rh,
 +                                                                   sector));
 +                      /* REMOVEME: statistics.*/
 +                      atomic_inc(rs->stats + S_DELAYED_BIOS);
 +                      atomic_inc(rs->stats + S_SUM_DELAYED_BIOS);
 +
 +                      /* Force bandwidth tests in recovery. */
 +                      SetRSBandwidth(rs);
 +              } else {
 +                      /*
 +                       * Process ios to non-recovering regions by queueing
 +                       * them to stripes (does dm_rh_inc()) for writes).
 +                       */
 +                      flush += stripe_queue_bio(rs, bio, &reject);
 +              }
 +      }
 +
 +      if (flush) {
 +              /* FIXME: better error handling. */
 +              r = dm_rh_flush(rh); /* Writes got queued -> flush dirty log. */
 +              if (r)
 +                      DMERR_LIMIT("dirty log flush");
 +      }
 +
 +      /* Merge any rejected bios back to the head of the input list. */
 +      bio_list_merge_head(ios, &reject);
 +}
 +
 +/* Unplug: let any queued io role on the sets devices. */
 +static void do_unplug(struct raid_set *rs)
 +{
 +      struct raid_dev *dev = rs->dev + rs->set.raid_devs;
 +
 +      while (dev-- > rs->dev) {
 +              /* Only call any device unplug function, if io got queued. */
 +              if (TestClearDevIoQueued(dev))
 +                      blk_unplug(bdev_get_queue(dev->dev->bdev));
 +      }
 +}
 +
 +/* Send an event in case we're getting too busy. */
 +static void do_busy_event(struct raid_set *rs)
 +{
 +      if (sc_busy(rs)) {
 +              if (!TestSetRSScBusy(rs))
 +                      schedule_work(&rs->io.ws_do_table_event);
 +      } else
 +              ClearRSScBusy(rs);
 +}
 +
 +/* Throw an event. */
 +static void do_table_event(struct work_struct *ws)
 +{
 +      struct raid_set *rs = container_of(ws, struct raid_set,
 +                                         io.ws_do_table_event);
 +      dm_table_event(rs->ti->table);
 +}
 +
 +
 +/*-----------------------------------------------------------------
 + * RAID daemon
 + *---------------------------------------------------------------*/
 +/*
 + * o belabour all end ios
 + * o update the region hash states
 + * o optionally shrink the stripe cache
 + * o optionally do recovery
 + * o unplug any component raid devices with queued bios
 + * o grab the input queue
 + * o work an all requeued or new ios and perform stripe cache flushs
 + * o unplug any component raid devices with queued bios
 + * o check, if the stripe cache gets too busy and throw an event if so
 + */
 +static void do_raid(struct work_struct *ws)
 +{
 +      int r;
 +      struct raid_set *rs = container_of(ws, struct raid_set,
 +                                         io.dws_do_raid.work);
 +      struct bio_list *ios = &rs->io.work, *ios_in = &rs->io.in;
 +
 +      /*
 +       * We always need to end io, so that ios can get errored in
 +       * case the set failed and the region counters get decremented
 +       * before we update region hash states and go any further.
 +       */
 +      do_endios(rs);
 +      dm_rh_update_states(rs->recover.rh, 1);
 +
 +      /*
 +       * Now that we've end io'd, which may have put stripes on the LRU list
 +       * to allow for shrinking, we resize the stripe cache if requested.
 +       */
 +      do_sc_resize(rs);
 +
 +      /* Try to recover regions. */
 +      r = do_recovery(rs);
 +      if (r)
 +              do_unplug(rs);  /* Unplug the sets device queues. */
 +
 +      /* Quickly grab all new ios queued and add them to the work list. */
 +      mutex_lock(&rs->io.in_lock);
 +      bio_list_merge(ios, ios_in);
 +      bio_list_init(ios_in);
 +      mutex_unlock(&rs->io.in_lock);
 +
 +      if (!bio_list_empty(ios))
 +              do_ios(rs, ios); /* Got ios to work into the cache. */
 +
 +      r = do_flush(rs);               /* Flush any stripes on io list. */
 +      if (r)
 +              do_unplug(rs);          /* Unplug the sets device queues. */
 +
 +      do_busy_event(rs);      /* Check if we got too busy. */
 +}
 +
 +/*
 + * Callback for region hash to dispatch
 + * delayed bios queued to recovered regions
 + * (gets called via dm_rh_update_states()).
 + */
 +static void dispatch_delayed_bios(void *context, struct bio_list *bl)
 +{
 +      struct raid_set *rs = context;
 +      struct bio *bio;
 +
 +      /* REMOVEME: statistics; decrement pending delayed bios counter. */
 +      bio_list_for_each(bio, bl)
 +              atomic_dec(rs->stats + S_DELAYED_BIOS);
 +
 +      /* Merge region hash private list to work list. */
 +      bio_list_merge_head(&rs->io.work, bl);
 +      bio_list_init(bl);
 +      ClearRSBandwidth(rs);
 +}
 +
 +/*************************************************************
 + * Constructor helpers
 + *************************************************************/
 +/* Calculate MB/sec. */
 +static unsigned mbpers(struct raid_set *rs, unsigned io_size)
 +{
 +      return to_bytes((rs->xor.speed * rs->set.data_devs *
 +                       io_size * HZ / XOR_SPEED_TICKS) >> 10) >> 10;
 +}
 +
 +/*
 + * Discover fastest xor algorithm and # of chunks combination.
 + */
 +/* Calculate speed of particular algorithm and # of chunks. */
 +static unsigned xor_speed(struct stripe *stripe)
 +{
 +      int ticks = XOR_SPEED_TICKS;
 +      unsigned p = RS(stripe->sc)->set.raid_devs, r = 0;
 +      unsigned long j;
 +
 +      /* Set uptodate so that common_xor()->xor() will belabour chunks. */
 +      while (p--)
 +              SetChunkUptodate(CHUNK(stripe, p));
 +
 +      /* Wait for next tick. */
 +      for (j = jiffies; j == jiffies; );
 +
 +      /* Do xors for a few ticks. */
 +      while (ticks--) {
 +              unsigned xors = 0;
 +
 +              for (j = jiffies; j == jiffies; ) {
 +                      mb();
 +                      common_xor(stripe, stripe->io.size, 0, 0);
 +                      mb();
 +                      xors++;
 +                      mb();
 +              }
 +
 +              if (xors > r)
 +                      r = xors;
 +      }
 +
 +      return r;
 +}
 +
 +/* Define for xor multi recovery stripe optimization runs. */
 +#define DMRAID45_XOR_TEST
 +
 +/* Optimize xor algorithm for this RAID set. */
 +static unsigned xor_optimize(struct raid_set *rs)
 +{
 +      unsigned chunks_max = 2, speed_max = 0;
 +      struct xor_func *f = ARRAY_END(xor_funcs), *f_max = NULL;
 +      struct stripe *stripe;
 +      unsigned io_size = 0, speed_hm = 0, speed_min = ~0, speed_xor_blocks = 0;
 +
 +      BUG_ON(list_empty(&rs->recover.stripes));
 +#ifndef DMRAID45_XOR_TEST
 +      stripe = list_first_entry(&rs->recover.stripes, struct stripe,
 +                                lists[LIST_RECOVER]);
 +#endif
 +
 +      /* Try all xor functions. */
 +      while (f-- > xor_funcs) {
 +              unsigned speed;
 +
 +#ifdef DMRAID45_XOR_TEST
 +              list_for_each_entry(stripe, &rs->recover.stripes,
 +                                  lists[LIST_RECOVER]) {
 +                      io_size = stripe->io.size;
 +#endif
 +
 +                      /* Set actual xor function for common_xor(). */
 +                      rs->xor.f = f;
 +                      rs->xor.chunks = (f->f == xor_blocks_wrapper ?
 +                                        (MAX_XOR_BLOCKS + 1) :
 +                                        XOR_CHUNKS_MAX);
 +                      if (rs->xor.chunks > rs->set.raid_devs)
 +                              rs->xor.chunks = rs->set.raid_devs;
 +
 +                      for ( ; rs->xor.chunks > 1; rs->xor.chunks--) {
 +                              speed = xor_speed(stripe);
 +
 +#ifdef DMRAID45_XOR_TEST
 +                              if (f->f == xor_blocks_wrapper) {
 +                                      if (speed > speed_xor_blocks)
 +                                              speed_xor_blocks = speed;
 +                              } else if (speed > speed_hm)
 +                                      speed_hm = speed;
 +
 +                              if (speed < speed_min)
 +                                      speed_min = speed;
 +#endif
 +
 +                              if (speed > speed_max) {
 +                                      speed_max = speed;
 +                                      chunks_max = rs->xor.chunks;
 +                                      f_max = f;
 +                              }
 +                      }
 +#ifdef DMRAID45_XOR_TEST
 +              }
 +#endif
 +      }
 +
 +      /* Memorize optimal parameters. */
 +      rs->xor.f = f_max;
 +      rs->xor.chunks = chunks_max;
 +#ifdef DMRAID45_XOR_TEST
 +      DMINFO("%s stripes=%u/size=%u min=%u xor_blocks=%u hm=%u max=%u",
 +             speed_max == speed_hm ? "HM" : "NB",
 +             rs->recover.recovery_stripes, io_size, speed_min,
 +             speed_xor_blocks, speed_hm, speed_max);
 +#endif
 +      return speed_max;
 +}
 +
 +/*
 + * Allocate a RAID context (a RAID set)
 + */
 +/* Structure for variable RAID parameters. */
 +struct variable_parms {
 +      int bandwidth;
 +      int bandwidth_parm;
 +      int chunk_size;
 +      int chunk_size_parm;
 +      int io_size;
 +      int io_size_parm;
 +      int stripes;
 +      int stripes_parm;
 +      int recover_io_size;
 +      int recover_io_size_parm;
 +      int raid_parms;
 +      int recovery;
 +      int recovery_stripes;
 +      int recovery_stripes_parm;
 +};
 +
 +static struct raid_set *
 +context_alloc(struct raid_type *raid_type, struct variable_parms *p,
 +            unsigned raid_devs, sector_t sectors_per_dev,
 +            struct dm_target *ti, unsigned dl_parms, char **argv)
 +{
 +      int r;
 +      size_t len;
 +      sector_t region_size, ti_len;
 +      struct raid_set *rs = NULL;
 +      struct dm_dirty_log *dl;
 +      struct recover *rec;
 +
 +      /*
 +       * Create the dirty log
 +       *
 +       * We need to change length for the dirty log constructor,
 +       * because we want an amount of regions for all stripes derived
 +       * from the single device size, so that we can keep region
 +       * size = 2^^n independant of the number of devices
 +       */
 +      ti_len = ti->len;
 +      ti->len = sectors_per_dev;
 +      dl = dm_dirty_log_create(argv[0], ti, NULL, dl_parms, argv + 2);
 +      ti->len = ti_len;
 +      if (!dl)
 +              goto bad_dirty_log;
 +
 +      /* Chunk size *must* be smaller than region size. */
 +      region_size = dl->type->get_region_size(dl);
 +      if (p->chunk_size > region_size)
 +              goto bad_chunk_size;
 +
 +      /* Recover io size *must* be smaller than region size as well. */
 +      if (p->recover_io_size > region_size)
 +              goto bad_recover_io_size;
 +
 +      /* Size and allocate the RAID set structure. */
 +      len = sizeof(*rs->data) + sizeof(*rs->dev);
 +      if (dm_array_too_big(sizeof(*rs), len, raid_devs))
 +              goto bad_array;
 +
 +      len = sizeof(*rs) + raid_devs * len;
 +      rs = kzalloc(len, GFP_KERNEL);
 +      if (!rs)
 +              goto bad_alloc;
 +
 +      rec = &rs->recover;
 +      atomic_set(&rs->io.in_process, 0);
 +      atomic_set(&rs->io.in_process_max, 0);
 +      rec->io_size = p->recover_io_size;
 +
 +      /* Pointer to data array. */
 +      rs->data = (unsigned long **)
 +                 ((void *) rs->dev + raid_devs * sizeof(*rs->dev));
 +      rec->dl = dl;
 +      rs->set.raid_devs = raid_devs;
 +      rs->set.data_devs = raid_devs - raid_type->parity_devs;
 +      rs->set.raid_type = raid_type;
 +
 +      rs->set.raid_parms = p->raid_parms;
 +      rs->set.chunk_size_parm = p->chunk_size_parm;
 +      rs->set.io_size_parm = p->io_size_parm;
 +      rs->sc.stripes_parm = p->stripes_parm;
 +      rec->io_size_parm = p->recover_io_size_parm;
 +      rec->bandwidth_parm = p->bandwidth_parm;
 +      rec->recovery = p->recovery;
 +      rec->recovery_stripes = p->recovery_stripes;
 +
 +      /*
 +       * Set chunk and io size and respective shifts
 +       * (used to avoid divisions)
 +       */
 +      rs->set.chunk_size = p->chunk_size;
 +      rs->set.chunk_shift = ffs(p->chunk_size) - 1;
 +
 +      rs->set.io_size = p->io_size;
 +      rs->set.io_mask = p->io_size - 1;
 +      /* Mask to adjust address key in case io_size != chunk_size. */
 +      rs->set.io_inv_mask = (p->chunk_size - 1) & ~rs->set.io_mask;
 +
 +      rs->set.sectors_per_dev = sectors_per_dev;
 +
 +      rs->set.ei = -1;        /* Indicate no failed device. */
 +      atomic_set(&rs->set.failed_devs, 0);
 +
 +      rs->ti = ti;
 +
 +      atomic_set(rec->io_count + IO_WORK, 0);
 +      atomic_set(rec->io_count + IO_RECOVER, 0);
 +
 +      /* Initialize io lock and queues. */
 +      mutex_init(&rs->io.in_lock);
 +      mutex_init(&rs->io.xor_lock);
 +      bio_list_init(&rs->io.in);
 +      bio_list_init(&rs->io.work);
 +
 +      init_waitqueue_head(&rs->io.suspendq);  /* Suspend waiters (dm-io). */
 +
 +      rec->nr_regions = dm_sector_div_up(sectors_per_dev, region_size);
 +      rec->rh = dm_region_hash_create(rs, dispatch_delayed_bios,
 +                      wake_dummy, wake_do_raid, 0, p->recovery_stripes,
 +                      dl, region_size, rec->nr_regions);
 +      if (IS_ERR(rec->rh))
 +              goto bad_rh;
 +
 +      /* Initialize stripe cache. */
 +      r = sc_init(rs, p->stripes);
 +      if (r)
 +              goto bad_sc;
 +
 +      /* REMOVEME: statistics. */
 +      stats_reset(rs);
 +      ClearRSDevelStats(rs);  /* Disnable development status. */
 +      return rs;
 +
 +bad_dirty_log:
 +      TI_ERR_RET("Error creating dirty log", ERR_PTR(-ENOMEM));
 +
 +bad_chunk_size:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR_RET("Chunk size larger than region size", ERR_PTR(-EINVAL));
 +
 +bad_recover_io_size:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR_RET("Recover stripe io size larger than region size",
 +                      ERR_PTR(-EINVAL));
 +
 +bad_array:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR_RET("Arry too big", ERR_PTR(-EINVAL));
 +
 +bad_alloc:
 +      dm_dirty_log_destroy(dl);
 +      TI_ERR_RET("Cannot allocate raid context", ERR_PTR(-ENOMEM));
 +
 +bad_rh:
 +      dm_dirty_log_destroy(dl);
 +      ti->error = DM_MSG_PREFIX "Error creating dirty region hash";
 +      goto free_rs;
 +
 +bad_sc:
 +      dm_region_hash_destroy(rec->rh); /* Destroys dirty log too. */
 +      sc_exit(&rs->sc);
 +      ti->error = DM_MSG_PREFIX "Error creating stripe cache";
 +free_rs:
 +      kfree(rs);
 +      return ERR_PTR(-ENOMEM);
 +}
 +
 +/* Free a RAID context (a RAID set). */
 +static void context_free(struct raid_set *rs, unsigned p)
 +{
 +      while (p--)
 +              dm_put_device(rs->ti, rs->dev[p].dev);
 +
 +      sc_exit(&rs->sc);
 +      dm_region_hash_destroy(rs->recover.rh); /* Destroys dirty log too. */
 +      kfree(rs);
 +}
 +
 +/* Create work queue and initialize delayed work. */
 +static int rs_workqueue_init(struct raid_set *rs)
 +{
 +      struct dm_target *ti = rs->ti;
 +
 +      rs->io.wq = create_singlethread_workqueue(DAEMON);
 +      if (!rs->io.wq)
 +              TI_ERR_RET("failed to create " DAEMON, -ENOMEM);
 +
 +      INIT_DELAYED_WORK(&rs->io.dws_do_raid, do_raid);
 +      INIT_WORK(&rs->io.ws_do_table_event, do_table_event);
 +      return 0;
 +}
 +
 +/* Return pointer to raid_type structure for raid name. */
 +static struct raid_type *get_raid_type(char *name)
 +{
 +      struct raid_type *r = ARRAY_END(raid_types);
 +
 +      while (r-- > raid_types) {
 +              if (!strcmp(r->name, name))
 +                      return r;
 +      }
 +
 +      return NULL;
 +}
 +
 +/* FIXME: factor out to dm core. */
 +static int multiple(sector_t a, sector_t b, sector_t *n)
 +{
 +      sector_t r = a;
 +
 +      sector_div(r, b);
 +      *n = r;
 +      return a == r * b;
 +}
 +
 +/* Log RAID set information to kernel log. */
 +static void rs_log(struct raid_set *rs, unsigned io_size)
 +{
 +      unsigned p;
 +      char buf[BDEVNAME_SIZE];
 +
 +      for (p = 0; p < rs->set.raid_devs; p++)
 +              DMINFO("/dev/%s is raid disk %u%s",
 +                              bdevname(rs->dev[p].dev->bdev, buf), p,
 +                              (p == rs->set.pi) ? " (parity)" : "");
 +
 +      DMINFO("%d/%d/%d sectors chunk/io/recovery size, %u stripes\n"
 +             "algorithm \"%s\", %u chunks with %uMB/s\n"
 +             "%s set with net %u/%u devices",
 +             rs->set.chunk_size, rs->set.io_size, rs->recover.io_size,
 +             atomic_read(&rs->sc.stripes),
 +             rs->xor.f->name, rs->xor.chunks, mbpers(rs, io_size),
 +             rs->set.raid_type->descr, rs->set.data_devs, rs->set.raid_devs);
 +}
 +
 +/* Get all devices and offsets. */
 +static int dev_parms(struct raid_set *rs, char **argv, int *p)
 +{
 +      struct dm_target *ti = rs->ti;
 +
 +DMINFO("rs->set.sectors_per_dev=%llu", (unsigned long long) rs->set.sectors_per_dev);
 +      for (*p = 0; *p < rs->set.raid_devs; (*p)++, argv += 2) {
 +              int r;
 +              unsigned long long tmp;
 +              struct raid_dev *dev = rs->dev + *p;
 +
 +              /* Get offset and device. */
 +              if (sscanf(argv[1], "%llu", &tmp) != 1 ||
 +                  tmp > rs->set.sectors_per_dev)
 +                      TI_ERR("Invalid RAID device offset parameter");
 +
 +              dev->start = tmp;
 +              r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
 +                                &dev->dev);
 +              if (r)
 +                      TI_ERR_RET("RAID device lookup failure", r);
 +
 +              r = raid_dev_lookup(rs, dev);
 +              if (r != -ENODEV && r < *p) {
 +                      (*p)++; /* Ensure dm_put_device() on actual device. */
 +                      TI_ERR_RET("Duplicate RAID device", -ENXIO);
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +/* Set recovery bandwidth. */
 +static void
 +recover_set_bandwidth(struct raid_set *rs, unsigned bandwidth)
 +{
 +      rs->recover.bandwidth = bandwidth;
 +      rs->recover.bandwidth_work = 100 / bandwidth;
 +}
 +
 +/* Handle variable number of RAID parameters. */
 +static int get_raid_variable_parms(struct dm_target *ti, char **argv,
 +                                 struct variable_parms *vp)
 +{
 +      int p, value;
 +      struct {
 +              int action; /* -1: skip, 0: no power2 check, 1: power2 check */
 +              char *errmsg;
 +              int min, max;
 +              int *var, *var2, *var3;
 +      } argctr[] = {
 +              { 1,
 +                "Invalid chunk size; must be -1 or 2^^n and <= 16384",
 +                IO_SIZE_MIN, CHUNK_SIZE_MAX,
 +                &vp->chunk_size_parm, &vp->chunk_size, &vp->io_size },
 +              { 0,
 +                "Invalid number of stripes: must be -1 or >= 8 and <= 16384",
 +                STRIPES_MIN, STRIPES_MAX,
 +                &vp->stripes_parm, &vp->stripes, NULL },
 +              { 1,
 +                "Invalid io size; must -1 or >= 8, 2^^n and less equal "
 +                "min(BIO_MAX_SECTORS/2, chunk size)",
 +                IO_SIZE_MIN, 0, /* Needs to be updated in loop below. */
 +                &vp->io_size_parm, &vp->io_size, NULL },
 +              { 1,
 +                "Invalid recovery io size; must be -1 or "
 +                "2^^n and less equal BIO_MAX_SECTORS/2",
 +                RECOVER_IO_SIZE_MIN, BIO_MAX_SECTORS / 2,
 +                &vp->recover_io_size_parm, &vp->recover_io_size, NULL },
 +              { 0,
 +                "Invalid recovery bandwidth percentage; "
 +                "must be -1 or > 0 and <= 100",
 +                BANDWIDTH_MIN, BANDWIDTH_MAX,
 +                &vp->bandwidth_parm, &vp->bandwidth, NULL },
 +              /* Handle sync argument seperately in loop. */
 +              { -1,
 +                "Invalid recovery switch; must be \"sync\" or \"nosync\"" },
 +              { 0,
 +                "Invalid number of recovery stripes;"
 +                "must be -1, > 0 and <= 64",
 +                RECOVERY_STRIPES_MIN, RECOVERY_STRIPES_MAX,
 +                &vp->recovery_stripes_parm, &vp->recovery_stripes, NULL },
 +      }, *varp;
 +
 +      /* Fetch # of variable raid parameters. */
 +      if (sscanf(*(argv++), "%d", &vp->raid_parms) != 1 ||
 +          !range_ok(vp->raid_parms, 0, 7))
 +              TI_ERR("Bad variable raid parameters number");
 +
 +      /* Preset variable RAID parameters. */
 +      vp->chunk_size = CHUNK_SIZE_DEFAULT;
 +      vp->io_size = IO_SIZE_DEFAULT;
 +      vp->stripes = STRIPES_DEFAULT;
 +      vp->recover_io_size = RECOVER_IO_SIZE_DEFAULT;
 +      vp->bandwidth = BANDWIDTH_DEFAULT;
 +      vp->recovery = 1;
 +      vp->recovery_stripes = RECOVERY_STRIPES_DEFAULT;
 +
 +      /* Walk the array of argument constraints for all given ones. */
 +      for (p = 0, varp = argctr; p < vp->raid_parms; p++, varp++) {
 +              BUG_ON(varp >= ARRAY_END(argctr));
 +
 +              /* Special case for "[no]sync" string argument. */
 +              if (varp->action < 0) {
 +                      if (!strcmp(*argv, "sync"))
 +                              ;
 +                      else if (!strcmp(*argv, "nosync"))
 +                              vp->recovery = 0;
 +                      else
 +                              TI_ERR(varp->errmsg);
 +
 +                      argv++;
 +                      continue;
 +              }
 +
 +              /*
 +               * Special case for io_size depending
 +               * on previously set chunk size.
 +               */
 +              if (p == 2)
 +                      varp->max = min(BIO_MAX_SECTORS / 2, vp->chunk_size);
 +
 +              if (sscanf(*(argv++), "%d", &value) != 1 ||
 +                  (value != -1 &&
 +                   ((varp->action && !is_power_of_2(value)) ||
 +                    !range_ok(value, varp->min, varp->max))))
 +                      TI_ERR(varp->errmsg);
 +
 +              *varp->var = value;
 +              if (value != -1) {
 +                      if (varp->var2)
 +                              *varp->var2 = value;
 +                      if (varp->var3)
 +                              *varp->var3 = value;
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +/* Parse optional locking parameters. */
 +static int get_raid_locking_parms(struct dm_target *ti, char **argv,
 +                                int *locking_parms,
 +                                struct dm_raid45_locking_type **locking_type)
 +{
 +      if (!strnicmp(argv[0], "locking", strlen(argv[0]))) {
 +              char *lckstr = argv[1];
 +              size_t lcksz = strlen(lckstr);
 +
 +              if (!strnicmp(lckstr, "none", lcksz)) {
 +                      *locking_type = &locking_none;
 +                      *locking_parms = 2;
 +              } else if (!strnicmp(lckstr, "cluster", lcksz)) {
 +                      DMERR("locking type \"%s\" not yet implemented",
 +                            lckstr);
 +                      return -EINVAL;
 +              } else {
 +                      DMERR("unknown locking type \"%s\"", lckstr);
 +                      return -EINVAL;
 +              }
 +      }
 +
 +      *locking_parms = 0;
 +      *locking_type = &locking_none;
 +      return 0;
 +}
 +
 +/* Set backing device read ahead properties of RAID set. */
 +static void rs_set_read_ahead(struct raid_set *rs,
 +                            unsigned sectors, unsigned stripes)
 +{
 +      unsigned ra_pages = dm_div_up(sectors, SECTORS_PER_PAGE);
 +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
 +      struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
 +
 +      /* Set read-ahead for the RAID set and the component devices. */
 +      if (ra_pages) {
 +              unsigned p = rs->set.raid_devs;
 +
 +              bdi->ra_pages = stripes * ra_pages * rs->set.data_devs;
 +
 +              while (p--) {
 +                      struct request_queue *q =
 +                              bdev_get_queue(rs->dev[p].dev->bdev);
 +
 +                      q->backing_dev_info.ra_pages = ra_pages;
 +              }
 +      }
 +}
 +
 +/* Set congested function. */
 +static void rs_set_congested_fn(struct raid_set *rs)
 +{
 +      struct mapped_device *md = dm_table_get_md(rs->ti->table);
 +      struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
 +
 +      /* Set congested function and data. */
 +      bdi->congested_fn = rs_congested;
 +      bdi->congested_data = rs;
 +}
 +
 +/*
 + * Construct a RAID4/5 mapping:
 + *
 + * log_type #log_params <log_params> \
 + * raid_type [#parity_dev] #raid_variable_params <raid_params> \
 + * [locking "none"/"cluster"]
 + * #raid_devs #dev_to_initialize [<dev_path> <offset>]{3,}
 + *
 + * log_type = "core"/"disk",
 + * #log_params = 1-3 (1-2 for core dirty log type, 3 for disk dirty log only)
 + * log_params = [dirty_log_path] region_size [[no]sync])
 + *
 + * raid_type = "raid4", "raid5_la", "raid5_ra", "raid5_ls", "raid5_rs"
 + *
 + * #parity_dev = N if raid_type = "raid4"
 + * o N = -1: pick default = last device
 + * o N >= 0 and < #raid_devs: parity device index
 + *
 + * #raid_variable_params = 0-7; raid_params (-1 = default):
 + *   [chunk_size [#stripes [io_size [recover_io_size \
 + *    [%recovery_bandwidth [recovery_switch [#recovery_stripes]]]]]]]
 + *   o chunk_size (unit to calculate drive addresses; must be 2^^n, > 8
 + *     and <= CHUNK_SIZE_MAX)
 + *   o #stripes is number of stripes allocated to stripe cache
 + *     (must be > 1 and < STRIPES_MAX)
 + *   o io_size (io unit size per device in sectors; must be 2^^n and > 8)
 + *   o recover_io_size (io unit size per device for recovery in sectors;
 + must be 2^^n, > SECTORS_PER_PAGE and <= region_size)
 + *   o %recovery_bandwith is the maximum amount spend for recovery during
 + *     application io (1-100%)
 + *   o recovery switch = [sync|nosync]
 + *   o #recovery_stripes is the number of recovery stripes used for
 + *     parallel recovery of the RAID set
 + * If raid_variable_params = 0, defaults will be used.
 + * Any raid_variable_param can be set to -1 to apply a default
 + *
 + * #raid_devs = N (N >= 3)
 + *
 + * #dev_to_initialize = N
 + * -1: initialize parity on all devices
 + * >= 0 and < #raid_devs: initialize raid_path; used to force reconstruction
 + * of a failed devices content after replacement
 + *
 + * <dev_path> = device_path (eg, /dev/sdd1)
 + * <offset>   = begin at offset on <dev_path>
 + *
 + */
 +#define       MIN_PARMS       13
 +static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 +{
 +      int dev_to_init, dl_parms, i, locking_parms,
 +          parity_parm, pi = -1, r, raid_devs;
 +      sector_t tmp, sectors_per_dev;
 +      struct dm_raid45_locking_type *locking;
 +      struct raid_set *rs;
 +      struct raid_type *raid_type;
 +      struct variable_parms parms;
 +
 +      /* Ensure minimum number of parameters. */
 +      if (argc < MIN_PARMS)
 +              TI_ERR("Not enough parameters");
 +
 +      /* Fetch # of dirty log parameters. */
 +      if (sscanf(argv[1], "%d", &dl_parms) != 1 ||
 +          !range_ok(dl_parms, 1, 4711)) /* ;-) */
 +              TI_ERR("Bad dirty log parameters number");
 +
 +      /* Check raid_type. */
 +      raid_type = get_raid_type(argv[dl_parms + 2]);
 +      if (!raid_type)
 +              TI_ERR("Bad raid type");
 +
 +      /* In case of RAID4, parity drive is selectable. */
 +      parity_parm = !!(raid_type->level == raid4);
 +
 +      /* Handle variable number of RAID parameters. */
 +      r = get_raid_variable_parms(ti, argv + dl_parms + parity_parm + 3,
 +                                  &parms);
 +      if (r)
 +              return r;
 +
 +      /* Handle any locking parameters. */
 +      r = get_raid_locking_parms(ti,
 +                                 argv + dl_parms + parity_parm +
 +                                 parms.raid_parms + 4,
 +                                 &locking_parms, &locking);
 +      if (r)
 +              return r;
 +
 +      /* # of raid devices. */
 +      i = dl_parms + parity_parm + parms.raid_parms + locking_parms + 4;
 +      if (sscanf(argv[i], "%d", &raid_devs) != 1 ||
 +          raid_devs < raid_type->minimal_devs)
 +              TI_ERR("Invalid number of raid devices");
 +
 +      /* In case of RAID4, check parity drive index is in limits. */
 +      if (raid_type->level == raid4) {
 +              /* Fetch index of parity device. */
 +              if (sscanf(argv[dl_parms + 3], "%d", &pi) != 1 ||
 +                  (pi != -1 && !range_ok(pi, 0, raid_devs - 1)))
 +                      TI_ERR("Invalid RAID4 parity device index");
 +      }
 +
 +      /*
 +       * Index of device to initialize starts at 0
 +       *
 +       * o -1 -> don't initialize a selected device;
 +       *         initialize parity conforming to algorithm
 +       * o 0..raid_devs-1 -> initialize respective device
 +       *   (used for reconstruction of a replaced device)
 +       */
 +      if (sscanf(argv[dl_parms + parity_parm + parms.raid_parms +
 +                 locking_parms + 5], "%d", &dev_to_init) != 1 ||
 +          !range_ok(dev_to_init, -1, raid_devs - 1))
 +              TI_ERR("Invalid number for raid device to initialize");
 +
 +      /* Check # of raid device arguments. */
 +      if (argc - dl_parms - parity_parm - parms.raid_parms - 6 !=
 +          2 * raid_devs)
 +              TI_ERR("Wrong number of raid device/offset arguments");
 +
 +      /*
 +       * Check that the table length is devisable
 +       * w/o rest by (raid_devs - parity_devs)
 +       */
 +      if (!multiple(ti->len, raid_devs - raid_type->parity_devs,
 +                    &sectors_per_dev))
 +              TI_ERR("Target length not divisible by number of data devices");
 +
 +      /*
 +       * Check that the device size is
 +       * devisable w/o rest by chunk size
 +       */
 +      if (!multiple(sectors_per_dev, parms.chunk_size, &tmp))
 +              TI_ERR("Device length not divisible by chunk_size");
 +
 +      /****************************************************************
 +       * Now that we checked the constructor arguments ->
 +       * let's allocate the RAID set
 +       ****************************************************************/
 +      rs = context_alloc(raid_type, &parms, raid_devs, sectors_per_dev,
 +                         ti, dl_parms, argv);
 +      if (IS_ERR(rs))
 +              return PTR_ERR(rs);
 +
 +
 +      rs->set.dev_to_init = rs->set.dev_to_init_parm = dev_to_init;
 +      rs->set.pi = rs->set.pi_parm = pi;
 +
 +      /* Set RAID4 parity drive index. */
 +      if (raid_type->level == raid4)
 +              rs->set.pi = (pi == -1) ? rs->set.data_devs : pi;
 +
 +      recover_set_bandwidth(rs, parms.bandwidth);
 +
 +      /* Use locking type to lock stripe access. */
 +      rs->locking = locking;
 +
 +      /* Get the device/offset tupels. */
 +      argv += dl_parms + 6 + parity_parm + parms.raid_parms;
 +      r = dev_parms(rs, argv, &i);
 +      if (r)
 +              goto err;
 +
 +      /* Set backing device information (eg. read ahead). */
 +      rs_set_read_ahead(rs, 2 * rs->set.chunk_size /* sectors per device */,
 +                            2 /* # of stripes */);
 +      rs_set_congested_fn(rs); /* Set congested function. */
 +      SetRSCheckOverwrite(rs); /* Allow chunk overwrite checks. */
 +      rs->xor.speed = xor_optimize(rs); /* Select best xor algorithm. */
 +
 +      /* Set for recovery of any nosync regions. */
 +      if (parms.recovery)
 +              SetRSRecover(rs);
 +      else {
 +              /*
 +               * Need to free recovery stripe(s) here in case
 +               * of nosync, because xor_optimize uses one.
 +               */
 +              set_start_recovery(rs);
 +              set_end_recovery(rs);
 +              stripe_recover_free(rs);
 +      }
 +
 +      /*
 +       * Enable parity chunk creation enformcement for
 +       * little numbers of array members where it doesn'ti
 +       * gain us performance to xor parity out and back in as
 +       * with larger array member numbers.
 +       */
 +      if (rs->set.raid_devs <= rs->set.raid_type->minimal_devs + 1)
 +              SetRSEnforceParityCreation(rs);
 +
 +      /*
 +       * Make sure that dm core only hands maximum io size
 +       * length down and pays attention to io boundaries.
 +       */
 +      ti->split_io = rs->set.io_size;
 +      ti->private = rs;
 +
 +      /* Initialize work queue to handle this RAID set's io. */
 +      r = rs_workqueue_init(rs);
 +      if (r)
 +              goto err;
 +
 +      rs_log(rs, rs->recover.io_size); /* Log information about RAID set. */
 +      return 0;
 +
 +err:
 +      context_free(rs, i);
 +      return r;
 +}
 +
 +/*
 + * Destruct a raid mapping
 + */
 +static void raid_dtr(struct dm_target *ti)
 +{
 +      struct raid_set *rs = ti->private;
 +
 +      destroy_workqueue(rs->io.wq);
 +      context_free(rs, rs->set.raid_devs);
 +}
 +
 +/* Raid mapping function. */
 +static int raid_map(struct dm_target *ti, struct bio *bio,
 +                  union map_info *map_context)
 +{
 +      /* I don't want to waste stripe cache capacity. */
 +      if (bio_rw(bio) == READA)
 +              return -EIO;
 +      else {
 +              struct raid_set *rs = ti->private;
 +
 +              /*
 +               * Get io reference to be waiting for to drop
 +               * to zero on device suspension/destruction.
 +               */
 +              io_get(rs);
 +              bio->bi_sector -= ti->begin;    /* Remap sector. */
 +
 +              /* Queue io to RAID set. */
 +              mutex_lock(&rs->io.in_lock);
 +              bio_list_add(&rs->io.in, bio);
 +              mutex_unlock(&rs->io.in_lock);
 +
 +              /* Wake daemon to process input list. */
 +              wake_do_raid(rs);
 +
 +              /* REMOVEME: statistics. */
 +              atomic_inc(rs->stats + (bio_data_dir(bio) == READ ?
 +                                      S_BIOS_READ : S_BIOS_WRITE));
 +              return DM_MAPIO_SUBMITTED;      /* Handle later. */
 +      }
 +}
 +
 +/* Device suspend. */
 +static void raid_presuspend(struct dm_target *ti)
 +{
 +      struct raid_set *rs = ti->private;
 +      struct dm_dirty_log *dl = rs->recover.dl;
 +
 +      SetRSSuspend(rs);
 +
 +      if (RSRecover(rs))
 +              dm_rh_stop_recovery(rs->recover.rh);
 +
 +      cancel_delayed_work(&rs->io.dws_do_raid);
 +      flush_workqueue(rs->io.wq);
 +      wait_ios(rs);   /* Wait for completion of all ios being processed. */
 +
 +      if (dl->type->presuspend && dl->type->presuspend(dl))
 +              /* FIXME: need better error handling. */
 +              DMWARN("log presuspend failed");
 +}
 +
 +static void raid_postsuspend(struct dm_target *ti)
 +{
 +      struct raid_set *rs = ti->private;
 +      struct dm_dirty_log *dl = rs->recover.dl;
 +
 +      if (dl->type->postsuspend && dl->type->postsuspend(dl))
 +              /* FIXME: need better error handling. */
 +              DMWARN("log postsuspend failed");
 +
 +}
 +
 +/* Device resume. */
 +static void raid_resume(struct dm_target *ti)
 +{
 +      struct raid_set *rs = ti->private;
 +      struct recover *rec = &rs->recover;
 +      struct dm_dirty_log *dl = rec->dl;
 +
 +DMINFO("%s...", __func__);
 +      if (dl->type->resume && dl->type->resume(dl))
 +              /* Resume dirty log. */
 +              /* FIXME: need better error handling. */
 +              DMWARN("log resume failed");
 +
 +      rec->nr_regions_to_recover =
 +              rec->nr_regions - dl->type->get_sync_count(dl);
 +
 +      /* Restart any unfinished recovery. */
 +      if (RSRecover(rs)) {
 +              set_start_recovery(rs);
 +              dm_rh_start_recovery(rec->rh);
 +      }
 +
 +      ClearRSSuspend(rs);
 +}
 +
 +/* Return stripe cache size. */
 +static unsigned sc_size(struct raid_set *rs)
 +{
 +      return to_sector(atomic_read(&rs->sc.stripes) *
 +                       (sizeof(struct stripe) +
 +                        (sizeof(struct stripe_chunk) +
 +                         (sizeof(struct page_list) +
 +                          to_bytes(rs->set.io_size) *
 +                          rs->set.raid_devs)) +
 +                        (rs->recover.end_jiffies ?
 +                         0 : rs->recover.recovery_stripes *
 +                         to_bytes(rs->set.raid_devs * rs->recover.io_size))));
 +}
 +
 +/* REMOVEME: status output for development. */
 +static void raid_devel_stats(struct dm_target *ti, char *result,
 +                           unsigned *size, unsigned maxlen)
 +{
 +      unsigned sz = *size;
 +      unsigned long j;
 +      char buf[BDEVNAME_SIZE], *p;
 +      struct stats_map *sm;
 +      struct raid_set *rs = ti->private;
 +      struct recover *rec = &rs->recover;
 +      struct timespec ts;
 +
 +      DMEMIT("%s %s=%u bw=%u\n",
 +             version, rs->xor.f->name, rs->xor.chunks, rs->recover.bandwidth);
 +      DMEMIT("act_ios=%d ", io_ref(rs));
 +      DMEMIT("act_ios_max=%d\n", atomic_read(&rs->io.in_process_max));
 +      DMEMIT("act_stripes=%d ", sc_active(&rs->sc));
 +      DMEMIT("act_stripes_max=%d\n",
 +             atomic_read(&rs->sc.active_stripes_max));
 +
 +      for (sm = stats_map; sm < ARRAY_END(stats_map); sm++)
 +              DMEMIT("%s%d", sm->str, atomic_read(rs->stats + sm->type));
 +
 +      DMEMIT(" checkovr=%s\n", RSCheckOverwrite(rs) ? "on" : "off");
 +      DMEMIT("sc=%u/%u/%u/%u/%u/%u/%u\n", rs->set.chunk_size,
 +             atomic_read(&rs->sc.stripes), rs->set.io_size,
 +             rec->recovery_stripes, rec->io_size, rs->sc.hash.buckets,
 +             sc_size(rs));
 +
 +      j = (rec->end_jiffies ? rec->end_jiffies : jiffies) -
 +          rec->start_jiffies;
 +      jiffies_to_timespec(j, &ts);
 +      sprintf(buf, "%ld.%ld", ts.tv_sec, ts.tv_nsec);
 +      p = strchr(buf, '.');
 +      p[3] = 0;
 +
 +      DMEMIT("rg=%llu/%llu/%llu/%u %s\n",
 +             (unsigned long long) rec->nr_regions_recovered,
 +             (unsigned long long) rec->nr_regions_to_recover,
 +             (unsigned long long) rec->nr_regions, rec->bandwidth, buf);
 +
 +      *size = sz;
 +}
 +
 +static int raid_status(struct dm_target *ti, status_type_t type,
 +                     char *result, unsigned maxlen)
 +{
 +      unsigned p, sz = 0;
 +      char buf[BDEVNAME_SIZE];
 +      struct raid_set *rs = ti->private;
 +      struct dm_dirty_log *dl = rs->recover.dl;
 +      int raid_parms[] = {
 +              rs->set.chunk_size_parm,
 +              rs->sc.stripes_parm,
 +              rs->set.io_size_parm,
 +              rs->recover.io_size_parm,
 +              rs->recover.bandwidth_parm,
 +              -2,
 +              rs->recover.recovery_stripes,
 +      };
 +
 +      switch (type) {
 +      case STATUSTYPE_INFO:
 +              /* REMOVEME: statistics. */
 +              if (RSDevelStats(rs))
 +                      raid_devel_stats(ti, result, &sz, maxlen);
 +
 +              DMEMIT("%u ", rs->set.raid_devs);
 +
 +              for (p = 0; p < rs->set.raid_devs; p++)
 +                      DMEMIT("%s ",
 +                             format_dev_t(buf, rs->dev[p].dev->bdev->bd_dev));
 +
 +              DMEMIT("2 ");
 +              for (p = 0; p < rs->set.raid_devs; p++) {
 +                      DMEMIT("%c", !DevFailed(rs->dev + p) ? 'A' : 'D');
 +
 +                      if (p == rs->set.pi)
 +                              DMEMIT("p");
 +
 +                      if (p == rs->set.dev_to_init)
 +                              DMEMIT("i");
 +              }
 +
 +              DMEMIT(" %llu/%llu ",
 +                    (unsigned long long) dl->type->get_sync_count(dl),
 +                    (unsigned long long) rs->recover.nr_regions);
 +
 +              sz += dl->type->status(dl, type, result+sz, maxlen-sz);
 +              break;
 +      case STATUSTYPE_TABLE:
 +              sz = rs->recover.dl->type->status(rs->recover.dl, type,
 +                                                result, maxlen);
 +              DMEMIT("%s %u ", rs->set.raid_type->name, rs->set.raid_parms);
 +
 +              for (p = 0; p < rs->set.raid_parms; p++) {
 +                      if (raid_parms[p] > -2)
 +                              DMEMIT("%d ", raid_parms[p]);
 +                      else
 +                              DMEMIT("%s ", rs->recover.recovery ?
 +                                            "sync" : "nosync");
 +              }
 +
 +              DMEMIT("%u %d ", rs->set.raid_devs, rs->set.dev_to_init);
 +
 +              for (p = 0; p < rs->set.raid_devs; p++)
 +                      DMEMIT("%s %llu ",
 +                             format_dev_t(buf, rs->dev[p].dev->bdev->bd_dev),
 +                             (unsigned long long) rs->dev[p].start);
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * Message interface
 + */
 +/* Turn a delta into an absolute value. */
 +static int _absolute(char *action, int act, int r)
 +{
 +      size_t len = strlen(action);
 +
 +      if (len < 2)
 +              len = 2;
 +
 +      /* Make delta absolute. */
 +      if (!strncmp("set", action, len))
 +              ;
 +      else if (!strncmp("grow", action, len))
 +              r += act;
 +      else if (!strncmp("shrink", action, len))
 +              r = act - r;
 +      else
 +              r = -EINVAL;
 +
 +      return r;
 +}
 +
 + /* Change recovery io bandwidth. */
 +static int bandwidth_change(struct raid_set *rs, int argc, char **argv,
 +                          enum raid_set_flags flag)
 +{
 +      int act = rs->recover.bandwidth, bandwidth;
 +
 +      if (argc != 2)
 +              return -EINVAL;
 +
 +      if (sscanf(argv[1], "%d", &bandwidth) == 1 &&
 +          range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
 +              /* Make delta bandwidth absolute. */
 +              bandwidth = _absolute(argv[0], act, bandwidth);
 +
 +              /* Check range. */
 +              if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
 +                      recover_set_bandwidth(rs, bandwidth);
 +                      return 0;
 +              }
 +      }
 +
 +      return -EINVAL;
 +}
 +
 +/* Set/reset development feature flags. */
 +static int devel_flags(struct raid_set *rs, int argc, char **argv,
 +                     enum raid_set_flags flag)
 +{
 +      size_t len;
 +
 +      if (argc != 1)
 +              return -EINVAL;
 +
 +      len = strlen(argv[0]);
 +      if (len < 2)
 +              len = 2;
 +
 +      if (!strncmp(argv[0], "on", len))
 +              return test_and_set_bit(flag, &rs->io.flags) ? -EPERM : 0;
 +      else if (!strncmp(argv[0], "off", len))
 +              return test_and_clear_bit(flag, &rs->io.flags) ? 0 : -EPERM;
 +      else if (!strncmp(argv[0], "reset", len)) {
 +              if (flag == RS_DEVEL_STATS) {
 +                      if  (test_bit(flag, &rs->io.flags)) {
 +                              stats_reset(rs);
 +                              return 0;
 +                      } else
 +                              return -EPERM;
 +              } else  {
 +                      set_bit(flag, &rs->io.flags);
 +                      return 0;
 +              }
 +      }
 +
 +      return -EINVAL;
 +}
 +
 +/* Resize the stripe cache. */
 +static int sc_resize(struct raid_set *rs, int argc, char **argv,
 +                   enum raid_set_flags flag)
 +{
 +      int act, stripes;
 +
 +      if (argc != 2)
 +              return -EINVAL;
 +
 +      /* Deny permission in case the daemon is still resizing!. */
 +      if (atomic_read(&rs->sc.stripes_to_set))
 +              return -EPERM;
 +
 +      if (sscanf(argv[1], "%d", &stripes) == 1 &&
 +          stripes > 0) {
 +              act = atomic_read(&rs->sc.stripes);
 +
 +              /* Make delta stripes absolute. */
 +              stripes = _absolute(argv[0], act, stripes);
 +
 +              /*
 +               * Check range and that the # of stripes changes.
 +               * We leave the resizing to the wroker.
 +               */
 +              if (range_ok(stripes, STRIPES_MIN, STRIPES_MAX) &&
 +                  stripes != atomic_read(&rs->sc.stripes)) {
 +                      atomic_set(&rs->sc.stripes_to_set, stripes);
 +                      wake_do_raid(rs);
 +                      return 0;
 +              }
 +      }
 +
 +      return -EINVAL;
 +}
 +
 +/* Change xor algorithm and number of chunks. */
 +static int xor_set(struct raid_set *rs, int argc, char **argv,
 +                 enum raid_set_flags flag)
 +{
 +      if (argc == 2) {
 +              int chunks;
 +              char *algorithm = argv[0];
 +              struct xor_func *f = ARRAY_END(xor_funcs);
 +
 +              if (sscanf(argv[1], "%d", &chunks) == 1 &&
 +                  range_ok(chunks, 2, XOR_CHUNKS_MAX) &&
 +                  chunks <= rs->set.raid_devs) {
 +                      while (f-- > xor_funcs) {
 +                              if (!strcmp(algorithm, f->name)) {
 +                                      unsigned io_size = 0;
 +                                      struct stripe *stripe = stripe_alloc(&rs->sc, rs->sc.mem_cache_client, SC_GROW);
 +
 +                                      DMINFO("xor: %s", f->name);
 +                                      if (f->f == xor_blocks_wrapper &&
 +                                          chunks > MAX_XOR_BLOCKS + 1) {
 +                                              DMERR("chunks > MAX_XOR_BLOCKS"
 +                                                    " + 1");
 +                                              break;
 +                                      }
 +
 +                                      mutex_lock(&rs->io.xor_lock);
 +                                      rs->xor.f = f;
 +                                      rs->xor.chunks = chunks;
 +                                      rs->xor.speed = 0;
 +                                      mutex_unlock(&rs->io.xor_lock);
 +
 +                                      if (stripe) {
 +                                              rs->xor.speed = xor_speed(stripe);
 +                                              io_size = stripe->io.size;
 +                                              stripe_free(stripe, rs->sc.mem_cache_client);
 +                                      }
 +
 +                                      rs_log(rs, io_size);
 +                                      return 0;
 +                              }
 +                      }
 +              }
 +      }
 +
 +      return -EINVAL;
 +}
 +
 +/*
 + * Allow writes after they got prohibited because of a device failure.
 + *
 + * This needs to be called after userspace updated metadata state
 + * based on an event being thrown during device failure processing.
 + */
 +static int allow_writes(struct raid_set *rs, int argc, char **argv,
 +                      enum raid_set_flags flag)
 +{
 +      if (TestClearRSProhibitWrites(rs)) {
 +DMINFO("%s waking", __func__);
 +              wake_do_raid(rs);
 +              return 0;
 +      }
 +
 +      return -EPERM;
 +}
 +
 +/* Parse the RAID message. */
 +/*
 + * 'all[ow_writes]'
 + * 'ba[ndwidth] {se[t],g[row],sh[rink]} #'    # e.g 'ba se 50'
 + * "o[verwrite]  {on,of[f],r[eset]}'          # e.g. 'o of'
 + * 'sta[tistics] {on,of[f],r[eset]}'          # e.g. 'stat of'
 + * 'str[ipecache] {se[t],g[row],sh[rink]} #'  # e.g. 'stripe set 1024'
 + * 'xor algorithm #chunks'                    # e.g. 'xor xor_8 5'
 + *
 + */
 +static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
 +{
 +      if (argc) {
 +              size_t len = strlen(argv[0]);
 +              struct raid_set *rs = ti->private;
 +              struct {
 +                      const char *name;
 +                      int (*f) (struct raid_set *rs, int argc, char **argv,
 +                                enum raid_set_flags flag);
 +                      enum raid_set_flags flag;
 +              } msg_descr[] = {
 +                      { "allow_writes", allow_writes, 0 },
 +                      { "bandwidth", bandwidth_change, 0 },
 +                      { "overwrite", devel_flags, RS_CHECK_OVERWRITE },
 +                      { "statistics", devel_flags, RS_DEVEL_STATS },
 +                      { "stripe_cache", sc_resize, 0 },
 +                      { "xor", xor_set, 0 },
 +              }, *m = ARRAY_END(msg_descr);
 +
 +              if (len < 3)
 +                      len = 3;
 +
 +              while (m-- > msg_descr) {
 +                      if (!strncmp(argv[0], m->name, len))
 +                              return m->f(rs, argc - 1, argv + 1, m->flag);
 +              }
 +
 +      }
 +
 +      return -EINVAL;
 +}
 +/*
 + * END message interface
 + */
 +
 +/* Provide io hints. */
 +static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 +{
 +      struct raid_set *rs = ti->private;
 +
 +      blk_limits_io_min(limits, rs->set.chunk_size);
 +      blk_limits_io_opt(limits, rs->set.chunk_size * rs->set.data_devs);
 +}
 +
 +static struct target_type raid_target = {
 +      .name = "raid45",
 +      .version = {1, 0, 0},
 +      .module = THIS_MODULE,
 +      .ctr = raid_ctr,
 +      .dtr = raid_dtr,
 +      .map = raid_map,
 +      .presuspend = raid_presuspend,
 +      .postsuspend = raid_postsuspend,
 +      .resume = raid_resume,
 +      .status = raid_status,
 +      .message = raid_message,
 +      .io_hints = raid_io_hints,
 +};
 +
 +static void init_exit(const char *bad_msg, const char *good_msg, int r)
 +{
 +      if (r)
 +              DMERR("Failed to %sregister target [%d]", bad_msg, r);
 +      else
 +              DMINFO("%s %s", good_msg, version);
 +}
 +
 +static int __init dm_raid_init(void)
 +{
 +      int r = dm_register_target(&raid_target);
 +
 +      init_exit("", "initialized", r);
 +      return r;
 +}
 +
 +static void __exit dm_raid_exit(void)
 +{
 +      dm_unregister_target(&raid_target);
 +      init_exit("un", "exit", 0);
 +}
 +
 +/* Module hooks. */
 +module_init(dm_raid_init);
 +module_exit(dm_raid_exit);
 +
 +MODULE_DESCRIPTION(DM_NAME " raid4/5 target");
 +MODULE_AUTHOR("Heinz Mauelshagen <heinzm@redhat.com>");
 +MODULE_LICENSE("GPL");
 +MODULE_ALIAS("dm-raid4");
 +MODULE_ALIAS("dm-raid5");
Simple merge
Simple merge
index 0000000,428f4fe..38ebfc6
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,2121 +1,2130 @@@
+ /*
+  * n_tty.c --- implements the N_TTY line discipline.
+  *
+  * This code used to be in tty_io.c, but things are getting hairy
+  * enough that it made sense to split things off.  (The N_TTY
+  * processing has changed so much that it's hardly recognizable,
+  * anyway...)
+  *
+  * Note that the open routine for N_TTY is guaranteed never to return
+  * an error.  This is because Linux will fall back to setting a line
+  * to N_TTY if it can not switch to any other line discipline.
+  *
+  * Written by Theodore Ts'o, Copyright 1994.
+  *
+  * This file also contains code originally written by Linus Torvalds,
+  * Copyright 1991, 1992, 1993, and by Julian Cowley, Copyright 1994.
+  *
+  * This file may be redistributed under the terms of the GNU General Public
+  * License.
+  *
+  * Reduced memory usage for older ARM systems  - Russell King.
+  *
+  * 2000/01/20   Fixed SMP locking on put_tty_queue using bits of
+  *            the patch by Andrew J. Kroll <ag784@freenet.buffalo.edu>
+  *            who actually finally proved there really was a race.
+  *
+  * 2002/03/18   Implemented n_tty_wakeup to send SIGIO POLL_OUTs to
+  *            waiting writing processes-Sapan Bhatia <sapan@corewars.org>.
+  *            Also fixed a bug in BLOCKING mode where n_tty_write returns
+  *            EAGAIN
+  */
+ #include <linux/types.h>
+ #include <linux/major.h>
+ #include <linux/errno.h>
+ #include <linux/signal.h>
+ #include <linux/fcntl.h>
+ #include <linux/sched.h>
+ #include <linux/interrupt.h>
+ #include <linux/tty.h>
+ #include <linux/timer.h>
+ #include <linux/ctype.h>
+ #include <linux/mm.h>
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/bitops.h>
+ #include <linux/audit.h>
+ #include <linux/file.h>
+ #include <linux/uaccess.h>
+ #include <linux/module.h>
+ #include <asm/system.h>
+ /* number of characters left in xmit buffer before select has we have room */
+ #define WAKEUP_CHARS 256
+ /*
+  * This defines the low- and high-watermarks for throttling and
+  * unthrottling the TTY driver.  These watermarks are used for
+  * controlling the space in the read buffer.
+  */
+ #define TTY_THRESHOLD_THROTTLE                128 /* now based on remaining room */
+ #define TTY_THRESHOLD_UNTHROTTLE      128
+ /*
+  * Special byte codes used in the echo buffer to represent operations
+  * or special handling of characters.  Bytes in the echo buffer that
+  * are not part of such special blocks are treated as normal character
+  * codes.
+  */
+ #define ECHO_OP_START 0xff
+ #define ECHO_OP_MOVE_BACK_COL 0x80
+ #define ECHO_OP_SET_CANON_COL 0x81
+ #define ECHO_OP_ERASE_TAB 0x82
+ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
+                              unsigned char __user *ptr)
+ {
+       tty_audit_add_data(tty, &x, 1);
+       return put_user(x, ptr);
+ }
+ /**
+  *    n_tty_set__room -       receive space
+  *    @tty: terminal
+  *
+  *    Called by the driver to find out how much data it is
+  *    permitted to feed to the line discipline without any being lost
+  *    and thus to manage flow control. Not serialized. Answers for the
+  *    "instant".
+  */
+ static void n_tty_set_room(struct tty_struct *tty)
+ {
+       /* tty->read_cnt is not read locked ? */
+       int     left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
+       /*
+        * If we are doing input canonicalization, and there are no
+        * pending newlines, let characters through without limit, so
+        * that erase characters will be handled.  Other excess
+        * characters will be beeped.
+        */
+       if (left <= 0)
+               left = tty->icanon && !tty->canon_data;
+       tty->receive_room = left;
+ }
+ static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty)
+ {
+       if (tty->read_cnt < N_TTY_BUF_SIZE) {
+               tty->read_buf[tty->read_head] = c;
+               tty->read_head = (tty->read_head + 1) & (N_TTY_BUF_SIZE-1);
+               tty->read_cnt++;
+       }
+ }
+ /**
+  *    put_tty_queue           -       add character to tty
+  *    @c: character
+  *    @tty: tty device
+  *
+  *    Add a character to the tty read_buf queue. This is done under the
+  *    read_lock to serialize character addition and also to protect us
+  *    against parallel reads or flushes
+  */
+ static void put_tty_queue(unsigned char c, struct tty_struct *tty)
+ {
+       unsigned long flags;
+       /*
+        *      The problem of stomping on the buffers ends here.
+        *      Why didn't anyone see this one coming? --AJK
+       */
+       spin_lock_irqsave(&tty->read_lock, flags);
+       put_tty_queue_nolock(c, tty);
+       spin_unlock_irqrestore(&tty->read_lock, flags);
+ }
+ /**
+  *    check_unthrottle        -       allow new receive data
+  *    @tty; tty device
+  *
+  *    Check whether to call the driver unthrottle functions
+  *
+  *    Can sleep, may be called under the atomic_read_lock mutex but
+  *    this is not guaranteed.
+  */
+ static void check_unthrottle(struct tty_struct *tty)
+ {
+       if (tty->count)
+               tty_unthrottle(tty);
+ }
+ /**
+  *    reset_buffer_flags      -       reset buffer state
+  *    @tty: terminal to reset
+  *
+  *    Reset the read buffer counters, clear the flags,
+  *    and make sure the driver is unthrottled. Called
+  *    from n_tty_open() and n_tty_flush_buffer().
+  *
+  *    Locking: tty_read_lock for read fields.
+  */
+ static void reset_buffer_flags(struct tty_struct *tty)
+ {
+       unsigned long flags;
+       spin_lock_irqsave(&tty->read_lock, flags);
+       tty->read_head = tty->read_tail = tty->read_cnt = 0;
+       spin_unlock_irqrestore(&tty->read_lock, flags);
+       mutex_lock(&tty->echo_lock);
+       tty->echo_pos = tty->echo_cnt = tty->echo_overrun = 0;
+       mutex_unlock(&tty->echo_lock);
+       tty->canon_head = tty->canon_data = tty->erasing = 0;
+       memset(&tty->read_flags, 0, sizeof tty->read_flags);
+       n_tty_set_room(tty);
+       check_unthrottle(tty);
+ }
+ /**
+  *    n_tty_flush_buffer      -       clean input queue
+  *    @tty:   terminal device
+  *
+  *    Flush the input buffer. Called when the line discipline is
+  *    being closed, when the tty layer wants the buffer flushed (eg
+  *    at hangup) or when the N_TTY line discipline internally has to
+  *    clean the pending queue (for example some signals).
+  *
+  *    Locking: ctrl_lock, read_lock.
+  */
+ static void n_tty_flush_buffer(struct tty_struct *tty)
+ {
+       unsigned long flags;
+       /* clear everything and unthrottle the driver */
+       reset_buffer_flags(tty);
+       if (!tty->link)
+               return;
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       if (tty->link->packet) {
+               tty->ctrl_status |= TIOCPKT_FLUSHREAD;
+               wake_up_interruptible(&tty->link->read_wait);
+       }
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ }
+ /**
+  *    n_tty_chars_in_buffer   -       report available bytes
+  *    @tty: tty device
+  *
+  *    Report the number of characters buffered to be delivered to user
+  *    at this instant in time.
+  *
+  *    Locking: read_lock
+  */
+ static ssize_t n_tty_chars_in_buffer(struct tty_struct *tty)
+ {
+       unsigned long flags;
+       ssize_t n = 0;
+       spin_lock_irqsave(&tty->read_lock, flags);
+       if (!tty->icanon) {
+               n = tty->read_cnt;
+       } else if (tty->canon_data) {
+               n = (tty->canon_head > tty->read_tail) ?
+                       tty->canon_head - tty->read_tail :
+                       tty->canon_head + (N_TTY_BUF_SIZE - tty->read_tail);
+       }
+       spin_unlock_irqrestore(&tty->read_lock, flags);
+       return n;
+ }
+ /**
+  *    is_utf8_continuation    -       utf8 multibyte check
+  *    @c: byte to check
+  *
+  *    Returns true if the utf8 character 'c' is a multibyte continuation
+  *    character. We use this to correctly compute the on screen size
+  *    of the character when printing
+  */
+ static inline int is_utf8_continuation(unsigned char c)
+ {
+       return (c & 0xc0) == 0x80;
+ }
+ /**
+  *    is_continuation         -       multibyte check
+  *    @c: byte to check
+  *
+  *    Returns true if the utf8 character 'c' is a multibyte continuation
+  *    character and the terminal is in unicode mode.
+  */
+ static inline int is_continuation(unsigned char c, struct tty_struct *tty)
+ {
+       return I_IUTF8(tty) && is_utf8_continuation(c);
+ }
+ /**
+  *    do_output_char                  -       output one character
+  *    @c: character (or partial unicode symbol)
+  *    @tty: terminal device
+  *    @space: space available in tty driver write buffer
+  *
+  *    This is a helper function that handles one output character
+  *    (including special characters like TAB, CR, LF, etc.),
+  *    doing OPOST processing and putting the results in the
+  *    tty driver's write buffer.
+  *
+  *    Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY
+  *    and NLDLY.  They simply aren't relevant in the world today.
+  *    If you ever need them, add them here.
+  *
+  *    Returns the number of bytes of buffer space used or -1 if
+  *    no space left.
+  *
+  *    Locking: should be called under the output_lock to protect
+  *             the column state and space left in the buffer
+  */
+ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
+ {
+       int     spaces;
+       if (!space)
+               return -1;
+       switch (c) {
+       case '\n':
+               if (O_ONLRET(tty))
+                       tty->column = 0;
+               if (O_ONLCR(tty)) {
+                       if (space < 2)
+                               return -1;
+                       tty->canon_column = tty->column = 0;
+                       tty->ops->write(tty, "\r\n", 2);
+                       return 2;
+               }
+               tty->canon_column = tty->column;
+               break;
+       case '\r':
+               if (O_ONOCR(tty) && tty->column == 0)
+                       return 0;
+               if (O_OCRNL(tty)) {
+                       c = '\n';
+                       if (O_ONLRET(tty))
+                               tty->canon_column = tty->column = 0;
+                       break;
+               }
+               tty->canon_column = tty->column = 0;
+               break;
+       case '\t':
+               spaces = 8 - (tty->column & 7);
+               if (O_TABDLY(tty) == XTABS) {
+                       if (space < spaces)
+                               return -1;
+                       tty->column += spaces;
+                       tty->ops->write(tty, "        ", spaces);
+                       return spaces;
+               }
+               tty->column += spaces;
+               break;
+       case '\b':
+               if (tty->column > 0)
+                       tty->column--;
+               break;
+       default:
+               if (!iscntrl(c)) {
+                       if (O_OLCUC(tty))
+                               c = toupper(c);
+                       if (!is_continuation(c, tty))
+                               tty->column++;
+               }
+               break;
+       }
+       tty_put_char(tty, c);
+       return 1;
+ }
+ /**
+  *    process_output                  -       output post processor
+  *    @c: character (or partial unicode symbol)
+  *    @tty: terminal device
+  *
+  *    Output one character with OPOST processing.
+  *    Returns -1 when the output device is full and the character
+  *    must be retried.
+  *
+  *    Locking: output_lock to protect column state and space left
+  *             (also, this is called from n_tty_write under the
+  *              tty layer write lock)
+  */
+ static int process_output(unsigned char c, struct tty_struct *tty)
+ {
+       int     space, retval;
+       mutex_lock(&tty->output_lock);
+       space = tty_write_room(tty);
+       retval = do_output_char(c, tty, space);
+       mutex_unlock(&tty->output_lock);
+       if (retval < 0)
+               return -1;
+       else
+               return 0;
+ }
+ /**
+  *    process_output_block            -       block post processor
+  *    @tty: terminal device
+  *    @buf: character buffer
+  *    @nr: number of bytes to output
+  *
+  *    Output a block of characters with OPOST processing.
+  *    Returns the number of characters output.
+  *
+  *    This path is used to speed up block console writes, among other
+  *    things when processing blocks of output data. It handles only
+  *    the simple cases normally found and helps to generate blocks of
+  *    symbols for the console driver and thus improve performance.
+  *
+  *    Locking: output_lock to protect column state and space left
+  *             (also, this is called from n_tty_write under the
+  *              tty layer write lock)
+  */
+ static ssize_t process_output_block(struct tty_struct *tty,
+                                   const unsigned char *buf, unsigned int nr)
+ {
+       int     space;
+       int     i;
+       const unsigned char *cp;
+       mutex_lock(&tty->output_lock);
+       space = tty_write_room(tty);
+       if (!space) {
+               mutex_unlock(&tty->output_lock);
+               return 0;
+       }
+       if (nr > space)
+               nr = space;
+       for (i = 0, cp = buf; i < nr; i++, cp++) {
+               unsigned char c = *cp;
+               switch (c) {
+               case '\n':
+                       if (O_ONLRET(tty))
+                               tty->column = 0;
+                       if (O_ONLCR(tty))
+                               goto break_out;
+                       tty->canon_column = tty->column;
+                       break;
+               case '\r':
+                       if (O_ONOCR(tty) && tty->column == 0)
+                               goto break_out;
+                       if (O_OCRNL(tty))
+                               goto break_out;
+                       tty->canon_column = tty->column = 0;
+                       break;
+               case '\t':
+                       goto break_out;
+               case '\b':
+                       if (tty->column > 0)
+                               tty->column--;
+                       break;
+               default:
+                       if (!iscntrl(c)) {
+                               if (O_OLCUC(tty))
+                                       goto break_out;
+                               if (!is_continuation(c, tty))
+                                       tty->column++;
+                       }
+                       break;
+               }
+       }
+ break_out:
+       i = tty->ops->write(tty, buf, i);
+       mutex_unlock(&tty->output_lock);
+       return i;
+ }
+ /**
+  *    process_echoes  -       write pending echo characters
+  *    @tty: terminal device
+  *
+  *    Write previously buffered echo (and other ldisc-generated)
+  *    characters to the tty.
+  *
+  *    Characters generated by the ldisc (including echoes) need to
+  *    be buffered because the driver's write buffer can fill during
+  *    heavy program output.  Echoing straight to the driver will
+  *    often fail under these conditions, causing lost characters and
+  *    resulting mismatches of ldisc state information.
+  *
+  *    Since the ldisc state must represent the characters actually sent
+  *    to the driver at the time of the write, operations like certain
+  *    changes in column state are also saved in the buffer and executed
+  *    here.
+  *
+  *    A circular fifo buffer is used so that the most recent characters
+  *    are prioritized.  Also, when control characters are echoed with a
+  *    prefixed "^", the pair is treated atomically and thus not separated.
+  *
+  *    Locking: output_lock to protect column state and space left,
+  *             echo_lock to protect the echo buffer
+  */
+ static void process_echoes(struct tty_struct *tty)
+ {
+       int     space, nr;
+       unsigned char c;
+       unsigned char *cp, *buf_end;
+       if (!tty->echo_cnt)
+               return;
+       mutex_lock(&tty->output_lock);
+       mutex_lock(&tty->echo_lock);
+       space = tty_write_room(tty);
+       buf_end = tty->echo_buf + N_TTY_BUF_SIZE;
+       cp = tty->echo_buf + tty->echo_pos;
+       nr = tty->echo_cnt;
+       while (nr > 0) {
+               c = *cp;
+               if (c == ECHO_OP_START) {
+                       unsigned char op;
+                       unsigned char *opp;
+                       int no_space_left = 0;
+                       /*
+                        * If the buffer byte is the start of a multi-byte
+                        * operation, get the next byte, which is either the
+                        * op code or a control character value.
+                        */
+                       opp = cp + 1;
+                       if (opp == buf_end)
+                               opp -= N_TTY_BUF_SIZE;
+                       op = *opp;
+                       switch (op) {
+                               unsigned int num_chars, num_bs;
+                       case ECHO_OP_ERASE_TAB:
+                               if (++opp == buf_end)
+                                       opp -= N_TTY_BUF_SIZE;
+                               num_chars = *opp;
+                               /*
+                                * Determine how many columns to go back
+                                * in order to erase the tab.
+                                * This depends on the number of columns
+                                * used by other characters within the tab
+                                * area.  If this (modulo 8) count is from
+                                * the start of input rather than from a
+                                * previous tab, we offset by canon column.
+                                * Otherwise, tab spacing is normal.
+                                */
+                               if (!(num_chars & 0x80))
+                                       num_chars += tty->canon_column;
+                               num_bs = 8 - (num_chars & 7);
+                               if (num_bs > space) {
+                                       no_space_left = 1;
+                                       break;
+                               }
+                               space -= num_bs;
+                               while (num_bs--) {
+                                       tty_put_char(tty, '\b');
+                                       if (tty->column > 0)
+                                               tty->column--;
+                               }
+                               cp += 3;
+                               nr -= 3;
+                               break;
+                       case ECHO_OP_SET_CANON_COL:
+                               tty->canon_column = tty->column;
+                               cp += 2;
+                               nr -= 2;
+                               break;
+                       case ECHO_OP_MOVE_BACK_COL:
+                               if (tty->column > 0)
+                                       tty->column--;
+                               cp += 2;
+                               nr -= 2;
+                               break;
+                       case ECHO_OP_START:
+                               /* This is an escaped echo op start code */
+                               if (!space) {
+                                       no_space_left = 1;
+                                       break;
+                               }
+                               tty_put_char(tty, ECHO_OP_START);
+                               tty->column++;
+                               space--;
+                               cp += 2;
+                               nr -= 2;
+                               break;
+                       default:
+                               /*
+                                * If the op is not a special byte code,
+                                * it is a ctrl char tagged to be echoed
+                                * as "^X" (where X is the letter
+                                * representing the control char).
+                                * Note that we must ensure there is
+                                * enough space for the whole ctrl pair.
+                                *
+                                */
+                               if (space < 2) {
+                                       no_space_left = 1;
+                                       break;
+                               }
+                               tty_put_char(tty, '^');
+                               tty_put_char(tty, op ^ 0100);
+                               tty->column += 2;
+                               space -= 2;
+                               cp += 2;
+                               nr -= 2;
+                       }
+                       if (no_space_left)
+                               break;
+               } else {
+                       if (O_OPOST(tty) &&
+                           !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) {
+                               int retval = do_output_char(c, tty, space);
+                               if (retval < 0)
+                                       break;
+                               space -= retval;
+                       } else {
+                               if (!space)
+                                       break;
+                               tty_put_char(tty, c);
+                               space -= 1;
+                       }
+                       cp += 1;
+                       nr -= 1;
+               }
+               /* When end of circular buffer reached, wrap around */
+               if (cp >= buf_end)
+                       cp -= N_TTY_BUF_SIZE;
+       }
+       if (nr == 0) {
+               tty->echo_pos = 0;
+               tty->echo_cnt = 0;
+               tty->echo_overrun = 0;
+       } else {
+               int num_processed = tty->echo_cnt - nr;
+               tty->echo_pos += num_processed;
+               tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+               tty->echo_cnt = nr;
+               if (num_processed > 0)
+                       tty->echo_overrun = 0;
+       }
+       mutex_unlock(&tty->echo_lock);
+       mutex_unlock(&tty->output_lock);
+       if (tty->ops->flush_chars)
+               tty->ops->flush_chars(tty);
+ }
+ /**
+  *    add_echo_byte   -       add a byte to the echo buffer
+  *    @c: unicode byte to echo
+  *    @tty: terminal device
+  *
+  *    Add a character or operation byte to the echo buffer.
+  *
+  *    Should be called under the echo lock to protect the echo buffer.
+  */
+ static void add_echo_byte(unsigned char c, struct tty_struct *tty)
+ {
+       int     new_byte_pos;
+       if (tty->echo_cnt == N_TTY_BUF_SIZE) {
+               /* Circular buffer is already at capacity */
+               new_byte_pos = tty->echo_pos;
+               /*
+                * Since the buffer start position needs to be advanced,
+                * be sure to step by a whole operation byte group.
+                */
+               if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START) {
+                       if (tty->echo_buf[(tty->echo_pos + 1) &
+                                         (N_TTY_BUF_SIZE - 1)] ==
+                                               ECHO_OP_ERASE_TAB) {
+                               tty->echo_pos += 3;
+                               tty->echo_cnt -= 2;
+                       } else {
+                               tty->echo_pos += 2;
+                               tty->echo_cnt -= 1;
+                       }
+               } else {
+                       tty->echo_pos++;
+               }
+               tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+               tty->echo_overrun = 1;
+       } else {
+               new_byte_pos = tty->echo_pos + tty->echo_cnt;
+               new_byte_pos &= N_TTY_BUF_SIZE - 1;
+               tty->echo_cnt++;
+       }
+       tty->echo_buf[new_byte_pos] = c;
+ }
+ /**
+  *    echo_move_back_col      -       add operation to move back a column
+  *    @tty: terminal device
+  *
+  *    Add an operation to the echo buffer to move back one column.
+  *
+  *    Locking: echo_lock to protect the echo buffer
+  */
+ static void echo_move_back_col(struct tty_struct *tty)
+ {
+       mutex_lock(&tty->echo_lock);
+       add_echo_byte(ECHO_OP_START, tty);
+       add_echo_byte(ECHO_OP_MOVE_BACK_COL, tty);
+       mutex_unlock(&tty->echo_lock);
+ }
+ /**
+  *    echo_set_canon_col      -       add operation to set the canon column
+  *    @tty: terminal device
+  *
+  *    Add an operation to the echo buffer to set the canon column
+  *    to the current column.
+  *
+  *    Locking: echo_lock to protect the echo buffer
+  */
+ static void echo_set_canon_col(struct tty_struct *tty)
+ {
+       mutex_lock(&tty->echo_lock);
+       add_echo_byte(ECHO_OP_START, tty);
+       add_echo_byte(ECHO_OP_SET_CANON_COL, tty);
+       mutex_unlock(&tty->echo_lock);
+ }
+ /**
+  *    echo_erase_tab  -       add operation to erase a tab
+  *    @num_chars: number of character columns already used
+  *    @after_tab: true if num_chars starts after a previous tab
+  *    @tty: terminal device
+  *
+  *    Add an operation to the echo buffer to erase a tab.
+  *
+  *    Called by the eraser function, which knows how many character
+  *    columns have been used since either a previous tab or the start
+  *    of input.  This information will be used later, along with
+  *    canon column (if applicable), to go back the correct number
+  *    of columns.
+  *
+  *    Locking: echo_lock to protect the echo buffer
+  */
+ static void echo_erase_tab(unsigned int num_chars, int after_tab,
+                          struct tty_struct *tty)
+ {
+       mutex_lock(&tty->echo_lock);
+       add_echo_byte(ECHO_OP_START, tty);
+       add_echo_byte(ECHO_OP_ERASE_TAB, tty);
+       /* We only need to know this modulo 8 (tab spacing) */
+       num_chars &= 7;
+       /* Set the high bit as a flag if num_chars is after a previous tab */
+       if (after_tab)
+               num_chars |= 0x80;
+       add_echo_byte(num_chars, tty);
+       mutex_unlock(&tty->echo_lock);
+ }
+ /**
+  *    echo_char_raw   -       echo a character raw
+  *    @c: unicode byte to echo
+  *    @tty: terminal device
+  *
+  *    Echo user input back onto the screen. This must be called only when
+  *    L_ECHO(tty) is true. Called from the driver receive_buf path.
+  *
+  *    This variant does not treat control characters specially.
+  *
+  *    Locking: echo_lock to protect the echo buffer
+  */
+ static void echo_char_raw(unsigned char c, struct tty_struct *tty)
+ {
+       mutex_lock(&tty->echo_lock);
+       if (c == ECHO_OP_START) {
+               add_echo_byte(ECHO_OP_START, tty);
+               add_echo_byte(ECHO_OP_START, tty);
+       } else {
+               add_echo_byte(c, tty);
+       }
+       mutex_unlock(&tty->echo_lock);
+ }
+ /**
+  *    echo_char       -       echo a character
+  *    @c: unicode byte to echo
+  *    @tty: terminal device
+  *
+  *    Echo user input back onto the screen. This must be called only when
+  *    L_ECHO(tty) is true. Called from the driver receive_buf path.
+  *
+  *    This variant tags control characters to be echoed as "^X"
+  *    (where X is the letter representing the control char).
+  *
+  *    Locking: echo_lock to protect the echo buffer
+  */
+ static void echo_char(unsigned char c, struct tty_struct *tty)
+ {
+       mutex_lock(&tty->echo_lock);
+       if (c == ECHO_OP_START) {
+               add_echo_byte(ECHO_OP_START, tty);
+               add_echo_byte(ECHO_OP_START, tty);
+       } else {
+               if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t')
+                       add_echo_byte(ECHO_OP_START, tty);
+               add_echo_byte(c, tty);
+       }
+       mutex_unlock(&tty->echo_lock);
+ }
+ /**
+  *    finish_erasing          -       complete erase
+  *    @tty: tty doing the erase
+  */
+ static inline void finish_erasing(struct tty_struct *tty)
+ {
+       if (tty->erasing) {
+               echo_char_raw('/', tty);
+               tty->erasing = 0;
+       }
+ }
+ /**
+  *    eraser          -       handle erase function
+  *    @c: character input
+  *    @tty: terminal device
+  *
+  *    Perform erase and necessary output when an erase character is
+  *    present in the stream from the driver layer. Handles the complexities
+  *    of UTF-8 multibyte symbols.
+  *
+  *    Locking: read_lock for tty buffers
+  */
+ static void eraser(unsigned char c, struct tty_struct *tty)
+ {
+       enum { ERASE, WERASE, KILL } kill_type;
+       int head, seen_alnums, cnt;
+       unsigned long flags;
+       /* FIXME: locking needed ? */
+       if (tty->read_head == tty->canon_head) {
+               /* process_output('\a', tty); */ /* what do you think? */
+               return;
+       }
+       if (c == ERASE_CHAR(tty))
+               kill_type = ERASE;
+       else if (c == WERASE_CHAR(tty))
+               kill_type = WERASE;
+       else {
+               if (!L_ECHO(tty)) {
+                       spin_lock_irqsave(&tty->read_lock, flags);
+                       tty->read_cnt -= ((tty->read_head - tty->canon_head) &
+                                         (N_TTY_BUF_SIZE - 1));
+                       tty->read_head = tty->canon_head;
+                       spin_unlock_irqrestore(&tty->read_lock, flags);
+                       return;
+               }
+               if (!L_ECHOK(tty) || !L_ECHOKE(tty) || !L_ECHOE(tty)) {
+                       spin_lock_irqsave(&tty->read_lock, flags);
+                       tty->read_cnt -= ((tty->read_head - tty->canon_head) &
+                                         (N_TTY_BUF_SIZE - 1));
+                       tty->read_head = tty->canon_head;
+                       spin_unlock_irqrestore(&tty->read_lock, flags);
+                       finish_erasing(tty);
+                       echo_char(KILL_CHAR(tty), tty);
+                       /* Add a newline if ECHOK is on and ECHOKE is off. */
+                       if (L_ECHOK(tty))
+                               echo_char_raw('\n', tty);
+                       return;
+               }
+               kill_type = KILL;
+       }
+       seen_alnums = 0;
+       /* FIXME: Locking ?? */
+       while (tty->read_head != tty->canon_head) {
+               head = tty->read_head;
+               /* erase a single possibly multibyte character */
+               do {
+                       head = (head - 1) & (N_TTY_BUF_SIZE-1);
+                       c = tty->read_buf[head];
+               } while (is_continuation(c, tty) && head != tty->canon_head);
+               /* do not partially erase */
+               if (is_continuation(c, tty))
+                       break;
+               if (kill_type == WERASE) {
+                       /* Equivalent to BSD's ALTWERASE. */
+                       if (isalnum(c) || c == '_')
+                               seen_alnums++;
+                       else if (seen_alnums)
+                               break;
+               }
+               cnt = (tty->read_head - head) & (N_TTY_BUF_SIZE-1);
+               spin_lock_irqsave(&tty->read_lock, flags);
+               tty->read_head = head;
+               tty->read_cnt -= cnt;
+               spin_unlock_irqrestore(&tty->read_lock, flags);
+               if (L_ECHO(tty)) {
+                       if (L_ECHOPRT(tty)) {
+                               if (!tty->erasing) {
+                                       echo_char_raw('\\', tty);
+                                       tty->erasing = 1;
+                               }
+                               /* if cnt > 1, output a multi-byte character */
+                               echo_char(c, tty);
+                               while (--cnt > 0) {
+                                       head = (head+1) & (N_TTY_BUF_SIZE-1);
+                                       echo_char_raw(tty->read_buf[head], tty);
+                                       echo_move_back_col(tty);
+                               }
+                       } else if (kill_type == ERASE && !L_ECHOE(tty)) {
+                               echo_char(ERASE_CHAR(tty), tty);
+                       } else if (c == '\t') {
+                               unsigned int num_chars = 0;
+                               int after_tab = 0;
+                               unsigned long tail = tty->read_head;
+                               /*
+                                * Count the columns used for characters
+                                * since the start of input or after a
+                                * previous tab.
+                                * This info is used to go back the correct
+                                * number of columns.
+                                */
+                               while (tail != tty->canon_head) {
+                                       tail = (tail-1) & (N_TTY_BUF_SIZE-1);
+                                       c = tty->read_buf[tail];
+                                       if (c == '\t') {
+                                               after_tab = 1;
+                                               break;
+                                       } else if (iscntrl(c)) {
+                                               if (L_ECHOCTL(tty))
+                                                       num_chars += 2;
+                                       } else if (!is_continuation(c, tty)) {
+                                               num_chars++;
+                                       }
+                               }
+                               echo_erase_tab(num_chars, after_tab, tty);
+                       } else {
+                               if (iscntrl(c) && L_ECHOCTL(tty)) {
+                                       echo_char_raw('\b', tty);
+                                       echo_char_raw(' ', tty);
+                                       echo_char_raw('\b', tty);
+                               }
+                               if (!iscntrl(c) || L_ECHOCTL(tty)) {
+                                       echo_char_raw('\b', tty);
+                                       echo_char_raw(' ', tty);
+                                       echo_char_raw('\b', tty);
+                               }
+                       }
+               }
+               if (kill_type == ERASE)
+                       break;
+       }
+       if (tty->read_head == tty->canon_head && L_ECHO(tty))
+               finish_erasing(tty);
+ }
+ /**
+  *    isig            -       handle the ISIG optio
+  *    @sig: signal
+  *    @tty: terminal
+  *    @flush: force flush
+  *
+  *    Called when a signal is being sent due to terminal input. This
+  *    may caus terminal flushing to take place according to the termios
+  *    settings and character used. Called from the driver receive_buf
+  *    path so serialized.
+  *
+  *    Locking: ctrl_lock, read_lock (both via flush buffer)
+  */
+ static inline void isig(int sig, struct tty_struct *tty, int flush)
+ {
+       if (tty->pgrp)
+               kill_pgrp(tty->pgrp, sig, 1);
+       if (flush || !L_NOFLSH(tty)) {
+               n_tty_flush_buffer(tty);
+               tty_driver_flush_buffer(tty);
+       }
+ }
+ /**
+  *    n_tty_receive_break     -       handle break
+  *    @tty: terminal
+  *
+  *    An RS232 break event has been hit in the incoming bitstream. This
+  *    can cause a variety of events depending upon the termios settings.
+  *
+  *    Called from the receive_buf path so single threaded.
+  */
+ static inline void n_tty_receive_break(struct tty_struct *tty)
+ {
+       if (I_IGNBRK(tty))
+               return;
+       if (I_BRKINT(tty)) {
+               isig(SIGINT, tty, 1);
+               return;
+       }
+       if (I_PARMRK(tty)) {
+               put_tty_queue('\377', tty);
+               put_tty_queue('\0', tty);
+       }
+       put_tty_queue('\0', tty);
+       wake_up_interruptible(&tty->read_wait);
+ }
+ /**
+  *    n_tty_receive_overrun   -       handle overrun reporting
+  *    @tty: terminal
+  *
+  *    Data arrived faster than we could process it. While the tty
+  *    driver has flagged this the bits that were missed are gone
+  *    forever.
+  *
+  *    Called from the receive_buf path so single threaded. Does not
+  *    need locking as num_overrun and overrun_time are function
+  *    private.
+  */
+ static inline void n_tty_receive_overrun(struct tty_struct *tty)
+ {
+       char buf[64];
+       tty->num_overrun++;
+       if (time_before(tty->overrun_time, jiffies - HZ) ||
+                       time_after(tty->overrun_time, jiffies)) {
+               printk(KERN_WARNING "%s: %d input overrun(s)\n",
+                       tty_name(tty, buf),
+                       tty->num_overrun);
+               tty->overrun_time = jiffies;
+               tty->num_overrun = 0;
+       }
+ }
+ /**
+  *    n_tty_receive_parity_error      -       error notifier
+  *    @tty: terminal device
+  *    @c: character
+  *
+  *    Process a parity error and queue the right data to indicate
+  *    the error case if necessary. Locking as per n_tty_receive_buf.
+  */
+ static inline void n_tty_receive_parity_error(struct tty_struct *tty,
+                                             unsigned char c)
+ {
+       if (I_IGNPAR(tty))
+               return;
+       if (I_PARMRK(tty)) {
+               put_tty_queue('\377', tty);
+               put_tty_queue('\0', tty);
+               put_tty_queue(c, tty);
+       } else  if (I_INPCK(tty))
+               put_tty_queue('\0', tty);
+       else
+               put_tty_queue(c, tty);
+       wake_up_interruptible(&tty->read_wait);
+ }
+ /**
+  *    n_tty_receive_char      -       perform processing
+  *    @tty: terminal device
+  *    @c: character
+  *
+  *    Process an individual character of input received from the driver.
+  *    This is serialized with respect to itself by the rules for the
+  *    driver above.
+  */
+ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
+ {
+       unsigned long flags;
+       int parmrk;
+       if (tty->raw) {
+               put_tty_queue(c, tty);
+               return;
+       }
+       if (I_ISTRIP(tty))
+               c &= 0x7f;
+       if (I_IUCLC(tty) && L_IEXTEN(tty))
+               c = tolower(c);
+       if (L_EXTPROC(tty)) {
+               put_tty_queue(c, tty);
+               return;
+       }
+       if (tty->stopped && !tty->flow_stopped && I_IXON(tty) &&
+           I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty) &&
+           c != INTR_CHAR(tty) && c != QUIT_CHAR(tty) && c != SUSP_CHAR(tty)) {
+               start_tty(tty);
+               process_echoes(tty);
+       }
+       if (tty->closing) {
+               if (I_IXON(tty)) {
+                       if (c == START_CHAR(tty)) {
+                               start_tty(tty);
+                               process_echoes(tty);
+                       } else if (c == STOP_CHAR(tty))
+                               stop_tty(tty);
+               }
+               return;
+       }
+       /*
+        * If the previous character was LNEXT, or we know that this
+        * character is not one of the characters that we'll have to
+        * handle specially, do shortcut processing to speed things
+        * up.
+        */
+       if (!test_bit(c, tty->process_char_map) || tty->lnext) {
+               tty->lnext = 0;
+               parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+               if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+                       /* beep if no space */
+                       if (L_ECHO(tty))
+                               process_output('\a', tty);
+                       return;
+               }
+               if (L_ECHO(tty)) {
+                       finish_erasing(tty);
+                       /* Record the column of first canon char. */
+                       if (tty->canon_head == tty->read_head)
+                               echo_set_canon_col(tty);
+                       echo_char(c, tty);
+                       process_echoes(tty);
+               }
+               if (parmrk)
+                       put_tty_queue(c, tty);
+               put_tty_queue(c, tty);
+               return;
+       }
+       if (I_IXON(tty)) {
+               if (c == START_CHAR(tty)) {
+                       start_tty(tty);
+                       process_echoes(tty);
+                       return;
+               }
+               if (c == STOP_CHAR(tty)) {
+                       stop_tty(tty);
+                       return;
+               }
+       }
+       if (L_ISIG(tty)) {
+               int signal;
+               signal = SIGINT;
+               if (c == INTR_CHAR(tty))
+                       goto send_signal;
+               signal = SIGQUIT;
+               if (c == QUIT_CHAR(tty))
+                       goto send_signal;
+               signal = SIGTSTP;
+               if (c == SUSP_CHAR(tty)) {
+ send_signal:
+                       /*
+                        * Note that we do not use isig() here because we want
+                        * the order to be:
+                        * 1) flush, 2) echo, 3) signal
+                        */
+                       if (!L_NOFLSH(tty)) {
+                               n_tty_flush_buffer(tty);
+                               tty_driver_flush_buffer(tty);
+                       }
+                       if (I_IXON(tty))
+                               start_tty(tty);
+                       if (L_ECHO(tty)) {
+                               echo_char(c, tty);
+                               process_echoes(tty);
+                       }
+                       if (tty->pgrp)
+                               kill_pgrp(tty->pgrp, signal, 1);
+                       return;
+               }
+       }
+       if (c == '\r') {
+               if (I_IGNCR(tty))
+                       return;
+               if (I_ICRNL(tty))
+                       c = '\n';
+       } else if (c == '\n' && I_INLCR(tty))
+               c = '\r';
+       if (tty->icanon) {
+               if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) ||
+                   (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) {
+                       eraser(c, tty);
+                       process_echoes(tty);
+                       return;
+               }
+               if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) {
+                       tty->lnext = 1;
+                       if (L_ECHO(tty)) {
+                               finish_erasing(tty);
+                               if (L_ECHOCTL(tty)) {
+                                       echo_char_raw('^', tty);
+                                       echo_char_raw('\b', tty);
+                                       process_echoes(tty);
+                               }
+                       }
+                       return;
+               }
+               if (c == REPRINT_CHAR(tty) && L_ECHO(tty) &&
+                   L_IEXTEN(tty)) {
+                       unsigned long tail = tty->canon_head;
+                       finish_erasing(tty);
+                       echo_char(c, tty);
+                       echo_char_raw('\n', tty);
+                       while (tail != tty->read_head) {
+                               echo_char(tty->read_buf[tail], tty);
+                               tail = (tail+1) & (N_TTY_BUF_SIZE-1);
+                       }
+                       process_echoes(tty);
+                       return;
+               }
+               if (c == '\n') {
+                       if (tty->read_cnt >= N_TTY_BUF_SIZE) {
+                               if (L_ECHO(tty))
+                                       process_output('\a', tty);
+                               return;
+                       }
+                       if (L_ECHO(tty) || L_ECHONL(tty)) {
+                               echo_char_raw('\n', tty);
+                               process_echoes(tty);
+                       }
+                       goto handle_newline;
+               }
+               if (c == EOF_CHAR(tty)) {
+                       if (tty->read_cnt >= N_TTY_BUF_SIZE)
+                               return;
+                       if (tty->canon_head != tty->read_head)
+                               set_bit(TTY_PUSH, &tty->flags);
+                       c = __DISABLED_CHAR;
+                       goto handle_newline;
+               }
+               if ((c == EOL_CHAR(tty)) ||
+                   (c == EOL2_CHAR(tty) && L_IEXTEN(tty))) {
+                       parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty))
+                                ? 1 : 0;
+                       if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) {
+                               if (L_ECHO(tty))
+                                       process_output('\a', tty);
+                               return;
+                       }
+                       /*
+                        * XXX are EOL_CHAR and EOL2_CHAR echoed?!?
+                        */
+                       if (L_ECHO(tty)) {
+                               /* Record the column of first canon char. */
+                               if (tty->canon_head == tty->read_head)
+                                       echo_set_canon_col(tty);
+                               echo_char(c, tty);
+                               process_echoes(tty);
+                       }
+                       /*
+                        * XXX does PARMRK doubling happen for
+                        * EOL_CHAR and EOL2_CHAR?
+                        */
+                       if (parmrk)
+                               put_tty_queue(c, tty);
+ handle_newline:
+                       spin_lock_irqsave(&tty->read_lock, flags);
+                       set_bit(tty->read_head, tty->read_flags);
+                       put_tty_queue_nolock(c, tty);
+                       tty->canon_head = tty->read_head;
+                       tty->canon_data++;
+                       spin_unlock_irqrestore(&tty->read_lock, flags);
+                       kill_fasync(&tty->fasync, SIGIO, POLL_IN);
+                       if (waitqueue_active(&tty->read_wait))
+                               wake_up_interruptible(&tty->read_wait);
+                       return;
+               }
+       }
+       parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+       if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+               /* beep if no space */
+               if (L_ECHO(tty))
+                       process_output('\a', tty);
+               return;
+       }
+       if (L_ECHO(tty)) {
+               finish_erasing(tty);
+               if (c == '\n')
+                       echo_char_raw('\n', tty);
+               else {
+                       /* Record the column of first canon char. */
+                       if (tty->canon_head == tty->read_head)
+                               echo_set_canon_col(tty);
+                       echo_char(c, tty);
+               }
+               process_echoes(tty);
+       }
+       if (parmrk)
+               put_tty_queue(c, tty);
+       put_tty_queue(c, tty);
+ }
+ /**
+  *    n_tty_write_wakeup      -       asynchronous I/O notifier
+  *    @tty: tty device
+  *
+  *    Required for the ptys, serial driver etc. since processes
+  *    that attach themselves to the master and rely on ASYNC
+  *    IO must be woken up
+  */
+ static void n_tty_write_wakeup(struct tty_struct *tty)
+ {
+       if (tty->fasync && test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags))
+               kill_fasync(&tty->fasync, SIGIO, POLL_OUT);
+ }
+ /**
+  *    n_tty_receive_buf       -       data receive
+  *    @tty: terminal device
+  *    @cp: buffer
+  *    @fp: flag buffer
+  *    @count: characters
+  *
+  *    Called by the terminal driver when a block of characters has
+  *    been received. This function must be called from soft contexts
+  *    not from interrupt context. The driver is responsible for making
+  *    calls one at a time and in order (or using flush_to_ldisc)
+  */
+ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+                             char *fp, int count)
+ {
+       const unsigned char *p;
+       char *f, flags = TTY_NORMAL;
+       int     i;
+       char    buf[64];
+       unsigned long cpuflags;
+       if (!tty->read_buf)
+               return;
+       if (tty->real_raw) {
+               spin_lock_irqsave(&tty->read_lock, cpuflags);
+               i = min(N_TTY_BUF_SIZE - tty->read_cnt,
+                       N_TTY_BUF_SIZE - tty->read_head);
+               i = min(count, i);
+               memcpy(tty->read_buf + tty->read_head, cp, i);
+               tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
+               tty->read_cnt += i;
+               cp += i;
+               count -= i;
+               i = min(N_TTY_BUF_SIZE - tty->read_cnt,
+                       N_TTY_BUF_SIZE - tty->read_head);
+               i = min(count, i);
+               memcpy(tty->read_buf + tty->read_head, cp, i);
+               tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
+               tty->read_cnt += i;
+               spin_unlock_irqrestore(&tty->read_lock, cpuflags);
+       } else {
+               for (i = count, p = cp, f = fp; i; i--, p++) {
+                       if (f)
+                               flags = *f++;
+                       switch (flags) {
+                       case TTY_NORMAL:
+                               n_tty_receive_char(tty, *p);
+                               break;
+                       case TTY_BREAK:
+                               n_tty_receive_break(tty);
+                               break;
+                       case TTY_PARITY:
+                       case TTY_FRAME:
+                               n_tty_receive_parity_error(tty, *p);
+                               break;
+                       case TTY_OVERRUN:
+                               n_tty_receive_overrun(tty);
+                               break;
+                       default:
+                               printk(KERN_ERR "%s: unknown flag %d\n",
+                                      tty_name(tty, buf), flags);
+                               break;
+                       }
+               }
+               if (tty->ops->flush_chars)
+                       tty->ops->flush_chars(tty);
+       }
+       n_tty_set_room(tty);
+       if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
+               L_EXTPROC(tty)) {
+               kill_fasync(&tty->fasync, SIGIO, POLL_IN);
+               if (waitqueue_active(&tty->read_wait))
+                       wake_up_interruptible(&tty->read_wait);
+       }
+       /*
+        * Check the remaining room for the input canonicalization
+        * mode.  We don't want to throttle the driver if we're in
+        * canonical mode and don't have a newline yet!
+        */
+       if (tty->receive_room < TTY_THRESHOLD_THROTTLE)
+               tty_throttle(tty);
+ }
+ int is_ignored(int sig)
+ {
+       return (sigismember(&current->blocked, sig) ||
+               current->sighand->action[sig-1].sa.sa_handler == SIG_IGN);
+ }
+ /**
+  *    n_tty_set_termios       -       termios data changed
+  *    @tty: terminal
+  *    @old: previous data
+  *
+  *    Called by the tty layer when the user changes termios flags so
+  *    that the line discipline can plan ahead. This function cannot sleep
+  *    and is protected from re-entry by the tty layer. The user is
+  *    guaranteed that this function will not be re-entered or in progress
+  *    when the ldisc is closed.
+  *
+  *    Locking: Caller holds tty->termios_mutex
+  */
+ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
+ {
+       int canon_change = 1;
+       BUG_ON(!tty);
+       if (old)
+               canon_change = (old->c_lflag ^ tty->termios->c_lflag) & ICANON;
+       if (canon_change) {
+               memset(&tty->read_flags, 0, sizeof tty->read_flags);
+               tty->canon_head = tty->read_tail;
+               tty->canon_data = 0;
+               tty->erasing = 0;
+       }
+       if (canon_change && !L_ICANON(tty) && tty->read_cnt)
+               wake_up_interruptible(&tty->read_wait);
+       tty->icanon = (L_ICANON(tty) != 0);
+       if (test_bit(TTY_HW_COOK_IN, &tty->flags)) {
+               tty->raw = 1;
+               tty->real_raw = 1;
+               n_tty_set_room(tty);
+               return;
+       }
+       if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) ||
+           I_ICRNL(tty) || I_INLCR(tty) || L_ICANON(tty) ||
+           I_IXON(tty) || L_ISIG(tty) || L_ECHO(tty) ||
+           I_PARMRK(tty)) {
+               memset(tty->process_char_map, 0, 256/8);
+               if (I_IGNCR(tty) || I_ICRNL(tty))
+                       set_bit('\r', tty->process_char_map);
+               if (I_INLCR(tty))
+                       set_bit('\n', tty->process_char_map);
+               if (L_ICANON(tty)) {
+                       set_bit(ERASE_CHAR(tty), tty->process_char_map);
+                       set_bit(KILL_CHAR(tty), tty->process_char_map);
+                       set_bit(EOF_CHAR(tty), tty->process_char_map);
+                       set_bit('\n', tty->process_char_map);
+                       set_bit(EOL_CHAR(tty), tty->process_char_map);
+                       if (L_IEXTEN(tty)) {
+                               set_bit(WERASE_CHAR(tty),
+                                       tty->process_char_map);
+                               set_bit(LNEXT_CHAR(tty),
+                                       tty->process_char_map);
+                               set_bit(EOL2_CHAR(tty),
+                                       tty->process_char_map);
+                               if (L_ECHO(tty))
+                                       set_bit(REPRINT_CHAR(tty),
+                                               tty->process_char_map);
+                       }
+               }
+               if (I_IXON(tty)) {
+                       set_bit(START_CHAR(tty), tty->process_char_map);
+                       set_bit(STOP_CHAR(tty), tty->process_char_map);
+               }
+               if (L_ISIG(tty)) {
+                       set_bit(INTR_CHAR(tty), tty->process_char_map);
+                       set_bit(QUIT_CHAR(tty), tty->process_char_map);
+                       set_bit(SUSP_CHAR(tty), tty->process_char_map);
+               }
+               clear_bit(__DISABLED_CHAR, tty->process_char_map);
+               tty->raw = 0;
+               tty->real_raw = 0;
+       } else {
+               tty->raw = 1;
+               if ((I_IGNBRK(tty) || (!I_BRKINT(tty) && !I_PARMRK(tty))) &&
+                   (I_IGNPAR(tty) || !I_INPCK(tty)) &&
+                   (tty->driver->flags & TTY_DRIVER_REAL_RAW))
+                       tty->real_raw = 1;
+               else
+                       tty->real_raw = 0;
+       }
+       n_tty_set_room(tty);
+       /* The termios change make the tty ready for I/O */
+       wake_up_interruptible(&tty->write_wait);
+       wake_up_interruptible(&tty->read_wait);
+ }
+ /**
+  *    n_tty_close             -       close the ldisc for this tty
+  *    @tty: device
+  *
+  *    Called from the terminal layer when this line discipline is
+  *    being shut down, either because of a close or becsuse of a
+  *    discipline change. The function will not be called while other
+  *    ldisc methods are in progress.
+  */
+ static void n_tty_close(struct tty_struct *tty)
+ {
+       n_tty_flush_buffer(tty);
+       if (tty->read_buf) {
+               kfree(tty->read_buf);
+               tty->read_buf = NULL;
+       }
+       if (tty->echo_buf) {
+               kfree(tty->echo_buf);
+               tty->echo_buf = NULL;
+       }
+ }
+ /**
+  *    n_tty_open              -       open an ldisc
+  *    @tty: terminal to open
+  *
+  *    Called when this line discipline is being attached to the
+  *    terminal device. Can sleep. Called serialized so that no
+  *    other events will occur in parallel. No further open will occur
+  *    until a close.
+  */
+ static int n_tty_open(struct tty_struct *tty)
+ {
+       if (!tty)
+               return -EINVAL;
+       /* These are ugly. Currently a malloc failure here can panic */
+       if (!tty->read_buf) {
+               tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+               if (!tty->read_buf)
+                       return -ENOMEM;
+       }
+       if (!tty->echo_buf) {
+               tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+               if (!tty->echo_buf)
+                       return -ENOMEM;
+       }
+       reset_buffer_flags(tty);
+       tty->column = 0;
+       n_tty_set_termios(tty, NULL);
+       tty->minimum_to_wake = 1;
+       tty->closing = 0;
+       return 0;
+ }
+ static inline int input_available_p(struct tty_struct *tty, int amt)
+ {
+       tty_flush_to_ldisc(tty);
+       if (tty->icanon && !L_EXTPROC(tty)) {
+               if (tty->canon_data)
+                       return 1;
+       } else if (tty->read_cnt >= (amt ? amt : 1))
+               return 1;
+       return 0;
+ }
+ /**
+  *    copy_from_read_buf      -       copy read data directly
+  *    @tty: terminal device
+  *    @b: user data
+  *    @nr: size of data
+  *
+  *    Helper function to speed up n_tty_read.  It is only called when
+  *    ICANON is off; it copies characters straight from the tty queue to
+  *    user space directly.  It can be profitably called twice; once to
+  *    drain the space from the tail pointer to the (physical) end of the
+  *    buffer, and once to drain the space from the (physical) beginning of
+  *    the buffer to head pointer.
+  *
+  *    Called under the tty->atomic_read_lock sem
+  *
+  */
+ static int copy_from_read_buf(struct tty_struct *tty,
+                                     unsigned char __user **b,
+                                     size_t *nr)
+ {
+       int retval;
+       size_t n;
+       unsigned long flags;
+       retval = 0;
+       spin_lock_irqsave(&tty->read_lock, flags);
+       n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+       n = min(*nr, n);
+       spin_unlock_irqrestore(&tty->read_lock, flags);
+       if (n) {
+               retval = copy_to_user(*b, &tty->read_buf[tty->read_tail], n);
+               n -= retval;
+               tty_audit_add_data(tty, &tty->read_buf[tty->read_tail], n);
+               spin_lock_irqsave(&tty->read_lock, flags);
+               tty->read_tail = (tty->read_tail + n) & (N_TTY_BUF_SIZE-1);
+               tty->read_cnt -= n;
+               /* Turn single EOF into zero-length read */
+               if (L_EXTPROC(tty) && tty->icanon && n == 1) {
+                       if (!tty->read_cnt && (*b)[n-1] == EOF_CHAR(tty))
+                               n--;
+               }
+               spin_unlock_irqrestore(&tty->read_lock, flags);
+               *b += n;
+               *nr -= n;
+       }
+       return retval;
+ }
+ extern ssize_t redirected_tty_write(struct file *, const char __user *,
+                                                       size_t, loff_t *);
+ /**
+  *    job_control             -       check job control
+  *    @tty: tty
+  *    @file: file handle
+  *
+  *    Perform job control management checks on this file/tty descriptor
+  *    and if appropriate send any needed signals and return a negative
+  *    error code if action should be taken.
+  *
+  *    FIXME:
+  *    Locking: None - redirected write test is safe, testing
+  *    current->signal should possibly lock current->sighand
+  *    pgrp locking ?
+  */
+ static int job_control(struct tty_struct *tty, struct file *file)
+ {
+       /* Job control check -- must be done at start and after
+          every sleep (POSIX.1 7.1.1.4). */
+       /* NOTE: not yet done after every sleep pending a thorough
+          check of the logic of this change. -- jlc */
+       /* don't stop on /dev/console */
+       if (file->f_op->write != redirected_tty_write &&
+           current->signal->tty == tty) {
+               if (!tty->pgrp)
+                       printk(KERN_ERR "n_tty_read: no tty->pgrp!\n");
+               else if (task_pgrp(current) != tty->pgrp) {
+                       if (is_ignored(SIGTTIN) ||
+                           is_current_pgrp_orphaned())
+                               return -EIO;
+                       kill_pgrp(task_pgrp(current), SIGTTIN, 1);
+                       set_thread_flag(TIF_SIGPENDING);
+                       return -ERESTARTSYS;
+               }
+       }
+       return 0;
+ }
+ /**
+  *    n_tty_read              -       read function for tty
+  *    @tty: tty device
+  *    @file: file object
+  *    @buf: userspace buffer pointer
+  *    @nr: size of I/O
+  *
+  *    Perform reads for the line discipline. We are guaranteed that the
+  *    line discipline will not be closed under us but we may get multiple
+  *    parallel readers and must handle this ourselves. We may also get
+  *    a hangup. Always called in user context, may sleep.
+  *
+  *    This code must be sure never to sleep through a hangup.
+  */
+ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+                        unsigned char __user *buf, size_t nr)
+ {
+       unsigned char __user *b = buf;
+       DECLARE_WAITQUEUE(wait, current);
+       int c;
+       int minimum, time;
+       ssize_t retval = 0;
+       ssize_t size;
+       long timeout;
+       unsigned long flags;
+       int packet;
+ do_it_again:
+       BUG_ON(!tty->read_buf);
+       c = job_control(tty, file);
+       if (c < 0)
+               return c;
+       minimum = time = 0;
+       timeout = MAX_SCHEDULE_TIMEOUT;
+       if (!tty->icanon) {
+               time = (HZ / 10) * TIME_CHAR(tty);
+               minimum = MIN_CHAR(tty);
+               if (minimum) {
+                       if (time)
+                               tty->minimum_to_wake = 1;
+                       else if (!waitqueue_active(&tty->read_wait) ||
+                                (tty->minimum_to_wake > minimum))
+                               tty->minimum_to_wake = minimum;
+               } else {
+                       timeout = 0;
+                       if (time) {
+                               timeout = time;
+                               time = 0;
+                       }
+                       tty->minimum_to_wake = minimum = 1;
+               }
+       }
+       /*
+        *      Internal serialization of reads.
+        */
+       if (file->f_flags & O_NONBLOCK) {
+               if (!mutex_trylock(&tty->atomic_read_lock))
+                       return -EAGAIN;
+       } else {
+               if (mutex_lock_interruptible(&tty->atomic_read_lock))
+                       return -ERESTARTSYS;
+       }
+       packet = tty->packet;
+       add_wait_queue(&tty->read_wait, &wait);
+       while (nr) {
+               /* First test for status change. */
+               if (packet && tty->link->ctrl_status) {
+                       unsigned char cs;
+                       if (b != buf)
+                               break;
+                       spin_lock_irqsave(&tty->link->ctrl_lock, flags);
+                       cs = tty->link->ctrl_status;
+                       tty->link->ctrl_status = 0;
+                       spin_unlock_irqrestore(&tty->link->ctrl_lock, flags);
+                       if (tty_put_user(tty, cs, b++)) {
+                               retval = -EFAULT;
+                               b--;
+                               break;
+                       }
+                       nr--;
+                       break;
+               }
+               /* This statement must be first before checking for input
+                  so that any interrupt will set the state back to
+                  TASK_RUNNING. */
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (((minimum - (b - buf)) < tty->minimum_to_wake) &&
+                   ((minimum - (b - buf)) >= 1))
+                       tty->minimum_to_wake = (minimum - (b - buf));
+               if (!input_available_p(tty, 0)) {
++#ifdef CONFIG_BOOTSPLASH
++                      if (file->f_dentry->d_inode->i_rdev == MKDEV(TTY_MAJOR,0) ||
++                          file->f_dentry->d_inode->i_rdev == MKDEV(TTY_MAJOR,1) ||
++                          file->f_dentry->d_inode->i_rdev == MKDEV(TTYAUX_MAJOR,0) ||
++                          file->f_dentry->d_inode->i_rdev == MKDEV(TTYAUX_MAJOR,1)) {
++                              extern int splash_verbose(void);
++                              (void)splash_verbose();
++                      }
++#endif
+                       if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) {
+                               retval = -EIO;
+                               break;
+                       }
+                       if (tty_hung_up_p(file))
+                               break;
+                       if (!timeout)
+                               break;
+                       if (file->f_flags & O_NONBLOCK) {
+                               retval = -EAGAIN;
+                               break;
+                       }
+                       if (signal_pending(current)) {
+                               retval = -ERESTARTSYS;
+                               break;
+                       }
+                       /* FIXME: does n_tty_set_room need locking ? */
+                       n_tty_set_room(tty);
+                       timeout = schedule_timeout(timeout);
+                       continue;
+               }
+               __set_current_state(TASK_RUNNING);
+               /* Deal with packet mode. */
+               if (packet && b == buf) {
+                       if (tty_put_user(tty, TIOCPKT_DATA, b++)) {
+                               retval = -EFAULT;
+                               b--;
+                               break;
+                       }
+                       nr--;
+               }
+               if (tty->icanon && !L_EXTPROC(tty)) {
+                       /* N.B. avoid overrun if nr == 0 */
+                       while (nr && tty->read_cnt) {
+                               int eol;
+                               eol = test_and_clear_bit(tty->read_tail,
+                                               tty->read_flags);
+                               c = tty->read_buf[tty->read_tail];
+                               spin_lock_irqsave(&tty->read_lock, flags);
+                               tty->read_tail = ((tty->read_tail+1) &
+                                                 (N_TTY_BUF_SIZE-1));
+                               tty->read_cnt--;
+                               if (eol) {
+                                       /* this test should be redundant:
+                                        * we shouldn't be reading data if
+                                        * canon_data is 0
+                                        */
+                                       if (--tty->canon_data < 0)
+                                               tty->canon_data = 0;
+                               }
+                               spin_unlock_irqrestore(&tty->read_lock, flags);
+                               if (!eol || (c != __DISABLED_CHAR)) {
+                                       if (tty_put_user(tty, c, b++)) {
+                                               retval = -EFAULT;
+                                               b--;
+                                               break;
+                                       }
+                                       nr--;
+                               }
+                               if (eol) {
+                                       tty_audit_push(tty);
+                                       break;
+                               }
+                       }
+                       if (retval)
+                               break;
+               } else {
+                       int uncopied;
+                       /* The copy function takes the read lock and handles
+                          locking internally for this case */
+                       uncopied = copy_from_read_buf(tty, &b, &nr);
+                       uncopied += copy_from_read_buf(tty, &b, &nr);
+                       if (uncopied) {
+                               retval = -EFAULT;
+                               break;
+                       }
+               }
+               /* If there is enough space in the read buffer now, let the
+                * low-level driver know. We use n_tty_chars_in_buffer() to
+                * check the buffer, as it now knows about canonical mode.
+                * Otherwise, if the driver is throttled and the line is
+                * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode,
+                * we won't get any more characters.
+                */
+               if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) {
+                       n_tty_set_room(tty);
+                       check_unthrottle(tty);
+               }
+               if (b - buf >= minimum)
+                       break;
+               if (time)
+                       timeout = time;
+       }
+       mutex_unlock(&tty->atomic_read_lock);
+       remove_wait_queue(&tty->read_wait, &wait);
+       if (!waitqueue_active(&tty->read_wait))
+               tty->minimum_to_wake = minimum;
+       __set_current_state(TASK_RUNNING);
+       size = b - buf;
+       if (size) {
+               retval = size;
+               if (nr)
+                       clear_bit(TTY_PUSH, &tty->flags);
+       } else if (test_and_clear_bit(TTY_PUSH, &tty->flags))
+                goto do_it_again;
+       n_tty_set_room(tty);
+       return retval;
+ }
+ /**
+  *    n_tty_write             -       write function for tty
+  *    @tty: tty device
+  *    @file: file object
+  *    @buf: userspace buffer pointer
+  *    @nr: size of I/O
+  *
+  *    Write function of the terminal device.  This is serialized with
+  *    respect to other write callers but not to termios changes, reads
+  *    and other such events.  Since the receive code will echo characters,
+  *    thus calling driver write methods, the output_lock is used in
+  *    the output processing functions called here as well as in the
+  *    echo processing function to protect the column state and space
+  *    left in the buffer.
+  *
+  *    This code must be sure never to sleep through a hangup.
+  *
+  *    Locking: output_lock to protect column state and space left
+  *             (note that the process_output*() functions take this
+  *              lock themselves)
+  */
+ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
+                          const unsigned char *buf, size_t nr)
+ {
+       const unsigned char *b = buf;
+       DECLARE_WAITQUEUE(wait, current);
+       int c;
+       ssize_t retval = 0;
+       /* Job control check -- must be done at start (POSIX.1 7.1.1.4). */
+       if (L_TOSTOP(tty) && file->f_op->write != redirected_tty_write) {
+               retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
+       }
+       /* Write out any echoed characters that are still pending */
+       process_echoes(tty);
+       add_wait_queue(&tty->write_wait, &wait);
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (signal_pending(current)) {
+                       retval = -ERESTARTSYS;
+                       break;
+               }
+               if (tty_hung_up_p(file) || (tty->link && !tty->link->count)) {
+                       retval = -EIO;
+                       break;
+               }
+               if (O_OPOST(tty) && !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) {
+                       while (nr > 0) {
+                               ssize_t num = process_output_block(tty, b, nr);
+                               if (num < 0) {
+                                       if (num == -EAGAIN)
+                                               break;
+                                       retval = num;
+                                       goto break_out;
+                               }
+                               b += num;
+                               nr -= num;
+                               if (nr == 0)
+                                       break;
+                               c = *b;
+                               if (process_output(c, tty) < 0)
+                                       break;
+                               b++; nr--;
+                       }
+                       if (tty->ops->flush_chars)
+                               tty->ops->flush_chars(tty);
+               } else {
+                       while (nr > 0) {
+                               c = tty->ops->write(tty, b, nr);
+                               if (c < 0) {
+                                       retval = c;
+                                       goto break_out;
+                               }
+                               if (!c)
+                                       break;
+                               b += c;
+                               nr -= c;
+                       }
+               }
+               if (!nr)
+                       break;
+               if (file->f_flags & O_NONBLOCK) {
+                       retval = -EAGAIN;
+                       break;
+               }
+               schedule();
+       }
+ break_out:
+       __set_current_state(TASK_RUNNING);
+       remove_wait_queue(&tty->write_wait, &wait);
+       if (b - buf != nr && tty->fasync)
+               set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+       return (b - buf) ? b - buf : retval;
+ }
+ /**
+  *    n_tty_poll              -       poll method for N_TTY
+  *    @tty: terminal device
+  *    @file: file accessing it
+  *    @wait: poll table
+  *
+  *    Called when the line discipline is asked to poll() for data or
+  *    for special events. This code is not serialized with respect to
+  *    other events save open/close.
+  *
+  *    This code must be sure never to sleep through a hangup.
+  *    Called without the kernel lock held - fine
+  */
+ static unsigned int n_tty_poll(struct tty_struct *tty, struct file *file,
+                                                       poll_table *wait)
+ {
+       unsigned int mask = 0;
+       poll_wait(file, &tty->read_wait, wait);
+       poll_wait(file, &tty->write_wait, wait);
+       if (input_available_p(tty, TIME_CHAR(tty) ? 0 : MIN_CHAR(tty)))
+               mask |= POLLIN | POLLRDNORM;
+       if (tty->packet && tty->link->ctrl_status)
+               mask |= POLLPRI | POLLIN | POLLRDNORM;
+       if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
+               mask |= POLLHUP;
+       if (tty_hung_up_p(file))
+               mask |= POLLHUP;
+       if (!(mask & (POLLHUP | POLLIN | POLLRDNORM))) {
+               if (MIN_CHAR(tty) && !TIME_CHAR(tty))
+                       tty->minimum_to_wake = MIN_CHAR(tty);
+               else
+                       tty->minimum_to_wake = 1;
+       }
+       if (tty->ops->write && !tty_is_writelocked(tty) &&
+                       tty_chars_in_buffer(tty) < WAKEUP_CHARS &&
+                       tty_write_room(tty) > 0)
+               mask |= POLLOUT | POLLWRNORM;
+       return mask;
+ }
+ static unsigned long inq_canon(struct tty_struct *tty)
+ {
+       int nr, head, tail;
+       if (!tty->canon_data)
+               return 0;
+       head = tty->canon_head;
+       tail = tty->read_tail;
+       nr = (head - tail) & (N_TTY_BUF_SIZE-1);
+       /* Skip EOF-chars.. */
+       while (head != tail) {
+               if (test_bit(tail, tty->read_flags) &&
+                   tty->read_buf[tail] == __DISABLED_CHAR)
+                       nr--;
+               tail = (tail+1) & (N_TTY_BUF_SIZE-1);
+       }
+       return nr;
+ }
+ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
+                      unsigned int cmd, unsigned long arg)
+ {
+       int retval;
+       switch (cmd) {
+       case TIOCOUTQ:
+               return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
+       case TIOCINQ:
+               /* FIXME: Locking */
+               retval = tty->read_cnt;
+               if (L_ICANON(tty))
+                       retval = inq_canon(tty);
+               return put_user(retval, (unsigned int __user *) arg);
+       default:
+               return n_tty_ioctl_helper(tty, file, cmd, arg);
+       }
+ }
+ struct tty_ldisc_ops tty_ldisc_N_TTY = {
+       .magic           = TTY_LDISC_MAGIC,
+       .name            = "n_tty",
+       .open            = n_tty_open,
+       .close           = n_tty_close,
+       .flush_buffer    = n_tty_flush_buffer,
+       .chars_in_buffer = n_tty_chars_in_buffer,
+       .read            = n_tty_read,
+       .write           = n_tty_write,
+       .ioctl           = n_tty_ioctl,
+       .set_termios     = n_tty_set_termios,
+       .poll            = n_tty_poll,
+       .receive_buf     = n_tty_receive_buf,
+       .write_wakeup    = n_tty_write_wakeup
+ };
+ /**
+  *    n_tty_inherit_ops       -       inherit N_TTY methods
+  *    @ops: struct tty_ldisc_ops where to save N_TTY methods
+  *
+  *    Used by a generic struct tty_ldisc_ops to easily inherit N_TTY
+  *    methods.
+  */
+ void n_tty_inherit_ops(struct tty_ldisc_ops *ops)
+ {
+       *ops = tty_ldisc_N_TTY;
+       ops->owner = NULL;
+       ops->refcount = ops->flags = 0;
+ }
+ EXPORT_SYMBOL_GPL(n_tty_inherit_ops);
index 0000000,c05c5af..0c3a30a
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,3263 +1,3278 @@@
+ /*
+  *  linux/drivers/char/tty_io.c
+  *
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  */
+ /*
+  * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+  * or rs-channels. It also implements echoing, cooked mode etc.
+  *
+  * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+  *
+  * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+  * tty_struct and tty_queue structures.  Previously there was an array
+  * of 256 tty_struct's which was statically allocated, and the
+  * tty_queue structures were allocated at boot time.  Both are now
+  * dynamically allocated only when the tty is open.
+  *
+  * Also restructured routines so that there is more of a separation
+  * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+  * the low-level tty routines (serial.c, pty.c, console.c).  This
+  * makes for cleaner and more compact code.  -TYT, 9/17/92
+  *
+  * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+  * which can be dynamically activated and de-activated by the line
+  * discipline handling modules (like SLIP).
+  *
+  * NOTE: pay no attention to the line discipline code (yet); its
+  * interface is still subject to change in this version...
+  * -- TYT, 1/31/92
+  *
+  * Added functionality to the OPOST tty handling.  No delays, but all
+  * other bits should be there.
+  *    -- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
+  *
+  * Rewrote canonical mode and added more termios flags.
+  *    -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
+  *
+  * Reorganized FASYNC support so mouse code can share it.
+  *    -- ctm@ardi.com, 9Sep95
+  *
+  * New TIOCLINUX variants added.
+  *    -- mj@k332.feld.cvut.cz, 19-Nov-95
+  *
+  * Restrict vt switching via ioctl()
+  *      -- grif@cs.ucr.edu, 5-Dec-95
+  *
+  * Move console and virtual terminal code to more appropriate files,
+  * implement CONFIG_VT and generalize console device interface.
+  *    -- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
+  *
+  * Rewrote tty_init_dev and tty_release_dev to eliminate races.
+  *    -- Bill Hawes <whawes@star.net>, June 97
+  *
+  * Added devfs support.
+  *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
+  *
+  * Added support for a Unix98-style ptmx device.
+  *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
+  *
+  * Reduced memory usage for older ARM systems
+  *      -- Russell King <rmk@arm.linux.org.uk>
+  *
+  * Move do_SAK() into process context.  Less stack use in devfs functions.
+  * alloc_tty_struct() always uses kmalloc()
+  *                     -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+  */
+ #include <linux/types.h>
+ #include <linux/major.h>
+ #include <linux/errno.h>
+ #include <linux/signal.h>
+ #include <linux/fcntl.h>
+ #include <linux/sched.h>
+ #include <linux/interrupt.h>
+ #include <linux/tty.h>
+ #include <linux/tty_driver.h>
+ #include <linux/tty_flip.h>
+ #include <linux/devpts_fs.h>
+ #include <linux/file.h>
+ #include <linux/fdtable.h>
+ #include <linux/console.h>
+ #include <linux/timer.h>
+ #include <linux/ctype.h>
+ #include <linux/kd.h>
+ #include <linux/mm.h>
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/proc_fs.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/device.h>
+ #include <linux/wait.h>
+ #include <linux/bitops.h>
+ #include <linux/delay.h>
+ #include <linux/seq_file.h>
+ #include <linux/serial.h>
+ #include <linux/uaccess.h>
+ #include <asm/system.h>
+ #include <linux/kbd_kern.h>
+ #include <linux/vt_kern.h>
+ #include <linux/selection.h>
+ #include <linux/kmod.h>
+ #include <linux/nsproxy.h>
+ #undef TTY_DEBUG_HANGUP
+ #define TTY_PARANOIA_CHECK 1
+ #define CHECK_TTY_COUNT 1
+ struct ktermios tty_std_termios = {   /* for the benefit of tty drivers  */
+       .c_iflag = ICRNL | IXON,
+       .c_oflag = OPOST | ONLCR,
+       .c_cflag = B38400 | CS8 | CREAD | HUPCL,
+       .c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+                  ECHOCTL | ECHOKE | IEXTEN,
+       .c_cc = INIT_C_CC,
+       .c_ispeed = 38400,
+       .c_ospeed = 38400
+ };
+ EXPORT_SYMBOL(tty_std_termios);
+ /* This list gets poked at by procfs and various bits of boot up code. This
+    could do with some rationalisation such as pulling the tty proc function
+    into this file */
+ LIST_HEAD(tty_drivers);                       /* linked list of tty drivers */
+ /* Mutex to protect creating and releasing a tty. This is shared with
+    vt.c for deeply disgusting hack reasons */
+ DEFINE_MUTEX(tty_mutex);
+ EXPORT_SYMBOL(tty_mutex);
+ /* Spinlock to protect the tty->tty_files list */
+ DEFINE_SPINLOCK(tty_files_lock);
+ static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
+ static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
+ ssize_t redirected_tty_write(struct file *, const char __user *,
+                                                       size_t, loff_t *);
+ static unsigned int tty_poll(struct file *, poll_table *);
+ static int tty_open(struct inode *, struct file *);
+ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ #ifdef CONFIG_COMPAT
+ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
+                               unsigned long arg);
+ #else
+ #define tty_compat_ioctl NULL
+ #endif
+ static int __tty_fasync(int fd, struct file *filp, int on);
+ static int tty_fasync(int fd, struct file *filp, int on);
+ static void release_tty(struct tty_struct *tty, int idx);
+ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+ /**
+  *    alloc_tty_struct        -       allocate a tty object
+  *
+  *    Return a new empty tty structure. The data fields have not
+  *    been initialized in any way but has been zeroed
+  *
+  *    Locking: none
+  */
+ struct tty_struct *alloc_tty_struct(void)
+ {
+       return kzalloc(sizeof(struct tty_struct), GFP_KERNEL);
+ }
+ /**
+  *    free_tty_struct         -       free a disused tty
+  *    @tty: tty struct to free
+  *
+  *    Free the write buffers, tty queue and tty memory itself.
+  *
+  *    Locking: none. Must be called after tty is definitely unused
+  */
+ void free_tty_struct(struct tty_struct *tty)
+ {
+       if (tty->dev)
+               put_device(tty->dev);
+       kfree(tty->write_buf);
+       tty_buffer_free_all(tty);
+       kfree(tty);
+ }
+ static inline struct tty_struct *file_tty(struct file *file)
+ {
+       return ((struct tty_file_private *)file->private_data)->tty;
+ }
+ /* Associate a new file with the tty structure */
+ int tty_add_file(struct tty_struct *tty, struct file *file)
+ {
+       struct tty_file_private *priv;
+       priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+       priv->tty = tty;
+       priv->file = file;
+       file->private_data = priv;
+       spin_lock(&tty_files_lock);
+       list_add(&priv->list, &tty->tty_files);
+       spin_unlock(&tty_files_lock);
+       return 0;
+ }
+ /* Delete file from its tty */
+ void tty_del_file(struct file *file)
+ {
+       struct tty_file_private *priv = file->private_data;
+       spin_lock(&tty_files_lock);
+       list_del(&priv->list);
+       spin_unlock(&tty_files_lock);
+       file->private_data = NULL;
+       kfree(priv);
+ }
+ #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
+ /**
+  *    tty_name        -       return tty naming
+  *    @tty: tty structure
+  *    @buf: buffer for output
+  *
+  *    Convert a tty structure into a name. The name reflects the kernel
+  *    naming policy and if udev is in use may not reflect user space
+  *
+  *    Locking: none
+  */
+ char *tty_name(struct tty_struct *tty, char *buf)
+ {
+       if (!tty) /* Hmm.  NULL pointer.  That's fun. */
+               strcpy(buf, "NULL tty");
+       else
+               strcpy(buf, tty->name);
+       return buf;
+ }
+ EXPORT_SYMBOL(tty_name);
+ int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
+                             const char *routine)
+ {
+ #ifdef TTY_PARANOIA_CHECK
+       if (!tty) {
+               printk(KERN_WARNING
+                       "null TTY for (%d:%d) in %s\n",
+                       imajor(inode), iminor(inode), routine);
+               return 1;
+       }
+       if (tty->magic != TTY_MAGIC) {
+               printk(KERN_WARNING
+                       "bad magic number for tty struct (%d:%d) in %s\n",
+                       imajor(inode), iminor(inode), routine);
+               return 1;
+       }
+ #endif
+       return 0;
+ }
+ static int check_tty_count(struct tty_struct *tty, const char *routine)
+ {
+ #ifdef CHECK_TTY_COUNT
+       struct list_head *p;
+       int count = 0;
+       spin_lock(&tty_files_lock);
+       list_for_each(p, &tty->tty_files) {
+               count++;
+       }
+       spin_unlock(&tty_files_lock);
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver->subtype == PTY_TYPE_SLAVE &&
+           tty->link && tty->link->count)
+               count++;
+       if (tty->count != count) {
+               printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+                                   "!= #fd's(%d) in %s\n",
+                      tty->name, tty->count, count, routine);
+               return count;
+       }
+ #endif
+       return 0;
+ }
+ /**
+  *    get_tty_driver          -       find device of a tty
+  *    @dev_t: device identifier
+  *    @index: returns the index of the tty
+  *
+  *    This routine returns a tty driver structure, given a device number
+  *    and also passes back the index number.
+  *
+  *    Locking: caller must hold tty_mutex
+  */
+ static struct tty_driver *get_tty_driver(dev_t device, int *index)
+ {
+       struct tty_driver *p;
+       list_for_each_entry(p, &tty_drivers, tty_drivers) {
+               dev_t base = MKDEV(p->major, p->minor_start);
+               if (device < base || device >= base + p->num)
+                       continue;
+               *index = device - base;
+               return tty_driver_kref_get(p);
+       }
+       return NULL;
+ }
+ #ifdef CONFIG_CONSOLE_POLL
+ /**
+  *    tty_find_polling_driver -       find device of a polled tty
+  *    @name: name string to match
+  *    @line: pointer to resulting tty line nr
+  *
+  *    This routine returns a tty driver structure, given a name
+  *    and the condition that the tty driver is capable of polled
+  *    operation.
+  */
+ struct tty_driver *tty_find_polling_driver(char *name, int *line)
+ {
+       struct tty_driver *p, *res = NULL;
+       int tty_line = 0;
+       int len;
+       char *str, *stp;
+       for (str = name; *str; str++)
+               if ((*str >= '0' && *str <= '9') || *str == ',')
+                       break;
+       if (!*str)
+               return NULL;
+       len = str - name;
+       tty_line = simple_strtoul(str, &str, 10);
+       mutex_lock(&tty_mutex);
+       /* Search through the tty devices to look for a match */
+       list_for_each_entry(p, &tty_drivers, tty_drivers) {
+               if (strncmp(name, p->name, len) != 0)
+                       continue;
+               stp = str;
+               if (*stp == ',')
+                       stp++;
+               if (*stp == '\0')
+                       stp = NULL;
+               if (tty_line >= 0 && tty_line < p->num && p->ops &&
+                   p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
+                       res = tty_driver_kref_get(p);
+                       *line = tty_line;
+                       break;
+               }
+       }
+       mutex_unlock(&tty_mutex);
+       return res;
+ }
+ EXPORT_SYMBOL_GPL(tty_find_polling_driver);
+ #endif
+ /**
+  *    tty_check_change        -       check for POSIX terminal changes
+  *    @tty: tty to check
+  *
+  *    If we try to write to, or set the state of, a terminal and we're
+  *    not in the foreground, send a SIGTTOU.  If the signal is blocked or
+  *    ignored, go ahead and perform the operation.  (POSIX 7.2)
+  *
+  *    Locking: ctrl_lock
+  */
+ int tty_check_change(struct tty_struct *tty)
+ {
+       unsigned long flags;
+       int ret = 0;
+       if (current->signal->tty != tty)
+               return 0;
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       if (!tty->pgrp) {
+               printk(KERN_WARNING "tty_check_change: tty->pgrp == NULL!\n");
+               goto out_unlock;
+       }
+       if (task_pgrp(current) == tty->pgrp)
+               goto out_unlock;
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       if (is_ignored(SIGTTOU))
+               goto out;
+       if (is_current_pgrp_orphaned()) {
+               ret = -EIO;
+               goto out;
+       }
+       kill_pgrp(task_pgrp(current), SIGTTOU, 1);
+       set_thread_flag(TIF_SIGPENDING);
+       ret = -ERESTARTSYS;
+ out:
+       return ret;
+ out_unlock:
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       return ret;
+ }
+ EXPORT_SYMBOL(tty_check_change);
+ static ssize_t hung_up_tty_read(struct file *file, char __user *buf,
+                               size_t count, loff_t *ppos)
+ {
+       return 0;
+ }
+ static ssize_t hung_up_tty_write(struct file *file, const char __user *buf,
+                                size_t count, loff_t *ppos)
+ {
+       return -EIO;
+ }
+ /* No kernel lock held - none needed ;) */
+ static unsigned int hung_up_tty_poll(struct file *filp, poll_table *wait)
+ {
+       return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+ }
+ static long hung_up_tty_ioctl(struct file *file, unsigned int cmd,
+               unsigned long arg)
+ {
+       return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+ }
+ static long hung_up_tty_compat_ioctl(struct file *file,
+                                    unsigned int cmd, unsigned long arg)
+ {
+       return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+ }
+ static const struct file_operations tty_fops = {
+       .llseek         = no_llseek,
+       .read           = tty_read,
+       .write          = tty_write,
+       .poll           = tty_poll,
+       .unlocked_ioctl = tty_ioctl,
+       .compat_ioctl   = tty_compat_ioctl,
+       .open           = tty_open,
+       .release        = tty_release,
+       .fasync         = tty_fasync,
+ };
+ static const struct file_operations console_fops = {
+       .llseek         = no_llseek,
+       .read           = tty_read,
+       .write          = redirected_tty_write,
+       .poll           = tty_poll,
+       .unlocked_ioctl = tty_ioctl,
+       .compat_ioctl   = tty_compat_ioctl,
+       .open           = tty_open,
+       .release        = tty_release,
+       .fasync         = tty_fasync,
+ };
+ static const struct file_operations hung_up_tty_fops = {
+       .llseek         = no_llseek,
+       .read           = hung_up_tty_read,
+       .write          = hung_up_tty_write,
+       .poll           = hung_up_tty_poll,
+       .unlocked_ioctl = hung_up_tty_ioctl,
+       .compat_ioctl   = hung_up_tty_compat_ioctl,
+       .release        = tty_release,
+ };
+ static DEFINE_SPINLOCK(redirect_lock);
+ static struct file *redirect;
+ /**
+  *    tty_wakeup      -       request more data
+  *    @tty: terminal
+  *
+  *    Internal and external helper for wakeups of tty. This function
+  *    informs the line discipline if present that the driver is ready
+  *    to receive more output data.
+  */
+ void tty_wakeup(struct tty_struct *tty)
+ {
+       struct tty_ldisc *ld;
+       if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) {
+               ld = tty_ldisc_ref(tty);
+               if (ld) {
+                       if (ld->ops->write_wakeup)
+                               ld->ops->write_wakeup(tty);
+                       tty_ldisc_deref(ld);
+               }
+       }
+       wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+ }
+ EXPORT_SYMBOL_GPL(tty_wakeup);
+ /**
+  *    __tty_hangup            -       actual handler for hangup events
+  *    @work: tty device
+  *
+  *    This can be called by the "eventd" kernel thread.  That is process
+  *    synchronous but doesn't hold any locks, so we need to make sure we
+  *    have the appropriate locks for what we're doing.
+  *
+  *    The hangup event clears any pending redirections onto the hung up
+  *    device. It ensures future writes will error and it does the needed
+  *    line discipline hangup and signal delivery. The tty object itself
+  *    remains intact.
+  *
+  *    Locking:
+  *            BTM
+  *              redirect lock for undoing redirection
+  *              file list lock for manipulating list of ttys
+  *              tty_ldisc_lock from called functions
+  *              termios_mutex resetting termios data
+  *              tasklist_lock to walk task list for hangup event
+  *                ->siglock to protect ->signal/->sighand
+  */
+ void __tty_hangup(struct tty_struct *tty)
+ {
+       struct file *cons_filp = NULL;
+       struct file *filp, *f = NULL;
+       struct task_struct *p;
+       struct tty_file_private *priv;
+       int    closecount = 0, n;
+       unsigned long flags;
+       int refs = 0;
+       if (!tty)
+               return;
+       spin_lock(&redirect_lock);
+       if (redirect && file_tty(redirect) == tty) {
+               f = redirect;
+               redirect = NULL;
+       }
+       spin_unlock(&redirect_lock);
+       tty_lock();
+       /* inuse_filps is protected by the single tty lock,
+          this really needs to change if we want to flush the
+          workqueue with the lock held */
+       check_tty_count(tty, "tty_hangup");
+       spin_lock(&tty_files_lock);
+       /* This breaks for file handles being sent over AF_UNIX sockets ? */
+       list_for_each_entry(priv, &tty->tty_files, list) {
+               filp = priv->file;
+               if (filp->f_op->write == redirected_tty_write)
+                       cons_filp = filp;
+               if (filp->f_op->write != tty_write)
+                       continue;
+               closecount++;
+               __tty_fasync(-1, filp, 0);      /* can't block */
+               filp->f_op = &hung_up_tty_fops;
+       }
+       spin_unlock(&tty_files_lock);
+       tty_ldisc_hangup(tty);
+       read_lock(&tasklist_lock);
+       if (tty->session) {
+               do_each_pid_task(tty->session, PIDTYPE_SID, p) {
+                       spin_lock_irq(&p->sighand->siglock);
+                       if (p->signal->tty == tty) {
+                               p->signal->tty = NULL;
+                               /* We defer the dereferences outside fo
+                                  the tasklist lock */
+                               refs++;
+                       }
+                       if (!p->signal->leader) {
+                               spin_unlock_irq(&p->sighand->siglock);
+                               continue;
+                       }
+                       __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+                       __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+                       put_pid(p->signal->tty_old_pgrp);  /* A noop */
+                       spin_lock_irqsave(&tty->ctrl_lock, flags);
+                       if (tty->pgrp)
+                               p->signal->tty_old_pgrp = get_pid(tty->pgrp);
+                       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+                       spin_unlock_irq(&p->sighand->siglock);
+               } while_each_pid_task(tty->session, PIDTYPE_SID, p);
+       }
+       read_unlock(&tasklist_lock);
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       clear_bit(TTY_THROTTLED, &tty->flags);
+       clear_bit(TTY_PUSH, &tty->flags);
+       clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+       put_pid(tty->session);
+       put_pid(tty->pgrp);
+       tty->session = NULL;
+       tty->pgrp = NULL;
+       tty->ctrl_status = 0;
+       set_bit(TTY_HUPPED, &tty->flags);
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       /* Account for the p->signal references we killed */
+       while (refs--)
+               tty_kref_put(tty);
+       /*
+        * If one of the devices matches a console pointer, we
+        * cannot just call hangup() because that will cause
+        * tty->count and state->count to go out of sync.
+        * So we just call close() the right number of times.
+        */
+       if (cons_filp) {
+               if (tty->ops->close)
+                       for (n = 0; n < closecount; n++)
+                               tty->ops->close(tty, cons_filp);
+       } else if (tty->ops->hangup)
+               (tty->ops->hangup)(tty);
+       /*
+        * We don't want to have driver/ldisc interactions beyond
+        * the ones we did here. The driver layer expects no
+        * calls after ->hangup() from the ldisc side. However we
+        * can't yet guarantee all that.
+        */
+       set_bit(TTY_HUPPED, &tty->flags);
+       tty_ldisc_enable(tty);
+       tty_unlock();
+       if (f)
+               fput(f);
+ }
+ static void do_tty_hangup(struct work_struct *work)
+ {
+       struct tty_struct *tty =
+               container_of(work, struct tty_struct, hangup_work);
+       __tty_hangup(tty);
+ }
+ /**
+  *    tty_hangup              -       trigger a hangup event
+  *    @tty: tty to hangup
+  *
+  *    A carrier loss (virtual or otherwise) has occurred on this like
+  *    schedule a hangup sequence to run after this event.
+  */
+ void tty_hangup(struct tty_struct *tty)
+ {
+ #ifdef TTY_DEBUG_HANGUP
+       char    buf[64];
+       printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf));
+ #endif
+       schedule_work(&tty->hangup_work);
+ }
+ EXPORT_SYMBOL(tty_hangup);
+ /**
+  *    tty_vhangup             -       process vhangup
+  *    @tty: tty to hangup
+  *
+  *    The user has asked via system call for the terminal to be hung up.
+  *    We do this synchronously so that when the syscall returns the process
+  *    is complete. That guarantee is necessary for security reasons.
+  */
+ void tty_vhangup(struct tty_struct *tty)
+ {
+ #ifdef TTY_DEBUG_HANGUP
+       char    buf[64];
+       printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
+ #endif
+       __tty_hangup(tty);
+ }
+ EXPORT_SYMBOL(tty_vhangup);
+ /**
+  *    tty_vhangup_self        -       process vhangup for own ctty
+  *
+  *    Perform a vhangup on the current controlling tty
+  */
+ void tty_vhangup_self(void)
+ {
+       struct tty_struct *tty;
+       tty = get_current_tty();
+       if (tty) {
+               tty_vhangup(tty);
+               tty_kref_put(tty);
+       }
+ }
+ /**
+  *    tty_hung_up_p           -       was tty hung up
+  *    @filp: file pointer of tty
+  *
+  *    Return true if the tty has been subject to a vhangup or a carrier
+  *    loss
+  */
+ int tty_hung_up_p(struct file *filp)
+ {
+       return (filp->f_op == &hung_up_tty_fops);
+ }
+ EXPORT_SYMBOL(tty_hung_up_p);
+ static void session_clear_tty(struct pid *session)
+ {
+       struct task_struct *p;
+       do_each_pid_task(session, PIDTYPE_SID, p) {
+               proc_clear_tty(p);
+       } while_each_pid_task(session, PIDTYPE_SID, p);
+ }
+ /**
+  *    disassociate_ctty       -       disconnect controlling tty
+  *    @on_exit: true if exiting so need to "hang up" the session
+  *
+  *    This function is typically called only by the session leader, when
+  *    it wants to disassociate itself from its controlling tty.
+  *
+  *    It performs the following functions:
+  *    (1)  Sends a SIGHUP and SIGCONT to the foreground process group
+  *    (2)  Clears the tty from being controlling the session
+  *    (3)  Clears the controlling tty for all processes in the
+  *            session group.
+  *
+  *    The argument on_exit is set to 1 if called when a process is
+  *    exiting; it is 0 if called by the ioctl TIOCNOTTY.
+  *
+  *    Locking:
+  *            BTM is taken for hysterical raisins, and held when
+  *              called from no_tty().
+  *              tty_mutex is taken to protect tty
+  *              ->siglock is taken to protect ->signal/->sighand
+  *              tasklist_lock is taken to walk process list for sessions
+  *                ->siglock is taken to protect ->signal/->sighand
+  */
+ void disassociate_ctty(int on_exit)
+ {
+       struct tty_struct *tty;
+       struct pid *tty_pgrp = NULL;
+       if (!current->signal->leader)
+               return;
+       tty = get_current_tty();
+       if (tty) {
+               tty_pgrp = get_pid(tty->pgrp);
+               if (on_exit) {
+                       if (tty->driver->type != TTY_DRIVER_TYPE_PTY)
+                               tty_vhangup(tty);
+               }
+               tty_kref_put(tty);
+       } else if (on_exit) {
+               struct pid *old_pgrp;
+               spin_lock_irq(&current->sighand->siglock);
+               old_pgrp = current->signal->tty_old_pgrp;
+               current->signal->tty_old_pgrp = NULL;
+               spin_unlock_irq(&current->sighand->siglock);
+               if (old_pgrp) {
+                       kill_pgrp(old_pgrp, SIGHUP, on_exit);
+                       kill_pgrp(old_pgrp, SIGCONT, on_exit);
+                       put_pid(old_pgrp);
+               }
+               return;
+       }
+       if (tty_pgrp) {
+               kill_pgrp(tty_pgrp, SIGHUP, on_exit);
+               if (!on_exit)
+                       kill_pgrp(tty_pgrp, SIGCONT, on_exit);
+               put_pid(tty_pgrp);
+       }
+       spin_lock_irq(&current->sighand->siglock);
+       put_pid(current->signal->tty_old_pgrp);
+       current->signal->tty_old_pgrp = NULL;
+       spin_unlock_irq(&current->sighand->siglock);
+       tty = get_current_tty();
+       if (tty) {
+               unsigned long flags;
+               spin_lock_irqsave(&tty->ctrl_lock, flags);
+               put_pid(tty->session);
+               put_pid(tty->pgrp);
+               tty->session = NULL;
+               tty->pgrp = NULL;
+               spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+               tty_kref_put(tty);
+       } else {
+ #ifdef TTY_DEBUG_HANGUP
+               printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+                      " = NULL", tty);
+ #endif
+       }
+       /* Now clear signal->tty under the lock */
+       read_lock(&tasklist_lock);
+       session_clear_tty(task_session(current));
+       read_unlock(&tasklist_lock);
+ }
+ /**
+  *
+  *    no_tty  - Ensure the current process does not have a controlling tty
+  */
+ void no_tty(void)
+ {
+       struct task_struct *tsk = current;
+       tty_lock();
+       disassociate_ctty(0);
+       tty_unlock();
+       proc_clear_tty(tsk);
+ }
+ /**
+  *    stop_tty        -       propagate flow control
+  *    @tty: tty to stop
+  *
+  *    Perform flow control to the driver. For PTY/TTY pairs we
+  *    must also propagate the TIOCKPKT status. May be called
+  *    on an already stopped device and will not re-call the driver
+  *    method.
+  *
+  *    This functionality is used by both the line disciplines for
+  *    halting incoming flow and by the driver. It may therefore be
+  *    called from any context, may be under the tty atomic_write_lock
+  *    but not always.
+  *
+  *    Locking:
+  *            Uses the tty control lock internally
+  */
+ void stop_tty(struct tty_struct *tty)
+ {
+       unsigned long flags;
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       if (tty->stopped) {
+               spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+               return;
+       }
+       tty->stopped = 1;
+       if (tty->link && tty->link->packet) {
+               tty->ctrl_status &= ~TIOCPKT_START;
+               tty->ctrl_status |= TIOCPKT_STOP;
+               wake_up_interruptible_poll(&tty->link->read_wait, POLLIN);
+       }
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       if (tty->ops->stop)
+               (tty->ops->stop)(tty);
+ }
+ EXPORT_SYMBOL(stop_tty);
+ /**
+  *    start_tty       -       propagate flow control
+  *    @tty: tty to start
+  *
+  *    Start a tty that has been stopped if at all possible. Perform
+  *    any necessary wakeups and propagate the TIOCPKT status. If this
+  *    is the tty was previous stopped and is being started then the
+  *    driver start method is invoked and the line discipline woken.
+  *
+  *    Locking:
+  *            ctrl_lock
+  */
+ void start_tty(struct tty_struct *tty)
+ {
+       unsigned long flags;
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       if (!tty->stopped || tty->flow_stopped) {
+               spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+               return;
+       }
+       tty->stopped = 0;
+       if (tty->link && tty->link->packet) {
+               tty->ctrl_status &= ~TIOCPKT_STOP;
+               tty->ctrl_status |= TIOCPKT_START;
+               wake_up_interruptible_poll(&tty->link->read_wait, POLLIN);
+       }
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       if (tty->ops->start)
+               (tty->ops->start)(tty);
+       /* If we have a running line discipline it may need kicking */
+       tty_wakeup(tty);
+ }
+ EXPORT_SYMBOL(start_tty);
+ /**
+  *    tty_read        -       read method for tty device files
+  *    @file: pointer to tty file
+  *    @buf: user buffer
+  *    @count: size of user buffer
+  *    @ppos: unused
+  *
+  *    Perform the read system call function on this terminal device. Checks
+  *    for hung up devices before calling the line discipline method.
+  *
+  *    Locking:
+  *            Locks the line discipline internally while needed. Multiple
+  *    read calls may be outstanding in parallel.
+  */
+ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
+                       loff_t *ppos)
+ {
+       int i;
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct tty_struct *tty = file_tty(file);
+       struct tty_ldisc *ld;
+       if (tty_paranoia_check(tty, inode, "tty_read"))
+               return -EIO;
+       if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
+               return -EIO;
+       /* We want to wait for the line discipline to sort out in this
+          situation */
+       ld = tty_ldisc_ref_wait(tty);
+       if (ld->ops->read)
+               i = (ld->ops->read)(tty, file, buf, count);
+       else
+               i = -EIO;
+       tty_ldisc_deref(ld);
+       if (i > 0)
+               inode->i_atime = current_fs_time(inode->i_sb);
+       return i;
+ }
+ void tty_write_unlock(struct tty_struct *tty)
+ {
+       mutex_unlock(&tty->atomic_write_lock);
+       wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+ }
+ int tty_write_lock(struct tty_struct *tty, int ndelay)
+ {
+       if (!mutex_trylock(&tty->atomic_write_lock)) {
+               if (ndelay)
+                       return -EAGAIN;
+               if (mutex_lock_interruptible(&tty->atomic_write_lock))
+                       return -ERESTARTSYS;
+       }
+       return 0;
+ }
+ /*
+  * Split writes up in sane blocksizes to avoid
+  * denial-of-service type attacks
+  */
+ static inline ssize_t do_tty_write(
+       ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t),
+       struct tty_struct *tty,
+       struct file *file,
+       const char __user *buf,
+       size_t count)
+ {
+       ssize_t ret, written = 0;
+       unsigned int chunk;
+       ret = tty_write_lock(tty, file->f_flags & O_NDELAY);
+       if (ret < 0)
+               return ret;
+       /*
+        * We chunk up writes into a temporary buffer. This
+        * simplifies low-level drivers immensely, since they
+        * don't have locking issues and user mode accesses.
+        *
+        * But if TTY_NO_WRITE_SPLIT is set, we should use a
+        * big chunk-size..
+        *
+        * The default chunk-size is 2kB, because the NTTY
+        * layer has problems with bigger chunks. It will
+        * claim to be able to handle more characters than
+        * it actually does.
+        *
+        * FIXME: This can probably go away now except that 64K chunks
+        * are too likely to fail unless switched to vmalloc...
+        */
+       chunk = 2048;
+       if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags))
+               chunk = 65536;
+       if (count < chunk)
+               chunk = count;
+       /* write_buf/write_cnt is protected by the atomic_write_lock mutex */
+       if (tty->write_cnt < chunk) {
+               unsigned char *buf_chunk;
+               if (chunk < 1024)
+                       chunk = 1024;
+               buf_chunk = kmalloc(chunk, GFP_KERNEL);
+               if (!buf_chunk) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               kfree(tty->write_buf);
+               tty->write_cnt = chunk;
+               tty->write_buf = buf_chunk;
+       }
+       /* Do the write .. */
+       for (;;) {
+               size_t size = count;
+               if (size > chunk)
+                       size = chunk;
+               ret = -EFAULT;
+               if (copy_from_user(tty->write_buf, buf, size))
+                       break;
+               ret = write(tty, file, tty->write_buf, size);
+               if (ret <= 0)
+                       break;
+               written += ret;
+               buf += ret;
+               count -= ret;
+               if (!count)
+                       break;
+               ret = -ERESTARTSYS;
+               if (signal_pending(current))
+                       break;
+               cond_resched();
+       }
+       if (written) {
+               struct inode *inode = file->f_path.dentry->d_inode;
+               inode->i_mtime = current_fs_time(inode->i_sb);
+               ret = written;
+       }
+ out:
+       tty_write_unlock(tty);
+       return ret;
+ }
+ /**
+  * tty_write_message - write a message to a certain tty, not just the console.
+  * @tty: the destination tty_struct
+  * @msg: the message to write
+  *
+  * This is used for messages that need to be redirected to a specific tty.
+  * We don't put it into the syslog queue right now maybe in the future if
+  * really needed.
+  *
+  * We must still hold the BTM and test the CLOSING flag for the moment.
+  */
+ void tty_write_message(struct tty_struct *tty, char *msg)
+ {
+       if (tty) {
+               mutex_lock(&tty->atomic_write_lock);
+               tty_lock();
+               if (tty->ops->write && !test_bit(TTY_CLOSING, &tty->flags)) {
+                       tty_unlock();
+                       tty->ops->write(tty, msg, strlen(msg));
+               } else
+                       tty_unlock();
+               tty_write_unlock(tty);
+       }
+       return;
+ }
+ /**
+  *    tty_write               -       write method for tty device file
+  *    @file: tty file pointer
+  *    @buf: user data to write
+  *    @count: bytes to write
+  *    @ppos: unused
+  *
+  *    Write data to a tty device via the line discipline.
+  *
+  *    Locking:
+  *            Locks the line discipline as required
+  *            Writes to the tty driver are serialized by the atomic_write_lock
+  *    and are then processed in chunks to the device. The line discipline
+  *    write method will not be invoked in parallel for each device.
+  */
+ static ssize_t tty_write(struct file *file, const char __user *buf,
+                                               size_t count, loff_t *ppos)
+ {
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct tty_struct *tty = file_tty(file);
+       struct tty_ldisc *ld;
+       ssize_t ret;
+       if (tty_paranoia_check(tty, inode, "tty_write"))
+               return -EIO;
+       if (!tty || !tty->ops->write ||
+               (test_bit(TTY_IO_ERROR, &tty->flags)))
+                       return -EIO;
+       /* Short term debug to catch buggy drivers */
+       if (tty->ops->write_room == NULL)
+               printk(KERN_ERR "tty driver %s lacks a write_room method.\n",
+                       tty->driver->name);
+       ld = tty_ldisc_ref_wait(tty);
+       if (!ld->ops->write)
+               ret = -EIO;
+       else
+               ret = do_tty_write(ld->ops->write, tty, file, buf, count);
+       tty_ldisc_deref(ld);
+       return ret;
+ }
+ ssize_t redirected_tty_write(struct file *file, const char __user *buf,
+                                               size_t count, loff_t *ppos)
+ {
+       struct file *p = NULL;
+       spin_lock(&redirect_lock);
+       if (redirect) {
+               get_file(redirect);
+               p = redirect;
+       }
+       spin_unlock(&redirect_lock);
+       if (p) {
+               ssize_t res;
+               res = vfs_write(p, buf, count, &p->f_pos);
+               fput(p);
+               return res;
+       }
+       return tty_write(file, buf, count, ppos);
+ }
+ static char ptychar[] = "pqrstuvwxyzabcde";
+ /**
+  *    pty_line_name   -       generate name for a pty
+  *    @driver: the tty driver in use
+  *    @index: the minor number
+  *    @p: output buffer of at least 6 bytes
+  *
+  *    Generate a name from a driver reference and write it to the output
+  *    buffer.
+  *
+  *    Locking: None
+  */
+ static void pty_line_name(struct tty_driver *driver, int index, char *p)
+ {
+       int i = index + driver->name_base;
+       /* ->name is initialized to "ttyp", but "tty" is expected */
+       sprintf(p, "%s%c%x",
+               driver->subtype == PTY_TYPE_SLAVE ? "tty" : driver->name,
+               ptychar[i >> 4 & 0xf], i & 0xf);
+ }
+ /**
+  *    tty_line_name   -       generate name for a tty
+  *    @driver: the tty driver in use
+  *    @index: the minor number
+  *    @p: output buffer of at least 7 bytes
+  *
+  *    Generate a name from a driver reference and write it to the output
+  *    buffer.
+  *
+  *    Locking: None
+  */
+ static void tty_line_name(struct tty_driver *driver, int index, char *p)
+ {
+       sprintf(p, "%s%d", driver->name, index + driver->name_base);
+ }
+ /**
+  *    tty_driver_lookup_tty() - find an existing tty, if any
+  *    @driver: the driver for the tty
+  *    @idx:    the minor number
+  *
+  *    Return the tty, if found or ERR_PTR() otherwise.
+  *
+  *    Locking: tty_mutex must be held. If tty is found, the mutex must
+  *    be held until the 'fast-open' is also done. Will change once we
+  *    have refcounting in the driver and per driver locking
+  */
+ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
+               struct inode *inode, int idx)
+ {
+       struct tty_struct *tty;
+       if (driver->ops->lookup)
+               return driver->ops->lookup(driver, inode, idx);
+       tty = driver->ttys[idx];
+       return tty;
+ }
+ /**
+  *    tty_init_termios        -  helper for termios setup
+  *    @tty: the tty to set up
+  *
+  *    Initialise the termios structures for this tty. Thus runs under
+  *    the tty_mutex currently so we can be relaxed about ordering.
+  */
+ int tty_init_termios(struct tty_struct *tty)
+ {
+       struct ktermios *tp;
+       int idx = tty->index;
+       tp = tty->driver->termios[idx];
+       if (tp == NULL) {
+               tp = kzalloc(sizeof(struct ktermios[2]), GFP_KERNEL);
+               if (tp == NULL)
+                       return -ENOMEM;
+               memcpy(tp, &tty->driver->init_termios,
+                                               sizeof(struct ktermios));
+               tty->driver->termios[idx] = tp;
+       }
+       tty->termios = tp;
+       tty->termios_locked = tp + 1;
+       /* Compatibility until drivers always set this */
+       tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
+       tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(tty_init_termios);
+ /**
+  *    tty_driver_install_tty() - install a tty entry in the driver
+  *    @driver: the driver for the tty
+  *    @tty: the tty
+  *
+  *    Install a tty object into the driver tables. The tty->index field
+  *    will be set by the time this is called. This method is responsible
+  *    for ensuring any need additional structures are allocated and
+  *    configured.
+  *
+  *    Locking: tty_mutex for now
+  */
+ static int tty_driver_install_tty(struct tty_driver *driver,
+                                               struct tty_struct *tty)
+ {
+       int idx = tty->index;
+       int ret;
+       if (driver->ops->install) {
+               ret = driver->ops->install(driver, tty);
+               return ret;
+       }
+       if (tty_init_termios(tty) == 0) {
+               tty_driver_kref_get(driver);
+               tty->count++;
+               driver->ttys[idx] = tty;
+               return 0;
+       }
+       return -ENOMEM;
+ }
+ /**
+  *    tty_driver_remove_tty() - remove a tty from the driver tables
+  *    @driver: the driver for the tty
+  *    @idx:    the minor number
+  *
+  *    Remvoe a tty object from the driver tables. The tty->index field
+  *    will be set by the time this is called.
+  *
+  *    Locking: tty_mutex for now
+  */
+ static void tty_driver_remove_tty(struct tty_driver *driver,
+                                               struct tty_struct *tty)
+ {
+       if (driver->ops->remove)
+               driver->ops->remove(driver, tty);
+       else
+               driver->ttys[tty->index] = NULL;
+ }
+ /*
+  *    tty_reopen()    - fast re-open of an open tty
+  *    @tty    - the tty to open
+  *
+  *    Return 0 on success, -errno on error.
+  *
+  *    Locking: tty_mutex must be held from the time the tty was found
+  *             till this open completes.
+  */
+ static int tty_reopen(struct tty_struct *tty)
+ {
+       struct tty_driver *driver = tty->driver;
+       if (test_bit(TTY_CLOSING, &tty->flags))
+               return -EIO;
+       if (driver->type == TTY_DRIVER_TYPE_PTY &&
+           driver->subtype == PTY_TYPE_MASTER) {
+               /*
+                * special case for PTY masters: only one open permitted,
+                * and the slave side open count is incremented as well.
+                */
+               if (tty->count)
+                       return -EIO;
+               tty->link->count++;
+       }
+       tty->count++;
+       tty->driver = driver; /* N.B. why do this every time?? */
+       mutex_lock(&tty->ldisc_mutex);
+       WARN_ON(!test_bit(TTY_LDISC, &tty->flags));
+       mutex_unlock(&tty->ldisc_mutex);
+       return 0;
+ }
+ /**
+  *    tty_init_dev            -       initialise a tty device
+  *    @driver: tty driver we are opening a device on
+  *    @idx: device index
+  *    @ret_tty: returned tty structure
+  *    @first_ok: ok to open a new device (used by ptmx)
+  *
+  *    Prepare a tty device. This may not be a "new" clean device but
+  *    could also be an active device. The pty drivers require special
+  *    handling because of this.
+  *
+  *    Locking:
+  *            The function is called under the tty_mutex, which
+  *    protects us from the tty struct or driver itself going away.
+  *
+  *    On exit the tty device has the line discipline attached and
+  *    a reference count of 1. If a pair was created for pty/tty use
+  *    and the other was a pty master then it too has a reference count of 1.
+  *
+  * WSH 06/09/97: Rewritten to remove races and properly clean up after a
+  * failed open.  The new code protects the open with a mutex, so it's
+  * really quite straightforward.  The mutex locking can probably be
+  * relaxed for the (most common) case of reopening a tty.
+  */
+ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
+                                                               int first_ok)
+ {
+       struct tty_struct *tty;
+       int retval;
+       /* Check if pty master is being opened multiple times */
+       if (driver->subtype == PTY_TYPE_MASTER &&
+               (driver->flags & TTY_DRIVER_DEVPTS_MEM) && !first_ok) {
+               return ERR_PTR(-EIO);
+       }
+       /*
+        * First time open is complex, especially for PTY devices.
+        * This code guarantees that either everything succeeds and the
+        * TTY is ready for operation, or else the table slots are vacated
+        * and the allocated memory released.  (Except that the termios
+        * and locked termios may be retained.)
+        */
+       if (!try_module_get(driver->owner))
+               return ERR_PTR(-ENODEV);
+       tty = alloc_tty_struct();
+       if (!tty)
+               goto fail_no_mem;
+       initialize_tty_struct(tty, driver, idx);
+       retval = tty_driver_install_tty(driver, tty);
+       if (retval < 0) {
+               free_tty_struct(tty);
+               module_put(driver->owner);
+               return ERR_PTR(retval);
+       }
+       /*
+        * Structures all installed ... call the ldisc open routines.
+        * If we fail here just call release_tty to clean up.  No need
+        * to decrement the use counts, as release_tty doesn't care.
+        */
+       retval = tty_ldisc_setup(tty, tty->link);
+       if (retval)
+               goto release_mem_out;
+       return tty;
+ fail_no_mem:
+       module_put(driver->owner);
+       return ERR_PTR(-ENOMEM);
+       /* call the tty release_tty routine to clean out this slot */
+ release_mem_out:
+       if (printk_ratelimit())
+               printk(KERN_INFO "tty_init_dev: ldisc open failed, "
+                                "clearing slot %d\n", idx);
+       release_tty(tty, idx);
+       return ERR_PTR(retval);
+ }
+ void tty_free_termios(struct tty_struct *tty)
+ {
+       struct ktermios *tp;
+       int idx = tty->index;
+       /* Kill this flag and push into drivers for locking etc */
+       if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
+               /* FIXME: Locking on ->termios array */
+               tp = tty->termios;
+               tty->driver->termios[idx] = NULL;
+               kfree(tp);
+       }
+ }
+ EXPORT_SYMBOL(tty_free_termios);
+ void tty_shutdown(struct tty_struct *tty)
+ {
+       tty_driver_remove_tty(tty->driver, tty);
+       tty_free_termios(tty);
+ }
+ EXPORT_SYMBOL(tty_shutdown);
+ /**
+  *    release_one_tty         -       release tty structure memory
+  *    @kref: kref of tty we are obliterating
+  *
+  *    Releases memory associated with a tty structure, and clears out the
+  *    driver table slots. This function is called when a device is no longer
+  *    in use. It also gets called when setup of a device fails.
+  *
+  *    Locking:
+  *            tty_mutex - sometimes only
+  *            takes the file list lock internally when working on the list
+  *    of ttys that the driver keeps.
+  *
+  *    This method gets called from a work queue so that the driver private
+  *    cleanup ops can sleep (needed for USB at least)
+  */
+ static void release_one_tty(struct work_struct *work)
+ {
+       struct tty_struct *tty =
+               container_of(work, struct tty_struct, hangup_work);
+       struct tty_driver *driver = tty->driver;
+       if (tty->ops->cleanup)
+               tty->ops->cleanup(tty);
+       tty->magic = 0;
+       tty_driver_kref_put(driver);
+       module_put(driver->owner);
+       spin_lock(&tty_files_lock);
+       list_del_init(&tty->tty_files);
+       spin_unlock(&tty_files_lock);
+       put_pid(tty->pgrp);
+       put_pid(tty->session);
+       free_tty_struct(tty);
+ }
+ static void queue_release_one_tty(struct kref *kref)
+ {
+       struct tty_struct *tty = container_of(kref, struct tty_struct, kref);
+       if (tty->ops->shutdown)
+               tty->ops->shutdown(tty);
+       else
+               tty_shutdown(tty);
+       /* The hangup queue is now free so we can reuse it rather than
+          waste a chunk of memory for each port */
+       INIT_WORK(&tty->hangup_work, release_one_tty);
+       schedule_work(&tty->hangup_work);
+ }
+ /**
+  *    tty_kref_put            -       release a tty kref
+  *    @tty: tty device
+  *
+  *    Release a reference to a tty device and if need be let the kref
+  *    layer destruct the object for us
+  */
+ void tty_kref_put(struct tty_struct *tty)
+ {
+       if (tty)
+               kref_put(&tty->kref, queue_release_one_tty);
+ }
+ EXPORT_SYMBOL(tty_kref_put);
+ /**
+  *    release_tty             -       release tty structure memory
+  *
+  *    Release both @tty and a possible linked partner (think pty pair),
+  *    and decrement the refcount of the backing module.
+  *
+  *    Locking:
+  *            tty_mutex - sometimes only
+  *            takes the file list lock internally when working on the list
+  *    of ttys that the driver keeps.
+  *            FIXME: should we require tty_mutex is held here ??
+  *
+  */
+ static void release_tty(struct tty_struct *tty, int idx)
+ {
+       /* This should always be true but check for the moment */
+       WARN_ON(tty->index != idx);
+       if (tty->link)
+               tty_kref_put(tty->link);
+       tty_kref_put(tty);
+ }
+ /**
+  *    tty_release             -       vfs callback for close
+  *    @inode: inode of tty
+  *    @filp: file pointer for handle to tty
+  *
+  *    Called the last time each file handle is closed that references
+  *    this tty. There may however be several such references.
+  *
+  *    Locking:
+  *            Takes bkl. See tty_release_dev
+  *
+  * Even releasing the tty structures is a tricky business.. We have
+  * to be very careful that the structures are all released at the
+  * same time, as interrupts might otherwise get the wrong pointers.
+  *
+  * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
+  * lead to double frees or releasing memory still in use.
+  */
+ int tty_release(struct inode *inode, struct file *filp)
+ {
+       struct tty_struct *tty = file_tty(filp);
+       struct tty_struct *o_tty;
+       int     pty_master, tty_closing, o_tty_closing, do_sleep;
+       int     devpts;
+       int     idx;
+       char    buf[64];
+       if (tty_paranoia_check(tty, inode, "tty_release_dev"))
+               return 0;
+       tty_lock();
+       check_tty_count(tty, "tty_release_dev");
+       __tty_fasync(-1, filp, 0);
+       idx = tty->index;
+       pty_master = (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+                     tty->driver->subtype == PTY_TYPE_MASTER);
+       devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
+       o_tty = tty->link;
+ #ifdef TTY_PARANOIA_CHECK
+       if (idx < 0 || idx >= tty->driver->num) {
+               printk(KERN_DEBUG "tty_release_dev: bad idx when trying to "
+                                 "free (%s)\n", tty->name);
+               tty_unlock();
+               return 0;
+       }
+       if (!devpts) {
+               if (tty != tty->driver->ttys[idx]) {
+                       tty_unlock();
+                       printk(KERN_DEBUG "tty_release_dev: driver.table[%d] not tty "
+                              "for (%s)\n", idx, tty->name);
+                       return 0;
+               }
+               if (tty->termios != tty->driver->termios[idx]) {
+                       tty_unlock();
+                       printk(KERN_DEBUG "tty_release_dev: driver.termios[%d] not termios "
+                              "for (%s)\n",
+                              idx, tty->name);
+                       return 0;
+               }
+       }
+ #endif
+ #ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "tty_release_dev of %s (tty count=%d)...",
+              tty_name(tty, buf), tty->count);
+ #endif
+ #ifdef TTY_PARANOIA_CHECK
+       if (tty->driver->other &&
+            !(tty->driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
+               if (o_tty != tty->driver->other->ttys[idx]) {
+                       tty_unlock();
+                       printk(KERN_DEBUG "tty_release_dev: other->table[%d] "
+                                         "not o_tty for (%s)\n",
+                              idx, tty->name);
+                       return 0 ;
+               }
+               if (o_tty->termios != tty->driver->other->termios[idx]) {
+                       tty_unlock();
+                       printk(KERN_DEBUG "tty_release_dev: other->termios[%d] "
+                                         "not o_termios for (%s)\n",
+                              idx, tty->name);
+                       return 0;
+               }
+               if (o_tty->link != tty) {
+                       tty_unlock();
+                       printk(KERN_DEBUG "tty_release_dev: bad pty pointers\n");
+                       return 0;
+               }
+       }
+ #endif
+       if (tty->ops->close)
+               tty->ops->close(tty, filp);
+       tty_unlock();
+       /*
+        * Sanity check: if tty->count is going to zero, there shouldn't be
+        * any waiters on tty->read_wait or tty->write_wait.  We test the
+        * wait queues and kick everyone out _before_ actually starting to
+        * close.  This ensures that we won't block while releasing the tty
+        * structure.
+        *
+        * The test for the o_tty closing is necessary, since the master and
+        * slave sides may close in any order.  If the slave side closes out
+        * first, its count will be one, since the master side holds an open.
+        * Thus this test wouldn't be triggered at the time the slave closes,
+        * so we do it now.
+        *
+        * Note that it's possible for the tty to be opened again while we're
+        * flushing out waiters.  By recalculating the closing flags before
+        * each iteration we avoid any problems.
+        */
+       while (1) {
+               /* Guard against races with tty->count changes elsewhere and
+                  opens on /dev/tty */
+               mutex_lock(&tty_mutex);
+               tty_lock();
+               tty_closing = tty->count <= 1;
+               o_tty_closing = o_tty &&
+                       (o_tty->count <= (pty_master ? 1 : 0));
+               do_sleep = 0;
+               if (tty_closing) {
+                       if (waitqueue_active(&tty->read_wait)) {
+                               wake_up_poll(&tty->read_wait, POLLIN);
+                               do_sleep++;
+                       }
+                       if (waitqueue_active(&tty->write_wait)) {
+                               wake_up_poll(&tty->write_wait, POLLOUT);
+                               do_sleep++;
+                       }
+               }
+               if (o_tty_closing) {
+                       if (waitqueue_active(&o_tty->read_wait)) {
+                               wake_up_poll(&o_tty->read_wait, POLLIN);
+                               do_sleep++;
+                       }
+                       if (waitqueue_active(&o_tty->write_wait)) {
+                               wake_up_poll(&o_tty->write_wait, POLLOUT);
+                               do_sleep++;
+                       }
+               }
+               if (!do_sleep)
+                       break;
+               printk(KERN_WARNING "tty_release_dev: %s: read/write wait queue "
+                                   "active!\n", tty_name(tty, buf));
+               tty_unlock();
+               mutex_unlock(&tty_mutex);
+               schedule();
+       }
+       /*
+        * The closing flags are now consistent with the open counts on
+        * both sides, and we've completed the last operation that could
+        * block, so it's safe to proceed with closing.
+        */
+       if (pty_master) {
+               if (--o_tty->count < 0) {
+                       printk(KERN_WARNING "tty_release_dev: bad pty slave count "
+                                           "(%d) for %s\n",
+                              o_tty->count, tty_name(o_tty, buf));
+                       o_tty->count = 0;
+               }
+       }
+       if (--tty->count < 0) {
+               printk(KERN_WARNING "tty_release_dev: bad tty->count (%d) for %s\n",
+                      tty->count, tty_name(tty, buf));
+               tty->count = 0;
+       }
+       /*
+        * We've decremented tty->count, so we need to remove this file
+        * descriptor off the tty->tty_files list; this serves two
+        * purposes:
+        *  - check_tty_count sees the correct number of file descriptors
+        *    associated with this tty.
+        *  - do_tty_hangup no longer sees this file descriptor as
+        *    something that needs to be handled for hangups.
+        */
+       tty_del_file(filp);
+       /*
+        * Perform some housekeeping before deciding whether to return.
+        *
+        * Set the TTY_CLOSING flag if this was the last open.  In the
+        * case of a pty we may have to wait around for the other side
+        * to close, and TTY_CLOSING makes sure we can't be reopened.
+        */
+       if (tty_closing)
+               set_bit(TTY_CLOSING, &tty->flags);
+       if (o_tty_closing)
+               set_bit(TTY_CLOSING, &o_tty->flags);
+       /*
+        * If _either_ side is closing, make sure there aren't any
+        * processes that still think tty or o_tty is their controlling
+        * tty.
+        */
+       if (tty_closing || o_tty_closing) {
+               read_lock(&tasklist_lock);
+               session_clear_tty(tty->session);
+               if (o_tty)
+                       session_clear_tty(o_tty->session);
+               read_unlock(&tasklist_lock);
+       }
+       mutex_unlock(&tty_mutex);
+       /* check whether both sides are closing ... */
+       if (!tty_closing || (o_tty && !o_tty_closing)) {
+               tty_unlock();
+               return 0;
+       }
+ #ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "freeing tty structure...");
+ #endif
+       /*
+        * Ask the line discipline code to release its structures
+        */
+       tty_ldisc_release(tty, o_tty);
+       /*
+        * The release_tty function takes care of the details of clearing
+        * the slots and preserving the termios structure.
+        */
+       release_tty(tty, idx);
+       /* Make this pty number available for reallocation */
+       if (devpts)
+               devpts_kill_index(inode, idx);
+       tty_unlock();
+       return 0;
+ }
+ /**
+  *    tty_open                -       open a tty device
+  *    @inode: inode of device file
+  *    @filp: file pointer to tty
+  *
+  *    tty_open and tty_release keep up the tty count that contains the
+  *    number of opens done on a tty. We cannot use the inode-count, as
+  *    different inodes might point to the same tty.
+  *
+  *    Open-counting is needed for pty masters, as well as for keeping
+  *    track of serial lines: DTR is dropped when the last close happens.
+  *    (This is not done solely through tty->count, now.  - Ted 1/27/92)
+  *
+  *    The termios state of a pty is reset on first open so that
+  *    settings don't persist across reuse.
+  *
+  *    Locking: tty_mutex protects tty, get_tty_driver and tty_init_dev work.
+  *             tty->count should protect the rest.
+  *             ->siglock protects ->signal/->sighand
+  */
+ static int tty_open(struct inode *inode, struct file *filp)
+ {
+       struct tty_struct *tty = NULL;
+       int noctty, retval;
+       struct tty_driver *driver;
+       int index;
+       dev_t device = inode->i_rdev;
+       unsigned saved_flags = filp->f_flags;
+       nonseekable_open(inode, filp);
+ retry_open:
+       noctty = filp->f_flags & O_NOCTTY;
+       index  = -1;
+       retval = 0;
+       mutex_lock(&tty_mutex);
+       tty_lock();
+       if (device == MKDEV(TTYAUX_MAJOR, 0)) {
+               tty = get_current_tty();
+               if (!tty) {
+                       tty_unlock();
+                       mutex_unlock(&tty_mutex);
+                       return -ENXIO;
+               }
+               driver = tty_driver_kref_get(tty->driver);
+               index = tty->index;
+               filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+               /* noctty = 1; */
+               /* FIXME: Should we take a driver reference ? */
+               tty_kref_put(tty);
+               goto got_driver;
+       }
+ #ifdef CONFIG_VT
+       if (device == MKDEV(TTY_MAJOR, 0)) {
+               extern struct tty_driver *console_driver;
+               driver = tty_driver_kref_get(console_driver);
+               index = fg_console;
+               noctty = 1;
+               goto got_driver;
+       }
+ #endif
+       if (device == MKDEV(TTYAUX_MAJOR, 1)) {
+               struct tty_driver *console_driver = console_device(&index);
+               if (console_driver) {
+                       driver = tty_driver_kref_get(console_driver);
+                       if (driver) {
+                               /* Don't let /dev/console block */
+                               filp->f_flags |= O_NONBLOCK;
+                               noctty = 1;
+                               goto got_driver;
+                       }
+               }
+               tty_unlock();
+               mutex_unlock(&tty_mutex);
+               return -ENODEV;
+       }
+       driver = get_tty_driver(device, &index);
+       if (!driver) {
+               tty_unlock();
+               mutex_unlock(&tty_mutex);
+               return -ENODEV;
+       }
+ got_driver:
+       if (!tty) {
+               /* check whether we're reopening an existing tty */
+               tty = tty_driver_lookup_tty(driver, inode, index);
+               if (IS_ERR(tty)) {
+                       tty_unlock();
+                       mutex_unlock(&tty_mutex);
+                       return PTR_ERR(tty);
+               }
+       }
+       if (tty) {
+               retval = tty_reopen(tty);
+               if (retval)
+                       tty = ERR_PTR(retval);
+       } else
+               tty = tty_init_dev(driver, index, 0);
+       mutex_unlock(&tty_mutex);
+       tty_driver_kref_put(driver);
+       if (IS_ERR(tty)) {
+               tty_unlock();
+               return PTR_ERR(tty);
+       }
+       retval = tty_add_file(tty, filp);
+       if (retval) {
+               tty_unlock();
+               return retval;
+       }
+       check_tty_count(tty, "tty_open");
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver->subtype == PTY_TYPE_MASTER)
+               noctty = 1;
+ #ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "opening %s...", tty->name);
+ #endif
+       if (!retval) {
+               if (tty->ops->open)
+                       retval = tty->ops->open(tty, filp);
+               else
+                       retval = -ENODEV;
+       }
+       filp->f_flags = saved_flags;
+       if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) &&
+                                               !capable(CAP_SYS_ADMIN))
+               retval = -EBUSY;
+       if (retval) {
+ #ifdef TTY_DEBUG_HANGUP
+               printk(KERN_DEBUG "error %d in opening %s...", retval,
+                      tty->name);
+ #endif
+               tty_unlock(); /* need to call tty_release without BTM */
+               tty_release(inode, filp);
+               if (retval != -ERESTARTSYS)
+                       return retval;
+               if (signal_pending(current))
+                       return retval;
+               schedule();
+               /*
+                * Need to reset f_op in case a hangup happened.
+                */
+               tty_lock();
+               if (filp->f_op == &hung_up_tty_fops)
+                       filp->f_op = &tty_fops;
+               tty_unlock();
+               goto retry_open;
+       }
+       tty_unlock();
+       mutex_lock(&tty_mutex);
+       tty_lock();
+       spin_lock_irq(&current->sighand->siglock);
+       if (!noctty &&
+           current->signal->leader &&
+           !current->signal->tty &&
+           tty->session == NULL)
+               __proc_set_tty(current, tty);
+       spin_unlock_irq(&current->sighand->siglock);
+       tty_unlock();
+       mutex_unlock(&tty_mutex);
+       return 0;
+ }
+ /**
+  *    tty_poll        -       check tty status
+  *    @filp: file being polled
+  *    @wait: poll wait structures to update
+  *
+  *    Call the line discipline polling method to obtain the poll
+  *    status of the device.
+  *
+  *    Locking: locks called line discipline but ldisc poll method
+  *    may be re-entered freely by other callers.
+  */
+ static unsigned int tty_poll(struct file *filp, poll_table *wait)
+ {
+       struct tty_struct *tty = file_tty(filp);
+       struct tty_ldisc *ld;
+       int ret = 0;
+       if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
+               return 0;
+       ld = tty_ldisc_ref_wait(tty);
+       if (ld->ops->poll)
+               ret = (ld->ops->poll)(tty, filp, wait);
+       tty_ldisc_deref(ld);
+       return ret;
+ }
+ static int __tty_fasync(int fd, struct file *filp, int on)
+ {
+       struct tty_struct *tty = file_tty(filp);
+       unsigned long flags;
+       int retval = 0;
+       if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
+               goto out;
+       retval = fasync_helper(fd, filp, on, &tty->fasync);
+       if (retval <= 0)
+               goto out;
+       if (on) {
+               enum pid_type type;
+               struct pid *pid;
+               if (!waitqueue_active(&tty->read_wait))
+                       tty->minimum_to_wake = 1;
+               spin_lock_irqsave(&tty->ctrl_lock, flags);
+               if (tty->pgrp) {
+                       pid = tty->pgrp;
+                       type = PIDTYPE_PGID;
+               } else {
+                       pid = task_pid(current);
+                       type = PIDTYPE_PID;
+               }
+               get_pid(pid);
+               spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+               retval = __f_setown(filp, pid, type, 0);
+               put_pid(pid);
+               if (retval)
+                       goto out;
+       } else {
+               if (!tty->fasync && !waitqueue_active(&tty->read_wait))
+                       tty->minimum_to_wake = N_TTY_BUF_SIZE;
+       }
+       retval = 0;
+ out:
+       return retval;
+ }
+ static int tty_fasync(int fd, struct file *filp, int on)
+ {
+       int retval;
+       tty_lock();
+       retval = __tty_fasync(fd, filp, on);
+       tty_unlock();
+       return retval;
+ }
+ /**
+  *    tiocsti                 -       fake input character
+  *    @tty: tty to fake input into
+  *    @p: pointer to character
+  *
+  *    Fake input to a tty device. Does the necessary locking and
+  *    input management.
+  *
+  *    FIXME: does not honour flow control ??
+  *
+  *    Locking:
+  *            Called functions take tty_ldisc_lock
+  *            current->signal->tty check is safe without locks
+  *
+  *    FIXME: may race normal receive processing
+  */
+ static int tiocsti(struct tty_struct *tty, char __user *p)
+ {
+       char ch, mbz = 0;
+       struct tty_ldisc *ld;
+       if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (get_user(ch, p))
+               return -EFAULT;
+       tty_audit_tiocsti(tty, ch);
+       ld = tty_ldisc_ref_wait(tty);
+       ld->ops->receive_buf(tty, &ch, &mbz, 1);
+       tty_ldisc_deref(ld);
+       return 0;
+ }
+ /**
+  *    tiocgwinsz              -       implement window query ioctl
+  *    @tty; tty
+  *    @arg: user buffer for result
+  *
+  *    Copies the kernel idea of the window size into the user buffer.
+  *
+  *    Locking: tty->termios_mutex is taken to ensure the winsize data
+  *            is consistent.
+  */
+ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
+ {
+       int err;
+       mutex_lock(&tty->termios_mutex);
+       err = copy_to_user(arg, &tty->winsize, sizeof(*arg));
+       mutex_unlock(&tty->termios_mutex);
+       return err ? -EFAULT: 0;
+ }
+ /**
+  *    tty_do_resize           -       resize event
+  *    @tty: tty being resized
+  *    @rows: rows (character)
+  *    @cols: cols (character)
+  *
+  *    Update the termios variables and send the necessary signals to
+  *    peform a terminal resize correctly
+  */
+ int tty_do_resize(struct tty_struct *tty, struct winsize *ws)
+ {
+       struct pid *pgrp;
+       unsigned long flags;
+       /* Lock the tty */
+       mutex_lock(&tty->termios_mutex);
+       if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
+               goto done;
+       /* Get the PID values and reference them so we can
+          avoid holding the tty ctrl lock while sending signals */
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       pgrp = get_pid(tty->pgrp);
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       if (pgrp)
+               kill_pgrp(pgrp, SIGWINCH, 1);
+       put_pid(pgrp);
+       tty->winsize = *ws;
+ done:
+       mutex_unlock(&tty->termios_mutex);
+       return 0;
+ }
+ /**
+  *    tiocswinsz              -       implement window size set ioctl
+  *    @tty; tty side of tty
+  *    @arg: user buffer for result
+  *
+  *    Copies the user idea of the window size to the kernel. Traditionally
+  *    this is just advisory information but for the Linux console it
+  *    actually has driver level meaning and triggers a VC resize.
+  *
+  *    Locking:
+  *            Driver dependant. The default do_resize method takes the
+  *    tty termios mutex and ctrl_lock. The console takes its own lock
+  *    then calls into the default method.
+  */
+ static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg)
+ {
+       struct winsize tmp_ws;
+       if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
+               return -EFAULT;
+       if (tty->ops->resize)
+               return tty->ops->resize(tty, &tmp_ws);
+       else
+               return tty_do_resize(tty, &tmp_ws);
+ }
+ /**
+  *    tioccons        -       allow admin to move logical console
+  *    @file: the file to become console
+  *
+  *    Allow the adminstrator to move the redirected console device
+  *
+  *    Locking: uses redirect_lock to guard the redirect information
+  */
+ static int tioccons(struct file *file)
+ {
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (file->f_op->write == redirected_tty_write) {
+               struct file *f;
+               spin_lock(&redirect_lock);
+               f = redirect;
+               redirect = NULL;
+               spin_unlock(&redirect_lock);
+               if (f)
+                       fput(f);
+               return 0;
+       }
+       spin_lock(&redirect_lock);
+       if (redirect) {
+               spin_unlock(&redirect_lock);
+               return -EBUSY;
+       }
+       get_file(file);
+       redirect = file;
+       spin_unlock(&redirect_lock);
+       return 0;
+ }
+ /**
+  *    fionbio         -       non blocking ioctl
+  *    @file: file to set blocking value
+  *    @p: user parameter
+  *
+  *    Historical tty interfaces had a blocking control ioctl before
+  *    the generic functionality existed. This piece of history is preserved
+  *    in the expected tty API of posix OS's.
+  *
+  *    Locking: none, the open file handle ensures it won't go away.
+  */
+ static int fionbio(struct file *file, int __user *p)
+ {
+       int nonblock;
+       if (get_user(nonblock, p))
+               return -EFAULT;
+       spin_lock(&file->f_lock);
+       if (nonblock)
+               file->f_flags |= O_NONBLOCK;
+       else
+               file->f_flags &= ~O_NONBLOCK;
+       spin_unlock(&file->f_lock);
+       return 0;
+ }
+ /**
+  *    tiocsctty       -       set controlling tty
+  *    @tty: tty structure
+  *    @arg: user argument
+  *
+  *    This ioctl is used to manage job control. It permits a session
+  *    leader to set this tty as the controlling tty for the session.
+  *
+  *    Locking:
+  *            Takes tty_mutex() to protect tty instance
+  *            Takes tasklist_lock internally to walk sessions
+  *            Takes ->siglock() when updating signal->tty
+  */
+ static int tiocsctty(struct tty_struct *tty, int arg)
+ {
+       int ret = 0;
+       if (current->signal->leader && (task_session(current) == tty->session))
+               return ret;
+       mutex_lock(&tty_mutex);
+       /*
+        * The process must be a session leader and
+        * not have a controlling tty already.
+        */
+       if (!current->signal->leader || current->signal->tty) {
+               ret = -EPERM;
+               goto unlock;
+       }
+       if (tty->session) {
+               /*
+                * This tty is already the controlling
+                * tty for another session group!
+                */
+               if (arg == 1 && capable(CAP_SYS_ADMIN)) {
+                       /*
+                        * Steal it away
+                        */
+                       read_lock(&tasklist_lock);
+                       session_clear_tty(tty->session);
+                       read_unlock(&tasklist_lock);
+               } else {
+                       ret = -EPERM;
+                       goto unlock;
+               }
+       }
+       proc_set_tty(current, tty);
+ unlock:
+       mutex_unlock(&tty_mutex);
+       return ret;
+ }
+ /**
+  *    tty_get_pgrp    -       return a ref counted pgrp pid
+  *    @tty: tty to read
+  *
+  *    Returns a refcounted instance of the pid struct for the process
+  *    group controlling the tty.
+  */
+ struct pid *tty_get_pgrp(struct tty_struct *tty)
+ {
+       unsigned long flags;
+       struct pid *pgrp;
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       pgrp = get_pid(tty->pgrp);
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       return pgrp;
+ }
+ EXPORT_SYMBOL_GPL(tty_get_pgrp);
+ /**
+  *    tiocgpgrp               -       get process group
+  *    @tty: tty passed by user
+  *    @real_tty: tty side of the tty pased by the user if a pty else the tty
+  *    @p: returned pid
+  *
+  *    Obtain the process group of the tty. If there is no process group
+  *    return an error.
+  *
+  *    Locking: none. Reference to current->signal->tty is safe.
+  */
+ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+ {
+       struct pid *pid;
+       int ret;
+       /*
+        * (tty == real_tty) is a cheap way of
+        * testing if the tty is NOT a master pty.
+        */
+       if (tty == real_tty && current->signal->tty != real_tty)
+               return -ENOTTY;
+       pid = tty_get_pgrp(real_tty);
+       ret =  put_user(pid_vnr(pid), p);
+       put_pid(pid);
+       return ret;
+ }
+ /**
+  *    tiocspgrp               -       attempt to set process group
+  *    @tty: tty passed by user
+  *    @real_tty: tty side device matching tty passed by user
+  *    @p: pid pointer
+  *
+  *    Set the process group of the tty to the session passed. Only
+  *    permitted where the tty session is our session.
+  *
+  *    Locking: RCU, ctrl lock
+  */
+ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+ {
+       struct pid *pgrp;
+       pid_t pgrp_nr;
+       int retval = tty_check_change(real_tty);
+       unsigned long flags;
+       if (retval == -EIO)
+               return -ENOTTY;
+       if (retval)
+               return retval;
+       if (!current->signal->tty ||
+           (current->signal->tty != real_tty) ||
+           (real_tty->session != task_session(current)))
+               return -ENOTTY;
+       if (get_user(pgrp_nr, p))
+               return -EFAULT;
+       if (pgrp_nr < 0)
+               return -EINVAL;
+       rcu_read_lock();
+       pgrp = find_vpid(pgrp_nr);
+       retval = -ESRCH;
+       if (!pgrp)
+               goto out_unlock;
+       retval = -EPERM;
+       if (session_of_pgrp(pgrp) != task_session(current))
+               goto out_unlock;
+       retval = 0;
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       put_pid(real_tty->pgrp);
+       real_tty->pgrp = get_pid(pgrp);
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ out_unlock:
+       rcu_read_unlock();
+       return retval;
+ }
+ /**
+  *    tiocgsid                -       get session id
+  *    @tty: tty passed by user
+  *    @real_tty: tty side of the tty pased by the user if a pty else the tty
+  *    @p: pointer to returned session id
+  *
+  *    Obtain the session id of the tty. If there is no session
+  *    return an error.
+  *
+  *    Locking: none. Reference to current->signal->tty is safe.
+  */
+ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+ {
+       /*
+        * (tty == real_tty) is a cheap way of
+        * testing if the tty is NOT a master pty.
+       */
+       if (tty == real_tty && current->signal->tty != real_tty)
+               return -ENOTTY;
+       if (!real_tty->session)
+               return -ENOTTY;
+       return put_user(pid_vnr(real_tty->session), p);
+ }
+ /**
+  *    tiocsetd        -       set line discipline
+  *    @tty: tty device
+  *    @p: pointer to user data
+  *
+  *    Set the line discipline according to user request.
+  *
+  *    Locking: see tty_set_ldisc, this function is just a helper
+  */
+ static int tiocsetd(struct tty_struct *tty, int __user *p)
+ {
+       int ldisc;
+       int ret;
+       if (get_user(ldisc, p))
+               return -EFAULT;
+       ret = tty_set_ldisc(tty, ldisc);
+       return ret;
+ }
+ /**
+  *    send_break      -       performed time break
+  *    @tty: device to break on
+  *    @duration: timeout in mS
+  *
+  *    Perform a timed break on hardware that lacks its own driver level
+  *    timed break functionality.
+  *
+  *    Locking:
+  *            atomic_write_lock serializes
+  *
+  */
+ static int send_break(struct tty_struct *tty, unsigned int duration)
+ {
+       int retval;
+       if (tty->ops->break_ctl == NULL)
+               return 0;
+       if (tty->driver->flags & TTY_DRIVER_HARDWARE_BREAK)
+               retval = tty->ops->break_ctl(tty, duration);
+       else {
+               /* Do the work ourselves */
+               if (tty_write_lock(tty, 0) < 0)
+                       return -EINTR;
+               retval = tty->ops->break_ctl(tty, -1);
+               if (retval)
+                       goto out;
+               if (!signal_pending(current))
+                       msleep_interruptible(duration);
+               retval = tty->ops->break_ctl(tty, 0);
+ out:
+               tty_write_unlock(tty);
+               if (signal_pending(current))
+                       retval = -EINTR;
+       }
+       return retval;
+ }
+ /**
+  *    tty_tiocmget            -       get modem status
+  *    @tty: tty device
+  *    @file: user file pointer
+  *    @p: pointer to result
+  *
+  *    Obtain the modem status bits from the tty driver if the feature
+  *    is supported. Return -EINVAL if it is not available.
+  *
+  *    Locking: none (up to the driver)
+  */
+ static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
+ {
+       int retval = -EINVAL;
+       if (tty->ops->tiocmget) {
+               retval = tty->ops->tiocmget(tty, file);
+               if (retval >= 0)
+                       retval = put_user(retval, p);
+       }
+       return retval;
+ }
+ /**
+  *    tty_tiocmset            -       set modem status
+  *    @tty: tty device
+  *    @file: user file pointer
+  *    @cmd: command - clear bits, set bits or set all
+  *    @p: pointer to desired bits
+  *
+  *    Set the modem status bits from the tty driver if the feature
+  *    is supported. Return -EINVAL if it is not available.
+  *
+  *    Locking: none (up to the driver)
+  */
+ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd,
+            unsigned __user *p)
+ {
+       int retval;
+       unsigned int set, clear, val;
+       if (tty->ops->tiocmset == NULL)
+               return -EINVAL;
+       retval = get_user(val, p);
+       if (retval)
+               return retval;
+       set = clear = 0;
+       switch (cmd) {
+       case TIOCMBIS:
+               set = val;
+               break;
+       case TIOCMBIC:
+               clear = val;
+               break;
+       case TIOCMSET:
+               set = val;
+               clear = ~val;
+               break;
+       }
+       set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
+       clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
+       return tty->ops->tiocmset(tty, file, set, clear);
+ }
+ static int tty_tiocgicount(struct tty_struct *tty, void __user *arg)
+ {
+       int retval = -EINVAL;
+       struct serial_icounter_struct icount;
+       memset(&icount, 0, sizeof(icount));
+       if (tty->ops->get_icount)
+               retval = tty->ops->get_icount(tty, &icount);
+       if (retval != 0)
+               return retval;
+       if (copy_to_user(arg, &icount, sizeof(icount)))
+               return -EFAULT;
+       return 0;
+ }
+ struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+ {
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver->subtype == PTY_TYPE_MASTER)
+               tty = tty->link;
+       return tty;
+ }
+ EXPORT_SYMBOL(tty_pair_get_tty);
+ struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
+ {
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver->subtype == PTY_TYPE_MASTER)
+           return tty;
+       return tty->link;
+ }
+ EXPORT_SYMBOL(tty_pair_get_pty);
+ /*
+  * Split this up, as gcc can choke on it otherwise..
+  */
+ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ {
+       struct tty_struct *tty = file_tty(file);
+       struct tty_struct *real_tty;
+       void __user *p = (void __user *)arg;
+       int retval;
+       struct tty_ldisc *ld;
+       struct inode *inode = file->f_dentry->d_inode;
+       if (tty_paranoia_check(tty, inode, "tty_ioctl"))
+               return -EINVAL;
+       real_tty = tty_pair_get_tty(tty);
+       /*
+        * Factor out some common prep work
+        */
+       switch (cmd) {
+       case TIOCSETD:
+       case TIOCSBRK:
+       case TIOCCBRK:
+       case TCSBRK:
+       case TCSBRKP:
+               retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
+               if (cmd != TIOCCBRK) {
+                       tty_wait_until_sent(tty, 0);
+                       if (signal_pending(current))
+                               return -EINTR;
+               }
+               break;
+       }
+       /*
+        *      Now do the stuff.
+        */
+       switch (cmd) {
+       case TIOCSTI:
+               return tiocsti(tty, p);
+       case TIOCGWINSZ:
+               return tiocgwinsz(real_tty, p);
+       case TIOCSWINSZ:
+               return tiocswinsz(real_tty, p);
+       case TIOCCONS:
+               return real_tty != tty ? -EINVAL : tioccons(file);
+       case FIONBIO:
+               return fionbio(file, p);
+       case TIOCEXCL:
+               set_bit(TTY_EXCLUSIVE, &tty->flags);
+               return 0;
+       case TIOCNXCL:
+               clear_bit(TTY_EXCLUSIVE, &tty->flags);
+               return 0;
+       case TIOCNOTTY:
+               if (current->signal->tty != tty)
+                       return -ENOTTY;
+               no_tty();
+               return 0;
+       case TIOCSCTTY:
+               return tiocsctty(tty, arg);
+       case TIOCGPGRP:
+               return tiocgpgrp(tty, real_tty, p);
+       case TIOCSPGRP:
+               return tiocspgrp(tty, real_tty, p);
+       case TIOCGSID:
+               return tiocgsid(tty, real_tty, p);
+       case TIOCGETD:
+               return put_user(tty->ldisc->ops->num, (int __user *)p);
+       case TIOCSETD:
+               return tiocsetd(tty, p);
+       /*
++       * Without the real device to which /dev/console is connected,
++       * blogd can not work.
++       *      blogd spawns a pty/tty pair,
++       *      set /dev/console to the tty of that pair (ioctl TIOCCONS),
++       *      then reads in all input from the current /dev/console,
++       *      buffer or write the readed data to /var/log/boot.msg
++       *      _and_ to the original real device.
++       */
++      case TIOCGDEV:
++      {
++              unsigned int ret = new_encode_dev(tty_devnum(real_tty));
++              return put_user(ret, (unsigned int __user *)p);
++      }
++
++      /*
+        * Break handling
+        */
+       case TIOCSBRK:  /* Turn break on, unconditionally */
+               if (tty->ops->break_ctl)
+                       return tty->ops->break_ctl(tty, -1);
+               return 0;
+       case TIOCCBRK:  /* Turn break off, unconditionally */
+               if (tty->ops->break_ctl)
+                       return tty->ops->break_ctl(tty, 0);
+               return 0;
+       case TCSBRK:   /* SVID version: non-zero arg --> no break */
+               /* non-zero arg means wait for all output data
+                * to be sent (performed above) but don't send break.
+                * This is used by the tcdrain() termios function.
+                */
+               if (!arg)
+                       return send_break(tty, 250);
+               return 0;
+       case TCSBRKP:   /* support for POSIX tcsendbreak() */
+               return send_break(tty, arg ? arg*100 : 250);
+       case TIOCMGET:
+               return tty_tiocmget(tty, file, p);
+       case TIOCMSET:
+       case TIOCMBIC:
+       case TIOCMBIS:
+               return tty_tiocmset(tty, file, cmd, p);
+       case TIOCGICOUNT:
+               retval = tty_tiocgicount(tty, p);
+               /* For the moment allow fall through to the old method */
+               if (retval != -EINVAL)
+                       return retval;
+               break;
+       case TCFLSH:
+               switch (arg) {
+               case TCIFLUSH:
+               case TCIOFLUSH:
+               /* flush tty buffer and allow ldisc to process ioctl */
+                       tty_buffer_flush(tty);
+                       break;
+               }
+               break;
+       }
+       if (tty->ops->ioctl) {
+               retval = (tty->ops->ioctl)(tty, file, cmd, arg);
+               if (retval != -ENOIOCTLCMD)
+                       return retval;
+       }
+       ld = tty_ldisc_ref_wait(tty);
+       retval = -EINVAL;
+       if (ld->ops->ioctl) {
+               retval = ld->ops->ioctl(tty, file, cmd, arg);
+               if (retval == -ENOIOCTLCMD)
+                       retval = -EINVAL;
+       }
+       tty_ldisc_deref(ld);
+       return retval;
+ }
+ #ifdef CONFIG_COMPAT
+ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
+                               unsigned long arg)
+ {
+       struct inode *inode = file->f_dentry->d_inode;
+       struct tty_struct *tty = file_tty(file);
+       struct tty_ldisc *ld;
+       int retval = -ENOIOCTLCMD;
+       if (tty_paranoia_check(tty, inode, "tty_ioctl"))
+               return -EINVAL;
+       if (tty->ops->compat_ioctl) {
+               retval = (tty->ops->compat_ioctl)(tty, file, cmd, arg);
+               if (retval != -ENOIOCTLCMD)
+                       return retval;
+       }
+       ld = tty_ldisc_ref_wait(tty);
+       if (ld->ops->compat_ioctl)
+               retval = ld->ops->compat_ioctl(tty, file, cmd, arg);
+       tty_ldisc_deref(ld);
+       return retval;
+ }
+ #endif
+ /*
+  * This implements the "Secure Attention Key" ---  the idea is to
+  * prevent trojan horses by killing all processes associated with this
+  * tty when the user hits the "Secure Attention Key".  Required for
+  * super-paranoid applications --- see the Orange Book for more details.
+  *
+  * This code could be nicer; ideally it should send a HUP, wait a few
+  * seconds, then send a INT, and then a KILL signal.  But you then
+  * have to coordinate with the init process, since all processes associated
+  * with the current tty must be dead before the new getty is allowed
+  * to spawn.
+  *
+  * Now, if it would be correct ;-/ The current code has a nasty hole -
+  * it doesn't catch files in flight. We may send the descriptor to ourselves
+  * via AF_UNIX socket, close it and later fetch from socket. FIXME.
+  *
+  * Nasty bug: do_SAK is being called in interrupt context.  This can
+  * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
+  */
+ void __do_SAK(struct tty_struct *tty)
+ {
+ #ifdef TTY_SOFT_SAK
+       tty_hangup(tty);
+ #else
+       struct task_struct *g, *p;
+       struct pid *session;
+       int             i;
+       struct file     *filp;
+       struct fdtable *fdt;
+       if (!tty)
+               return;
+       session = tty->session;
+       tty_ldisc_flush(tty);
+       tty_driver_flush_buffer(tty);
+       read_lock(&tasklist_lock);
+       /* Kill the entire session */
+       do_each_pid_task(session, PIDTYPE_SID, p) {
+               printk(KERN_NOTICE "SAK: killed process %d"
+                       " (%s): task_session(p)==tty->session\n",
+                       task_pid_nr(p), p->comm);
+               send_sig(SIGKILL, p, 1);
+       } while_each_pid_task(session, PIDTYPE_SID, p);
+       /* Now kill any processes that happen to have the
+        * tty open.
+        */
+       do_each_thread(g, p) {
+               if (p->signal->tty == tty) {
+                       printk(KERN_NOTICE "SAK: killed process %d"
+                           " (%s): task_session(p)==tty->session\n",
+                           task_pid_nr(p), p->comm);
+                       send_sig(SIGKILL, p, 1);
+                       continue;
+               }
+               task_lock(p);
+               if (p->files) {
+                       /*
+                        * We don't take a ref to the file, so we must
+                        * hold ->file_lock instead.
+                        */
+                       spin_lock(&p->files->file_lock);
+                       fdt = files_fdtable(p->files);
+                       for (i = 0; i < fdt->max_fds; i++) {
+                               filp = fcheck_files(p->files, i);
+                               if (!filp)
+                                       continue;
+                               if (filp->f_op->read == tty_read &&
+                                   file_tty(filp) == tty) {
+                                       printk(KERN_NOTICE "SAK: killed process %d"
+                                           " (%s): fd#%d opened to the tty\n",
+                                           task_pid_nr(p), p->comm, i);
+                                       force_sig(SIGKILL, p);
+                                       break;
+                               }
+                       }
+                       spin_unlock(&p->files->file_lock);
+               }
+               task_unlock(p);
+       } while_each_thread(g, p);
+       read_unlock(&tasklist_lock);
+ #endif
+ }
+ static void do_SAK_work(struct work_struct *work)
+ {
+       struct tty_struct *tty =
+               container_of(work, struct tty_struct, SAK_work);
+       __do_SAK(tty);
+ }
+ /*
+  * The tq handling here is a little racy - tty->SAK_work may already be queued.
+  * Fortunately we don't need to worry, because if ->SAK_work is already queued,
+  * the values which we write to it will be identical to the values which it
+  * already has. --akpm
+  */
+ void do_SAK(struct tty_struct *tty)
+ {
+       if (!tty)
+               return;
+       schedule_work(&tty->SAK_work);
+ }
+ EXPORT_SYMBOL(do_SAK);
+ static int dev_match_devt(struct device *dev, void *data)
+ {
+       dev_t *devt = data;
+       return dev->devt == *devt;
+ }
+ /* Must put_device() after it's unused! */
+ static struct device *tty_get_device(struct tty_struct *tty)
+ {
+       dev_t devt = tty_devnum(tty);
+       return class_find_device(tty_class, NULL, &devt, dev_match_devt);
+ }
+ /**
+  *    initialize_tty_struct
+  *    @tty: tty to initialize
+  *
+  *    This subroutine initializes a tty structure that has been newly
+  *    allocated.
+  *
+  *    Locking: none - tty in question must not be exposed at this point
+  */
+ void initialize_tty_struct(struct tty_struct *tty,
+               struct tty_driver *driver, int idx)
+ {
+       memset(tty, 0, sizeof(struct tty_struct));
+       kref_init(&tty->kref);
+       tty->magic = TTY_MAGIC;
+       tty_ldisc_init(tty);
+       tty->session = NULL;
+       tty->pgrp = NULL;
+       tty->overrun_time = jiffies;
+       tty->buf.head = tty->buf.tail = NULL;
+       tty_buffer_init(tty);
+       mutex_init(&tty->termios_mutex);
+       mutex_init(&tty->ldisc_mutex);
+       init_waitqueue_head(&tty->write_wait);
+       init_waitqueue_head(&tty->read_wait);
+       INIT_WORK(&tty->hangup_work, do_tty_hangup);
+       mutex_init(&tty->atomic_read_lock);
+       mutex_init(&tty->atomic_write_lock);
+       mutex_init(&tty->output_lock);
+       mutex_init(&tty->echo_lock);
+       spin_lock_init(&tty->read_lock);
+       spin_lock_init(&tty->ctrl_lock);
+       INIT_LIST_HEAD(&tty->tty_files);
+       INIT_WORK(&tty->SAK_work, do_SAK_work);
+       tty->driver = driver;
+       tty->ops = driver->ops;
+       tty->index = idx;
+       tty_line_name(driver, idx, tty->name);
+       tty->dev = tty_get_device(tty);
+ }
+ /**
+  *    tty_put_char    -       write one character to a tty
+  *    @tty: tty
+  *    @ch: character
+  *
+  *    Write one byte to the tty using the provided put_char method
+  *    if present. Returns the number of characters successfully output.
+  *
+  *    Note: the specific put_char operation in the driver layer may go
+  *    away soon. Don't call it directly, use this method
+  */
+ int tty_put_char(struct tty_struct *tty, unsigned char ch)
+ {
+       if (tty->ops->put_char)
+               return tty->ops->put_char(tty, ch);
+       return tty->ops->write(tty, &ch, 1);
+ }
+ EXPORT_SYMBOL_GPL(tty_put_char);
+ struct class *tty_class;
+ /**
+  *    tty_register_device - register a tty device
+  *    @driver: the tty driver that describes the tty device
+  *    @index: the index in the tty driver for this tty device
+  *    @device: a struct device that is associated with this tty device.
+  *            This field is optional, if there is no known struct device
+  *            for this tty device it can be set to NULL safely.
+  *
+  *    Returns a pointer to the struct device for this tty device
+  *    (or ERR_PTR(-EFOO) on error).
+  *
+  *    This call is required to be made to register an individual tty device
+  *    if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set.  If
+  *    that bit is not set, this function should not be called by a tty
+  *    driver.
+  *
+  *    Locking: ??
+  */
+ struct device *tty_register_device(struct tty_driver *driver, unsigned index,
+                                  struct device *device)
+ {
+       char name[64];
+       dev_t dev = MKDEV(driver->major, driver->minor_start) + index;
+       if (index >= driver->num) {
+               printk(KERN_ERR "Attempt to register invalid tty line number "
+                      " (%d).\n", index);
+               return ERR_PTR(-EINVAL);
+       }
+       if (driver->type == TTY_DRIVER_TYPE_PTY)
+               pty_line_name(driver, index, name);
+       else
+               tty_line_name(driver, index, name);
+       return device_create(tty_class, device, dev, NULL, name);
+ }
+ EXPORT_SYMBOL(tty_register_device);
+ /**
+  *    tty_unregister_device - unregister a tty device
+  *    @driver: the tty driver that describes the tty device
+  *    @index: the index in the tty driver for this tty device
+  *
+  *    If a tty device is registered with a call to tty_register_device() then
+  *    this function must be called when the tty device is gone.
+  *
+  *    Locking: ??
+  */
+ void tty_unregister_device(struct tty_driver *driver, unsigned index)
+ {
+       device_destroy(tty_class,
+               MKDEV(driver->major, driver->minor_start) + index);
+ }
+ EXPORT_SYMBOL(tty_unregister_device);
+ struct tty_driver *alloc_tty_driver(int lines)
+ {
+       struct tty_driver *driver;
+       driver = kzalloc(sizeof(struct tty_driver), GFP_KERNEL);
+       if (driver) {
+               kref_init(&driver->kref);
+               driver->magic = TTY_DRIVER_MAGIC;
+               driver->num = lines;
+               /* later we'll move allocation of tables here */
+       }
+       return driver;
+ }
+ EXPORT_SYMBOL(alloc_tty_driver);
+ static void destruct_tty_driver(struct kref *kref)
+ {
+       struct tty_driver *driver = container_of(kref, struct tty_driver, kref);
+       int i;
+       struct ktermios *tp;
+       void *p;
+       if (driver->flags & TTY_DRIVER_INSTALLED) {
+               /*
+                * Free the termios and termios_locked structures because
+                * we don't want to get memory leaks when modular tty
+                * drivers are removed from the kernel.
+                */
+               for (i = 0; i < driver->num; i++) {
+                       tp = driver->termios[i];
+                       if (tp) {
+                               driver->termios[i] = NULL;
+                               kfree(tp);
+                       }
+                       if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV))
+                               tty_unregister_device(driver, i);
+               }
+               p = driver->ttys;
+               proc_tty_unregister_driver(driver);
+               driver->ttys = NULL;
+               driver->termios = NULL;
+               kfree(p);
+               cdev_del(&driver->cdev);
+       }
+       kfree(driver);
+ }
+ void tty_driver_kref_put(struct tty_driver *driver)
+ {
+       kref_put(&driver->kref, destruct_tty_driver);
+ }
+ EXPORT_SYMBOL(tty_driver_kref_put);
+ void tty_set_operations(struct tty_driver *driver,
+                       const struct tty_operations *op)
+ {
+       driver->ops = op;
+ };
+ EXPORT_SYMBOL(tty_set_operations);
+ void put_tty_driver(struct tty_driver *d)
+ {
+       tty_driver_kref_put(d);
+ }
+ EXPORT_SYMBOL(put_tty_driver);
+ /*
+  * Called by a tty driver to register itself.
+  */
+ int tty_register_driver(struct tty_driver *driver)
+ {
+       int error;
+       int i;
+       dev_t dev;
+       void **p = NULL;
+       struct device *d;
+       if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM) && driver->num) {
+               p = kzalloc(driver->num * 2 * sizeof(void *), GFP_KERNEL);
+               if (!p)
+                       return -ENOMEM;
+       }
+       if (!driver->major) {
+               error = alloc_chrdev_region(&dev, driver->minor_start,
+                                               driver->num, driver->name);
+               if (!error) {
+                       driver->major = MAJOR(dev);
+                       driver->minor_start = MINOR(dev);
+               }
+       } else {
+               dev = MKDEV(driver->major, driver->minor_start);
+               error = register_chrdev_region(dev, driver->num, driver->name);
+       }
+       if (error < 0) {
+               kfree(p);
+               return error;
+       }
+       if (p) {
+               driver->ttys = (struct tty_struct **)p;
+               driver->termios = (struct ktermios **)(p + driver->num);
+       } else {
+               driver->ttys = NULL;
+               driver->termios = NULL;
+       }
+       cdev_init(&driver->cdev, &tty_fops);
+       driver->cdev.owner = driver->owner;
+       error = cdev_add(&driver->cdev, dev, driver->num);
+       if (error) {
+               unregister_chrdev_region(dev, driver->num);
+               driver->ttys = NULL;
+               driver->termios = NULL;
+               kfree(p);
+               return error;
+       }
+       mutex_lock(&tty_mutex);
+       list_add(&driver->tty_drivers, &tty_drivers);
+       mutex_unlock(&tty_mutex);
+       if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) {
+               for (i = 0; i < driver->num; i++) {
+                       d = tty_register_device(driver, i, NULL);
+                       if (IS_ERR(d)) {
+                               error = PTR_ERR(d);
+                               goto err;
+                       }
+               }
+       }
+       proc_tty_register_driver(driver);
+       driver->flags |= TTY_DRIVER_INSTALLED;
+       return 0;
+ err:
+       for (i--; i >= 0; i--)
+               tty_unregister_device(driver, i);
+       mutex_lock(&tty_mutex);
+       list_del(&driver->tty_drivers);
+       mutex_unlock(&tty_mutex);
+       unregister_chrdev_region(dev, driver->num);
+       driver->ttys = NULL;
+       driver->termios = NULL;
+       kfree(p);
+       return error;
+ }
+ EXPORT_SYMBOL(tty_register_driver);
+ /*
+  * Called by a tty driver to unregister itself.
+  */
+ int tty_unregister_driver(struct tty_driver *driver)
+ {
+ #if 0
+       /* FIXME */
+       if (driver->refcount)
+               return -EBUSY;
+ #endif
+       unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
+                               driver->num);
+       mutex_lock(&tty_mutex);
+       list_del(&driver->tty_drivers);
+       mutex_unlock(&tty_mutex);
+       return 0;
+ }
+ EXPORT_SYMBOL(tty_unregister_driver);
+ dev_t tty_devnum(struct tty_struct *tty)
+ {
+       return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
+ }
+ EXPORT_SYMBOL(tty_devnum);
+ void proc_clear_tty(struct task_struct *p)
+ {
+       unsigned long flags;
+       struct tty_struct *tty;
+       spin_lock_irqsave(&p->sighand->siglock, flags);
+       tty = p->signal->tty;
+       p->signal->tty = NULL;
+       spin_unlock_irqrestore(&p->sighand->siglock, flags);
+       tty_kref_put(tty);
+ }
+ /* Called under the sighand lock */
+ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+ {
+       if (tty) {
+               unsigned long flags;
+               /* We should not have a session or pgrp to put here but.... */
+               spin_lock_irqsave(&tty->ctrl_lock, flags);
+               put_pid(tty->session);
+               put_pid(tty->pgrp);
+               tty->pgrp = get_pid(task_pgrp(tsk));
+               spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+               tty->session = get_pid(task_session(tsk));
+               if (tsk->signal->tty) {
+                       printk(KERN_DEBUG "tty not NULL!!\n");
+                       tty_kref_put(tsk->signal->tty);
+               }
+       }
+       put_pid(tsk->signal->tty_old_pgrp);
+       tsk->signal->tty = tty_kref_get(tty);
+       tsk->signal->tty_old_pgrp = NULL;
+ }
+ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+ {
+       spin_lock_irq(&tsk->sighand->siglock);
+       __proc_set_tty(tsk, tty);
+       spin_unlock_irq(&tsk->sighand->siglock);
+ }
+ struct tty_struct *get_current_tty(void)
+ {
+       struct tty_struct *tty;
+       unsigned long flags;
+       spin_lock_irqsave(&current->sighand->siglock, flags);
+       tty = tty_kref_get(current->signal->tty);
+       spin_unlock_irqrestore(&current->sighand->siglock, flags);
+       return tty;
+ }
+ EXPORT_SYMBOL_GPL(get_current_tty);
+ void tty_default_fops(struct file_operations *fops)
+ {
+       *fops = tty_fops;
+ }
+ /*
+  * Initialize the console device. This is called *early*, so
+  * we can't necessarily depend on lots of kernel help here.
+  * Just do some early initializations, and do the complex setup
+  * later.
+  */
+ void __init console_init(void)
+ {
+       initcall_t *call;
+       /* Setup the default TTY line discipline. */
+       tty_ldisc_begin();
+       /*
+        * set up the console device so that later boot sequences can
+        * inform about problems etc..
+        */
+       call = __con_initcall_start;
+       while (call < __con_initcall_end) {
+               (*call)();
+               call++;
+       }
+ }
+ static char *tty_devnode(struct device *dev, mode_t *mode)
+ {
+       if (!mode)
+               return NULL;
+       if (dev->devt == MKDEV(TTYAUX_MAJOR, 0) ||
+           dev->devt == MKDEV(TTYAUX_MAJOR, 2))
+               *mode = 0666;
+       return NULL;
+ }
+ static int __init tty_class_init(void)
+ {
+       tty_class = class_create(THIS_MODULE, "tty");
+       if (IS_ERR(tty_class))
+               return PTR_ERR(tty_class);
+       tty_class->devnode = tty_devnode;
+       return 0;
+ }
+ postcore_initcall(tty_class_init);
+ /* 3/2004 jmc: why do these devices exist? */
+ static struct cdev tty_cdev, console_cdev;
+ /*
+  * Ok, now we can initialize the rest of the tty devices and can count
+  * on memory allocations, interrupts etc..
+  */
+ int __init tty_init(void)
+ {
+       cdev_init(&tty_cdev, &tty_fops);
+       if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
+           register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
+               panic("Couldn't register /dev/tty driver\n");
+       device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL,
+                             "tty");
+       cdev_init(&console_cdev, &console_fops);
+       if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
+           register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
+               panic("Couldn't register /dev/console driver\n");
+       device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL,
+                             "console");
+ #ifdef CONFIG_VT
+       vty_init(&console_fops);
+ #endif
+       return 0;
+ }
index 0000000,e95d787..f0c9032
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,1454 +1,1463 @@@
+ /*
+  * linux/drivers/char/keyboard.c
+  *
+  * Written for linux by Johan Myreen as a translation from
+  * the assembly version by Linus (with diacriticals added)
+  *
+  * Some additional features added by Christoph Niemann (ChN), March 1993
+  *
+  * Loadable keymaps by Risto Kankkunen, May 1993
+  *
+  * Diacriticals redone & other small changes, aeb@cwi.nl, June 1993
+  * Added decr/incr_console, dynamic keymaps, Unicode support,
+  * dynamic function/string keys, led setting,  Sept 1994
+  * `Sticky' modifier keys, 951006.
+  *
+  * 11-11-96: SAK should now work in the raw mode (Martin Mares)
+  *
+  * Modified to provide 'generic' keyboard support by Hamish Macdonald
+  * Merge with the m68k keyboard driver and split-off of the PC low-level
+  * parts by Geert Uytterhoeven, May 1997
+  *
+  * 27-05-97: Added support for the Magic SysRq Key (Martin Mares)
+  * 30-07-98: Dead keys redone, aeb@cwi.nl.
+  * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik)
+  */
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ #include <linux/consolemap.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/tty.h>
+ #include <linux/tty_flip.h>
+ #include <linux/mm.h>
+ #include <linux/string.h>
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/irq.h>
+ #include <linux/kbd_kern.h>
+ #include <linux/kbd_diacr.h>
+ #include <linux/vt_kern.h>
+ #include <linux/input.h>
+ #include <linux/reboot.h>
+ #include <linux/notifier.h>
+ #include <linux/jiffies.h>
+ extern void ctrl_alt_del(void);
+ /*
+  * Exported functions/variables
+  */
+ #define KBD_DEFMODE ((1 << VC_REPEAT) | (1 << VC_META))
+ /*
+  * Some laptops take the 789uiojklm,. keys as number pad when NumLock is on.
+  * This seems a good reason to start with NumLock off. On HIL keyboards
+  * of PARISC machines however there is no NumLock key and everyone expects the keypad
+  * to be used for numbers.
+  */
+ #if defined(CONFIG_PARISC) && (defined(CONFIG_KEYBOARD_HIL) || defined(CONFIG_KEYBOARD_HIL_OLD))
+ #define KBD_DEFLEDS (1 << VC_NUMLOCK)
+ #else
+ #define KBD_DEFLEDS 0
+ #endif
+ #define KBD_DEFLOCK 0
+ void compute_shiftstate(void);
+ /*
+  * Handler Tables.
+  */
+ #define K_HANDLERS\
+       k_self,         k_fn,           k_spec,         k_pad,\
+       k_dead,         k_cons,         k_cur,          k_shift,\
+       k_meta,         k_ascii,        k_lock,         k_lowercase,\
+       k_slock,        k_dead2,        k_brl,          k_ignore
+ typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
+                           char up_flag);
+ static k_handler_fn K_HANDLERS;
+ static k_handler_fn *k_handler[16] = { K_HANDLERS };
+ #define FN_HANDLERS\
+       fn_null,        fn_enter,       fn_show_ptregs, fn_show_mem,\
+       fn_show_state,  fn_send_intr,   fn_lastcons,    fn_caps_toggle,\
+       fn_num,         fn_hold,        fn_scroll_forw, fn_scroll_back,\
+       fn_boot_it,     fn_caps_on,     fn_compose,     fn_SAK,\
+       fn_dec_console, fn_inc_console, fn_spawn_con,   fn_bare_num
+ typedef void (fn_handler_fn)(struct vc_data *vc);
+ static fn_handler_fn FN_HANDLERS;
+ static fn_handler_fn *fn_handler[] = { FN_HANDLERS };
+ /*
+  * Variables exported for vt_ioctl.c
+  */
+ /* maximum values each key_handler can handle */
+ const int max_vals[] = {
+       255, ARRAY_SIZE(func_table) - 1, ARRAY_SIZE(fn_handler) - 1, NR_PAD - 1,
+       NR_DEAD - 1, 255, 3, NR_SHIFT - 1, 255, NR_ASCII - 1, NR_LOCK - 1,
+       255, NR_LOCK - 1, 255, NR_BRL - 1
+ };
+ const int NR_TYPES = ARRAY_SIZE(max_vals);
+ struct kbd_struct kbd_table[MAX_NR_CONSOLES];
+ EXPORT_SYMBOL_GPL(kbd_table);
+ static struct kbd_struct *kbd = kbd_table;
+ struct vt_spawn_console vt_spawn_con = {
+       .lock = __SPIN_LOCK_UNLOCKED(vt_spawn_con.lock),
+       .pid  = NULL,
+       .sig  = 0,
+ };
+ /*
+  * Variables exported for vt.c
+  */
+ int shift_state = 0;
+ /*
+  * Internal Data.
+  */
+ static struct input_handler kbd_handler;
+ static DEFINE_SPINLOCK(kbd_event_lock);
+ static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)];        /* keyboard key bitmap */
+ static unsigned char shift_down[NR_SHIFT];            /* shift state counters.. */
+ static bool dead_key_next;
+ static int npadch = -1;                                       /* -1 or number assembled on pad */
+ static unsigned int diacr;
+ static char rep;                                      /* flag telling character repeat */
+ static unsigned char ledstate = 0xff;                 /* undefined */
+ static unsigned char ledioctl;
+ static struct ledptr {
+       unsigned int *addr;
+       unsigned int mask;
+       unsigned char valid:1;
+ } ledptrs[3];
+ /*
+  * Notifier list for console keyboard events
+  */
+ static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list);
+ int register_keyboard_notifier(struct notifier_block *nb)
+ {
+       return atomic_notifier_chain_register(&keyboard_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(register_keyboard_notifier);
+ int unregister_keyboard_notifier(struct notifier_block *nb)
+ {
+       return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
+ /*
+  * Translation of scancodes to keycodes. We set them on only the first
+  * keyboard in the list that accepts the scancode and keycode.
+  * Explanation for not choosing the first attached keyboard anymore:
+  *  USB keyboards for example have two event devices: one for all "normal"
+  *  keys and one for extra function keys (like "volume up", "make coffee",
+  *  etc.). So this means that scancodes for the extra function keys won't
+  *  be valid for the first event device, but will be for the second.
+  */
+ struct getset_keycode_data {
+       struct input_keymap_entry ke;
+       int error;
+ };
+ static int getkeycode_helper(struct input_handle *handle, void *data)
+ {
+       struct getset_keycode_data *d = data;
+       d->error = input_get_keycode(handle->dev, &d->ke);
+       return d->error == 0; /* stop as soon as we successfully get one */
+ }
+ int getkeycode(unsigned int scancode)
+ {
+       struct getset_keycode_data d = {
+               .ke     = {
+                       .flags          = 0,
+                       .len            = sizeof(scancode),
+                       .keycode        = 0,
+               },
+               .error  = -ENODEV,
+       };
+       memcpy(d.ke.scancode, &scancode, sizeof(scancode));
+       input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper);
+       return d.error ?: d.ke.keycode;
+ }
+ static int setkeycode_helper(struct input_handle *handle, void *data)
+ {
+       struct getset_keycode_data *d = data;
+       d->error = input_set_keycode(handle->dev, &d->ke);
+       return d->error == 0; /* stop as soon as we successfully set one */
+ }
+ int setkeycode(unsigned int scancode, unsigned int keycode)
+ {
+       struct getset_keycode_data d = {
+               .ke     = {
+                       .flags          = 0,
+                       .len            = sizeof(scancode),
+                       .keycode        = keycode,
+               },
+               .error  = -ENODEV,
+       };
+       memcpy(d.ke.scancode, &scancode, sizeof(scancode));
+       input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper);
+       return d.error;
+ }
+ /*
+  * Making beeps and bells. Note that we prefer beeps to bells, but when
+  * shutting the sound off we do both.
+  */
+ static int kd_sound_helper(struct input_handle *handle, void *data)
+ {
+       unsigned int *hz = data;
+       struct input_dev *dev = handle->dev;
+       if (test_bit(EV_SND, dev->evbit)) {
+               if (test_bit(SND_TONE, dev->sndbit)) {
+                       input_inject_event(handle, EV_SND, SND_TONE, *hz);
+                       if (*hz)
+                               return 0;
+               }
+               if (test_bit(SND_BELL, dev->sndbit))
+                       input_inject_event(handle, EV_SND, SND_BELL, *hz ? 1 : 0);
+       }
+       return 0;
+ }
+ static void kd_nosound(unsigned long ignored)
+ {
+       static unsigned int zero;
+       input_handler_for_each_handle(&kbd_handler, &zero, kd_sound_helper);
+ }
+ static DEFINE_TIMER(kd_mksound_timer, kd_nosound, 0, 0);
+ void kd_mksound(unsigned int hz, unsigned int ticks)
+ {
+       del_timer_sync(&kd_mksound_timer);
+       input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper);
+       if (hz && ticks)
+               mod_timer(&kd_mksound_timer, jiffies + ticks);
+ }
+ EXPORT_SYMBOL(kd_mksound);
+ /*
+  * Setting the keyboard rate.
+  */
+ static int kbd_rate_helper(struct input_handle *handle, void *data)
+ {
+       struct input_dev *dev = handle->dev;
+       struct kbd_repeat *rep = data;
+       if (test_bit(EV_REP, dev->evbit)) {
+               if (rep[0].delay > 0)
+                       input_inject_event(handle,
+                                          EV_REP, REP_DELAY, rep[0].delay);
+               if (rep[0].period > 0)
+                       input_inject_event(handle,
+                                          EV_REP, REP_PERIOD, rep[0].period);
+               rep[1].delay = dev->rep[REP_DELAY];
+               rep[1].period = dev->rep[REP_PERIOD];
+       }
+       return 0;
+ }
+ int kbd_rate(struct kbd_repeat *rep)
+ {
+       struct kbd_repeat data[2] = { *rep };
+       input_handler_for_each_handle(&kbd_handler, data, kbd_rate_helper);
+       *rep = data[1]; /* Copy currently used settings */
+       return 0;
+ }
+ /*
+  * Helper Functions.
+  */
+ static void put_queue(struct vc_data *vc, int ch)
+ {
+       struct tty_struct *tty = vc->port.tty;
+       if (tty) {
+               tty_insert_flip_char(tty, ch, 0);
+               con_schedule_flip(tty);
+       }
+ }
+ static void puts_queue(struct vc_data *vc, char *cp)
+ {
+       struct tty_struct *tty = vc->port.tty;
+       if (!tty)
+               return;
+       while (*cp) {
+               tty_insert_flip_char(tty, *cp, 0);
+               cp++;
+       }
+       con_schedule_flip(tty);
+ }
+ static void applkey(struct vc_data *vc, int key, char mode)
+ {
+       static char buf[] = { 0x1b, 'O', 0x00, 0x00 };
+       buf[1] = (mode ? 'O' : '[');
+       buf[2] = key;
+       puts_queue(vc, buf);
+ }
+ /*
+  * Many other routines do put_queue, but I think either
+  * they produce ASCII, or they produce some user-assigned
+  * string, and in both cases we might assume that it is
+  * in utf-8 already.
+  */
+ static void to_utf8(struct vc_data *vc, uint c)
+ {
+       if (c < 0x80)
+               /*  0******* */
+               put_queue(vc, c);
+       else if (c < 0x800) {
+               /* 110***** 10****** */
+               put_queue(vc, 0xc0 | (c >> 6));
+               put_queue(vc, 0x80 | (c & 0x3f));
+       } else if (c < 0x10000) {
+               if (c >= 0xD800 && c < 0xE000)
+                       return;
+               if (c == 0xFFFF)
+                       return;
+               /* 1110**** 10****** 10****** */
+               put_queue(vc, 0xe0 | (c >> 12));
+               put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
+               put_queue(vc, 0x80 | (c & 0x3f));
+       } else if (c < 0x110000) {
+               /* 11110*** 10****** 10****** 10****** */
+               put_queue(vc, 0xf0 | (c >> 18));
+               put_queue(vc, 0x80 | ((c >> 12) & 0x3f));
+               put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
+               put_queue(vc, 0x80 | (c & 0x3f));
+       }
+ }
+ /*
+  * Called after returning from RAW mode or when changing consoles - recompute
+  * shift_down[] and shift_state from key_down[] maybe called when keymap is
+  * undefined, so that shiftkey release is seen
+  */
+ void compute_shiftstate(void)
+ {
+       unsigned int i, j, k, sym, val;
+       shift_state = 0;
+       memset(shift_down, 0, sizeof(shift_down));
+       for (i = 0; i < ARRAY_SIZE(key_down); i++) {
+               if (!key_down[i])
+                       continue;
+               k = i * BITS_PER_LONG;
+               for (j = 0; j < BITS_PER_LONG; j++, k++) {
+                       if (!test_bit(k, key_down))
+                               continue;
+                       sym = U(key_maps[0][k]);
+                       if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
+                               continue;
+                       val = KVAL(sym);
+                       if (val == KVAL(K_CAPSSHIFT))
+                               val = KVAL(K_SHIFT);
+                       shift_down[val]++;
+                       shift_state |= (1 << val);
+               }
+       }
+ }
+ /*
+  * We have a combining character DIACR here, followed by the character CH.
+  * If the combination occurs in the table, return the corresponding value.
+  * Otherwise, if CH is a space or equals DIACR, return DIACR.
+  * Otherwise, conclude that DIACR was not combining after all,
+  * queue it and return CH.
+  */
+ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch)
+ {
+       unsigned int d = diacr;
+       unsigned int i;
+       diacr = 0;
+       if ((d & ~0xff) == BRL_UC_ROW) {
+               if ((ch & ~0xff) == BRL_UC_ROW)
+                       return d | ch;
+       } else {
+               for (i = 0; i < accent_table_size; i++)
+                       if (accent_table[i].diacr == d && accent_table[i].base == ch)
+                               return accent_table[i].result;
+       }
+       if (ch == ' ' || ch == (BRL_UC_ROW|0) || ch == d)
+               return d;
+       if (kbd->kbdmode == VC_UNICODE)
+               to_utf8(vc, d);
+       else {
+               int c = conv_uni_to_8bit(d);
+               if (c != -1)
+                       put_queue(vc, c);
+       }
+       return ch;
+ }
+ /*
+  * Special function handlers
+  */
+ static void fn_enter(struct vc_data *vc)
+ {
+       if (diacr) {
+               if (kbd->kbdmode == VC_UNICODE)
+                       to_utf8(vc, diacr);
+               else {
+                       int c = conv_uni_to_8bit(diacr);
+                       if (c != -1)
+                               put_queue(vc, c);
+               }
+               diacr = 0;
+       }
+       put_queue(vc, 13);
+       if (vc_kbd_mode(kbd, VC_CRLF))
+               put_queue(vc, 10);
+ }
+ static void fn_caps_toggle(struct vc_data *vc)
+ {
+       if (rep)
+               return;
+       chg_vc_kbd_led(kbd, VC_CAPSLOCK);
+ }
+ static void fn_caps_on(struct vc_data *vc)
+ {
+       if (rep)
+               return;
+       set_vc_kbd_led(kbd, VC_CAPSLOCK);
+ }
+ static void fn_show_ptregs(struct vc_data *vc)
+ {
+       struct pt_regs *regs = get_irq_regs();
+       if (regs)
+               show_regs(regs);
+ }
+ static void fn_hold(struct vc_data *vc)
+ {
+       struct tty_struct *tty = vc->port.tty;
+       if (rep || !tty)
+               return;
+       /*
+        * Note: SCROLLOCK will be set (cleared) by stop_tty (start_tty);
+        * these routines are also activated by ^S/^Q.
+        * (And SCROLLOCK can also be set by the ioctl KDSKBLED.)
+        */
+       if (tty->stopped)
+               start_tty(tty);
+       else
+               stop_tty(tty);
+ }
+ static void fn_num(struct vc_data *vc)
+ {
+       if (vc_kbd_mode(kbd, VC_APPLIC))
+               applkey(vc, 'P', 1);
+       else
+               fn_bare_num(vc);
+ }
+ /*
+  * Bind this to Shift-NumLock if you work in application keypad mode
+  * but want to be able to change the NumLock flag.
+  * Bind this to NumLock if you prefer that the NumLock key always
+  * changes the NumLock flag.
+  */
+ static void fn_bare_num(struct vc_data *vc)
+ {
+       if (!rep)
+               chg_vc_kbd_led(kbd, VC_NUMLOCK);
+ }
+ static void fn_lastcons(struct vc_data *vc)
+ {
+       /* switch to the last used console, ChN */
+       set_console(last_console);
+ }
+ static void fn_dec_console(struct vc_data *vc)
+ {
+       int i, cur = fg_console;
+       /* Currently switching?  Queue this next switch relative to that. */
+       if (want_console != -1)
+               cur = want_console;
+       for (i = cur - 1; i != cur; i--) {
+               if (i == -1)
+                       i = MAX_NR_CONSOLES - 1;
+               if (vc_cons_allocated(i))
+                       break;
+       }
+       set_console(i);
+ }
+ static void fn_inc_console(struct vc_data *vc)
+ {
+       int i, cur = fg_console;
+       /* Currently switching?  Queue this next switch relative to that. */
+       if (want_console != -1)
+               cur = want_console;
+       for (i = cur+1; i != cur; i++) {
+               if (i == MAX_NR_CONSOLES)
+                       i = 0;
+               if (vc_cons_allocated(i))
+                       break;
+       }
+       set_console(i);
+ }
+ static void fn_send_intr(struct vc_data *vc)
+ {
+       struct tty_struct *tty = vc->port.tty;
+       if (!tty)
+               return;
+       tty_insert_flip_char(tty, 0, TTY_BREAK);
+       con_schedule_flip(tty);
+ }
+ static void fn_scroll_forw(struct vc_data *vc)
+ {
+       scrollfront(vc, 0);
+ }
+ static void fn_scroll_back(struct vc_data *vc)
+ {
+       scrollback(vc, 0);
+ }
+ static void fn_show_mem(struct vc_data *vc)
+ {
+       show_mem();
+ }
+ static void fn_show_state(struct vc_data *vc)
+ {
+       show_state();
+ }
+ static void fn_boot_it(struct vc_data *vc)
+ {
+       ctrl_alt_del();
+ }
+ static void fn_compose(struct vc_data *vc)
+ {
+       dead_key_next = true;
+ }
+ static void fn_spawn_con(struct vc_data *vc)
+ {
+       spin_lock(&vt_spawn_con.lock);
+       if (vt_spawn_con.pid)
+               if (kill_pid(vt_spawn_con.pid, vt_spawn_con.sig, 1)) {
+                       put_pid(vt_spawn_con.pid);
+                       vt_spawn_con.pid = NULL;
+               }
+       spin_unlock(&vt_spawn_con.lock);
+ }
+ static void fn_SAK(struct vc_data *vc)
+ {
+       struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work;
+       schedule_work(SAK_work);
+ }
+ static void fn_null(struct vc_data *vc)
+ {
+       compute_shiftstate();
+ }
+ /*
+  * Special key handlers
+  */
+ static void k_ignore(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ }
+ static void k_spec(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       if (up_flag)
+               return;
+       if (value >= ARRAY_SIZE(fn_handler))
+               return;
+       if ((kbd->kbdmode == VC_RAW ||
+            kbd->kbdmode == VC_MEDIUMRAW) &&
+            value != KVAL(K_SAK))
+               return;         /* SAK is allowed even in raw mode */
+       fn_handler[value](vc);
+ }
+ static void k_lowercase(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       pr_err("k_lowercase was called - impossible\n");
+ }
+ static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag)
+ {
+       if (up_flag)
+               return;         /* no action, if this is a key release */
+       if (diacr)
+               value = handle_diacr(vc, value);
+       if (dead_key_next) {
+               dead_key_next = false;
+               diacr = value;
+               return;
+       }
+       if (kbd->kbdmode == VC_UNICODE)
+               to_utf8(vc, value);
+       else {
+               int c = conv_uni_to_8bit(value);
+               if (c != -1)
+                       put_queue(vc, c);
+       }
+ }
+ /*
+  * Handle dead key. Note that we now may have several
+  * dead keys modifying the same character. Very useful
+  * for Vietnamese.
+  */
+ static void k_deadunicode(struct vc_data *vc, unsigned int value, char up_flag)
+ {
+       if (up_flag)
+               return;
+       diacr = (diacr ? handle_diacr(vc, value) : value);
+ }
+ static void k_self(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       k_unicode(vc, conv_8bit_to_uni(value), up_flag);
+ }
+ static void k_dead2(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       k_deadunicode(vc, value, up_flag);
+ }
+ /*
+  * Obsolete - for backwards compatibility only
+  */
+ static void k_dead(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       static const unsigned char ret_diacr[NR_DEAD] = {'`', '\'', '^', '~', '"', ',' };
+       k_deadunicode(vc, ret_diacr[value], up_flag);
+ }
+ static void k_cons(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       if (up_flag)
+               return;
+       set_console(value);
+ }
+ static void k_fn(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       if (up_flag)
+               return;
+       if ((unsigned)value < ARRAY_SIZE(func_table)) {
+               if (func_table[value])
+                       puts_queue(vc, func_table[value]);
+       } else
+               pr_err("k_fn called with value=%d\n", value);
+ }
+ static void k_cur(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       static const char cur_chars[] = "BDCA";
+       if (up_flag)
+               return;
+       applkey(vc, cur_chars[value], vc_kbd_mode(kbd, VC_CKMODE));
+ }
+ static void k_pad(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       static const char pad_chars[] = "0123456789+-*/\015,.?()#";
+       static const char app_map[] = "pqrstuvwxylSRQMnnmPQS";
+       if (up_flag)
+               return;         /* no action, if this is a key release */
+       /* kludge... shift forces cursor/number keys */
+       if (vc_kbd_mode(kbd, VC_APPLIC) && !shift_down[KG_SHIFT]) {
+               applkey(vc, app_map[value], 1);
+               return;
+       }
+       if (!vc_kbd_led(kbd, VC_NUMLOCK)) {
+               switch (value) {
+               case KVAL(K_PCOMMA):
+               case KVAL(K_PDOT):
+                       k_fn(vc, KVAL(K_REMOVE), 0);
+                       return;
+               case KVAL(K_P0):
+                       k_fn(vc, KVAL(K_INSERT), 0);
+                       return;
+               case KVAL(K_P1):
+                       k_fn(vc, KVAL(K_SELECT), 0);
+                       return;
+               case KVAL(K_P2):
+                       k_cur(vc, KVAL(K_DOWN), 0);
+                       return;
+               case KVAL(K_P3):
+                       k_fn(vc, KVAL(K_PGDN), 0);
+                       return;
+               case KVAL(K_P4):
+                       k_cur(vc, KVAL(K_LEFT), 0);
+                       return;
+               case KVAL(K_P6):
+                       k_cur(vc, KVAL(K_RIGHT), 0);
+                       return;
+               case KVAL(K_P7):
+                       k_fn(vc, KVAL(K_FIND), 0);
+                       return;
+               case KVAL(K_P8):
+                       k_cur(vc, KVAL(K_UP), 0);
+                       return;
+               case KVAL(K_P9):
+                       k_fn(vc, KVAL(K_PGUP), 0);
+                       return;
+               case KVAL(K_P5):
+                       applkey(vc, 'G', vc_kbd_mode(kbd, VC_APPLIC));
+                       return;
+               }
+       }
+       put_queue(vc, pad_chars[value]);
+       if (value == KVAL(K_PENTER) && vc_kbd_mode(kbd, VC_CRLF))
+               put_queue(vc, 10);
+ }
+ static void k_shift(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       int old_state = shift_state;
+       if (rep)
+               return;
+       /*
+        * Mimic typewriter:
+        * a CapsShift key acts like Shift but undoes CapsLock
+        */
+       if (value == KVAL(K_CAPSSHIFT)) {
+               value = KVAL(K_SHIFT);
+               if (!up_flag)
+                       clr_vc_kbd_led(kbd, VC_CAPSLOCK);
+       }
+       if (up_flag) {
+               /*
+                * handle the case that two shift or control
+                * keys are depressed simultaneously
+                */
+               if (shift_down[value])
+                       shift_down[value]--;
+       } else
+               shift_down[value]++;
+       if (shift_down[value])
+               shift_state |= (1 << value);
+       else
+               shift_state &= ~(1 << value);
+       /* kludge */
+       if (up_flag && shift_state != old_state && npadch != -1) {
+               if (kbd->kbdmode == VC_UNICODE)
+                       to_utf8(vc, npadch);
+               else
+                       put_queue(vc, npadch & 0xff);
+               npadch = -1;
+       }
+ }
+ static void k_meta(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       if (up_flag)
+               return;
+       if (vc_kbd_mode(kbd, VC_META)) {
+               put_queue(vc, '\033');
+               put_queue(vc, value);
+       } else
+               put_queue(vc, value | 0x80);
+ }
+ static void k_ascii(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       int base;
+       if (up_flag)
+               return;
+       if (value < 10) {
+               /* decimal input of code, while Alt depressed */
+               base = 10;
+       } else {
+               /* hexadecimal input of code, while AltGr depressed */
+               value -= 10;
+               base = 16;
+       }
+       if (npadch == -1)
+               npadch = value;
+       else
+               npadch = npadch * base + value;
+ }
+ static void k_lock(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       if (up_flag || rep)
+               return;
+       chg_vc_kbd_lock(kbd, value);
+ }
+ static void k_slock(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       k_shift(vc, value, up_flag);
+       if (up_flag || rep)
+               return;
+       chg_vc_kbd_slock(kbd, value);
+       /* try to make Alt, oops, AltGr and such work */
+       if (!key_maps[kbd->lockstate ^ kbd->slockstate]) {
+               kbd->slockstate = 0;
+               chg_vc_kbd_slock(kbd, value);
+       }
+ }
+ /* by default, 300ms interval for combination release */
+ static unsigned brl_timeout = 300;
+ MODULE_PARM_DESC(brl_timeout, "Braille keys release delay in ms (0 for commit on first key release)");
+ module_param(brl_timeout, uint, 0644);
+ static unsigned brl_nbchords = 1;
+ MODULE_PARM_DESC(brl_nbchords, "Number of chords that produce a braille pattern (0 for dead chords)");
+ module_param(brl_nbchords, uint, 0644);
+ static void k_brlcommit(struct vc_data *vc, unsigned int pattern, char up_flag)
+ {
+       static unsigned long chords;
+       static unsigned committed;
+       if (!brl_nbchords)
+               k_deadunicode(vc, BRL_UC_ROW | pattern, up_flag);
+       else {
+               committed |= pattern;
+               chords++;
+               if (chords == brl_nbchords) {
+                       k_unicode(vc, BRL_UC_ROW | committed, up_flag);
+                       chords = 0;
+                       committed = 0;
+               }
+       }
+ }
+ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+       static unsigned pressed, committing;
+       static unsigned long releasestart;
+       if (kbd->kbdmode != VC_UNICODE) {
+               if (!up_flag)
+                       pr_warning("keyboard mode must be unicode for braille patterns\n");
+               return;
+       }
+       if (!value) {
+               k_unicode(vc, BRL_UC_ROW, up_flag);
+               return;
+       }
+       if (value > 8)
+               return;
+       if (!up_flag) {
+               pressed |= 1 << (value - 1);
+               if (!brl_timeout)
+                       committing = pressed;
+       } else if (brl_timeout) {
+               if (!committing ||
+                   time_after(jiffies,
+                              releasestart + msecs_to_jiffies(brl_timeout))) {
+                       committing = pressed;
+                       releasestart = jiffies;
+               }
+               pressed &= ~(1 << (value - 1));
+               if (!pressed && committing) {
+                       k_brlcommit(vc, committing, 0);
+                       committing = 0;
+               }
+       } else {
+               if (committing) {
+                       k_brlcommit(vc, committing, 0);
+                       committing = 0;
+               }
+               pressed &= ~(1 << (value - 1));
+       }
+ }
+ /*
+  * The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
+  * or (ii) whatever pattern of lights people want to show using KDSETLED,
+  * or (iii) specified bits of specified words in kernel memory.
+  */
+ unsigned char getledstate(void)
+ {
+       return ledstate;
+ }
+ void setledstate(struct kbd_struct *kbd, unsigned int led)
+ {
+       if (!(led & ~7)) {
+               ledioctl = led;
+               kbd->ledmode = LED_SHOW_IOCTL;
+       } else
+               kbd->ledmode = LED_SHOW_FLAGS;
+       set_leds();
+ }
+ static inline unsigned char getleds(void)
+ {
+       struct kbd_struct *kbd = kbd_table + fg_console;
+       unsigned char leds;
+       int i;
+       if (kbd->ledmode == LED_SHOW_IOCTL)
+               return ledioctl;
+       leds = kbd->ledflagstate;
+       if (kbd->ledmode == LED_SHOW_MEM) {
+               for (i = 0; i < 3; i++)
+                       if (ledptrs[i].valid) {
+                               if (*ledptrs[i].addr & ledptrs[i].mask)
+                                       leds |= (1 << i);
+                               else
+                                       leds &= ~(1 << i);
+                       }
+       }
+       return leds;
+ }
+ static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+ {
+       unsigned char leds = *(unsigned char *)data;
+       if (test_bit(EV_LED, handle->dev->evbit)) {
+               input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
+               input_inject_event(handle, EV_LED, LED_NUML,    !!(leds & 0x02));
+               input_inject_event(handle, EV_LED, LED_CAPSL,   !!(leds & 0x04));
+               input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
+       }
+       return 0;
+ }
+ /*
+  * This is the tasklet that updates LED state on all keyboards
+  * attached to the box. The reason we use tasklet is that we
+  * need to handle the scenario when keyboard handler is not
+  * registered yet but we already getting updates form VT to
+  * update led state.
+  */
+ static void kbd_bh(unsigned long dummy)
+ {
+       unsigned char leds = getleds();
+       if (leds != ledstate) {
+               input_handler_for_each_handle(&kbd_handler, &leds,
+                                             kbd_update_leds_helper);
+               ledstate = leds;
+       }
+ }
+ DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
+ #if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_ALPHA) ||\
+     defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) ||\
+     defined(CONFIG_PARISC) || defined(CONFIG_SUPERH) ||\
+     (defined(CONFIG_ARM) && defined(CONFIG_KEYBOARD_ATKBD) && !defined(CONFIG_ARCH_RPC)) ||\
+     defined(CONFIG_AVR32)
+ #define HW_RAW(dev) (test_bit(EV_MSC, dev->evbit) && test_bit(MSC_RAW, dev->mscbit) &&\
+                       ((dev)->id.bustype == BUS_I8042) && ((dev)->id.vendor == 0x0001) && ((dev)->id.product == 0x0001))
+ static const unsigned short x86_keycodes[256] =
+       { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+        48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+        64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+        80, 81, 82, 83, 84,118, 86, 87, 88,115,120,119,121,112,123, 92,
+       284,285,309,  0,312, 91,327,328,329,331,333,335,336,337,338,339,
+       367,288,302,304,350, 89,334,326,267,126,268,269,125,347,348,349,
+       360,261,262,263,268,376,100,101,321,316,373,286,289,102,351,355,
+       103,104,105,275,287,279,258,106,274,107,294,364,358,363,362,361,
+       291,108,381,281,290,272,292,305,280, 99,112,257,306,359,113,114,
+       264,117,271,374,379,265,266, 93, 94, 95, 85,259,375,260, 90,116,
+       377,109,111,277,278,282,283,295,296,297,299,300,301,293,303,307,
+       308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330,
+       332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 };
+ #ifdef CONFIG_SPARC
+ static int sparc_l1_a_state;
+ extern void sun_do_break(void);
+ #endif
+ static int emulate_raw(struct vc_data *vc, unsigned int keycode,
+                      unsigned char up_flag)
+ {
+       int code;
+       switch (keycode) {
+       case KEY_PAUSE:
+               put_queue(vc, 0xe1);
+               put_queue(vc, 0x1d | up_flag);
+               put_queue(vc, 0x45 | up_flag);
+               break;
+       case KEY_HANGEUL:
+               if (!up_flag)
+                       put_queue(vc, 0xf2);
+               break;
+       case KEY_HANJA:
+               if (!up_flag)
+                       put_queue(vc, 0xf1);
+               break;
+       case KEY_SYSRQ:
+               /*
+                * Real AT keyboards (that's what we're trying
+                * to emulate here emit 0xe0 0x2a 0xe0 0x37 when
+                * pressing PrtSc/SysRq alone, but simply 0x54
+                * when pressing Alt+PrtSc/SysRq.
+                */
+               if (test_bit(KEY_LEFTALT, key_down) ||
+                   test_bit(KEY_RIGHTALT, key_down)) {
+                       put_queue(vc, 0x54 | up_flag);
+               } else {
+                       put_queue(vc, 0xe0);
+                       put_queue(vc, 0x2a | up_flag);
+                       put_queue(vc, 0xe0);
+                       put_queue(vc, 0x37 | up_flag);
+               }
+               break;
+       default:
+               if (keycode > 255)
+                       return -1;
+               code = x86_keycodes[keycode];
+               if (!code)
+                       return -1;
+               if (code & 0x100)
+                       put_queue(vc, 0xe0);
+               put_queue(vc, (code & 0x7f) | up_flag);
+               break;
+       }
+       return 0;
+ }
+ #else
+ #define HW_RAW(dev)   0
+ static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag)
+ {
+       if (keycode > 127)
+               return -1;
+       put_queue(vc, keycode | up_flag);
+       return 0;
+ }
+ #endif
+ static void kbd_rawcode(unsigned char data)
+ {
+       struct vc_data *vc = vc_cons[fg_console].d;
+       kbd = kbd_table + vc->vc_num;
+       if (kbd->kbdmode == VC_RAW)
+               put_queue(vc, data);
+ }
+ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
+ {
+       struct vc_data *vc = vc_cons[fg_console].d;
+       unsigned short keysym, *key_map;
+       unsigned char type;
+       bool raw_mode;
+       struct tty_struct *tty;
+       int shift_final;
+       struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down };
+       int rc;
+       tty = vc->port.tty;
+       if (tty && (!tty->driver_data)) {
+               /* No driver data? Strange. Okay we fix it then. */
+               tty->driver_data = vc;
+       }
+       kbd = kbd_table + vc->vc_num;
+ #ifdef CONFIG_SPARC
+       if (keycode == KEY_STOP)
+               sparc_l1_a_state = down;
+ #endif
+       rep = (down == 2);
+       raw_mode = (kbd->kbdmode == VC_RAW);
+       if (raw_mode && !hw_raw)
+               if (emulate_raw(vc, keycode, !down << 7))
+                       if (keycode < BTN_MISC && printk_ratelimit())
+                               pr_warning("can't emulate rawmode for keycode %d\n",
+                                          keycode);
++#ifdef CONFIG_BOOTSPLASH
++      /* This code has to be redone for some non-x86 platforms */
++      if (down == 1 && (keycode == 0x3c || keycode == 0x01)) {        /* F2 and ESC on PC keyboard */
++              extern int splash_verbose(void);
++              if (splash_verbose())
++                      return;
++      }
++#endif
++
+ #ifdef CONFIG_SPARC
+       if (keycode == KEY_A && sparc_l1_a_state) {
+               sparc_l1_a_state = false;
+               sun_do_break();
+       }
+ #endif
+       if (kbd->kbdmode == VC_MEDIUMRAW) {
+               /*
+                * This is extended medium raw mode, with keys above 127
+                * encoded as 0, high 7 bits, low 7 bits, with the 0 bearing
+                * the 'up' flag if needed. 0 is reserved, so this shouldn't
+                * interfere with anything else. The two bytes after 0 will
+                * always have the up flag set not to interfere with older
+                * applications. This allows for 16384 different keycodes,
+                * which should be enough.
+                */
+               if (keycode < 128) {
+                       put_queue(vc, keycode | (!down << 7));
+               } else {
+                       put_queue(vc, !down << 7);
+                       put_queue(vc, (keycode >> 7) | 0x80);
+                       put_queue(vc, keycode | 0x80);
+               }
+               raw_mode = true;
+       }
+       if (down)
+               set_bit(keycode, key_down);
+       else
+               clear_bit(keycode, key_down);
+       if (rep &&
+           (!vc_kbd_mode(kbd, VC_REPEAT) ||
+            (tty && !L_ECHO(tty) && tty_chars_in_buffer(tty)))) {
+               /*
+                * Don't repeat a key if the input buffers are not empty and the
+                * characters get aren't echoed locally. This makes key repeat
+                * usable with slow applications and under heavy loads.
+                */
+               return;
+       }
+       param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
+       param.ledstate = kbd->ledflagstate;
+       key_map = key_maps[shift_final];
+       rc = atomic_notifier_call_chain(&keyboard_notifier_list,
+                                       KBD_KEYCODE, &param);
+       if (rc == NOTIFY_STOP || !key_map) {
+               atomic_notifier_call_chain(&keyboard_notifier_list,
+                                          KBD_UNBOUND_KEYCODE, &param);
+               compute_shiftstate();
+               kbd->slockstate = 0;
+               return;
+       }
+       if (keycode < NR_KEYS)
+               keysym = key_map[keycode];
+       else if (keycode >= KEY_BRL_DOT1 && keycode <= KEY_BRL_DOT8)
+               keysym = U(K(KT_BRL, keycode - KEY_BRL_DOT1 + 1));
+       else
+               return;
+       type = KTYP(keysym);
+       if (type < 0xf0) {
+               param.value = keysym;
+               rc = atomic_notifier_call_chain(&keyboard_notifier_list,
+                                               KBD_UNICODE, &param);
+               if (rc != NOTIFY_STOP)
+                       if (down && !raw_mode)
+                               to_utf8(vc, keysym);
+               return;
+       }
+       type -= 0xf0;
+       if (type == KT_LETTER) {
+               type = KT_LATIN;
+               if (vc_kbd_led(kbd, VC_CAPSLOCK)) {
+                       key_map = key_maps[shift_final ^ (1 << KG_SHIFT)];
+                       if (key_map)
+                               keysym = key_map[keycode];
+               }
+       }
+       param.value = keysym;
+       rc = atomic_notifier_call_chain(&keyboard_notifier_list,
+                                       KBD_KEYSYM, &param);
+       if (rc == NOTIFY_STOP)
+               return;
+       if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
+               return;
+       (*k_handler[type])(vc, keysym & 0xff, !down);
+       param.ledstate = kbd->ledflagstate;
+       atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, &param);
+       if (type != KT_SLOCK)
+               kbd->slockstate = 0;
+ }
+ static void kbd_event(struct input_handle *handle, unsigned int event_type,
+                     unsigned int event_code, int value)
+ {
+       /* We are called with interrupts disabled, just take the lock */
+       spin_lock(&kbd_event_lock);
+       if (event_type == EV_MSC && event_code == MSC_RAW && HW_RAW(handle->dev))
+               kbd_rawcode(value);
+       if (event_type == EV_KEY)
+               kbd_keycode(event_code, value, HW_RAW(handle->dev));
+       spin_unlock(&kbd_event_lock);
+       tasklet_schedule(&keyboard_tasklet);
+       do_poke_blanked_console = 1;
+       schedule_console_callback();
+ }
+ static bool kbd_match(struct input_handler *handler, struct input_dev *dev)
+ {
+       int i;
+       if (test_bit(EV_SND, dev->evbit))
+               return true;
+       if (test_bit(EV_KEY, dev->evbit)) {
+               for (i = KEY_RESERVED; i < BTN_MISC; i++)
+                       if (test_bit(i, dev->keybit))
+                               return true;
+               for (i = KEY_BRL_DOT1; i <= KEY_BRL_DOT10; i++)
+                       if (test_bit(i, dev->keybit))
+                               return true;
+       }
+       return false;
+ }
+ /*
+  * When a keyboard (or other input device) is found, the kbd_connect
+  * function is called. The function then looks at the device, and if it
+  * likes it, it can open it and get events from it. In this (kbd_connect)
+  * function, we should decide which VT to bind that keyboard to initially.
+  */
+ static int kbd_connect(struct input_handler *handler, struct input_dev *dev,
+                       const struct input_device_id *id)
+ {
+       struct input_handle *handle;
+       int error;
+       handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
+       if (!handle)
+               return -ENOMEM;
+       handle->dev = dev;
+       handle->handler = handler;
+       handle->name = "kbd";
+       error = input_register_handle(handle);
+       if (error)
+               goto err_free_handle;
+       error = input_open_device(handle);
+       if (error)
+               goto err_unregister_handle;
+       return 0;
+  err_unregister_handle:
+       input_unregister_handle(handle);
+  err_free_handle:
+       kfree(handle);
+       return error;
+ }
+ static void kbd_disconnect(struct input_handle *handle)
+ {
+       input_close_device(handle);
+       input_unregister_handle(handle);
+       kfree(handle);
+ }
+ /*
+  * Start keyboard handler on the new keyboard by refreshing LED state to
+  * match the rest of the system.
+  */
+ static void kbd_start(struct input_handle *handle)
+ {
+       tasklet_disable(&keyboard_tasklet);
+       if (ledstate != 0xff)
+               kbd_update_leds_helper(handle, &ledstate);
+       tasklet_enable(&keyboard_tasklet);
+ }
+ static const struct input_device_id kbd_ids[] = {
+       {
+                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+                 .evbit = { BIT_MASK(EV_KEY) },
+         },
+       {
+                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+                 .evbit = { BIT_MASK(EV_SND) },
+         },
+       { },    /* Terminating entry */
+ };
+ MODULE_DEVICE_TABLE(input, kbd_ids);
+ static struct input_handler kbd_handler = {
+       .event          = kbd_event,
+       .match          = kbd_match,
+       .connect        = kbd_connect,
+       .disconnect     = kbd_disconnect,
+       .start          = kbd_start,
+       .name           = "kbd",
+       .id_table       = kbd_ids,
+ };
+ int __init kbd_init(void)
+ {
+       int i;
+       int error;
+         for (i = 0; i < MAX_NR_CONSOLES; i++) {
+               kbd_table[i].ledflagstate = KBD_DEFLEDS;
+               kbd_table[i].default_ledflagstate = KBD_DEFLEDS;
+               kbd_table[i].ledmode = LED_SHOW_FLAGS;
+               kbd_table[i].lockstate = KBD_DEFLOCK;
+               kbd_table[i].slockstate = 0;
+               kbd_table[i].modeflags = KBD_DEFMODE;
+               kbd_table[i].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE;
+       }
+       error = input_register_handler(&kbd_handler);
+       if (error)
+               return error;
+       tasklet_enable(&keyboard_tasklet);
+       tasklet_schedule(&keyboard_tasklet);
+       return 0;
+ }
index 0000000,a8ec48e..15915d8
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,4209 +1,4236 @@@
+ /*
+  *  linux/drivers/char/vt.c
+  *
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  */
+ /*
+  * Hopefully this will be a rather complete VT102 implementation.
+  *
+  * Beeping thanks to John T Kohl.
+  *
+  * Virtual Consoles, Screen Blanking, Screen Dumping, Color, Graphics
+  *   Chars, and VT100 enhancements by Peter MacDonald.
+  *
+  * Copy and paste function by Andrew Haylett,
+  *   some enhancements by Alessandro Rubini.
+  *
+  * Code to check for different video-cards mostly by Galen Hunt,
+  * <g-hunt@ee.utah.edu>
+  *
+  * Rudimentary ISO 10646/Unicode/UTF-8 character set support by
+  * Markus Kuhn, <mskuhn@immd4.informatik.uni-erlangen.de>.
+  *
+  * Dynamic allocation of consoles, aeb@cwi.nl, May 1994
+  * Resizing of consoles, aeb, 940926
+  *
+  * Code for xterm like mouse click reporting by Peter Orbaek 20-Jul-94
+  * <poe@daimi.aau.dk>
+  *
+  * User-defined bell sound, new setterm control sequences and printk
+  * redirection by Martin Mares <mj@k332.feld.cvut.cz> 19-Nov-95
+  *
+  * APM screenblank bug fixed Takashi Manabe <manabe@roy.dsl.tutics.tut.jp>
+  *
+  * Merge with the abstract console driver by Geert Uytterhoeven
+  * <geert@linux-m68k.org>, Jan 1997.
+  *
+  *   Original m68k console driver modifications by
+  *
+  *     - Arno Griffioen <arno@usn.nl>
+  *     - David Carter <carter@cs.bris.ac.uk>
+  * 
+  *   The abstract console driver provides a generic interface for a text
+  *   console. It supports VGA text mode, frame buffer based graphical consoles
+  *   and special graphics processors that are only accessible through some
+  *   registers (e.g. a TMS340x0 GSP).
+  *
+  *   The interface to the hardware is specified using a special structure
+  *   (struct consw) which contains function pointers to console operations
+  *   (see <linux/console.h> for more information).
+  *
+  * Support for changeable cursor shape
+  * by Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>, August 1997
+  *
+  * Ported to i386 and con_scrolldelta fixed
+  * by Emmanuel Marty <core@ggi-project.org>, April 1998
+  *
+  * Resurrected character buffers in videoram plus lots of other trickery
+  * by Martin Mares <mj@atrey.karlin.mff.cuni.cz>, July 1998
+  *
+  * Removed old-style timers, introduced console_timer, made timer
+  * deletion SMP-safe.  17Jun00, Andrew Morton
+  *
+  * Removed console_lock, enabled interrupts across all console operations
+  * 13 March 2001, Andrew Morton
+  *
+  * Fixed UTF-8 mode so alternate charset modes always work according
+  * to control sequences interpreted in do_con_trol function
+  * preserving backward VT100 semigraphics compatibility,
+  * malformed UTF sequences represented as sequences of replacement glyphs,
+  * original codes or '?' as a last resort if replacement glyph is undefined
+  * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
+  */
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/tty.h>
+ #include <linux/tty_flip.h>
+ #include <linux/kernel.h>
+ #include <linux/string.h>
+ #include <linux/errno.h>
+ #include <linux/kd.h>
+ #include <linux/slab.h>
+ #include <linux/major.h>
+ #include <linux/mm.h>
+ #include <linux/console.h>
+ #include <linux/init.h>
+ #include <linux/mutex.h>
+ #include <linux/vt_kern.h>
+ #include <linux/selection.h>
+ #include <linux/smp_lock.h>
+ #include <linux/tiocl.h>
+ #include <linux/kbd_kern.h>
+ #include <linux/consolemap.h>
+ #include <linux/timer.h>
+ #include <linux/interrupt.h>
+ #include <linux/workqueue.h>
+ #include <linux/pm.h>
+ #include <linux/font.h>
+ #include <linux/bitops.h>
+ #include <linux/notifier.h>
+ #include <linux/device.h>
+ #include <linux/io.h>
+ #include <asm/system.h>
+ #include <linux/uaccess.h>
+ #include <linux/kdb.h>
+ #include <linux/ctype.h>
+ #define MAX_NR_CON_DRIVER 16
+ #define CON_DRIVER_FLAG_MODULE 1
+ #define CON_DRIVER_FLAG_INIT   2
+ #define CON_DRIVER_FLAG_ATTR   4
+ struct con_driver {
+       const struct consw *con;
+       const char *desc;
+       struct device *dev;
+       int node;
+       int first;
+       int last;
+       int flag;
+ };
+ static struct con_driver registered_con_driver[MAX_NR_CON_DRIVER];
+ const struct consw *conswitchp;
+ /* A bitmap for codes <32. A bit of 1 indicates that the code
+  * corresponding to that bit number invokes some special action
+  * (such as cursor movement) and should not be displayed as a
+  * glyph unless the disp_ctrl mode is explicitly enabled.
+  */
+ #define CTRL_ACTION 0x0d00ff81
+ #define CTRL_ALWAYS 0x0800f501        /* Cannot be overridden by disp_ctrl */
+ /*
+  * Here is the default bell parameters: 750HZ, 1/8th of a second
+  */
+ #define DEFAULT_BELL_PITCH    750
+ #define DEFAULT_BELL_DURATION (HZ/8)
+ struct vc vc_cons [MAX_NR_CONSOLES];
+ #ifndef VT_SINGLE_DRIVER
+ static const struct consw *con_driver_map[MAX_NR_CONSOLES];
+ #endif
+ static int con_open(struct tty_struct *, struct file *);
+ static void vc_init(struct vc_data *vc, unsigned int rows,
+                   unsigned int cols, int do_clear);
+ static void gotoxy(struct vc_data *vc, int new_x, int new_y);
+ static void save_cur(struct vc_data *vc);
+ static void reset_terminal(struct vc_data *vc, int do_clear);
+ static void con_flush_chars(struct tty_struct *tty);
+ static int set_vesa_blanking(char __user *p);
+ static void set_cursor(struct vc_data *vc);
+ static void hide_cursor(struct vc_data *vc);
+ static void console_callback(struct work_struct *ignored);
+ static void blank_screen_t(unsigned long dummy);
+ static void set_palette(struct vc_data *vc);
+ static int printable;         /* Is console ready for printing? */
+ int default_utf8 = true;
+ module_param(default_utf8, int, S_IRUGO | S_IWUSR);
+ int global_cursor_default = -1;
+ module_param(global_cursor_default, int, S_IRUGO | S_IWUSR);
+ static int cur_default = CUR_DEFAULT;
+ module_param(cur_default, int, S_IRUGO | S_IWUSR);
+ /*
+  * ignore_poke: don't unblank the screen when things are typed.  This is
+  * mainly for the privacy of braille terminal users.
+  */
+ static int ignore_poke;
+ int do_poke_blanked_console;
+ int console_blanked;
+ static int vesa_blank_mode; /* 0:none 1:suspendV 2:suspendH 3:powerdown */
+ static int vesa_off_interval;
+ static int blankinterval = 10*60;
+ core_param(consoleblank, blankinterval, int, 0444);
+ static DECLARE_WORK(console_work, console_callback);
+ /*
+  * fg_console is the current virtual console,
+  * last_console is the last used one,
+  * want_console is the console we want to switch to,
+  * saved_* variants are for save/restore around kernel debugger enter/leave
+  */
+ int fg_console;
+ int last_console;
+ int want_console = -1;
+ static int saved_fg_console;
+ static int saved_last_console;
+ static int saved_want_console;
+ static int saved_vc_mode;
+ static int saved_console_blanked;
+ /*
+  * For each existing display, we have a pointer to console currently visible
+  * on that display, allowing consoles other than fg_console to be refreshed
+  * appropriately. Unless the low-level driver supplies its own display_fg
+  * variable, we use this one for the "master display".
+  */
+ static struct vc_data *master_display_fg;
+ /*
+  * Unfortunately, we need to delay tty echo when we're currently writing to the
+  * console since the code is (and always was) not re-entrant, so we schedule
+  * all flip requests to process context with schedule-task() and run it from
+  * console_callback().
+  */
+ /*
+  * For the same reason, we defer scrollback to the console callback.
+  */
+ static int scrollback_delta;
+ /*
+  * Hook so that the power management routines can (un)blank
+  * the console on our behalf.
+  */
+ int (*console_blank_hook)(int);
+ static DEFINE_TIMER(console_timer, blank_screen_t, 0, 0);
+ static int blank_state;
+ static int blank_timer_expired;
+ enum {
+       blank_off = 0,
+       blank_normal_wait,
+       blank_vesa_wait,
+ };
+ /*
+  * Notifier list for console events.
+  */
+ static ATOMIC_NOTIFIER_HEAD(vt_notifier_list);
+ int register_vt_notifier(struct notifier_block *nb)
+ {
+       return atomic_notifier_chain_register(&vt_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(register_vt_notifier);
+ int unregister_vt_notifier(struct notifier_block *nb)
+ {
+       return atomic_notifier_chain_unregister(&vt_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(unregister_vt_notifier);
+ static void notify_write(struct vc_data *vc, unsigned int unicode)
+ {
+       struct vt_notifier_param param = { .vc = vc, unicode = unicode };
+       atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, &param);
+ }
+ static void notify_update(struct vc_data *vc)
+ {
+       struct vt_notifier_param param = { .vc = vc };
+       atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
+ }
+ /*
+  *    Low-Level Functions
+  */
+ #define IS_FG(vc)     ((vc)->vc_num == fg_console)
+ #ifdef VT_BUF_VRAM_ONLY
+ #define DO_UPDATE(vc) 0
+ #else
+ #define DO_UPDATE(vc) (CON_IS_VISIBLE(vc) && !console_blanked)
+ #endif
+ static inline unsigned short *screenpos(struct vc_data *vc, int offset, int viewed)
+ {
+       unsigned short *p;
+       
+       if (!viewed)
+               p = (unsigned short *)(vc->vc_origin + offset);
+       else if (!vc->vc_sw->con_screen_pos)
+               p = (unsigned short *)(vc->vc_visible_origin + offset);
+       else
+               p = vc->vc_sw->con_screen_pos(vc, offset);
+       return p;
+ }
+ /* Called  from the keyboard irq path.. */
+ static inline void scrolldelta(int lines)
+ {
+       /* FIXME */
+       /* scrolldelta needs some kind of consistency lock, but the BKL was
+          and still is not protecting versus the scheduled back end */
+       scrollback_delta += lines;
+       schedule_console_callback();
+ }
+ void schedule_console_callback(void)
+ {
+       schedule_work(&console_work);
+ }
+ static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
+ {
+       unsigned short *d, *s;
+       if (t+nr >= b)
+               nr = b - t - 1;
+       if (b > vc->vc_rows || t >= b || nr < 1)
+               return;
+       if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_UP, nr))
+               return;
+       d = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
+       s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * (t + nr));
+       scr_memmovew(d, s, (b - t - nr) * vc->vc_size_row);
+       scr_memsetw(d + (b - t - nr) * vc->vc_cols, vc->vc_video_erase_char,
+                   vc->vc_size_row * nr);
+ }
+ static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
+ {
+       unsigned short *s;
+       unsigned int step;
+       if (t+nr >= b)
+               nr = b - t - 1;
+       if (b > vc->vc_rows || t >= b || nr < 1)
+               return;
+       if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_DOWN, nr))
+               return;
+       s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
+       step = vc->vc_cols * nr;
+       scr_memmovew(s + step, s, (b - t - nr) * vc->vc_size_row);
+       scr_memsetw(s, vc->vc_video_erase_char, 2 * step);
+ }
+ static void do_update_region(struct vc_data *vc, unsigned long start, int count)
+ {
+ #ifndef VT_BUF_VRAM_ONLY
+       unsigned int xx, yy, offset;
+       u16 *p;
+       p = (u16 *) start;
+       if (!vc->vc_sw->con_getxy) {
+               offset = (start - vc->vc_origin) / 2;
+               xx = offset % vc->vc_cols;
+               yy = offset / vc->vc_cols;
+       } else {
+               int nxx, nyy;
+               start = vc->vc_sw->con_getxy(vc, start, &nxx, &nyy);
+               xx = nxx; yy = nyy;
+       }
+       for(;;) {
+               u16 attrib = scr_readw(p) & 0xff00;
+               int startx = xx;
+               u16 *q = p;
+               while (xx < vc->vc_cols && count) {
+                       if (attrib != (scr_readw(p) & 0xff00)) {
+                               if (p > q)
+                                       vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
+                               startx = xx;
+                               q = p;
+                               attrib = scr_readw(p) & 0xff00;
+                       }
+                       p++;
+                       xx++;
+                       count--;
+               }
+               if (p > q)
+                       vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
+               if (!count)
+                       break;
+               xx = 0;
+               yy++;
+               if (vc->vc_sw->con_getxy) {
+                       p = (u16 *)start;
+                       start = vc->vc_sw->con_getxy(vc, start, NULL, NULL);
+               }
+       }
+ #endif
+ }
+ void update_region(struct vc_data *vc, unsigned long start, int count)
+ {
+       WARN_CONSOLE_UNLOCKED();
+       if (DO_UPDATE(vc)) {
+               hide_cursor(vc);
+               do_update_region(vc, start, count);
+               set_cursor(vc);
+       }
+ }
+ /* Structure of attributes is hardware-dependent */
+ static u8 build_attr(struct vc_data *vc, u8 _color, u8 _intensity, u8 _blink,
+     u8 _underline, u8 _reverse, u8 _italic)
+ {
+       if (vc->vc_sw->con_build_attr)
+               return vc->vc_sw->con_build_attr(vc, _color, _intensity,
+                      _blink, _underline, _reverse, _italic);
+ #ifndef VT_BUF_VRAM_ONLY
+ /*
+  * ++roman: I completely changed the attribute format for monochrome
+  * mode (!can_do_color). The formerly used MDA (monochrome display
+  * adapter) format didn't allow the combination of certain effects.
+  * Now the attribute is just a bit vector:
+  *  Bit 0..1: intensity (0..2)
+  *  Bit 2   : underline
+  *  Bit 3   : reverse
+  *  Bit 7   : blink
+  */
+       {
+       u8 a = _color;
+       if (!vc->vc_can_do_color)
+               return _intensity |
+                      (_italic ? 2 : 0) |
+                      (_underline ? 4 : 0) |
+                      (_reverse ? 8 : 0) |
+                      (_blink ? 0x80 : 0);
+       if (_italic)
+               a = (a & 0xF0) | vc->vc_itcolor;
+       else if (_underline)
+               a = (a & 0xf0) | vc->vc_ulcolor;
+       else if (_intensity == 0)
+               a = (a & 0xf0) | vc->vc_ulcolor;
+       if (_reverse)
+               a = ((a) & 0x88) | ((((a) >> 4) | ((a) << 4)) & 0x77);
+       if (_blink)
+               a ^= 0x80;
+       if (_intensity == 2)
+               a ^= 0x08;
+       if (vc->vc_hi_font_mask == 0x100)
+               a <<= 1;
+       return a;
+       }
+ #else
+       return 0;
+ #endif
+ }
+ static void update_attr(struct vc_data *vc)
+ {
+       vc->vc_attr = build_attr(vc, vc->vc_color, vc->vc_intensity,
+                     vc->vc_blink, vc->vc_underline,
+                     vc->vc_reverse ^ vc->vc_decscnm, vc->vc_italic);
+       vc->vc_video_erase_char = (build_attr(vc, vc->vc_color, 1, vc->vc_blink, 0, vc->vc_decscnm, 0) << 8) | ' ';
+ }
+ /* Note: inverting the screen twice should revert to the original state */
+ void invert_screen(struct vc_data *vc, int offset, int count, int viewed)
+ {
+       unsigned short *p;
+       WARN_CONSOLE_UNLOCKED();
+       count /= 2;
+       p = screenpos(vc, offset, viewed);
+       if (vc->vc_sw->con_invert_region)
+               vc->vc_sw->con_invert_region(vc, p, count);
+ #ifndef VT_BUF_VRAM_ONLY
+       else {
+               u16 *q = p;
+               int cnt = count;
+               u16 a;
+               if (!vc->vc_can_do_color) {
+                       while (cnt--) {
+                           a = scr_readw(q);
+                           a ^= 0x0800;
+                           scr_writew(a, q);
+                           q++;
+                       }
+               } else if (vc->vc_hi_font_mask == 0x100) {
+                       while (cnt--) {
+                               a = scr_readw(q);
+                               a = ((a) & 0x11ff) | (((a) & 0xe000) >> 4) | (((a) & 0x0e00) << 4);
+                               scr_writew(a, q);
+                               q++;
+                       }
+               } else {
+                       while (cnt--) {
+                               a = scr_readw(q);
+                               a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) | (((a) & 0x0700) << 4);
+                               scr_writew(a, q);
+                               q++;
+                       }
+               }
+       }
+ #endif
+       if (DO_UPDATE(vc))
+               do_update_region(vc, (unsigned long) p, count);
+ }
+ /* used by selection: complement pointer position */
+ void complement_pos(struct vc_data *vc, int offset)
+ {
+       static int old_offset = -1;
+       static unsigned short old;
+       static unsigned short oldx, oldy;
+       WARN_CONSOLE_UNLOCKED();
+       if (old_offset != -1 && old_offset >= 0 &&
+           old_offset < vc->vc_screenbuf_size) {
+               scr_writew(old, screenpos(vc, old_offset, 1));
+               if (DO_UPDATE(vc))
+                       vc->vc_sw->con_putc(vc, old, oldy, oldx);
+       }
+       old_offset = offset;
+       if (offset != -1 && offset >= 0 &&
+           offset < vc->vc_screenbuf_size) {
+               unsigned short new;
+               unsigned short *p;
+               p = screenpos(vc, offset, 1);
+               old = scr_readw(p);
+               new = old ^ vc->vc_complement_mask;
+               scr_writew(new, p);
+               if (DO_UPDATE(vc)) {
+                       oldx = (offset >> 1) % vc->vc_cols;
+                       oldy = (offset >> 1) / vc->vc_cols;
+                       vc->vc_sw->con_putc(vc, new, oldy, oldx);
+               }
+       }
+ }
+ static void insert_char(struct vc_data *vc, unsigned int nr)
+ {
+       unsigned short *p, *q = (unsigned short *)vc->vc_pos;
+       p = q + vc->vc_cols - nr - vc->vc_x;
+       while (--p >= q)
+               scr_writew(scr_readw(p), p + nr);
+       scr_memsetw(q, vc->vc_video_erase_char, nr * 2);
+       vc->vc_need_wrap = 0;
+       if (DO_UPDATE(vc)) {
+               unsigned short oldattr = vc->vc_attr;
+               vc->vc_sw->con_bmove(vc, vc->vc_y, vc->vc_x, vc->vc_y, vc->vc_x + nr, 1,
+                                    vc->vc_cols - vc->vc_x - nr);
+               vc->vc_attr = vc->vc_video_erase_char >> 8;
+               while (nr--)
+                       vc->vc_sw->con_putc(vc, vc->vc_video_erase_char, vc->vc_y, vc->vc_x + nr);
+               vc->vc_attr = oldattr;
+       }
+ }
+ static void delete_char(struct vc_data *vc, unsigned int nr)
+ {
+       unsigned int i = vc->vc_x;
+       unsigned short *p = (unsigned short *)vc->vc_pos;
+       while (++i <= vc->vc_cols - nr) {
+               scr_writew(scr_readw(p+nr), p);
+               p++;
+       }
+       scr_memsetw(p, vc->vc_video_erase_char, nr * 2);
+       vc->vc_need_wrap = 0;
+       if (DO_UPDATE(vc)) {
+               unsigned short oldattr = vc->vc_attr;
+               vc->vc_sw->con_bmove(vc, vc->vc_y, vc->vc_x + nr, vc->vc_y, vc->vc_x, 1,
+                                    vc->vc_cols - vc->vc_x - nr);
+               vc->vc_attr = vc->vc_video_erase_char >> 8;
+               while (nr--)
+                       vc->vc_sw->con_putc(vc, vc->vc_video_erase_char, vc->vc_y,
+                                    vc->vc_cols - 1 - nr);
+               vc->vc_attr = oldattr;
+       }
+ }
+ static int softcursor_original;
+ static void add_softcursor(struct vc_data *vc)
+ {
+       int i = scr_readw((u16 *) vc->vc_pos);
+       u32 type = vc->vc_cursor_type;
+       if (! (type & 0x10)) return;
+       if (softcursor_original != -1) return;
+       softcursor_original = i;
+       i |= ((type >> 8) & 0xff00 );
+       i ^= ((type) & 0xff00 );
+       if ((type & 0x20) && ((softcursor_original & 0x7000) == (i & 0x7000))) i ^= 0x7000;
+       if ((type & 0x40) && ((i & 0x700) == ((i & 0x7000) >> 4))) i ^= 0x0700;
+       scr_writew(i, (u16 *) vc->vc_pos);
+       if (DO_UPDATE(vc))
+               vc->vc_sw->con_putc(vc, i, vc->vc_y, vc->vc_x);
+ }
+ static void hide_softcursor(struct vc_data *vc)
+ {
+       if (softcursor_original != -1) {
+               scr_writew(softcursor_original, (u16 *)vc->vc_pos);
+               if (DO_UPDATE(vc))
+                       vc->vc_sw->con_putc(vc, softcursor_original,
+                                       vc->vc_y, vc->vc_x);
+               softcursor_original = -1;
+       }
+ }
+ static void hide_cursor(struct vc_data *vc)
+ {
+       if (vc == sel_cons)
+               clear_selection();
+       vc->vc_sw->con_cursor(vc, CM_ERASE);
+       hide_softcursor(vc);
+ }
+ static void set_cursor(struct vc_data *vc)
+ {
+       if (!IS_FG(vc) || console_blanked ||
+           vc->vc_mode == KD_GRAPHICS)
+               return;
+       if (vc->vc_deccm) {
+               if (vc == sel_cons)
+                       clear_selection();
+               add_softcursor(vc);
+               if ((vc->vc_cursor_type & 0x0f) != 1)
+                       vc->vc_sw->con_cursor(vc, CM_DRAW);
+       } else
+               hide_cursor(vc);
+ }
+ static void set_origin(struct vc_data *vc)
+ {
+       WARN_CONSOLE_UNLOCKED();
+       if (!CON_IS_VISIBLE(vc) ||
+           !vc->vc_sw->con_set_origin ||
+           !vc->vc_sw->con_set_origin(vc))
+               vc->vc_origin = (unsigned long)vc->vc_screenbuf;
+       vc->vc_visible_origin = vc->vc_origin;
+       vc->vc_scr_end = vc->vc_origin + vc->vc_screenbuf_size;
+       vc->vc_pos = vc->vc_origin + vc->vc_size_row * vc->vc_y + 2 * vc->vc_x;
+ }
+ static inline void save_screen(struct vc_data *vc)
+ {
+       WARN_CONSOLE_UNLOCKED();
+       if (vc->vc_sw->con_save_screen)
+               vc->vc_sw->con_save_screen(vc);
+ }
+ /*
+  *    Redrawing of screen
+  */
+ static void clear_buffer_attributes(struct vc_data *vc)
+ {
+       unsigned short *p = (unsigned short *)vc->vc_origin;
+       int count = vc->vc_screenbuf_size / 2;
+       int mask = vc->vc_hi_font_mask | 0xff;
+       for (; count > 0; count--, p++) {
+               scr_writew((scr_readw(p)&mask) | (vc->vc_video_erase_char & ~mask), p);
+       }
+ }
+ void redraw_screen(struct vc_data *vc, int is_switch)
+ {
+       int redraw = 0;
+       WARN_CONSOLE_UNLOCKED();
+       if (!vc) {
+               /* strange ... */
+               /* printk("redraw_screen: tty %d not allocated ??\n", new_console+1); */
+               return;
+       }
+       if (is_switch) {
+               struct vc_data *old_vc = vc_cons[fg_console].d;
+               if (old_vc == vc)
+                       return;
+               if (!CON_IS_VISIBLE(vc))
+                       redraw = 1;
+               *vc->vc_display_fg = vc;
+               fg_console = vc->vc_num;
+               hide_cursor(old_vc);
+               if (!CON_IS_VISIBLE(old_vc)) {
+                       save_screen(old_vc);
+                       set_origin(old_vc);
+               }
+       } else {
+               hide_cursor(vc);
+               redraw = 1;
+       }
+       if (redraw) {
+               int update;
+               int old_was_color = vc->vc_can_do_color;
+               set_origin(vc);
+               update = vc->vc_sw->con_switch(vc);
+               set_palette(vc);
+               /*
+                * If console changed from mono<->color, the best we can do
+                * is to clear the buffer attributes. As it currently stands,
+                * rebuilding new attributes from the old buffer is not doable
+                * without overly complex code.
+                */
+               if (old_was_color != vc->vc_can_do_color) {
+                       update_attr(vc);
+                       clear_buffer_attributes(vc);
+               }
+               /* Forcibly update if we're panicing */
+               if ((update && vc->vc_mode != KD_GRAPHICS) ||
+                   vt_force_oops_output(vc))
+                       do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
+       }
+       set_cursor(vc);
+       if (is_switch) {
+               set_leds();
+               compute_shiftstate();
+               notify_update(vc);
+       }
+ }
+ /*
+  *    Allocation, freeing and resizing of VTs.
+  */
+ int vc_cons_allocated(unsigned int i)
+ {
+       return (i < MAX_NR_CONSOLES && vc_cons[i].d);
+ }
+ static void visual_init(struct vc_data *vc, int num, int init)
+ {
+       /* ++Geert: vc->vc_sw->con_init determines console size */
+       if (vc->vc_sw)
+               module_put(vc->vc_sw->owner);
+       vc->vc_sw = conswitchp;
+ #ifndef VT_SINGLE_DRIVER
+       if (con_driver_map[num])
+               vc->vc_sw = con_driver_map[num];
+ #endif
+       __module_get(vc->vc_sw->owner);
+       vc->vc_num = num;
+       vc->vc_display_fg = &master_display_fg;
+       vc->vc_uni_pagedir_loc = &vc->vc_uni_pagedir;
+       vc->vc_uni_pagedir = 0;
+       vc->vc_hi_font_mask = 0;
+       vc->vc_complement_mask = 0;
+       vc->vc_can_do_color = 0;
+       vc->vc_panic_force_write = false;
+       vc->vc_sw->con_init(vc, init);
+       if (!vc->vc_complement_mask)
+               vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
+       vc->vc_s_complement_mask = vc->vc_complement_mask;
+       vc->vc_size_row = vc->vc_cols << 1;
+       vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row;
+ }
+ int vc_allocate(unsigned int currcons)        /* return 0 on success */
+ {
+       WARN_CONSOLE_UNLOCKED();
+       if (currcons >= MAX_NR_CONSOLES)
+               return -ENXIO;
+       if (!vc_cons[currcons].d) {
+           struct vc_data *vc;
+           struct vt_notifier_param param;
+           /* prevent users from taking too much memory */
+           if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE))
+             return -EPERM;
+           /* due to the granularity of kmalloc, we waste some memory here */
+           /* the alloc is done in two steps, to optimize the common situation
+              of a 25x80 console (structsize=216, screenbuf_size=4000) */
+           /* although the numbers above are not valid since long ago, the
+              point is still up-to-date and the comment still has its value
+              even if only as a historical artifact.  --mj, July 1998 */
+           param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
+           if (!vc)
+               return -ENOMEM;
+           vc_cons[currcons].d = vc;
+           tty_port_init(&vc->port);
+           INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
+           visual_init(vc, currcons, 1);
+           if (!*vc->vc_uni_pagedir_loc)
+               con_set_default_unimap(vc);
+           vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
+           if (!vc->vc_screenbuf) {
+               kfree(vc);
+               vc_cons[currcons].d = NULL;
+               return -ENOMEM;
+           }
+           /* If no drivers have overridden us and the user didn't pass a
+              boot option, default to displaying the cursor */
+           if (global_cursor_default == -1)
+                   global_cursor_default = 1;
+           vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
+           vcs_make_sysfs(currcons);
+           atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);
+       }
+       return 0;
+ }
+ static inline int resize_screen(struct vc_data *vc, int width, int height,
+                               int user)
+ {
+       /* Resizes the resolution of the display adapater */
+       int err = 0;
+       if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize)
+               err = vc->vc_sw->con_resize(vc, width, height, user);
+       return err;
+ }
+ /*
+  * Change # of rows and columns (0 means unchanged/the size of fg_console)
+  * [this is to be used together with some user program
+  * like resize that changes the hardware videomode]
+  */
+ #define VC_RESIZE_MAXCOL (32767)
+ #define VC_RESIZE_MAXROW (32767)
+ /**
+  *    vc_do_resize    -       resizing method for the tty
+  *    @tty: tty being resized
+  *    @real_tty: real tty (different to tty if a pty/tty pair)
+  *    @vc: virtual console private data
+  *    @cols: columns
+  *    @lines: lines
+  *
+  *    Resize a virtual console, clipping according to the actual constraints.
+  *    If the caller passes a tty structure then update the termios winsize
+  *    information and perform any necessary signal handling.
+  *
+  *    Caller must hold the console semaphore. Takes the termios mutex and
+  *    ctrl_lock of the tty IFF a tty is passed.
+  */
+ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
+                               unsigned int cols, unsigned int lines)
+ {
+       unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
+       unsigned long end;
+       unsigned int old_cols, old_rows, old_row_size, old_screen_size;
+       unsigned int new_cols, new_rows, new_row_size, new_screen_size;
+       unsigned int user;
+       unsigned short *newscreen;
+       WARN_CONSOLE_UNLOCKED();
+       if (!vc)
+               return -ENXIO;
+       user = vc->vc_resize_user;
+       vc->vc_resize_user = 0;
+       if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW)
+               return -EINVAL;
+       new_cols = (cols ? cols : vc->vc_cols);
+       new_rows = (lines ? lines : vc->vc_rows);
+       new_row_size = new_cols << 1;
+       new_screen_size = new_row_size * new_rows;
+       if (new_cols == vc->vc_cols && new_rows == vc->vc_rows)
+               return 0;
+       newscreen = kmalloc(new_screen_size, GFP_USER);
+       if (!newscreen)
+               return -ENOMEM;
+       old_rows = vc->vc_rows;
+       old_cols = vc->vc_cols;
+       old_row_size = vc->vc_size_row;
+       old_screen_size = vc->vc_screenbuf_size;
+       err = resize_screen(vc, new_cols, new_rows, user);
+       if (err) {
+               kfree(newscreen);
+               return err;
+       }
+       vc->vc_rows = new_rows;
+       vc->vc_cols = new_cols;
+       vc->vc_size_row = new_row_size;
+       vc->vc_screenbuf_size = new_screen_size;
+       rlth = min(old_row_size, new_row_size);
+       rrem = new_row_size - rlth;
+       old_origin = vc->vc_origin;
+       new_origin = (long) newscreen;
+       new_scr_end = new_origin + new_screen_size;
+       if (vc->vc_y > new_rows) {
+               if (old_rows - vc->vc_y < new_rows) {
+                       /*
+                        * Cursor near the bottom, copy contents from the
+                        * bottom of buffer
+                        */
+                       old_origin += (old_rows - new_rows) * old_row_size;
+               } else {
+                       /*
+                        * Cursor is in no man's land, copy 1/2 screenful
+                        * from the top and bottom of cursor position
+                        */
+                       old_origin += (vc->vc_y - new_rows/2) * old_row_size;
+               }
+       }
+       end = old_origin + old_row_size * min(old_rows, new_rows);
+       update_attr(vc);
+       while (old_origin < end) {
+               scr_memcpyw((unsigned short *) new_origin,
+                           (unsigned short *) old_origin, rlth);
+               if (rrem)
+                       scr_memsetw((void *)(new_origin + rlth),
+                                   vc->vc_video_erase_char, rrem);
+               old_origin += old_row_size;
+               new_origin += new_row_size;
+       }
+       if (new_scr_end > new_origin)
+               scr_memsetw((void *)new_origin, vc->vc_video_erase_char,
+                           new_scr_end - new_origin);
+       kfree(vc->vc_screenbuf);
+       vc->vc_screenbuf = newscreen;
+       vc->vc_screenbuf_size = new_screen_size;
+       set_origin(vc);
+       /* do part of a reset_terminal() */
+       vc->vc_top = 0;
+       vc->vc_bottom = vc->vc_rows;
+       gotoxy(vc, vc->vc_x, vc->vc_y);
+       save_cur(vc);
+       if (tty) {
+               /* Rewrite the requested winsize data with the actual
+                  resulting sizes */
+               struct winsize ws;
+               memset(&ws, 0, sizeof(ws));
+               ws.ws_row = vc->vc_rows;
+               ws.ws_col = vc->vc_cols;
+               ws.ws_ypixel = vc->vc_scan_lines;
+               tty_do_resize(tty, &ws);
+       }
+       if (CON_IS_VISIBLE(vc))
+               update_screen(vc);
+       vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num);
+       return err;
+ }
+ /**
+  *    vc_resize               -       resize a VT
+  *    @vc: virtual console
+  *    @cols: columns
+  *    @rows: rows
+  *
+  *    Resize a virtual console as seen from the console end of things. We
+  *    use the common vc_do_resize methods to update the structures. The
+  *    caller must hold the console sem to protect console internals and
+  *    vc->port.tty
+  */
+ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
+ {
+       return vc_do_resize(vc->port.tty, vc, cols, rows);
+ }
+ /**
+  *    vt_resize               -       resize a VT
+  *    @tty: tty to resize
+  *    @ws: winsize attributes
+  *
+  *    Resize a virtual terminal. This is called by the tty layer as we
+  *    register our own handler for resizing. The mutual helper does all
+  *    the actual work.
+  *
+  *    Takes the console sem and the called methods then take the tty
+  *    termios_mutex and the tty ctrl_lock in that order.
+  */
+ static int vt_resize(struct tty_struct *tty, struct winsize *ws)
+ {
+       struct vc_data *vc = tty->driver_data;
+       int ret;
+       acquire_console_sem();
+       ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row);
+       release_console_sem();
+       return ret;
+ }
+ void vc_deallocate(unsigned int currcons)
+ {
+       WARN_CONSOLE_UNLOCKED();
+       if (vc_cons_allocated(currcons)) {
+               struct vc_data *vc = vc_cons[currcons].d;
+               struct vt_notifier_param param = { .vc = vc };
+               atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
+               vcs_remove_sysfs(currcons);
+               vc->vc_sw->con_deinit(vc);
+               put_pid(vc->vt_pid);
+               module_put(vc->vc_sw->owner);
+               kfree(vc->vc_screenbuf);
+               if (currcons >= MIN_NR_CONSOLES)
+                       kfree(vc);
+               vc_cons[currcons].d = NULL;
+       }
+ }
+ /*
+  *    VT102 emulator
+  */
+ #define set_kbd(vc, x)        set_vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
+ #define clr_kbd(vc, x)        clr_vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
+ #define is_kbd(vc, x) vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
+ #define decarm                VC_REPEAT
+ #define decckm                VC_CKMODE
+ #define kbdapplic     VC_APPLIC
+ #define lnm           VC_CRLF
+ /*
+  * this is what the terminal answers to a ESC-Z or csi0c query.
+  */
+ #define VT100ID "\033[?1;2c"
+ #define VT102ID "\033[?6c"
+ unsigned char color_table[] = { 0, 4, 2, 6, 1, 5, 3, 7,
+                                      8,12,10,14, 9,13,11,15 };
+ /* the default colour table, for VGA+ colour systems */
+ int default_red[] = {0x00,0xaa,0x00,0xaa,0x00,0xaa,0x00,0xaa,
+     0x55,0xff,0x55,0xff,0x55,0xff,0x55,0xff};
+ int default_grn[] = {0x00,0x00,0xaa,0x55,0x00,0x00,0xaa,0xaa,
+     0x55,0x55,0xff,0xff,0x55,0x55,0xff,0xff};
+ int default_blu[] = {0x00,0x00,0x00,0x00,0xaa,0xaa,0xaa,0xaa,
+     0x55,0x55,0x55,0x55,0xff,0xff,0xff,0xff};
+ module_param_array(default_red, int, NULL, S_IRUGO | S_IWUSR);
+ module_param_array(default_grn, int, NULL, S_IRUGO | S_IWUSR);
+ module_param_array(default_blu, int, NULL, S_IRUGO | S_IWUSR);
+ /*
+  * gotoxy() must verify all boundaries, because the arguments
+  * might also be negative. If the given position is out of
+  * bounds, the cursor is placed at the nearest margin.
+  */
+ static void gotoxy(struct vc_data *vc, int new_x, int new_y)
+ {
+       int min_y, max_y;
+       if (new_x < 0)
+               vc->vc_x = 0;
+       else {
+               if (new_x >= vc->vc_cols)
+                       vc->vc_x = vc->vc_cols - 1;
+               else
+                       vc->vc_x = new_x;
+       }
+       if (vc->vc_decom) {
+               min_y = vc->vc_top;
+               max_y = vc->vc_bottom;
+       } else {
+               min_y = 0;
+               max_y = vc->vc_rows;
+       }
+       if (new_y < min_y)
+               vc->vc_y = min_y;
+       else if (new_y >= max_y)
+               vc->vc_y = max_y - 1;
+       else
+               vc->vc_y = new_y;
+       vc->vc_pos = vc->vc_origin + vc->vc_y * vc->vc_size_row + (vc->vc_x<<1);
+       vc->vc_need_wrap = 0;
+ }
+ /* for absolute user moves, when decom is set */
+ static void gotoxay(struct vc_data *vc, int new_x, int new_y)
+ {
+       gotoxy(vc, new_x, vc->vc_decom ? (vc->vc_top + new_y) : new_y);
+ }
+ void scrollback(struct vc_data *vc, int lines)
+ {
+       if (!lines)
+               lines = vc->vc_rows / 2;
+       scrolldelta(-lines);
+ }
+ void scrollfront(struct vc_data *vc, int lines)
+ {
+       if (!lines)
+               lines = vc->vc_rows / 2;
+       scrolldelta(lines);
+ }
+ static void lf(struct vc_data *vc)
+ {
+       /* don't scroll if above bottom of scrolling region, or
+        * if below scrolling region
+        */
+       if (vc->vc_y + 1 == vc->vc_bottom)
+               scrup(vc, vc->vc_top, vc->vc_bottom, 1);
+       else if (vc->vc_y < vc->vc_rows - 1) {
+               vc->vc_y++;
+               vc->vc_pos += vc->vc_size_row;
+       }
+       vc->vc_need_wrap = 0;
+       notify_write(vc, '\n');
+ }
+ static void ri(struct vc_data *vc)
+ {
+       /* don't scroll if below top of scrolling region, or
+        * if above scrolling region
+        */
+       if (vc->vc_y == vc->vc_top)
+               scrdown(vc, vc->vc_top, vc->vc_bottom, 1);
+       else if (vc->vc_y > 0) {
+               vc->vc_y--;
+               vc->vc_pos -= vc->vc_size_row;
+       }
+       vc->vc_need_wrap = 0;
+ }
+ static inline void cr(struct vc_data *vc)
+ {
+       vc->vc_pos -= vc->vc_x << 1;
+       vc->vc_need_wrap = vc->vc_x = 0;
+       notify_write(vc, '\r');
+ }
+ static inline void bs(struct vc_data *vc)
+ {
+       if (vc->vc_x) {
+               vc->vc_pos -= 2;
+               vc->vc_x--;
+               vc->vc_need_wrap = 0;
+               notify_write(vc, '\b');
+       }
+ }
+ static inline void del(struct vc_data *vc)
+ {
+       /* ignored */
+ }
+ static void csi_J(struct vc_data *vc, int vpar)
+ {
+       unsigned int count;
+       unsigned short * start;
+       switch (vpar) {
+               case 0: /* erase from cursor to end of display */
+                       count = (vc->vc_scr_end - vc->vc_pos) >> 1;
+                       start = (unsigned short *)vc->vc_pos;
+                       if (DO_UPDATE(vc)) {
+                               /* do in two stages */
+                               vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1,
+                                             vc->vc_cols - vc->vc_x);
+                               vc->vc_sw->con_clear(vc, vc->vc_y + 1, 0,
+                                             vc->vc_rows - vc->vc_y - 1,
+                                             vc->vc_cols);
+                       }
+                       break;
+               case 1: /* erase from start to cursor */
+                       count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1;
+                       start = (unsigned short *)vc->vc_origin;
+                       if (DO_UPDATE(vc)) {
+                               /* do in two stages */
+                               vc->vc_sw->con_clear(vc, 0, 0, vc->vc_y,
+                                             vc->vc_cols);
+                               vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
+                                             vc->vc_x + 1);
+                       }
+                       break;
+               case 2: /* erase whole display */
+                       count = vc->vc_cols * vc->vc_rows;
+                       start = (unsigned short *)vc->vc_origin;
+                       if (DO_UPDATE(vc))
+                               vc->vc_sw->con_clear(vc, 0, 0,
+                                             vc->vc_rows,
+                                             vc->vc_cols);
+                       break;
+               default:
+                       return;
+       }
+       scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
+       vc->vc_need_wrap = 0;
+ }
+ static void csi_K(struct vc_data *vc, int vpar)
+ {
+       unsigned int count;
+       unsigned short * start;
+       switch (vpar) {
+               case 0: /* erase from cursor to end of line */
+                       count = vc->vc_cols - vc->vc_x;
+                       start = (unsigned short *)vc->vc_pos;
+                       if (DO_UPDATE(vc))
+                               vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1,
+                                                    vc->vc_cols - vc->vc_x);
+                       break;
+               case 1: /* erase from start of line to cursor */
+                       start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+                       count = vc->vc_x + 1;
+                       if (DO_UPDATE(vc))
+                               vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
+                                                    vc->vc_x + 1);
+                       break;
+               case 2: /* erase whole line */
+                       start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+                       count = vc->vc_cols;
+                       if (DO_UPDATE(vc))
+                               vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
+                                             vc->vc_cols);
+                       break;
+               default:
+                       return;
+       }
+       scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
+       vc->vc_need_wrap = 0;
+ }
+ static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar positions */
+ {                                       /* not vt100? */
+       int count;
+       if (!vpar)
+               vpar++;
+       count = (vpar > vc->vc_cols - vc->vc_x) ? (vc->vc_cols - vc->vc_x) : vpar;
+       scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count);
+       if (DO_UPDATE(vc))
+               vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1, count);
+       vc->vc_need_wrap = 0;
+ }
+ static void default_attr(struct vc_data *vc)
+ {
+       vc->vc_intensity = 1;
+       vc->vc_italic = 0;
+       vc->vc_underline = 0;
+       vc->vc_reverse = 0;
+       vc->vc_blink = 0;
+       vc->vc_color = vc->vc_def_color;
+ }
+ /* console_sem is held */
+ static void csi_m(struct vc_data *vc)
+ {
+       int i;
+       for (i = 0; i <= vc->vc_npar; i++)
+               switch (vc->vc_par[i]) {
+                       case 0: /* all attributes off */
+                               default_attr(vc);
+                               break;
+                       case 1:
+                               vc->vc_intensity = 2;
+                               break;
+                       case 2:
+                               vc->vc_intensity = 0;
+                               break;
+                       case 3:
+                               vc->vc_italic = 1;
+                               break;
+                       case 4:
+                               vc->vc_underline = 1;
+                               break;
+                       case 5:
+                               vc->vc_blink = 1;
+                               break;
+                       case 7:
+                               vc->vc_reverse = 1;
+                               break;
+                       case 10: /* ANSI X3.64-1979 (SCO-ish?)
+                                 * Select primary font, don't display
+                                 * control chars if defined, don't set
+                                 * bit 8 on output.
+                                 */
+                               vc->vc_translate = set_translate(vc->vc_charset == 0
+                                               ? vc->vc_G0_charset
+                                               : vc->vc_G1_charset, vc);
+                               vc->vc_disp_ctrl = 0;
+                               vc->vc_toggle_meta = 0;
+                               break;
+                       case 11: /* ANSI X3.64-1979 (SCO-ish?)
+                                 * Select first alternate font, lets
+                                 * chars < 32 be displayed as ROM chars.
+                                 */
+                               vc->vc_translate = set_translate(IBMPC_MAP, vc);
+                               vc->vc_disp_ctrl = 1;
+                               vc->vc_toggle_meta = 0;
+                               break;
+                       case 12: /* ANSI X3.64-1979 (SCO-ish?)
+                                 * Select second alternate font, toggle
+                                 * high bit before displaying as ROM char.
+                                 */
+                               vc->vc_translate = set_translate(IBMPC_MAP, vc);
+                               vc->vc_disp_ctrl = 1;
+                               vc->vc_toggle_meta = 1;
+                               break;
+                       case 21:
+                       case 22:
+                               vc->vc_intensity = 1;
+                               break;
+                       case 23:
+                               vc->vc_italic = 0;
+                               break;
+                       case 24:
+                               vc->vc_underline = 0;
+                               break;
+                       case 25:
+                               vc->vc_blink = 0;
+                               break;
+                       case 27:
+                               vc->vc_reverse = 0;
+                               break;
+                       case 38: /* ANSI X3.64-1979 (SCO-ish?)
+                                 * Enables underscore, white foreground
+                                 * with white underscore (Linux - use
+                                 * default foreground).
+                                 */
+                               vc->vc_color = (vc->vc_def_color & 0x0f) | (vc->vc_color & 0xf0);
+                               vc->vc_underline = 1;
+                               break;
+                       case 39: /* ANSI X3.64-1979 (SCO-ish?)
+                                 * Disable underline option.
+                                 * Reset colour to default? It did this
+                                 * before...
+                                 */
+                               vc->vc_color = (vc->vc_def_color & 0x0f) | (vc->vc_color & 0xf0);
+                               vc->vc_underline = 0;
+                               break;
+                       case 49:
+                               vc->vc_color = (vc->vc_def_color & 0xf0) | (vc->vc_color & 0x0f);
+                               break;
+                       default:
+                               if (vc->vc_par[i] >= 30 && vc->vc_par[i] <= 37)
+                                       vc->vc_color = color_table[vc->vc_par[i] - 30]
+                                               | (vc->vc_color & 0xf0);
+                               else if (vc->vc_par[i] >= 40 && vc->vc_par[i] <= 47)
+                                       vc->vc_color = (color_table[vc->vc_par[i] - 40] << 4)
+                                               | (vc->vc_color & 0x0f);
+                               break;
+               }
+       update_attr(vc);
+ }
+ static void respond_string(const char *p, struct tty_struct *tty)
+ {
+       while (*p) {
+               tty_insert_flip_char(tty, *p, 0);
+               p++;
+       }
+       con_schedule_flip(tty);
+ }
+ static void cursor_report(struct vc_data *vc, struct tty_struct *tty)
+ {
+       char buf[40];
+       sprintf(buf, "\033[%d;%dR", vc->vc_y + (vc->vc_decom ? vc->vc_top + 1 : 1), vc->vc_x + 1);
+       respond_string(buf, tty);
+ }
+ static inline void status_report(struct tty_struct *tty)
+ {
+       respond_string("\033[0n", tty); /* Terminal ok */
+ }
+ static inline void respond_ID(struct tty_struct * tty)
+ {
+       respond_string(VT102ID, tty);
+ }
+ void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry)
+ {
+       char buf[8];
+       sprintf(buf, "\033[M%c%c%c", (char)(' ' + butt), (char)('!' + mrx),
+               (char)('!' + mry));
+       respond_string(buf, tty);
+ }
+ /* invoked via ioctl(TIOCLINUX) and through set_selection */
+ int mouse_reporting(void)
+ {
+       return vc_cons[fg_console].d->vc_report_mouse;
+ }
+ /* console_sem is held */
+ static void set_mode(struct vc_data *vc, int on_off)
+ {
+       int i;
+       for (i = 0; i <= vc->vc_npar; i++)
+               if (vc->vc_ques) {
+                       switch(vc->vc_par[i]) { /* DEC private modes set/reset */
+                       case 1:                 /* Cursor keys send ^[Ox/^[[x */
+                               if (on_off)
+                                       set_kbd(vc, decckm);
+                               else
+                                       clr_kbd(vc, decckm);
+                               break;
+                       case 3: /* 80/132 mode switch unimplemented */
+                               vc->vc_deccolm = on_off;
+ #if 0
+                               vc_resize(deccolm ? 132 : 80, vc->vc_rows);
+                               /* this alone does not suffice; some user mode
+                                  utility has to change the hardware regs */
+ #endif
+                               break;
+                       case 5:                 /* Inverted screen on/off */
+                               if (vc->vc_decscnm != on_off) {
+                                       vc->vc_decscnm = on_off;
+                                       invert_screen(vc, 0, vc->vc_screenbuf_size, 0);
+                                       update_attr(vc);
+                               }
+                               break;
+                       case 6:                 /* Origin relative/absolute */
+                               vc->vc_decom = on_off;
+                               gotoxay(vc, 0, 0);
+                               break;
+                       case 7:                 /* Autowrap on/off */
+                               vc->vc_decawm = on_off;
+                               break;
+                       case 8:                 /* Autorepeat on/off */
+                               if (on_off)
+                                       set_kbd(vc, decarm);
+                               else
+                                       clr_kbd(vc, decarm);
+                               break;
+                       case 9:
+                               vc->vc_report_mouse = on_off ? 1 : 0;
+                               break;
+                       case 25:                /* Cursor on/off */
+                               vc->vc_deccm = on_off;
+                               break;
+                       case 1000:
+                               vc->vc_report_mouse = on_off ? 2 : 0;
+                               break;
+                       }
+               } else {
+                       switch(vc->vc_par[i]) { /* ANSI modes set/reset */
+                       case 3:                 /* Monitor (display ctrls) */
+                               vc->vc_disp_ctrl = on_off;
+                               break;
+                       case 4:                 /* Insert Mode on/off */
+                               vc->vc_decim = on_off;
+                               break;
+                       case 20:                /* Lf, Enter == CrLf/Lf */
+                               if (on_off)
+                                       set_kbd(vc, lnm);
+                               else
+                                       clr_kbd(vc, lnm);
+                               break;
+                       }
+               }
+ }
+ /* console_sem is held */
+ static void setterm_command(struct vc_data *vc)
+ {
+       switch(vc->vc_par[0]) {
+               case 1: /* set color for underline mode */
+                       if (vc->vc_can_do_color &&
+                                       vc->vc_par[1] < 16) {
+                               vc->vc_ulcolor = color_table[vc->vc_par[1]];
+                               if (vc->vc_underline)
+                                       update_attr(vc);
+                       }
+                       break;
+               case 2: /* set color for half intensity mode */
+                       if (vc->vc_can_do_color &&
+                                       vc->vc_par[1] < 16) {
+                               vc->vc_halfcolor = color_table[vc->vc_par[1]];
+                               if (vc->vc_intensity == 0)
+                                       update_attr(vc);
+                       }
+                       break;
+               case 8: /* store colors as defaults */
+                       vc->vc_def_color = vc->vc_attr;
+                       if (vc->vc_hi_font_mask == 0x100)
+                               vc->vc_def_color >>= 1;
+                       default_attr(vc);
+                       update_attr(vc);
+                       break;
+               case 9: /* set blanking interval */
+                       blankinterval = ((vc->vc_par[1] < 60) ? vc->vc_par[1] : 60) * 60;
+                       poke_blanked_console();
+                       break;
+               case 10: /* set bell frequency in Hz */
+                       if (vc->vc_npar >= 1)
+                               vc->vc_bell_pitch = vc->vc_par[1];
+                       else
+                               vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
+                       break;
+               case 11: /* set bell duration in msec */
+                       if (vc->vc_npar >= 1)
+                               vc->vc_bell_duration = (vc->vc_par[1] < 2000) ?
+                                       vc->vc_par[1] * HZ / 1000 : 0;
+                       else
+                               vc->vc_bell_duration = DEFAULT_BELL_DURATION;
+                       break;
+               case 12: /* bring specified console to the front */
+                       if (vc->vc_par[1] >= 1 && vc_cons_allocated(vc->vc_par[1] - 1))
+                               set_console(vc->vc_par[1] - 1);
+                       break;
+               case 13: /* unblank the screen */
+                       poke_blanked_console();
+                       break;
+               case 14: /* set vesa powerdown interval */
+                       vesa_off_interval = ((vc->vc_par[1] < 60) ? vc->vc_par[1] : 60) * 60 * HZ;
+                       break;
+               case 15: /* activate the previous console */
+                       set_console(last_console);
+                       break;
+       }
+ }
+ /* console_sem is held */
+ static void csi_at(struct vc_data *vc, unsigned int nr)
+ {
+       if (nr > vc->vc_cols - vc->vc_x)
+               nr = vc->vc_cols - vc->vc_x;
+       else if (!nr)
+               nr = 1;
+       insert_char(vc, nr);
+ }
+ /* console_sem is held */
+ static void csi_L(struct vc_data *vc, unsigned int nr)
+ {
+       if (nr > vc->vc_rows - vc->vc_y)
+               nr = vc->vc_rows - vc->vc_y;
+       else if (!nr)
+               nr = 1;
+       scrdown(vc, vc->vc_y, vc->vc_bottom, nr);
+       vc->vc_need_wrap = 0;
+ }
+ /* console_sem is held */
+ static void csi_P(struct vc_data *vc, unsigned int nr)
+ {
+       if (nr > vc->vc_cols - vc->vc_x)
+               nr = vc->vc_cols - vc->vc_x;
+       else if (!nr)
+               nr = 1;
+       delete_char(vc, nr);
+ }
+ /* console_sem is held */
+ static void csi_M(struct vc_data *vc, unsigned int nr)
+ {
+       if (nr > vc->vc_rows - vc->vc_y)
+               nr = vc->vc_rows - vc->vc_y;
+       else if (!nr)
+               nr=1;
+       scrup(vc, vc->vc_y, vc->vc_bottom, nr);
+       vc->vc_need_wrap = 0;
+ }
+ /* console_sem is held (except via vc_init->reset_terminal */
+ static void save_cur(struct vc_data *vc)
+ {
+       vc->vc_saved_x          = vc->vc_x;
+       vc->vc_saved_y          = vc->vc_y;
+       vc->vc_s_intensity      = vc->vc_intensity;
+       vc->vc_s_italic         = vc->vc_italic;
+       vc->vc_s_underline      = vc->vc_underline;
+       vc->vc_s_blink          = vc->vc_blink;
+       vc->vc_s_reverse        = vc->vc_reverse;
+       vc->vc_s_charset        = vc->vc_charset;
+       vc->vc_s_color          = vc->vc_color;
+       vc->vc_saved_G0         = vc->vc_G0_charset;
+       vc->vc_saved_G1         = vc->vc_G1_charset;
+ }
+ /* console_sem is held */
+ static void restore_cur(struct vc_data *vc)
+ {
+       gotoxy(vc, vc->vc_saved_x, vc->vc_saved_y);
+       vc->vc_intensity        = vc->vc_s_intensity;
+       vc->vc_italic           = vc->vc_s_italic;
+       vc->vc_underline        = vc->vc_s_underline;
+       vc->vc_blink            = vc->vc_s_blink;
+       vc->vc_reverse          = vc->vc_s_reverse;
+       vc->vc_charset          = vc->vc_s_charset;
+       vc->vc_color            = vc->vc_s_color;
+       vc->vc_G0_charset       = vc->vc_saved_G0;
+       vc->vc_G1_charset       = vc->vc_saved_G1;
+       vc->vc_translate        = set_translate(vc->vc_charset ? vc->vc_G1_charset : vc->vc_G0_charset, vc);
+       update_attr(vc);
+       vc->vc_need_wrap = 0;
+ }
+ enum { ESnormal, ESesc, ESsquare, ESgetpars, ESgotpars, ESfunckey,
+       EShash, ESsetG0, ESsetG1, ESpercent, ESignore, ESnonstd,
+       ESpalette };
+ /* console_sem is held (except via vc_init()) */
+ static void reset_terminal(struct vc_data *vc, int do_clear)
+ {
+       vc->vc_top              = 0;
+       vc->vc_bottom           = vc->vc_rows;
+       vc->vc_state            = ESnormal;
+       vc->vc_ques             = 0;
+       vc->vc_translate        = set_translate(LAT1_MAP, vc);
+       vc->vc_G0_charset       = LAT1_MAP;
+       vc->vc_G1_charset       = GRAF_MAP;
+       vc->vc_charset          = 0;
+       vc->vc_need_wrap        = 0;
+       vc->vc_report_mouse     = 0;
+       vc->vc_utf              = default_utf8;
+       vc->vc_utf_count        = 0;
+       vc->vc_disp_ctrl        = 0;
+       vc->vc_toggle_meta      = 0;
+       vc->vc_decscnm          = 0;
+       vc->vc_decom            = 0;
+       vc->vc_decawm           = 1;
+       vc->vc_deccm            = global_cursor_default;
+       vc->vc_decim            = 0;
+       set_kbd(vc, decarm);
+       clr_kbd(vc, decckm);
+       clr_kbd(vc, kbdapplic);
+       clr_kbd(vc, lnm);
+       kbd_table[vc->vc_num].lockstate = 0;
+       kbd_table[vc->vc_num].slockstate = 0;
+       kbd_table[vc->vc_num].ledmode = LED_SHOW_FLAGS;
+       kbd_table[vc->vc_num].ledflagstate = kbd_table[vc->vc_num].default_ledflagstate;
+       /* do not do set_leds here because this causes an endless tasklet loop
+          when the keyboard hasn't been initialized yet */
+       vc->vc_cursor_type = cur_default;
+       vc->vc_complement_mask = vc->vc_s_complement_mask;
+       default_attr(vc);
+       update_attr(vc);
+       vc->vc_tab_stop[0]      = 0x01010100;
+       vc->vc_tab_stop[1]      =
+       vc->vc_tab_stop[2]      =
+       vc->vc_tab_stop[3]      =
+       vc->vc_tab_stop[4]      =
+       vc->vc_tab_stop[5]      =
+       vc->vc_tab_stop[6]      =
+       vc->vc_tab_stop[7]      = 0x01010101;
+       vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
+       vc->vc_bell_duration = DEFAULT_BELL_DURATION;
+       gotoxy(vc, 0, 0);
+       save_cur(vc);
+       if (do_clear)
+           csi_J(vc, 2);
+ }
+ /* console_sem is held */
+ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
+ {
+       /*
+        *  Control characters can be used in the _middle_
+        *  of an escape sequence.
+        */
+       switch (c) {
+       case 0:
+               return;
+       case 7:
+               if (vc->vc_bell_duration)
+                       kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration);
+               return;
+       case 8:
+               bs(vc);
+               return;
+       case 9:
+               vc->vc_pos -= (vc->vc_x << 1);
+               while (vc->vc_x < vc->vc_cols - 1) {
+                       vc->vc_x++;
+                       if (vc->vc_tab_stop[vc->vc_x >> 5] & (1 << (vc->vc_x & 31)))
+                               break;
+               }
+               vc->vc_pos += (vc->vc_x << 1);
+               notify_write(vc, '\t');
+               return;
+       case 10: case 11: case 12:
+               lf(vc);
+               if (!is_kbd(vc, lnm))
+                       return;
+       case 13:
+               cr(vc);
+               return;
+       case 14:
+               vc->vc_charset = 1;
+               vc->vc_translate = set_translate(vc->vc_G1_charset, vc);
+               vc->vc_disp_ctrl = 1;
+               return;
+       case 15:
+               vc->vc_charset = 0;
+               vc->vc_translate = set_translate(vc->vc_G0_charset, vc);
+               vc->vc_disp_ctrl = 0;
+               return;
+       case 24: case 26:
+               vc->vc_state = ESnormal;
+               return;
+       case 27:
+               vc->vc_state = ESesc;
+               return;
+       case 127:
+               del(vc);
+               return;
+       case 128+27:
+               vc->vc_state = ESsquare;
+               return;
+       }
+       switch(vc->vc_state) {
+       case ESesc:
+               vc->vc_state = ESnormal;
+               switch (c) {
+               case '[':
+                       vc->vc_state = ESsquare;
+                       return;
+               case ']':
+                       vc->vc_state = ESnonstd;
+                       return;
+               case '%':
+                       vc->vc_state = ESpercent;
+                       return;
+               case 'E':
+                       cr(vc);
+                       lf(vc);
+                       return;
+               case 'M':
+                       ri(vc);
+                       return;
+               case 'D':
+                       lf(vc);
+                       return;
+               case 'H':
+                       vc->vc_tab_stop[vc->vc_x >> 5] |= (1 << (vc->vc_x & 31));
+                       return;
+               case 'Z':
+                       respond_ID(tty);
+                       return;
+               case '7':
+                       save_cur(vc);
+                       return;
+               case '8':
+                       restore_cur(vc);
+                       return;
+               case '(':
+                       vc->vc_state = ESsetG0;
+                       return;
+               case ')':
+                       vc->vc_state = ESsetG1;
+                       return;
+               case '#':
+                       vc->vc_state = EShash;
+                       return;
+               case 'c':
+                       reset_terminal(vc, 1);
+                       return;
+               case '>':  /* Numeric keypad */
+                       clr_kbd(vc, kbdapplic);
+                       return;
+               case '=':  /* Appl. keypad */
+                       set_kbd(vc, kbdapplic);
+                       return;
+               }
+               return;
+       case ESnonstd:
+               if (c=='P') {   /* palette escape sequence */
+                       for (vc->vc_npar = 0; vc->vc_npar < NPAR; vc->vc_npar++)
+                               vc->vc_par[vc->vc_npar] = 0;
+                       vc->vc_npar = 0;
+                       vc->vc_state = ESpalette;
+                       return;
+               } else if (c=='R') {   /* reset palette */
+                       reset_palette(vc);
+                       vc->vc_state = ESnormal;
+               } else
+                       vc->vc_state = ESnormal;
+               return;
+       case ESpalette:
+               if (isxdigit(c)) {
+                       vc->vc_par[vc->vc_npar++] = hex_to_bin(c);
+                       if (vc->vc_npar == 7) {
+                               int i = vc->vc_par[0] * 3, j = 1;
+                               vc->vc_palette[i] = 16 * vc->vc_par[j++];
+                               vc->vc_palette[i++] += vc->vc_par[j++];
+                               vc->vc_palette[i] = 16 * vc->vc_par[j++];
+                               vc->vc_palette[i++] += vc->vc_par[j++];
+                               vc->vc_palette[i] = 16 * vc->vc_par[j++];
+                               vc->vc_palette[i] += vc->vc_par[j];
+                               set_palette(vc);
+                               vc->vc_state = ESnormal;
+                       }
+               } else
+                       vc->vc_state = ESnormal;
+               return;
+       case ESsquare:
+               for (vc->vc_npar = 0; vc->vc_npar < NPAR; vc->vc_npar++)
+                       vc->vc_par[vc->vc_npar] = 0;
+               vc->vc_npar = 0;
+               vc->vc_state = ESgetpars;
+               if (c == '[') { /* Function key */
+                       vc->vc_state=ESfunckey;
+                       return;
+               }
+               vc->vc_ques = (c == '?');
+               if (vc->vc_ques)
+                       return;
+       case ESgetpars:
+               if (c == ';' && vc->vc_npar < NPAR - 1) {
+                       vc->vc_npar++;
+                       return;
+               } else if (c>='0' && c<='9') {
+                       vc->vc_par[vc->vc_npar] *= 10;
+                       vc->vc_par[vc->vc_npar] += c - '0';
+                       return;
+               } else
+                       vc->vc_state = ESgotpars;
+       case ESgotpars:
+               vc->vc_state = ESnormal;
+               switch(c) {
+               case 'h':
+                       set_mode(vc, 1);
+                       return;
+               case 'l':
+                       set_mode(vc, 0);
+                       return;
+               case 'c':
+                       if (vc->vc_ques) {
+                               if (vc->vc_par[0])
+                                       vc->vc_cursor_type = vc->vc_par[0] | (vc->vc_par[1] << 8) | (vc->vc_par[2] << 16);
+                               else
+                                       vc->vc_cursor_type = cur_default;
+                               return;
+                       }
+                       break;
+               case 'm':
+                       if (vc->vc_ques) {
+                               clear_selection();
+                               if (vc->vc_par[0])
+                                       vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1];
+                               else
+                                       vc->vc_complement_mask = vc->vc_s_complement_mask;
+                               return;
+                       }
+                       break;
+               case 'n':
+                       if (!vc->vc_ques) {
+                               if (vc->vc_par[0] == 5)
+                                       status_report(tty);
+                               else if (vc->vc_par[0] == 6)
+                                       cursor_report(vc, tty);
+                       }
+                       return;
+               }
+               if (vc->vc_ques) {
+                       vc->vc_ques = 0;
+                       return;
+               }
+               switch(c) {
+               case 'G': case '`':
+                       if (vc->vc_par[0])
+                               vc->vc_par[0]--;
+                       gotoxy(vc, vc->vc_par[0], vc->vc_y);
+                       return;
+               case 'A':
+                       if (!vc->vc_par[0])
+                               vc->vc_par[0]++;
+                       gotoxy(vc, vc->vc_x, vc->vc_y - vc->vc_par[0]);
+                       return;
+               case 'B': case 'e':
+                       if (!vc->vc_par[0])
+                               vc->vc_par[0]++;
+                       gotoxy(vc, vc->vc_x, vc->vc_y + vc->vc_par[0]);
+                       return;
+               case 'C': case 'a':
+                       if (!vc->vc_par[0])
+                               vc->vc_par[0]++;
+                       gotoxy(vc, vc->vc_x + vc->vc_par[0], vc->vc_y);
+                       return;
+               case 'D':
+                       if (!vc->vc_par[0])
+                               vc->vc_par[0]++;
+                       gotoxy(vc, vc->vc_x - vc->vc_par[0], vc->vc_y);
+                       return;
+               case 'E':
+                       if (!vc->vc_par[0])
+                               vc->vc_par[0]++;
+                       gotoxy(vc, 0, vc->vc_y + vc->vc_par[0]);
+                       return;
+               case 'F':
+                       if (!vc->vc_par[0])
+                               vc->vc_par[0]++;
+                       gotoxy(vc, 0, vc->vc_y - vc->vc_par[0]);
+                       return;
+               case 'd':
+                       if (vc->vc_par[0])
+                               vc->vc_par[0]--;
+                       gotoxay(vc, vc->vc_x ,vc->vc_par[0]);
+                       return;
+               case 'H': case 'f':
+                       if (vc->vc_par[0])
+                               vc->vc_par[0]--;
+                       if (vc->vc_par[1])
+                               vc->vc_par[1]--;
+                       gotoxay(vc, vc->vc_par[1], vc->vc_par[0]);
+                       return;
+               case 'J':
+                       csi_J(vc, vc->vc_par[0]);
+                       return;
+               case 'K':
+                       csi_K(vc, vc->vc_par[0]);
+                       return;
+               case 'L':
+                       csi_L(vc, vc->vc_par[0]);
+                       return;
+               case 'M':
+                       csi_M(vc, vc->vc_par[0]);
+                       return;
+               case 'P':
+                       csi_P(vc, vc->vc_par[0]);
+                       return;
+               case 'c':
+                       if (!vc->vc_par[0])
+                               respond_ID(tty);
+                       return;
+               case 'g':
+                       if (!vc->vc_par[0])
+                               vc->vc_tab_stop[vc->vc_x >> 5] &= ~(1 << (vc->vc_x & 31));
+                       else if (vc->vc_par[0] == 3) {
+                               vc->vc_tab_stop[0] =
+                                       vc->vc_tab_stop[1] =
+                                       vc->vc_tab_stop[2] =
+                                       vc->vc_tab_stop[3] =
+                                       vc->vc_tab_stop[4] =
+                                       vc->vc_tab_stop[5] =
+                                       vc->vc_tab_stop[6] =
+                                       vc->vc_tab_stop[7] = 0;
+                       }
+                       return;
+               case 'm':
+                       csi_m(vc);
+                       return;
+               case 'q': /* DECLL - but only 3 leds */
+                       /* map 0,1,2,3 to 0,1,2,4 */
+                       if (vc->vc_par[0] < 4)
+                               setledstate(kbd_table + vc->vc_num,
+                                           (vc->vc_par[0] < 3) ? vc->vc_par[0] : 4);
+                       return;
+               case 'r':
+                       if (!vc->vc_par[0])
+                               vc->vc_par[0]++;
+                       if (!vc->vc_par[1])
+                               vc->vc_par[1] = vc->vc_rows;
+                       /* Minimum allowed region is 2 lines */
+                       if (vc->vc_par[0] < vc->vc_par[1] &&
+                           vc->vc_par[1] <= vc->vc_rows) {
+                               vc->vc_top = vc->vc_par[0] - 1;
+                               vc->vc_bottom = vc->vc_par[1];
+                               gotoxay(vc, 0, 0);
+                       }
+                       return;
+               case 's':
+                       save_cur(vc);
+                       return;
+               case 'u':
+                       restore_cur(vc);
+                       return;
+               case 'X':
+                       csi_X(vc, vc->vc_par[0]);
+                       return;
+               case '@':
+                       csi_at(vc, vc->vc_par[0]);
+                       return;
+               case ']': /* setterm functions */
+                       setterm_command(vc);
+                       return;
+               }
+               return;
+       case ESpercent:
+               vc->vc_state = ESnormal;
+               switch (c) {
+               case '@':  /* defined in ISO 2022 */
+                       vc->vc_utf = 0;
+                       return;
+               case 'G':  /* prelim official escape code */
+               case '8':  /* retained for compatibility */
+                       vc->vc_utf = 1;
+                       return;
+               }
+               return;
+       case ESfunckey:
+               vc->vc_state = ESnormal;
+               return;
+       case EShash:
+               vc->vc_state = ESnormal;
+               if (c == '8') {
+                       /* DEC screen alignment test. kludge :-) */
+                       vc->vc_video_erase_char =
+                               (vc->vc_video_erase_char & 0xff00) | 'E';
+                       csi_J(vc, 2);
+                       vc->vc_video_erase_char =
+                               (vc->vc_video_erase_char & 0xff00) | ' ';
+                       do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
+               }
+               return;
+       case ESsetG0:
+               if (c == '0')
+                       vc->vc_G0_charset = GRAF_MAP;
+               else if (c == 'B')
+                       vc->vc_G0_charset = LAT1_MAP;
+               else if (c == 'U')
+                       vc->vc_G0_charset = IBMPC_MAP;
+               else if (c == 'K')
+                       vc->vc_G0_charset = USER_MAP;
+               if (vc->vc_charset == 0)
+                       vc->vc_translate = set_translate(vc->vc_G0_charset, vc);
+               vc->vc_state = ESnormal;
+               return;
+       case ESsetG1:
+               if (c == '0')
+                       vc->vc_G1_charset = GRAF_MAP;
+               else if (c == 'B')
+                       vc->vc_G1_charset = LAT1_MAP;
+               else if (c == 'U')
+                       vc->vc_G1_charset = IBMPC_MAP;
+               else if (c == 'K')
+                       vc->vc_G1_charset = USER_MAP;
+               if (vc->vc_charset == 1)
+                       vc->vc_translate = set_translate(vc->vc_G1_charset, vc);
+               vc->vc_state = ESnormal;
+               return;
+       default:
+               vc->vc_state = ESnormal;
+       }
+ }
+ /* This is a temporary buffer used to prepare a tty console write
+  * so that we can easily avoid touching user space while holding the
+  * console spinlock.  It is allocated in con_init and is shared by
+  * this code and the vc_screen read/write tty calls.
+  *
+  * We have to allocate this statically in the kernel data section
+  * since console_init (and thus con_init) are called before any
+  * kernel memory allocation is available.
+  */
+ char con_buf[CON_BUF_SIZE];
+ DEFINE_MUTEX(con_buf_mtx);
+ /* is_double_width() is based on the wcwidth() implementation by
+  * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
+  * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+  */
+ struct interval {
+       uint32_t first;
+       uint32_t last;
+ };
+ static int bisearch(uint32_t ucs, const struct interval *table, int max)
+ {
+       int min = 0;
+       int mid;
+       if (ucs < table[0].first || ucs > table[max].last)
+               return 0;
+       while (max >= min) {
+               mid = (min + max) / 2;
+               if (ucs > table[mid].last)
+                       min = mid + 1;
+               else if (ucs < table[mid].first)
+                       max = mid - 1;
+               else
+                       return 1;
+       }
+       return 0;
+ }
+ static int is_double_width(uint32_t ucs)
+ {
+       static const struct interval double_width[] = {
+               { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E },
+               { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF },
+               { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 },
+               { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }
+       };
+       return bisearch(ucs, double_width, ARRAY_SIZE(double_width) - 1);
+ }
+ /* acquires console_sem */
+ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
+ {
+ #ifdef VT_BUF_VRAM_ONLY
+ #define FLUSH do { } while(0);
+ #else
+ #define FLUSH if (draw_x >= 0) { \
+       vc->vc_sw->con_putcs(vc, (u16 *)draw_from, (u16 *)draw_to - (u16 *)draw_from, vc->vc_y, draw_x); \
+       draw_x = -1; \
+       }
+ #endif
+       int c, tc, ok, n = 0, draw_x = -1;
+       unsigned int currcons;
+       unsigned long draw_from = 0, draw_to = 0;
+       struct vc_data *vc;
+       unsigned char vc_attr;
+       struct vt_notifier_param param;
+       uint8_t rescan;
+       uint8_t inverse;
+       uint8_t width;
+       u16 himask, charmask;
+       if (in_interrupt())
+               return count;
+       might_sleep();
+       acquire_console_sem();
+       vc = tty->driver_data;
+       if (vc == NULL) {
+               printk(KERN_ERR "vt: argh, driver_data is NULL !\n");
+               release_console_sem();
+               return 0;
+       }
+       currcons = vc->vc_num;
+       if (!vc_cons_allocated(currcons)) {
+           /* could this happen? */
+               printk_once("con_write: tty %d not allocated\n", currcons+1);
+           release_console_sem();
+           return 0;
+       }
+       himask = vc->vc_hi_font_mask;
+       charmask = himask ? 0x1ff : 0xff;
+       /* undraw cursor first */
+       if (IS_FG(vc))
+               hide_cursor(vc);
+       param.vc = vc;
+       while (!tty->stopped && count) {
+               int orig = *buf;
+               c = orig;
+               buf++;
+               n++;
+               count--;
+               rescan = 0;
+               inverse = 0;
+               width = 1;
+               /* Do no translation at all in control states */
+               if (vc->vc_state != ESnormal) {
+                       tc = c;
+               } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
+                   /* Combine UTF-8 into Unicode in vc_utf_char.
+                    * vc_utf_count is the number of continuation bytes still
+                    * expected to arrive.
+                    * vc_npar is the number of continuation bytes arrived so
+                    * far
+                    */
+ rescan_last_byte:
+                   if ((c & 0xc0) == 0x80) {
+                       /* Continuation byte received */
+                       static const uint32_t utf8_length_changes[] = { 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff };
+                       if (vc->vc_utf_count) {
+                           vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
+                           vc->vc_npar++;
+                           if (--vc->vc_utf_count) {
+                               /* Still need some bytes */
+                               continue;
+                           }
+                           /* Got a whole character */
+                           c = vc->vc_utf_char;
+                           /* Reject overlong sequences */
+                           if (c <= utf8_length_changes[vc->vc_npar - 1] ||
+                                       c > utf8_length_changes[vc->vc_npar])
+                               c = 0xfffd;
+                       } else {
+                           /* Unexpected continuation byte */
+                           vc->vc_utf_count = 0;
+                           c = 0xfffd;
+                       }
+                   } else {
+                       /* Single ASCII byte or first byte of a sequence received */
+                       if (vc->vc_utf_count) {
+                           /* Continuation byte expected */
+                           rescan = 1;
+                           vc->vc_utf_count = 0;
+                           c = 0xfffd;
+                       } else if (c > 0x7f) {
+                           /* First byte of a multibyte sequence received */
+                           vc->vc_npar = 0;
+                           if ((c & 0xe0) == 0xc0) {
+                               vc->vc_utf_count = 1;
+                               vc->vc_utf_char = (c & 0x1f);
+                           } else if ((c & 0xf0) == 0xe0) {
+                               vc->vc_utf_count = 2;
+                               vc->vc_utf_char = (c & 0x0f);
+                           } else if ((c & 0xf8) == 0xf0) {
+                               vc->vc_utf_count = 3;
+                               vc->vc_utf_char = (c & 0x07);
+                           } else if ((c & 0xfc) == 0xf8) {
+                               vc->vc_utf_count = 4;
+                               vc->vc_utf_char = (c & 0x03);
+                           } else if ((c & 0xfe) == 0xfc) {
+                               vc->vc_utf_count = 5;
+                               vc->vc_utf_char = (c & 0x01);
+                           } else {
+                               /* 254 and 255 are invalid */
+                               c = 0xfffd;
+                           }
+                           if (vc->vc_utf_count) {
+                               /* Still need some bytes */
+                               continue;
+                           }
+                       }
+                       /* Nothing to do if an ASCII byte was received */
+                   }
+                   /* End of UTF-8 decoding. */
+                   /* c is the received character, or U+FFFD for invalid sequences. */
+                   /* Replace invalid Unicode code points with U+FFFD too */
+                   if ((c >= 0xd800 && c <= 0xdfff) || c == 0xfffe || c == 0xffff)
+                       c = 0xfffd;
+                   tc = c;
+               } else {        /* no utf or alternate charset mode */
+                   tc = vc_translate(vc, c);
+               }
+               param.c = tc;
+               if (atomic_notifier_call_chain(&vt_notifier_list, VT_PREWRITE,
+                                       &param) == NOTIFY_STOP)
+                       continue;
+                 /* If the original code was a control character we
+                  * only allow a glyph to be displayed if the code is
+                  * not normally used (such as for cursor movement) or
+                  * if the disp_ctrl mode has been explicitly enabled.
+                  * Certain characters (as given by the CTRL_ALWAYS
+                  * bitmap) are always displayed as control characters,
+                  * as the console would be pretty useless without
+                  * them; to display an arbitrary font position use the
+                  * direct-to-font zone in UTF-8 mode.
+                  */
+                 ok = tc && (c >= 32 ||
+                           !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
+                                 vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
+                       && (c != 127 || vc->vc_disp_ctrl)
+                       && (c != 128+27);
+               if (vc->vc_state == ESnormal && ok) {
+                       if (vc->vc_utf && !vc->vc_disp_ctrl) {
+                               if (is_double_width(c))
+                                       width = 2;
+                       }
+                       /* Now try to find out how to display it */
+                       tc = conv_uni_to_pc(vc, tc);
+                       if (tc & ~charmask) {
+                               if (tc == -1 || tc == -2) {
+                                   continue; /* nothing to display */
+                               }
+                               /* Glyph not found */
+                               if ((!(vc->vc_utf && !vc->vc_disp_ctrl) || c < 128) && !(c & ~charmask)) {
+                                   /* In legacy mode use the glyph we get by a 1:1 mapping.
+                                      This would make absolutely no sense with Unicode in mind,
+                                      but do this for ASCII characters since a font may lack
+                                      Unicode mapping info and we don't want to end up with
+                                      having question marks only. */
+                                   tc = c;
+                               } else {
+                                   /* Display U+FFFD. If it's not found, display an inverse question mark. */
+                                   tc = conv_uni_to_pc(vc, 0xfffd);
+                                   if (tc < 0) {
+                                       inverse = 1;
+                                       tc = conv_uni_to_pc(vc, '?');
+                                       if (tc < 0) tc = '?';
+                                   }
+                               }
+                       }
+                       if (!inverse) {
+                               vc_attr = vc->vc_attr;
+                       } else {
+                               /* invert vc_attr */
+                               if (!vc->vc_can_do_color) {
+                                       vc_attr = (vc->vc_attr) ^ 0x08;
+                               } else if (vc->vc_hi_font_mask == 0x100) {
+                                       vc_attr = ((vc->vc_attr) & 0x11) | (((vc->vc_attr) & 0xe0) >> 4) | (((vc->vc_attr) & 0x0e) << 4);
+                               } else {
+                                       vc_attr = ((vc->vc_attr) & 0x88) | (((vc->vc_attr) & 0x70) >> 4) | (((vc->vc_attr) & 0x07) << 4);
+                               }
+                               FLUSH
+                       }
+                       while (1) {
+                               if (vc->vc_need_wrap || vc->vc_decim)
+                                       FLUSH
+                               if (vc->vc_need_wrap) {
+                                       cr(vc);
+                                       lf(vc);
+                               }
+                               if (vc->vc_decim)
+                                       insert_char(vc, 1);
+                               scr_writew(himask ?
+                                            ((vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
+                                            (vc_attr << 8) + tc,
+                                          (u16 *) vc->vc_pos);
+                               if (DO_UPDATE(vc) && draw_x < 0) {
+                                       draw_x = vc->vc_x;
+                                       draw_from = vc->vc_pos;
+                               }
+                               if (vc->vc_x == vc->vc_cols - 1) {
+                                       vc->vc_need_wrap = vc->vc_decawm;
+                                       draw_to = vc->vc_pos + 2;
+                               } else {
+                                       vc->vc_x++;
+                                       draw_to = (vc->vc_pos += 2);
+                               }
+                               if (!--width) break;
+                               tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
+                               if (tc < 0) tc = ' ';
+                       }
+                       notify_write(vc, c);
+                       if (inverse) {
+                               FLUSH
+                       }
+                       if (rescan) {
+                               rescan = 0;
+                               inverse = 0;
+                               width = 1;
+                               c = orig;
+                               goto rescan_last_byte;
+                       }
+                       continue;
+               }
+               FLUSH
+               do_con_trol(tty, vc, orig);
+       }
+       FLUSH
+       console_conditional_schedule();
+       release_console_sem();
+       notify_update(vc);
+       return n;
+ #undef FLUSH
+ }
+ /*
+  * This is the console switching callback.
+  *
+  * Doing console switching in a process context allows
+  * us to do the switches asynchronously (needed when we want
+  * to switch due to a keyboard interrupt).  Synchronization
+  * with other console code and prevention of re-entrancy is
+  * ensured with console_sem.
+  */
+ static void console_callback(struct work_struct *ignored)
+ {
+       acquire_console_sem();
+       if (want_console >= 0) {
+               if (want_console != fg_console &&
+                   vc_cons_allocated(want_console)) {
+                       hide_cursor(vc_cons[fg_console].d);
+                       change_console(vc_cons[want_console].d);
+                       /* we only changed when the console had already
+                          been allocated - a new console is not created
+                          in an interrupt routine */
+               }
+               want_console = -1;
+       }
+       if (do_poke_blanked_console) { /* do not unblank for a LED change */
+               do_poke_blanked_console = 0;
+               poke_blanked_console();
+       }
+       if (scrollback_delta) {
+               struct vc_data *vc = vc_cons[fg_console].d;
+               clear_selection();
+               if (vc->vc_mode == KD_TEXT)
+                       vc->vc_sw->con_scrolldelta(vc, scrollback_delta);
+               scrollback_delta = 0;
+       }
+       if (blank_timer_expired) {
+               do_blank_screen(0);
+               blank_timer_expired = 0;
+       }
+       notify_update(vc_cons[fg_console].d);
+       release_console_sem();
+ }
+ int set_console(int nr)
+ {
+       struct vc_data *vc = vc_cons[fg_console].d;
+       if (!vc_cons_allocated(nr) || vt_dont_switch ||
+               (vc->vt_mode.mode == VT_AUTO && vc->vc_mode == KD_GRAPHICS)) {
+               /*
+                * Console switch will fail in console_callback() or
+                * change_console() so there is no point scheduling
+                * the callback
+                *
+                * Existing set_console() users don't check the return
+                * value so this shouldn't break anything
+                */
+               return -EINVAL;
+       }
+       want_console = nr;
+       schedule_console_callback();
+       return 0;
+ }
+ struct tty_driver *console_driver;
+ #ifdef CONFIG_VT_CONSOLE
+ /**
+  * vt_kmsg_redirect() - Sets/gets the kernel message console
+  * @new:      The new virtual terminal number or -1 if the console should stay
+  *            unchanged
+  *
+  * By default, the kernel messages are always printed on the current virtual
+  * console. However, the user may modify that default with the
+  * TIOCL_SETKMSGREDIRECT ioctl call.
+  *
+  * This function sets the kernel message console to be @new. It returns the old
+  * virtual console number. The virtual terminal number 0 (both as parameter and
+  * return value) means no redirection (i.e. always printed on the currently
+  * active console).
+  *
+  * The parameter -1 means that only the current console is returned, but the
+  * value is not modified. You may use the macro vt_get_kmsg_redirect() in that
+  * case to make the code more understandable.
+  *
+  * When the kernel is compiled without CONFIG_VT_CONSOLE, this function ignores
+  * the parameter and always returns 0.
+  */
+ int vt_kmsg_redirect(int new)
+ {
+       static int kmsg_con;
+       if (new != -1)
+               return xchg(&kmsg_con, new);
+       else
+               return kmsg_con;
+ }
+ /*
+  *    Console on virtual terminal
+  *
+  * The console must be locked when we get here.
+  */
+ static void vt_console_print(struct console *co, const char *b, unsigned count)
+ {
+       struct vc_data *vc = vc_cons[fg_console].d;
+       unsigned char c;
+       static DEFINE_SPINLOCK(printing_lock);
+       const ushort *start;
+       ushort cnt = 0;
+       ushort myx;
+       int kmsg_console;
+       /* console busy or not yet initialized */
+       if (!printable)
+               return;
+       if (!spin_trylock(&printing_lock))
+               return;
+       kmsg_console = vt_get_kmsg_redirect();
+       if (kmsg_console && vc_cons_allocated(kmsg_console - 1))
+               vc = vc_cons[kmsg_console - 1].d;
+       /* read `x' only after setting currcons properly (otherwise
+          the `x' macro will read the x of the foreground console). */
+       myx = vc->vc_x;
+       if (!vc_cons_allocated(fg_console)) {
+               /* impossible */
+               /* printk("vt_console_print: tty %d not allocated ??\n", currcons+1); */
+               goto quit;
+       }
+       if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+               goto quit;
+       /* undraw cursor first */
+       if (IS_FG(vc))
+               hide_cursor(vc);
+       start = (ushort *)vc->vc_pos;
+       /* Contrived structure to try to emulate original need_wrap behaviour
+        * Problems caused when we have need_wrap set on '\n' character */
+       while (count--) {
+               c = *b++;
+               if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) {
+                       if (cnt > 0) {
+                               if (CON_IS_VISIBLE(vc))
+                                       vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
+                               vc->vc_x += cnt;
+                               if (vc->vc_need_wrap)
+                                       vc->vc_x--;
+                               cnt = 0;
+                       }
+                       if (c == 8) {           /* backspace */
+                               bs(vc);
+                               start = (ushort *)vc->vc_pos;
+                               myx = vc->vc_x;
+                               continue;
+                       }
+                       if (c != 13)
+                               lf(vc);
+                       cr(vc);
+                       start = (ushort *)vc->vc_pos;
+                       myx = vc->vc_x;
+                       if (c == 10 || c == 13)
+                               continue;
+               }
+               scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
+               notify_write(vc, c);
+               cnt++;
+               if (myx == vc->vc_cols - 1) {
+                       vc->vc_need_wrap = 1;
+                       continue;
+               }
+               vc->vc_pos += 2;
+               myx++;
+       }
+       if (cnt > 0) {
+               if (CON_IS_VISIBLE(vc))
+                       vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
+               vc->vc_x += cnt;
+               if (vc->vc_x == vc->vc_cols) {
+                       vc->vc_x--;
+                       vc->vc_need_wrap = 1;
+               }
+       }
+       set_cursor(vc);
+       notify_update(vc);
+ quit:
+       spin_unlock(&printing_lock);
+ }
+ static struct tty_driver *vt_console_device(struct console *c, int *index)
+ {
+       *index = c->index ? c->index-1 : fg_console;
+       return console_driver;
+ }
+ static struct console vt_console_driver = {
+       .name           = "tty",
+       .write          = vt_console_print,
+       .device         = vt_console_device,
+       .unblank        = unblank_screen,
+       .flags          = CON_PRINTBUFFER,
+       .index          = -1,
+ };
+ #endif
+ /*
+  *    Handling of Linux-specific VC ioctls
+  */
+ /*
+  * Generally a bit racy with respect to console_sem().
+  *
+  * There are some functions which don't need it.
+  *
+  * There are some functions which can sleep for arbitrary periods
+  * (paste_selection) but we don't need the lock there anyway.
+  *
+  * set_selection has locking, and definitely needs it
+  */
+ int tioclinux(struct tty_struct *tty, unsigned long arg)
+ {
+       char type, data;
+       char __user *p = (char __user *)arg;
+       int lines;
+       int ret;
+       if (current->signal->tty != tty && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (get_user(type, p))
+               return -EFAULT;
+       ret = 0;
+       switch (type)
+       {
+               case TIOCL_SETSEL:
+                       acquire_console_sem();
+                       ret = set_selection((struct tiocl_selection __user *)(p+1), tty);
+                       release_console_sem();
+                       break;
+               case TIOCL_PASTESEL:
+                       ret = paste_selection(tty);
+                       break;
+               case TIOCL_UNBLANKSCREEN:
+                       acquire_console_sem();
+                       unblank_screen();
+                       release_console_sem();
+                       break;
+               case TIOCL_SELLOADLUT:
+                       ret = sel_loadlut(p);
+                       break;
+               case TIOCL_GETSHIFTSTATE:
+       /*
+        * Make it possible to react to Shift+Mousebutton.
+        * Note that 'shift_state' is an undocumented
+        * kernel-internal variable; programs not closely
+        * related to the kernel should not use this.
+        */
+                       data = shift_state;
+                       ret = __put_user(data, p);
+                       break;
+               case TIOCL_GETMOUSEREPORTING:
+                       data = mouse_reporting();
+                       ret = __put_user(data, p);
+                       break;
+               case TIOCL_SETVESABLANK:
+                       ret = set_vesa_blanking(p);
+                       break;
+               case TIOCL_GETKMSGREDIRECT:
+                       data = vt_get_kmsg_redirect();
+                       ret = __put_user(data, p);
+                       break;
+               case TIOCL_SETKMSGREDIRECT:
+                       if (!capable(CAP_SYS_ADMIN)) {
+                               ret = -EPERM;
+                       } else {
+                               if (get_user(data, p+1))
+                                       ret = -EFAULT;
+                               else
+                                       vt_kmsg_redirect(data);
+                       }
+                       break;
+               case TIOCL_GETFGCONSOLE:
+                       ret = fg_console;
+                       break;
+               case TIOCL_SCROLLCONSOLE:
+                       if (get_user(lines, (s32 __user *)(p+4))) {
+                               ret = -EFAULT;
+                       } else {
+                               scrollfront(vc_cons[fg_console].d, lines);
+                               ret = 0;
+                       }
+                       break;
+               case TIOCL_BLANKSCREEN: /* until explicitly unblanked, not only poked */
+                       acquire_console_sem();
+                       ignore_poke = 1;
+                       do_blank_screen(0);
+                       release_console_sem();
+                       break;
+               case TIOCL_BLANKEDSCREEN:
+                       ret = console_blanked;
+                       break;
+               default:
+                       ret = -EINVAL;
+                       break;
+       }
+       return ret;
+ }
+ /*
+  * /dev/ttyN handling
+  */
+ static int con_write(struct tty_struct *tty, const unsigned char *buf, int count)
+ {
+       int     retval;
+       retval = do_con_write(tty, buf, count);
+       con_flush_chars(tty);
+       return retval;
+ }
+ static int con_put_char(struct tty_struct *tty, unsigned char ch)
+ {
+       if (in_interrupt())
+               return 0;       /* n_r3964 calls put_char() from interrupt context */
+       return do_con_write(tty, &ch, 1);
+ }
+ static int con_write_room(struct tty_struct *tty)
+ {
+       if (tty->stopped)
+               return 0;
+       return 32768;           /* No limit, really; we're not buffering */
+ }
+ static int con_chars_in_buffer(struct tty_struct *tty)
+ {
+       return 0;               /* we're not buffering */
+ }
+ /*
+  * con_throttle and con_unthrottle are only used for
+  * paste_selection(), which has to stuff in a large number of
+  * characters...
+  */
+ static void con_throttle(struct tty_struct *tty)
+ {
+ }
+ static void con_unthrottle(struct tty_struct *tty)
+ {
+       struct vc_data *vc = tty->driver_data;
+       wake_up_interruptible(&vc->paste_wait);
+ }
+ /*
+  * Turn the Scroll-Lock LED on when the tty is stopped
+  */
+ static void con_stop(struct tty_struct *tty)
+ {
+       int console_num;
+       if (!tty)
+               return;
+       console_num = tty->index;
+       if (!vc_cons_allocated(console_num))
+               return;
+       set_vc_kbd_led(kbd_table + console_num, VC_SCROLLOCK);
+       set_leds();
+ }
+ /*
+  * Turn the Scroll-Lock LED off when the console is started
+  */
+ static void con_start(struct tty_struct *tty)
+ {
+       int console_num;
+       if (!tty)
+               return;
+       console_num = tty->index;
+       if (!vc_cons_allocated(console_num))
+               return;
+       clr_vc_kbd_led(kbd_table + console_num, VC_SCROLLOCK);
+       set_leds();
+ }
+ static void con_flush_chars(struct tty_struct *tty)
+ {
+       struct vc_data *vc;
+       if (in_interrupt())     /* from flush_to_ldisc */
+               return;
+       /* if we race with con_close(), vt may be null */
+       acquire_console_sem();
+       vc = tty->driver_data;
+       if (vc)
+               set_cursor(vc);
+       release_console_sem();
+ }
+ /*
+  * Allocate the console screen memory.
+  */
+ static int con_open(struct tty_struct *tty, struct file *filp)
+ {
+       unsigned int currcons = tty->index;
+       int ret = 0;
+       acquire_console_sem();
+       if (tty->driver_data == NULL) {
+               ret = vc_allocate(currcons);
+               if (ret == 0) {
+                       struct vc_data *vc = vc_cons[currcons].d;
+                       /* Still being freed */
+                       if (vc->port.tty) {
+                               release_console_sem();
+                               return -ERESTARTSYS;
+                       }
+                       tty->driver_data = vc;
+                       vc->port.tty = tty;
+                       if (!tty->winsize.ws_row && !tty->winsize.ws_col) {
+                               tty->winsize.ws_row = vc_cons[currcons].d->vc_rows;
+                               tty->winsize.ws_col = vc_cons[currcons].d->vc_cols;
+                       }
+                       if (vc->vc_utf)
+                               tty->termios->c_iflag |= IUTF8;
+                       else
+                               tty->termios->c_iflag &= ~IUTF8;
+                       release_console_sem();
+                       return ret;
+               }
+       }
+       release_console_sem();
+       return ret;
+ }
+ static void con_close(struct tty_struct *tty, struct file *filp)
+ {
+       /* Nothing to do - we defer to shutdown */
+ }
+ static void con_shutdown(struct tty_struct *tty)
+ {
+       struct vc_data *vc = tty->driver_data;
+       BUG_ON(vc == NULL);
+       acquire_console_sem();
+       vc->port.tty = NULL;
+       release_console_sem();
+       tty_shutdown(tty);
+ }
+ static int default_italic_color    = 2; // green (ASCII)
+ static int default_underline_color = 3; // cyan (ASCII)
+ module_param_named(italic, default_italic_color, int, S_IRUGO | S_IWUSR);
+ module_param_named(underline, default_underline_color, int, S_IRUGO | S_IWUSR);
+ static void vc_init(struct vc_data *vc, unsigned int rows,
+                   unsigned int cols, int do_clear)
+ {
+       int j, k ;
+       vc->vc_cols = cols;
+       vc->vc_rows = rows;
+       vc->vc_size_row = cols << 1;
+       vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row;
+       set_origin(vc);
+       vc->vc_pos = vc->vc_origin;
+       reset_vc(vc);
+       for (j=k=0; j<16; j++) {
+               vc->vc_palette[k++] = default_red[j] ;
+               vc->vc_palette[k++] = default_grn[j] ;
+               vc->vc_palette[k++] = default_blu[j] ;
+       }
+       vc->vc_def_color       = 0x07;   /* white */
+       vc->vc_ulcolor         = default_underline_color;
+       vc->vc_itcolor         = default_italic_color;
+       vc->vc_halfcolor       = 0x08;   /* grey */
+       init_waitqueue_head(&vc->paste_wait);
+       reset_terminal(vc, do_clear);
+ }
+ /*
+  * This routine initializes console interrupts, and does nothing
+  * else. If you want the screen to clear, call tty_write with
+  * the appropriate escape-sequence.
+  */
+ static int __init con_init(void)
+ {
+       const char *display_desc = NULL;
+       struct vc_data *vc;
+       unsigned int currcons = 0, i;
+       acquire_console_sem();
+       if (conswitchp)
+               display_desc = conswitchp->con_startup();
+       if (!display_desc) {
+               fg_console = 0;
+               release_console_sem();
+               return 0;
+       }
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               struct con_driver *con_driver = &registered_con_driver[i];
+               if (con_driver->con == NULL) {
+                       con_driver->con = conswitchp;
+                       con_driver->desc = display_desc;
+                       con_driver->flag = CON_DRIVER_FLAG_INIT;
+                       con_driver->first = 0;
+                       con_driver->last = MAX_NR_CONSOLES - 1;
+                       break;
+               }
+       }
+       for (i = 0; i < MAX_NR_CONSOLES; i++)
+               con_driver_map[i] = conswitchp;
+       if (blankinterval) {
+               blank_state = blank_normal_wait;
+               mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+       }
+       for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
+               vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT);
+               INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
+               tty_port_init(&vc->port);
+               visual_init(vc, currcons, 1);
+               vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT);
+               vc_init(vc, vc->vc_rows, vc->vc_cols,
+                       currcons || !vc->vc_sw->con_save_screen);
+       }
+       currcons = fg_console = 0;
+       master_display_fg = vc = vc_cons[currcons].d;
+       set_origin(vc);
+       save_screen(vc);
+       gotoxy(vc, vc->vc_x, vc->vc_y);
+       csi_J(vc, 0);
+       update_screen(vc);
+       printk("Console: %s %s %dx%d",
+               vc->vc_can_do_color ? "colour" : "mono",
+               display_desc, vc->vc_cols, vc->vc_rows);
+       printable = 1;
+       printk("\n");
+       release_console_sem();
+ #ifdef CONFIG_VT_CONSOLE
+       register_console(&vt_console_driver);
+ #endif
+       return 0;
+ }
+ console_initcall(con_init);
+ static const struct tty_operations con_ops = {
+       .open = con_open,
+       .close = con_close,
+       .write = con_write,
+       .write_room = con_write_room,
+       .put_char = con_put_char,
+       .flush_chars = con_flush_chars,
+       .chars_in_buffer = con_chars_in_buffer,
+       .ioctl = vt_ioctl,
+ #ifdef CONFIG_COMPAT
+       .compat_ioctl = vt_compat_ioctl,
+ #endif
+       .stop = con_stop,
+       .start = con_start,
+       .throttle = con_throttle,
+       .unthrottle = con_unthrottle,
+       .resize = vt_resize,
+       .shutdown = con_shutdown
+ };
+ static struct cdev vc0_cdev;
+ int __init vty_init(const struct file_operations *console_fops)
+ {
+       cdev_init(&vc0_cdev, console_fops);
+       if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) ||
+           register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
+               panic("Couldn't register /dev/tty0 driver\n");
+       device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
+       vcs_init();
+       console_driver = alloc_tty_driver(MAX_NR_CONSOLES);
+       if (!console_driver)
+               panic("Couldn't allocate console driver\n");
+       console_driver->owner = THIS_MODULE;
+       console_driver->name = "tty";
+       console_driver->name_base = 1;
+       console_driver->major = TTY_MAJOR;
+       console_driver->minor_start = 1;
+       console_driver->type = TTY_DRIVER_TYPE_CONSOLE;
+       console_driver->init_termios = tty_std_termios;
+       if (default_utf8)
+               console_driver->init_termios.c_iflag |= IUTF8;
+       console_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS;
+       tty_set_operations(console_driver, &con_ops);
+       if (tty_register_driver(console_driver))
+               panic("Couldn't register console driver\n");
+       kbd_init();
+       console_map_init();
+ #ifdef CONFIG_MDA_CONSOLE
+       mda_console_init();
+ #endif
+       return 0;
+ }
+ #ifndef VT_SINGLE_DRIVER
+ static struct class *vtconsole_class;
+ static int bind_con_driver(const struct consw *csw, int first, int last,
+                          int deflt)
+ {
+       struct module *owner = csw->owner;
+       const char *desc = NULL;
+       struct con_driver *con_driver;
+       int i, j = -1, k = -1, retval = -ENODEV;
+       if (!try_module_get(owner))
+               return -ENODEV;
+       acquire_console_sem();
+       /* check if driver is registered */
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               con_driver = &registered_con_driver[i];
+               if (con_driver->con == csw) {
+                       desc = con_driver->desc;
+                       retval = 0;
+                       break;
+               }
+       }
+       if (retval)
+               goto err;
+       if (!(con_driver->flag & CON_DRIVER_FLAG_INIT)) {
+               csw->con_startup();
+               con_driver->flag |= CON_DRIVER_FLAG_INIT;
+       }
+       if (deflt) {
+               if (conswitchp)
+                       module_put(conswitchp->owner);
+               __module_get(owner);
+               conswitchp = csw;
+       }
+       first = max(first, con_driver->first);
+       last = min(last, con_driver->last);
+       for (i = first; i <= last; i++) {
+               int old_was_color;
+               struct vc_data *vc = vc_cons[i].d;
+               if (con_driver_map[i])
+                       module_put(con_driver_map[i]->owner);
+               __module_get(owner);
+               con_driver_map[i] = csw;
+               if (!vc || !vc->vc_sw)
+                       continue;
+               j = i;
+               if (CON_IS_VISIBLE(vc)) {
+                       k = i;
+                       save_screen(vc);
+               }
+               old_was_color = vc->vc_can_do_color;
+               vc->vc_sw->con_deinit(vc);
+               vc->vc_origin = (unsigned long)vc->vc_screenbuf;
+               visual_init(vc, i, 0);
+               set_origin(vc);
+               update_attr(vc);
+               /* If the console changed between mono <-> color, then
+                * the attributes in the screenbuf will be wrong.  The
+                * following resets all attributes to something sane.
+                */
+               if (old_was_color != vc->vc_can_do_color)
+                       clear_buffer_attributes(vc);
+       }
+       printk("Console: switching ");
+       if (!deflt)
+               printk("consoles %d-%d ", first+1, last+1);
+       if (j >= 0) {
+               struct vc_data *vc = vc_cons[j].d;
+               printk("to %s %s %dx%d\n",
+                      vc->vc_can_do_color ? "colour" : "mono",
+                      desc, vc->vc_cols, vc->vc_rows);
+               if (k >= 0) {
+                       vc = vc_cons[k].d;
+                       update_screen(vc);
+               }
+       } else
+               printk("to %s\n", desc);
+       retval = 0;
+ err:
+       release_console_sem();
+       module_put(owner);
+       return retval;
+ };
+ #ifdef CONFIG_VT_HW_CONSOLE_BINDING
+ static int con_is_graphics(const struct consw *csw, int first, int last)
+ {
+       int i, retval = 0;
+       for (i = first; i <= last; i++) {
+               struct vc_data *vc = vc_cons[i].d;
+               if (vc && vc->vc_mode == KD_GRAPHICS) {
+                       retval = 1;
+                       break;
+               }
+       }
+       return retval;
+ }
+ /**
+  * unbind_con_driver - unbind a console driver
+  * @csw: pointer to console driver to unregister
+  * @first: first in range of consoles that @csw should be unbound from
+  * @last: last in range of consoles that @csw should be unbound from
+  * @deflt: should next bound console driver be default after @csw is unbound?
+  *
+  * To unbind a driver from all possible consoles, pass 0 as @first and
+  * %MAX_NR_CONSOLES as @last.
+  *
+  * @deflt controls whether the console that ends up replacing @csw should be
+  * the default console.
+  *
+  * RETURNS:
+  * -ENODEV if @csw isn't a registered console driver or can't be unregistered
+  * or 0 on success.
+  */
+ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
+ {
+       struct module *owner = csw->owner;
+       const struct consw *defcsw = NULL;
+       struct con_driver *con_driver = NULL, *con_back = NULL;
+       int i, retval = -ENODEV;
+       if (!try_module_get(owner))
+               return -ENODEV;
+       acquire_console_sem();
+       /* check if driver is registered and if it is unbindable */
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               con_driver = &registered_con_driver[i];
+               if (con_driver->con == csw &&
+                   con_driver->flag & CON_DRIVER_FLAG_MODULE) {
+                       retval = 0;
+                       break;
+               }
+       }
+       if (retval) {
+               release_console_sem();
+               goto err;
+       }
+       retval = -ENODEV;
+       /* check if backup driver exists */
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               con_back = &registered_con_driver[i];
+               if (con_back->con &&
+                   !(con_back->flag & CON_DRIVER_FLAG_MODULE)) {
+                       defcsw = con_back->con;
+                       retval = 0;
+                       break;
+               }
+       }
+       if (retval) {
+               release_console_sem();
+               goto err;
+       }
+       if (!con_is_bound(csw)) {
+               release_console_sem();
+               goto err;
+       }
+       first = max(first, con_driver->first);
+       last = min(last, con_driver->last);
+       for (i = first; i <= last; i++) {
+               if (con_driver_map[i] == csw) {
+                       module_put(csw->owner);
+                       con_driver_map[i] = NULL;
+               }
+       }
+       if (!con_is_bound(defcsw)) {
+               const struct consw *defconsw = conswitchp;
+               defcsw->con_startup();
+               con_back->flag |= CON_DRIVER_FLAG_INIT;
+               /*
+                * vgacon may change the default driver to point
+                * to dummycon, we restore it here...
+                */
+               conswitchp = defconsw;
+       }
+       if (!con_is_bound(csw))
+               con_driver->flag &= ~CON_DRIVER_FLAG_INIT;
+       release_console_sem();
+       /* ignore return value, binding should not fail */
+       bind_con_driver(defcsw, first, last, deflt);
+ err:
+       module_put(owner);
+       return retval;
+ }
+ EXPORT_SYMBOL(unbind_con_driver);
+ static int vt_bind(struct con_driver *con)
+ {
+       const struct consw *defcsw = NULL, *csw = NULL;
+       int i, more = 1, first = -1, last = -1, deflt = 0;
+       if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE) ||
+           con_is_graphics(con->con, con->first, con->last))
+               goto err;
+       csw = con->con;
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               struct con_driver *con = &registered_con_driver[i];
+               if (con->con && !(con->flag & CON_DRIVER_FLAG_MODULE)) {
+                       defcsw = con->con;
+                       break;
+               }
+       }
+       if (!defcsw)
+               goto err;
+       while (more) {
+               more = 0;
+               for (i = con->first; i <= con->last; i++) {
+                       if (con_driver_map[i] == defcsw) {
+                               if (first == -1)
+                                       first = i;
+                               last = i;
+                               more = 1;
+                       } else if (first != -1)
+                               break;
+               }
+               if (first == 0 && last == MAX_NR_CONSOLES -1)
+                       deflt = 1;
+               if (first != -1)
+                       bind_con_driver(csw, first, last, deflt);
+               first = -1;
+               last = -1;
+               deflt = 0;
+       }
+ err:
+       return 0;
+ }
+ static int vt_unbind(struct con_driver *con)
+ {
+       const struct consw *csw = NULL;
+       int i, more = 1, first = -1, last = -1, deflt = 0;
+       if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE) ||
+           con_is_graphics(con->con, con->first, con->last))
+               goto err;
+       csw = con->con;
+       while (more) {
+               more = 0;
+               for (i = con->first; i <= con->last; i++) {
+                       if (con_driver_map[i] == csw) {
+                               if (first == -1)
+                                       first = i;
+                               last = i;
+                               more = 1;
+                       } else if (first != -1)
+                               break;
+               }
+               if (first == 0 && last == MAX_NR_CONSOLES -1)
+                       deflt = 1;
+               if (first != -1)
+                       unbind_con_driver(csw, first, last, deflt);
+               first = -1;
+               last = -1;
+               deflt = 0;
+       }
+ err:
+       return 0;
+ }
+ #else
+ static inline int vt_bind(struct con_driver *con)
+ {
+       return 0;
+ }
+ static inline int vt_unbind(struct con_driver *con)
+ {
+       return 0;
+ }
+ #endif /* CONFIG_VT_HW_CONSOLE_BINDING */
+ static ssize_t store_bind(struct device *dev, struct device_attribute *attr,
+                         const char *buf, size_t count)
+ {
+       struct con_driver *con = dev_get_drvdata(dev);
+       int bind = simple_strtoul(buf, NULL, 0);
+       if (bind)
+               vt_bind(con);
+       else
+               vt_unbind(con);
+       return count;
+ }
+ static ssize_t show_bind(struct device *dev, struct device_attribute *attr,
+                        char *buf)
+ {
+       struct con_driver *con = dev_get_drvdata(dev);
+       int bind = con_is_bound(con->con);
+       return snprintf(buf, PAGE_SIZE, "%i\n", bind);
+ }
+ static ssize_t show_name(struct device *dev, struct device_attribute *attr,
+                        char *buf)
+ {
+       struct con_driver *con = dev_get_drvdata(dev);
+       return snprintf(buf, PAGE_SIZE, "%s %s\n",
+                       (con->flag & CON_DRIVER_FLAG_MODULE) ? "(M)" : "(S)",
+                        con->desc);
+ }
+ static struct device_attribute device_attrs[] = {
+       __ATTR(bind, S_IRUGO|S_IWUSR, show_bind, store_bind),
+       __ATTR(name, S_IRUGO, show_name, NULL),
+ };
+ static int vtconsole_init_device(struct con_driver *con)
+ {
+       int i;
+       int error = 0;
+       con->flag |= CON_DRIVER_FLAG_ATTR;
+       dev_set_drvdata(con->dev, con);
+       for (i = 0; i < ARRAY_SIZE(device_attrs); i++) {
+               error = device_create_file(con->dev, &device_attrs[i]);
+               if (error)
+                       break;
+       }
+       if (error) {
+               while (--i >= 0)
+                       device_remove_file(con->dev, &device_attrs[i]);
+               con->flag &= ~CON_DRIVER_FLAG_ATTR;
+       }
+       return error;
+ }
+ static void vtconsole_deinit_device(struct con_driver *con)
+ {
+       int i;
+       if (con->flag & CON_DRIVER_FLAG_ATTR) {
+               for (i = 0; i < ARRAY_SIZE(device_attrs); i++)
+                       device_remove_file(con->dev, &device_attrs[i]);
+               con->flag &= ~CON_DRIVER_FLAG_ATTR;
+       }
+ }
+ /**
+  * con_is_bound - checks if driver is bound to the console
+  * @csw: console driver
+  *
+  * RETURNS: zero if unbound, nonzero if bound
+  *
+  * Drivers can call this and if zero, they should release
+  * all resources allocated on con_startup()
+  */
+ int con_is_bound(const struct consw *csw)
+ {
+       int i, bound = 0;
+       for (i = 0; i < MAX_NR_CONSOLES; i++) {
+               if (con_driver_map[i] == csw) {
+                       bound = 1;
+                       break;
+               }
+       }
+       return bound;
+ }
+ EXPORT_SYMBOL(con_is_bound);
+ /**
+  * con_debug_enter - prepare the console for the kernel debugger
+  * @sw: console driver
+  *
+  * Called when the console is taken over by the kernel debugger, this
+  * function needs to save the current console state, then put the console
+  * into a state suitable for the kernel debugger.
+  *
+  * RETURNS:
+  * Zero on success, nonzero if a failure occurred when trying to prepare
+  * the console for the debugger.
+  */
+ int con_debug_enter(struct vc_data *vc)
+ {
+       int ret = 0;
+       saved_fg_console = fg_console;
+       saved_last_console = last_console;
+       saved_want_console = want_console;
+       saved_vc_mode = vc->vc_mode;
+       saved_console_blanked = console_blanked;
+       vc->vc_mode = KD_TEXT;
+       console_blanked = 0;
+       if (vc->vc_sw->con_debug_enter)
+               ret = vc->vc_sw->con_debug_enter(vc);
+ #ifdef CONFIG_KGDB_KDB
+       /* Set the initial LINES variable if it is not already set */
+       if (vc->vc_rows < 999) {
+               int linecount;
+               char lns[4];
+               const char *setargs[3] = {
+                       "set",
+                       "LINES",
+                       lns,
+               };
+               if (kdbgetintenv(setargs[0], &linecount)) {
+                       snprintf(lns, 4, "%i", vc->vc_rows);
+                       kdb_set(2, setargs);
+               }
+       }
+ #endif /* CONFIG_KGDB_KDB */
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(con_debug_enter);
+ /**
+  * con_debug_leave - restore console state
+  * @sw: console driver
+  *
+  * Restore the console state to what it was before the kernel debugger
+  * was invoked.
+  *
+  * RETURNS:
+  * Zero on success, nonzero if a failure occurred when trying to restore
+  * the console.
+  */
+ int con_debug_leave(void)
+ {
+       struct vc_data *vc;
+       int ret = 0;
+       fg_console = saved_fg_console;
+       last_console = saved_last_console;
+       want_console = saved_want_console;
+       console_blanked = saved_console_blanked;
+       vc_cons[fg_console].d->vc_mode = saved_vc_mode;
+       vc = vc_cons[fg_console].d;
+       if (vc->vc_sw->con_debug_leave)
+               ret = vc->vc_sw->con_debug_leave(vc);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(con_debug_leave);
+ /**
+  * register_con_driver - register console driver to console layer
+  * @csw: console driver
+  * @first: the first console to take over, minimum value is 0
+  * @last: the last console to take over, maximum value is MAX_NR_CONSOLES -1
+  *
+  * DESCRIPTION: This function registers a console driver which can later
+  * bind to a range of consoles specified by @first and @last. It will
+  * also initialize the console driver by calling con_startup().
+  */
+ int register_con_driver(const struct consw *csw, int first, int last)
+ {
+       struct module *owner = csw->owner;
+       struct con_driver *con_driver;
+       const char *desc;
+       int i, retval = 0;
+       if (!try_module_get(owner))
+               return -ENODEV;
+       acquire_console_sem();
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               con_driver = &registered_con_driver[i];
+               /* already registered */
+               if (con_driver->con == csw)
+                       retval = -EINVAL;
+       }
+       if (retval)
+               goto err;
+       desc = csw->con_startup();
+       if (!desc)
+               goto err;
+       retval = -EINVAL;
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               con_driver = &registered_con_driver[i];
+               if (con_driver->con == NULL) {
+                       con_driver->con = csw;
+                       con_driver->desc = desc;
+                       con_driver->node = i;
+                       con_driver->flag = CON_DRIVER_FLAG_MODULE |
+                                          CON_DRIVER_FLAG_INIT;
+                       con_driver->first = first;
+                       con_driver->last = last;
+                       retval = 0;
+                       break;
+               }
+       }
+       if (retval)
+               goto err;
+       con_driver->dev = device_create(vtconsole_class, NULL,
+                                               MKDEV(0, con_driver->node),
+                                               NULL, "vtcon%i",
+                                               con_driver->node);
+       if (IS_ERR(con_driver->dev)) {
+               printk(KERN_WARNING "Unable to create device for %s; "
+                      "errno = %ld\n", con_driver->desc,
+                      PTR_ERR(con_driver->dev));
+               con_driver->dev = NULL;
+       } else {
+               vtconsole_init_device(con_driver);
+       }
+ err:
+       release_console_sem();
+       module_put(owner);
+       return retval;
+ }
+ EXPORT_SYMBOL(register_con_driver);
+ /**
+  * unregister_con_driver - unregister console driver from console layer
+  * @csw: console driver
+  *
+  * DESCRIPTION: All drivers that registers to the console layer must
+  * call this function upon exit, or if the console driver is in a state
+  * where it won't be able to handle console services, such as the
+  * framebuffer console without loaded framebuffer drivers.
+  *
+  * The driver must unbind first prior to unregistration.
+  */
+ int unregister_con_driver(const struct consw *csw)
+ {
+       int i, retval = -ENODEV;
+       acquire_console_sem();
+       /* cannot unregister a bound driver */
+       if (con_is_bound(csw))
+               goto err;
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               struct con_driver *con_driver = &registered_con_driver[i];
+               if (con_driver->con == csw &&
+                   con_driver->flag & CON_DRIVER_FLAG_MODULE) {
+                       vtconsole_deinit_device(con_driver);
+                       device_destroy(vtconsole_class,
+                                      MKDEV(0, con_driver->node));
+                       con_driver->con = NULL;
+                       con_driver->desc = NULL;
+                       con_driver->dev = NULL;
+                       con_driver->node = 0;
+                       con_driver->flag = 0;
+                       con_driver->first = 0;
+                       con_driver->last = 0;
+                       retval = 0;
+                       break;
+               }
+       }
+ err:
+       release_console_sem();
+       return retval;
+ }
+ EXPORT_SYMBOL(unregister_con_driver);
+ /*
+  *    If we support more console drivers, this function is used
+  *    when a driver wants to take over some existing consoles
+  *    and become default driver for newly opened ones.
+  *
+  *      take_over_console is basically a register followed by unbind
+  */
+ int take_over_console(const struct consw *csw, int first, int last, int deflt)
+ {
+       int err;
+       err = register_con_driver(csw, first, last);
+       if (!err)
+               bind_con_driver(csw, first, last, deflt);
+       return err;
+ }
+ /*
+  * give_up_console is a wrapper to unregister_con_driver. It will only
+  * work if driver is fully unbound.
+  */
+ void give_up_console(const struct consw *csw)
+ {
+       unregister_con_driver(csw);
+ }
+ static int __init vtconsole_class_init(void)
+ {
+       int i;
+       vtconsole_class = class_create(THIS_MODULE, "vtconsole");
+       if (IS_ERR(vtconsole_class)) {
+               printk(KERN_WARNING "Unable to create vt console class; "
+                      "errno = %ld\n", PTR_ERR(vtconsole_class));
+               vtconsole_class = NULL;
+       }
+       /* Add system drivers to sysfs */
+       for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+               struct con_driver *con = &registered_con_driver[i];
+               if (con->con && !con->dev) {
+                       con->dev = device_create(vtconsole_class, NULL,
+                                                        MKDEV(0, con->node),
+                                                        NULL, "vtcon%i",
+                                                        con->node);
+                       if (IS_ERR(con->dev)) {
+                               printk(KERN_WARNING "Unable to create "
+                                      "device for %s; errno = %ld\n",
+                                      con->desc, PTR_ERR(con->dev));
+                               con->dev = NULL;
+                       } else {
+                               vtconsole_init_device(con);
+                       }
+               }
+       }
+       return 0;
+ }
+ postcore_initcall(vtconsole_class_init);
+ #endif
+ /*
+  *    Screen blanking
+  */
+ static int set_vesa_blanking(char __user *p)
+ {
+       unsigned int mode;
+       if (get_user(mode, p + 1))
+               return -EFAULT;
+       vesa_blank_mode = (mode < 4) ? mode : 0;
+       return 0;
+ }
+ void do_blank_screen(int entering_gfx)
+ {
+       struct vc_data *vc = vc_cons[fg_console].d;
+       int i;
+       WARN_CONSOLE_UNLOCKED();
+       if (console_blanked) {
+               if (blank_state == blank_vesa_wait) {
+                       blank_state = blank_off;
+                       vc->vc_sw->con_blank(vc, vesa_blank_mode + 1, 0);
+               }
+               return;
+       }
+       /* entering graphics mode? */
+       if (entering_gfx) {
+               hide_cursor(vc);
+               save_screen(vc);
+               vc->vc_sw->con_blank(vc, -1, 1);
+               console_blanked = fg_console + 1;
+               blank_state = blank_off;
+               set_origin(vc);
+               return;
+       }
+       if (blank_state != blank_normal_wait)
+               return;
+       blank_state = blank_off;
+       /* don't blank graphics */
+       if (vc->vc_mode != KD_TEXT) {
+               console_blanked = fg_console + 1;
+               return;
+       }
+       hide_cursor(vc);
+       del_timer_sync(&console_timer);
+       blank_timer_expired = 0;
+       save_screen(vc);
+       /* In case we need to reset origin, blanking hook returns 1 */
+       i = vc->vc_sw->con_blank(vc, vesa_off_interval ? 1 : (vesa_blank_mode + 1), 0);
+       console_blanked = fg_console + 1;
+       if (i)
+               set_origin(vc);
+       if (console_blank_hook && console_blank_hook(1))
+               return;
+       if (vesa_off_interval && vesa_blank_mode) {
+               blank_state = blank_vesa_wait;
+               mod_timer(&console_timer, jiffies + vesa_off_interval);
+       }
+       vt_event_post(VT_EVENT_BLANK, vc->vc_num, vc->vc_num);
+ }
+ EXPORT_SYMBOL(do_blank_screen);
+ /*
+  * Called by timer as well as from vt_console_driver
+  */
+ void do_unblank_screen(int leaving_gfx)
+ {
+       struct vc_data *vc;
+       /* This should now always be called from a "sane" (read: can schedule)
+        * context for the sake of the low level drivers, except in the special
+        * case of oops_in_progress
+        */
+       if (!oops_in_progress)
+               might_sleep();
+       WARN_CONSOLE_UNLOCKED();
+       ignore_poke = 0;
+       if (!console_blanked)
+               return;
+       if (!vc_cons_allocated(fg_console)) {
+               /* impossible */
+               printk("unblank_screen: tty %d not allocated ??\n", fg_console+1);
+               return;
+       }
+       vc = vc_cons[fg_console].d;
+       /* Try to unblank in oops case too */
+       if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+               return; /* but leave console_blanked != 0 */
+       if (blankinterval) {
+               mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+               blank_state = blank_normal_wait;
+       }
+       console_blanked = 0;
+       if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || vt_force_oops_output(vc))
+               /* Low-level driver cannot restore -> do it ourselves */
+               update_screen(vc);
+       if (console_blank_hook)
+               console_blank_hook(0);
+       set_palette(vc);
+       set_cursor(vc);
+       vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num);
+ }
+ EXPORT_SYMBOL(do_unblank_screen);
+ /*
+  * This is called by the outside world to cause a forced unblank, mostly for
+  * oopses. Currently, I just call do_unblank_screen(0), but we could eventually
+  * call it with 1 as an argument and so force a mode restore... that may kill
+  * X or at least garbage the screen but would also make the Oops visible...
+  */
+ void unblank_screen(void)
+ {
+       do_unblank_screen(0);
+ }
+ /*
+  * We defer the timer blanking to work queue so it can take the console mutex
+  * (console operations can still happen at irq time, but only from printk which
+  * has the console mutex. Not perfect yet, but better than no locking
+  */
+ static void blank_screen_t(unsigned long dummy)
+ {
+       if (unlikely(!keventd_up())) {
+               mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+               return;
+       }
+       blank_timer_expired = 1;
+       schedule_work(&console_work);
+ }
+ void poke_blanked_console(void)
+ {
+       WARN_CONSOLE_UNLOCKED();
+       /* Add this so we quickly catch whoever might call us in a non
+        * safe context. Nowadays, unblank_screen() isn't to be called in
+        * atomic contexts and is allowed to schedule (with the special case
+        * of oops_in_progress, but that isn't of any concern for this
+        * function. --BenH.
+        */
+       might_sleep();
+       /* This isn't perfectly race free, but a race here would be mostly harmless,
+        * at worse, we'll do a spurrious blank and it's unlikely
+        */
+       del_timer(&console_timer);
+       blank_timer_expired = 0;
+       if (ignore_poke || !vc_cons[fg_console].d || vc_cons[fg_console].d->vc_mode == KD_GRAPHICS)
+               return;
+       if (console_blanked)
+               unblank_screen();
+       else if (blankinterval) {
+               mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+               blank_state = blank_normal_wait;
+       }
+ }
+ /*
+  *    Palettes
+  */
+ static void set_palette(struct vc_data *vc)
+ {
+       WARN_CONSOLE_UNLOCKED();
+       if (vc->vc_mode != KD_GRAPHICS)
+               vc->vc_sw->con_set_palette(vc, color_table);
+ }
+ static int set_get_cmap(unsigned char __user *arg, int set)
+ {
+     int i, j, k;
+     WARN_CONSOLE_UNLOCKED();
+     for (i = 0; i < 16; i++)
+       if (set) {
+           get_user(default_red[i], arg++);
+           get_user(default_grn[i], arg++);
+           get_user(default_blu[i], arg++);
+       } else {
+           put_user(default_red[i], arg++);
+           put_user(default_grn[i], arg++);
+           put_user(default_blu[i], arg++);
+       }
+     if (set) {
+       for (i = 0; i < MAX_NR_CONSOLES; i++)
+           if (vc_cons_allocated(i)) {
+               for (j = k = 0; j < 16; j++) {
+                   vc_cons[i].d->vc_palette[k++] = default_red[j];
+                   vc_cons[i].d->vc_palette[k++] = default_grn[j];
+                   vc_cons[i].d->vc_palette[k++] = default_blu[j];
+               }
+               set_palette(vc_cons[i].d);
+           }
+     }
+     return 0;
+ }
+ /*
+  * Load palette into the DAC registers. arg points to a colour
+  * map, 3 bytes per colour, 16 colours, range from 0 to 255.
+  */
+ int con_set_cmap(unsigned char __user *arg)
+ {
+       int rc;
+       acquire_console_sem();
+       rc = set_get_cmap (arg,1);
+       release_console_sem();
+       return rc;
+ }
+ int con_get_cmap(unsigned char __user *arg)
+ {
+       int rc;
+       acquire_console_sem();
+       rc = set_get_cmap (arg,0);
+       release_console_sem();
+       return rc;
+ }
+ void reset_palette(struct vc_data *vc)
+ {
+       int j, k;
+       for (j=k=0; j<16; j++) {
+               vc->vc_palette[k++] = default_red[j];
+               vc->vc_palette[k++] = default_grn[j];
+               vc->vc_palette[k++] = default_blu[j];
+       }
+       set_palette(vc);
+ }
+ /*
+  *  Font switching
+  *
+  *  Currently we only support fonts up to 32 pixels wide, at a maximum height
+  *  of 32 pixels. Userspace fontdata is stored with 32 bytes (shorts/ints, 
+  *  depending on width) reserved for each character which is kinda wasty, but 
+  *  this is done in order to maintain compatibility with the EGA/VGA fonts. It 
+  *  is upto the actual low-level console-driver convert data into its favorite
+  *  format (maybe we should add a `fontoffset' field to the `display'
+  *  structure so we won't have to convert the fontdata all the time.
+  *  /Jes
+  */
+ #define max_font_size 65536
+ static int con_font_get(struct vc_data *vc, struct console_font_op *op)
+ {
+       struct console_font font;
+       int rc = -EINVAL;
+       int c;
+       if (vc->vc_mode != KD_TEXT)
+               return -EINVAL;
+       if (op->data) {
+               font.data = kmalloc(max_font_size, GFP_KERNEL);
+               if (!font.data)
+                       return -ENOMEM;
+       } else
+               font.data = NULL;
+       acquire_console_sem();
+       if (vc->vc_sw->con_font_get)
+               rc = vc->vc_sw->con_font_get(vc, &font);
+       else
+               rc = -ENOSYS;
+       release_console_sem();
+       if (rc)
+               goto out;
+       c = (font.width+7)/8 * 32 * font.charcount;
+       if (op->data && font.charcount > op->charcount)
+               rc = -ENOSPC;
+       if (!(op->flags & KD_FONT_FLAG_OLD)) {
+               if (font.width > op->width || font.height > op->height) 
+                       rc = -ENOSPC;
+       } else {
+               if (font.width != 8)
+                       rc = -EIO;
+               else if ((op->height && font.height > op->height) ||
+                        font.height > 32)
+                       rc = -ENOSPC;
+       }
+       if (rc)
+               goto out;
+       op->height = font.height;
+       op->width = font.width;
+       op->charcount = font.charcount;
+       if (op->data && copy_to_user(op->data, font.data, c))
+               rc = -EFAULT;
+ out:
+       kfree(font.data);
+       return rc;
+ }
+ static int con_font_set(struct vc_data *vc, struct console_font_op *op)
+ {
+       struct console_font font;
+       int rc = -EINVAL;
+       int size;
+       if (vc->vc_mode != KD_TEXT)
+               return -EINVAL;
+       if (!op->data)
+               return -EINVAL;
+       if (op->charcount > 512)
+               return -EINVAL;
+       if (!op->height) {              /* Need to guess font height [compat] */
+               int h, i;
+               u8 __user *charmap = op->data;
+               u8 tmp;
+               
+               /* If from KDFONTOP ioctl, don't allow things which can be done in userland,
+                  so that we can get rid of this soon */
+               if (!(op->flags & KD_FONT_FLAG_OLD))
+                       return -EINVAL;
+               for (h = 32; h > 0; h--)
+                       for (i = 0; i < op->charcount; i++) {
+                               if (get_user(tmp, &charmap[32*i+h-1]))
+                                       return -EFAULT;
+                               if (tmp)
+                                       goto nonzero;
+                       }
+               return -EINVAL;
+       nonzero:
+               op->height = h;
+       }
+       if (op->width <= 0 || op->width > 32 || op->height > 32)
+               return -EINVAL;
+       size = (op->width+7)/8 * 32 * op->charcount;
+       if (size > max_font_size)
+               return -ENOSPC;
+       font.charcount = op->charcount;
+       font.height = op->height;
+       font.width = op->width;
+       font.data = memdup_user(op->data, size);
+       if (IS_ERR(font.data))
+               return PTR_ERR(font.data);
+       acquire_console_sem();
+       if (vc->vc_sw->con_font_set)
+               rc = vc->vc_sw->con_font_set(vc, &font, op->flags);
+       else
+               rc = -ENOSYS;
+       release_console_sem();
+       kfree(font.data);
+       return rc;
+ }
+ static int con_font_default(struct vc_data *vc, struct console_font_op *op)
+ {
+       struct console_font font = {.width = op->width, .height = op->height};
+       char name[MAX_FONT_NAME];
+       char *s = name;
+       int rc;
+       if (vc->vc_mode != KD_TEXT)
+               return -EINVAL;
+       if (!op->data)
+               s = NULL;
+       else if (strncpy_from_user(name, op->data, MAX_FONT_NAME - 1) < 0)
+               return -EFAULT;
+       else
+               name[MAX_FONT_NAME - 1] = 0;
+       acquire_console_sem();
+       if (vc->vc_sw->con_font_default)
+               rc = vc->vc_sw->con_font_default(vc, &font, s);
+       else
+               rc = -ENOSYS;
+       release_console_sem();
+       if (!rc) {
+               op->width = font.width;
+               op->height = font.height;
+       }
+       return rc;
+ }
+ static int con_font_copy(struct vc_data *vc, struct console_font_op *op)
+ {
+       int con = op->height;
+       int rc;
+       if (vc->vc_mode != KD_TEXT)
+               return -EINVAL;
+       acquire_console_sem();
+       if (!vc->vc_sw->con_font_copy)
+               rc = -ENOSYS;
+       else if (con < 0 || !vc_cons_allocated(con))
+               rc = -ENOTTY;
+       else if (con == vc->vc_num)     /* nothing to do */
+               rc = 0;
+       else
+               rc = vc->vc_sw->con_font_copy(vc, con);
+       release_console_sem();
+       return rc;
+ }
+ int con_font_op(struct vc_data *vc, struct console_font_op *op)
+ {
+       switch (op->op) {
+       case KD_FONT_OP_SET:
+               return con_font_set(vc, op);
+       case KD_FONT_OP_GET:
+               return con_font_get(vc, op);
+       case KD_FONT_OP_SET_DEFAULT:
+               return con_font_default(vc, op);
+       case KD_FONT_OP_COPY:
+               return con_font_copy(vc, op);
+       }
+       return -ENOSYS;
+ }
+ /*
+  *    Interface exported to selection and vcs.
+  */
+ /* used by selection */
+ u16 screen_glyph(struct vc_data *vc, int offset)
+ {
+       u16 w = scr_readw(screenpos(vc, offset, 1));
+       u16 c = w & 0xff;
+       if (w & vc->vc_hi_font_mask)
+               c |= 0x100;
+       return c;
+ }
+ EXPORT_SYMBOL_GPL(screen_glyph);
+ /* used by vcs - note the word offset */
+ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
+ {
+       return screenpos(vc, 2 * w_offset, viewed);
+ }
+ void getconsxy(struct vc_data *vc, unsigned char *p)
+ {
+       p[0] = vc->vc_x;
+       p[1] = vc->vc_y;
+ }
+ void putconsxy(struct vc_data *vc, unsigned char *p)
+ {
+       hide_cursor(vc);
+       gotoxy(vc, p[0], p[1]);
+       set_cursor(vc);
+ }
+ u16 vcs_scr_readw(struct vc_data *vc, const u16 *org)
+ {
+       if ((unsigned long)org == vc->vc_pos && softcursor_original != -1)
+               return softcursor_original;
+       return scr_readw(org);
+ }
+ void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org)
+ {
+       scr_writew(val, org);
+       if ((unsigned long)org == vc->vc_pos) {
+               softcursor_original = -1;
+               add_softcursor(vc);
+       }
+ }
+ void vcs_scr_updated(struct vc_data *vc)
+ {
+       notify_update(vc);
+ }
++#ifdef CONFIG_BOOTSPLASH
++void con_remap_def_color(struct vc_data *vc, int new_color)
++{
++       unsigned short *sbuf = screenpos(vc, 0, 1);
++       unsigned c, len = vc->vc_screenbuf_size >> 1;
++       int old_color;
++
++       if (sbuf) {
++             old_color = vc->vc_def_color << 8;
++             new_color <<= 8;
++             while(len--) {
++                     c = scr_readw(sbuf);
++                     if (((c ^ old_color) & 0xf000) == 0)
++                           scr_writew(c ^ ((old_color ^ new_color) & 0xf000), sbuf);
++                     *sbuf ^= (old_color ^ new_color) & 0xf000;
++                     if (((c ^ old_color) & 0x0f00) == 0)
++                           scr_writew(c ^ ((old_color ^ new_color) & 0x0f00), sbuf);
++                     *sbuf ^= (old_color ^ new_color) & 0x0f00;
++                     sbuf++;
++             }
++             new_color >>= 8;
++       }
++       vc->vc_def_color = vc->vc_color = new_color;
++       update_attr(vc);
++}
++#endif
++
+ /*
+  *    Visible symbols for modules
+  */
+ EXPORT_SYMBOL(color_table);
+ EXPORT_SYMBOL(default_red);
+ EXPORT_SYMBOL(default_grn);
+ EXPORT_SYMBOL(default_blu);
+ EXPORT_SYMBOL(update_region);
+ EXPORT_SYMBOL(redraw_screen);
+ EXPORT_SYMBOL(vc_resize);
+ EXPORT_SYMBOL(fg_console);
+ EXPORT_SYMBOL(console_blank_hook);
+ EXPORT_SYMBOL(console_blanked);
+ EXPORT_SYMBOL(vc_cons);
+ EXPORT_SYMBOL(global_cursor_default);
+ #ifndef VT_SINGLE_DRIVER
+ EXPORT_SYMBOL(take_over_console);
+ EXPORT_SYMBOL(give_up_console);
+ #endif
diff --cc fs/ext4/ext4.h
Simple merge
diff --cc fs/ext4/inode.c
Simple merge
diff --cc fs/ext4/super.c
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc kernel/printk.c
Simple merge
diff --cc kernel/sysctl.c
Simple merge
diff --cc mm/filemap.c
Simple merge
diff --cc mm/vmscan.c
Simple merge
diff --cc mm/vmstat.c
Simple merge
Simple merge
diff --cc net/core/dev.c
Simple merge
Simple merge
diff --cc net/core/sock.c
Simple merge
diff --cc net/ipv4/tcp.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge