VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 37
--EXTRAVERSION = -rc1
++EXTRAVERSION = -rc1-git11
NAME = Flesh-Eating Bats with Fangs
# *DOCUMENTATION*
# char/ comes before serial/ etc so that the VT console is the boot-time
# default.
+ obj-y += tty/
obj-y += char/
-# gpu/ comes after char for AGP vs DRM startup
-obj-y += gpu/
-
obj-$(CONFIG_CONNECTOR) += connector/
-# i810fb and intelfb depend on char/agp/
-obj-$(CONFIG_FB_I810) += video/i810/
-obj-$(CONFIG_FB_INTEL) += video/intelfb/
-
obj-y += serial/
obj-$(CONFIG_PARPORT) += parport/
obj-y += base/ block/ misc/ mfd/
--- /dev/null
+/*
+ * Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
+ *
+ * Module Author: Heinz Mauelshagen <heinzm@redhat.com>
+ *
+ * This file is released under the GPL.
+ *
+ *
+ * Linux 2.6 Device Mapper RAID4 and RAID5 target.
+ *
+ * Tested-by: Intel; Marcin.Labun@intel.com, krzysztof.wojcik@intel.com
+ *
+ *
+ * Supports the following ATARAID vendor solutions (and SNIA DDF):
+ *
+ * Adaptec HostRAID ASR
+ * SNIA DDF1
+ * Hiphpoint 37x
+ * Hiphpoint 45x
+ * Intel IMSM
+ * Jmicron ATARAID
+ * LSI Logic MegaRAID
+ * NVidia RAID
+ * Promise FastTrack
+ * Silicon Image Medley
+ * VIA Software RAID
+ *
+ * via the dmraid application.
+ *
+ *
+ * Features:
+ *
+ * o RAID4 with dedicated and selectable parity device
+ * o RAID5 with rotating parity (left+right, symmetric+asymmetric)
+ * o recovery of out of sync device for initial
+ * RAID set creation or after dead drive replacement
+ * o run time optimization of xor algorithm used to calculate parity
+ *
+ *
+ * Thanks to MD for:
+ * o the raid address calculation algorithm
+ * o the base of the biovec <-> page list copier.
+ *
+ *
+ * Uses region hash to keep track of how many writes are in flight to
+ * regions in order to use dirty log to keep state of regions to recover:
+ *
+ * o clean regions (those which are synchronized
+ * and don't have write io in flight)
+ * o dirty regions (those with write io in flight)
+ *
+ *
+ * On startup, any dirty regions are migrated to the
+ * 'nosync' state and are subject to recovery by the daemon.
+ *
+ * See raid_ctr() for table definition.
+ *
+ * ANALYZEME: recovery bandwidth
+ */
+
+static const char *version = "v0.2597k";
+
+#include "dm.h"
+#include "dm-memcache.h"
+#include "dm-raid45.h"
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/raid/xor.h>
+#include <linux/slab.h>
+
+#include <linux/bio.h>
+#include <linux/dm-io.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/dm-region-hash.h>
+
+
+/*
+ * Configurable parameters
+ */
+
+/* Minimum/maximum and default # of selectable stripes. */
+#define STRIPES_MIN 8
+#define STRIPES_MAX 16384
+#define STRIPES_DEFAULT 80
+
+/* Maximum and default chunk size in sectors if not set in constructor. */
+#define CHUNK_SIZE_MIN 8
+#define CHUNK_SIZE_MAX 16384
+#define CHUNK_SIZE_DEFAULT 64
+
+/* Default io size in sectors if not set in constructor. */
+#define IO_SIZE_MIN CHUNK_SIZE_MIN
+#define IO_SIZE_DEFAULT IO_SIZE_MIN
+
+/* Recover io size default in sectors. */
+#define RECOVER_IO_SIZE_MIN 64
+#define RECOVER_IO_SIZE_DEFAULT 256
+
+/* Default, minimum and maximum percentage of recover io bandwidth. */
+#define BANDWIDTH_DEFAULT 10
+#define BANDWIDTH_MIN 1
+#define BANDWIDTH_MAX 100
+
+/* # of parallel recovered regions */
+#define RECOVERY_STRIPES_MIN 1
+#define RECOVERY_STRIPES_MAX 64
+#define RECOVERY_STRIPES_DEFAULT RECOVERY_STRIPES_MIN
+/*
+ * END Configurable parameters
+ */
+
+#define TARGET "dm-raid45"
+#define DAEMON "kraid45d"
+#define DM_MSG_PREFIX TARGET
+
+#define SECTORS_PER_PAGE (PAGE_SIZE >> SECTOR_SHIFT)
+
+/* Amount/size for __xor(). */
+#define XOR_SIZE PAGE_SIZE
+
+/* Ticks to run xor_speed() test for. */
+#define XOR_SPEED_TICKS 5
+
+/* Check value in range. */
+#define range_ok(i, min, max) (i >= min && i <= max)
+
+/* Structure access macros. */
+/* Derive raid_set from stripe_cache pointer. */
+#define RS(x) container_of(x, struct raid_set, sc)
+
+/* Page reference. */
+#define PAGE(stripe, p) ((stripe)->obj[p].pl->page)
+
+/* Stripe chunk reference. */
+#define CHUNK(stripe, p) ((stripe)->chunk + p)
+
+/* Bio list reference. */
+#define BL(stripe, p, rw) (stripe->chunk[p].bl + rw)
+#define BL_CHUNK(chunk, rw) (chunk->bl + rw)
+
+/* Page list reference. */
+#define PL(stripe, p) (stripe->obj[p].pl)
+/* END: structure access macros. */
+
+/* Factor out to dm-bio-list.h */
+static inline void bio_list_push(struct bio_list *bl, struct bio *bio)
+{
+ bio->bi_next = bl->head;
+ bl->head = bio;
+
+ if (!bl->tail)
+ bl->tail = bio;
+}
+
+/* Factor out to dm.h */
+#define TI_ERR_RET(str, ret) \
+ do { ti->error = str; return ret; } while (0);
+#define TI_ERR(str) TI_ERR_RET(str, -EINVAL)
+
+/* Macro to define access IO flags access inline functions. */
+#define BITOPS(name, what, var, flag) \
+static inline int TestClear ## name ## what(struct var *v) \
+{ return test_and_clear_bit(flag, &v->io.flags); } \
+static inline int TestSet ## name ## what(struct var *v) \
+{ return test_and_set_bit(flag, &v->io.flags); } \
+static inline void Clear ## name ## what(struct var *v) \
+{ clear_bit(flag, &v->io.flags); } \
+static inline void Set ## name ## what(struct var *v) \
+{ set_bit(flag, &v->io.flags); } \
+static inline int name ## what(struct var *v) \
+{ return test_bit(flag, &v->io.flags); }
+
+/*-----------------------------------------------------------------
+ * Stripe cache
+ *
+ * Cache for all reads and writes to raid sets (operational or degraded)
+ *
+ * We need to run all data to and from a RAID set through this cache,
+ * because parity chunks need to get calculated from data chunks
+ * or, in the degraded/resynchronization case, missing chunks need
+ * to be reconstructed using the other chunks of the stripe.
+ *---------------------------------------------------------------*/
+/* Unique kmem cache name suffix # counter. */
+static atomic_t _stripe_sc_nr = ATOMIC_INIT(-1); /* kmem cache # counter. */
+
+/* A chunk within a stripe (holds bios hanging off). */
+/* IO status flags for chunks of a stripe. */
+enum chunk_flags {
+ CHUNK_DIRTY, /* Pages of chunk dirty; need writing. */
+ CHUNK_ERROR, /* IO error on any chunk page. */
+ CHUNK_IO, /* Allow/prohibit IO on chunk pages. */
+ CHUNK_LOCKED, /* Chunk pages locked during IO. */
+ CHUNK_MUST_IO, /* Chunk must io. */
+ CHUNK_UNLOCK, /* Enforce chunk unlock. */
+ CHUNK_UPTODATE, /* Chunk pages are uptodate. */
+};
+
- #if READ != 0 || WRITE != 1
- #error dm-raid45: READ/WRITE != 0/1 used as index!!!
- #endif
-
+enum bl_type {
+ WRITE_QUEUED = WRITE + 1,
+ WRITE_MERGED,
+ NR_BL_TYPES, /* Must be last one! */
+};
+struct stripe_chunk {
+ atomic_t cnt; /* Reference count. */
+ struct stripe *stripe; /* Backpointer to stripe for endio(). */
+ /* Bio lists for reads, writes, and writes merged. */
+ struct bio_list bl[NR_BL_TYPES];
+ struct {
+ unsigned long flags; /* IO status flags. */
+ } io;
+};
+
+/* Define chunk bit operations. */
+BITOPS(Chunk, Dirty, stripe_chunk, CHUNK_DIRTY)
+BITOPS(Chunk, Error, stripe_chunk, CHUNK_ERROR)
+BITOPS(Chunk, Io, stripe_chunk, CHUNK_IO)
+BITOPS(Chunk, Locked, stripe_chunk, CHUNK_LOCKED)
+BITOPS(Chunk, MustIo, stripe_chunk, CHUNK_MUST_IO)
+BITOPS(Chunk, Unlock, stripe_chunk, CHUNK_UNLOCK)
+BITOPS(Chunk, Uptodate, stripe_chunk, CHUNK_UPTODATE)
+
+/*
+ * Stripe linked list indexes. Keep order, because the stripe
+ * and the stripe cache rely on the first 3!
+ */
+enum list_types {
+ LIST_FLUSH, /* Stripes to flush for io. */
+ LIST_ENDIO, /* Stripes to endio. */
+ LIST_LRU, /* Least recently used stripes. */
+ SC_NR_LISTS, /* # of lists in stripe cache. */
+ LIST_HASH = SC_NR_LISTS, /* Hashed stripes. */
+ LIST_RECOVER = LIST_HASH, /* For recovery type stripes only. */
+ STRIPE_NR_LISTS,/* To size array in struct stripe. */
+};
+
+/* Adressing region recovery. */
+struct recover_addr {
+ struct dm_region *reg; /* Actual region to recover. */
+ sector_t pos; /* Position within region to recover. */
+ sector_t end; /* End of region to recover. */
+};
+
+/* A stripe: the io object to handle all reads and writes to a RAID set. */
+struct stripe {
+ atomic_t cnt; /* Reference count. */
+ struct stripe_cache *sc; /* Backpointer to stripe cache. */
+
+ /*
+ * 4 linked lists:
+ * o io list to flush io
+ * o endio list
+ * o LRU list to put stripes w/o reference count on
+ * o stripe cache hash
+ */
+ struct list_head lists[STRIPE_NR_LISTS];
+
+ sector_t key; /* Hash key. */
+ region_t region; /* Region stripe is mapped to. */
+
+ struct {
+ unsigned long flags; /* Stripe state flags (see below). */
+
+ /*
+ * Pending ios in flight:
+ *
+ * used to control move of stripe to endio list
+ */
+ atomic_t pending;
+
+ /* Sectors to read and write for multi page stripe sets. */
+ unsigned size;
+ } io;
+
+ /* Address region recovery. */
+ struct recover_addr *recover;
+
+ /* Lock on stripe (Future: for clustering). */
+ void *lock;
+
+ struct {
+ unsigned short parity; /* Parity chunk index. */
+ short recover; /* Recovery chunk index. */
+ } idx;
+
+ /*
+ * This stripe's memory cache object (dm-mem-cache);
+ * i.e. the io chunk pages.
+ */
+ struct dm_mem_cache_object *obj;
+
+ /* Array of stripe sets (dynamically allocated). */
+ struct stripe_chunk chunk[0];
+};
+
+/* States stripes can be in (flags field). */
+enum stripe_states {
+ STRIPE_ERROR, /* io error on stripe. */
+ STRIPE_MERGED, /* Writes got merged to be written. */
+ STRIPE_RBW, /* Read-before-write stripe. */
+ STRIPE_RECONSTRUCT, /* Reconstruct of a missing chunk required. */
+ STRIPE_RECONSTRUCTED, /* Reconstructed of a missing chunk. */
+ STRIPE_RECOVER, /* Stripe used for RAID set recovery. */
+};
+
+/* Define stripe bit operations. */
+BITOPS(Stripe, Error, stripe, STRIPE_ERROR)
+BITOPS(Stripe, Merged, stripe, STRIPE_MERGED)
+BITOPS(Stripe, RBW, stripe, STRIPE_RBW)
+BITOPS(Stripe, Reconstruct, stripe, STRIPE_RECONSTRUCT)
+BITOPS(Stripe, Reconstructed, stripe, STRIPE_RECONSTRUCTED)
+BITOPS(Stripe, Recover, stripe, STRIPE_RECOVER)
+
+/* A stripe hash. */
+struct stripe_hash {
+ struct list_head *hash;
+ unsigned buckets;
+ unsigned mask;
+ unsigned prime;
+ unsigned shift;
+};
+
+enum sc_lock_types {
+ LOCK_ENDIO, /* Protect endio list. */
+ NR_LOCKS, /* To size array in struct stripe_cache. */
+};
+
+/* A stripe cache. */
+struct stripe_cache {
+ /* Stripe hash. */
+ struct stripe_hash hash;
+
+ spinlock_t locks[NR_LOCKS]; /* Locks to protect lists. */
+
+ /* Stripes with io to flush, stripes to endio and LRU lists. */
+ struct list_head lists[SC_NR_LISTS];
+
+ /* Slab cache to allocate stripes from. */
+ struct {
+ struct kmem_cache *cache; /* Cache itself. */
+ char name[32]; /* Unique name. */
+ } kc;
+
+ struct dm_io_client *dm_io_client; /* dm-io client resource context. */
+
+ /* dm-mem-cache client resource context. */
+ struct dm_mem_cache_client *mem_cache_client;
+
+ int stripes_parm; /* # stripes parameter from constructor. */
+ atomic_t stripes; /* actual # of stripes in cache. */
+ atomic_t stripes_to_set; /* # of stripes to resize cache to. */
+ atomic_t stripes_last; /* last # of stripes in cache. */
+ atomic_t active_stripes; /* actual # of active stripes in cache. */
+
+ /* REMOVEME: */
+ atomic_t active_stripes_max; /* actual # of active stripes in cache. */
+};
+
+/* Flag specs for raid_dev */ ;
+enum raid_dev_flags {
+ DEV_FAILED, /* Device failed. */
+ DEV_IO_QUEUED, /* Io got queued to device. */
+};
+
+/* The raid device in a set. */
+struct raid_dev {
+ struct dm_dev *dev;
+ sector_t start; /* Offset to map to. */
+ struct { /* Using struct to be able to BITOPS(). */
+ unsigned long flags; /* raid_dev_flags. */
+ } io;
+};
+
+BITOPS(Dev, Failed, raid_dev, DEV_FAILED)
+BITOPS(Dev, IoQueued, raid_dev, DEV_IO_QUEUED)
+
+/* Flags spec for raid_set. */
+enum raid_set_flags {
+ RS_CHECK_OVERWRITE, /* Check for chunk overwrites. */
+ RS_DEAD, /* RAID set inoperational. */
+ RS_DEAD_ENDIO_MESSAGE, /* RAID set dead endio one-off message. */
+ RS_DEGRADED, /* Io errors on RAID device. */
+ RS_DEVEL_STATS, /* REMOVEME: display status information. */
+ RS_ENFORCE_PARITY_CREATION,/* Enforce parity creation. */
+ RS_PROHIBIT_WRITES, /* Prohibit writes on device failure. */
+ RS_RECOVER, /* Do recovery. */
+ RS_RECOVERY_BANDWIDTH, /* Allow recovery bandwidth (delayed bios). */
+ RS_SC_BUSY, /* Stripe cache busy -> send an event. */
+ RS_SUSPEND, /* Suspend RAID set. */
+};
+
+/* REMOVEME: devel stats counters. */
+enum stats_types {
+ S_BIOS_READ,
+ S_BIOS_ADDED_READ,
+ S_BIOS_ENDIO_READ,
+ S_BIOS_WRITE,
+ S_BIOS_ADDED_WRITE,
+ S_BIOS_ENDIO_WRITE,
+ S_CAN_MERGE,
+ S_CANT_MERGE,
+ S_CONGESTED,
+ S_DM_IO_READ,
+ S_DM_IO_WRITE,
+ S_BANDWIDTH,
+ S_BARRIER,
+ S_BIO_COPY_PL_NEXT,
+ S_DEGRADED,
+ S_DELAYED_BIOS,
+ S_FLUSHS,
+ S_HITS_1ST,
+ S_IOS_POST,
+ S_INSCACHE,
+ S_MAX_LOOKUP,
+ S_CHUNK_LOCKED,
+ S_NO_BANDWIDTH,
+ S_NOT_CONGESTED,
+ S_NO_RW,
+ S_NOSYNC,
+ S_OVERWRITE,
+ S_PROHIBITCHUNKIO,
+ S_RECONSTRUCT_EI,
+ S_RECONSTRUCT_DEV,
+ S_RECONSTRUCT_SET,
+ S_RECONSTRUCTED,
+ S_REQUEUE,
+ S_STRIPE_ERROR,
+ S_SUM_DELAYED_BIOS,
+ S_XORS,
+ S_NR_STATS, /* # of stats counters. Must be last! */
+};
+
+/* Status type -> string mappings. */
+struct stats_map {
+ const enum stats_types type;
+ const char *str;
+};
+
+static struct stats_map stats_map[] = {
+ { S_BIOS_READ, "r=" },
+ { S_BIOS_ADDED_READ, "/" },
+ { S_BIOS_ENDIO_READ, "/" },
+ { S_BIOS_WRITE, " w=" },
+ { S_BIOS_ADDED_WRITE, "/" },
+ { S_BIOS_ENDIO_WRITE, "/" },
+ { S_DM_IO_READ, " rc=" },
+ { S_DM_IO_WRITE, " wc=" },
+ { S_BANDWIDTH, "\nbw=" },
+ { S_NO_BANDWIDTH, " no_bw=" },
+ { S_BARRIER, "\nbarrier=" },
+ { S_BIO_COPY_PL_NEXT, "\nbio_cp_next=" },
+ { S_CAN_MERGE, "\nmerge=" },
+ { S_CANT_MERGE, "/no_merge=" },
+ { S_CHUNK_LOCKED, "\nchunk_locked=" },
+ { S_CONGESTED, "\ncgst=" },
+ { S_NOT_CONGESTED, "/not_cgst=" },
+ { S_DEGRADED, "\ndegraded=" },
+ { S_DELAYED_BIOS, "\ndel_bios=" },
+ { S_SUM_DELAYED_BIOS, "/sum_del_bios=" },
+ { S_FLUSHS, "\nflushs=" },
+ { S_HITS_1ST, "\nhits_1st=" },
+ { S_IOS_POST, " ios_post=" },
+ { S_INSCACHE, " inscache=" },
+ { S_MAX_LOOKUP, " maxlookup=" },
+ { S_NO_RW, "\nno_rw=" },
+ { S_NOSYNC, " nosync=" },
+ { S_OVERWRITE, " ovr=" },
+ { S_PROHIBITCHUNKIO, " prhbt_io=" },
+ { S_RECONSTRUCT_EI, "\nrec_ei=" },
+ { S_RECONSTRUCT_DEV, " rec_dev=" },
+ { S_RECONSTRUCT_SET, " rec_set=" },
+ { S_RECONSTRUCTED, " rec=" },
+ { S_REQUEUE, " requeue=" },
+ { S_STRIPE_ERROR, " stripe_err=" },
+ { S_XORS, " xors=" },
+};
+
+/*
+ * A RAID set.
+ */
+#define dm_rh_client dm_region_hash
+enum count_type { IO_WORK = 0, IO_RECOVER, IO_NR_COUNT };
+typedef void (*xor_function_t)(unsigned count, unsigned long **data);
+struct raid_set {
+ struct dm_target *ti; /* Target pointer. */
+
+ struct {
+ unsigned long flags; /* State flags. */
+ struct mutex in_lock; /* Protects central input list below. */
+ struct mutex xor_lock; /* Protects xor algorithm set. */
+ struct bio_list in; /* Pending ios (central input list). */
+ struct bio_list work; /* ios work set. */
+ wait_queue_head_t suspendq; /* suspend synchronization. */
+ atomic_t in_process; /* counter of queued bios (suspendq). */
+ atomic_t in_process_max;/* counter of queued bios max. */
+
+ /* io work. */
+ struct workqueue_struct *wq;
+ struct delayed_work dws_do_raid; /* For main worker. */
+ struct work_struct ws_do_table_event; /* For event worker. */
+ } io;
+
+ /* Stripe locking abstraction. */
+ struct dm_raid45_locking_type *locking;
+
+ struct stripe_cache sc; /* Stripe cache for this set. */
+
+ /* Xor optimization. */
+ struct {
+ struct xor_func *f;
+ unsigned chunks;
+ unsigned speed;
+ } xor;
+
+ /* Recovery parameters. */
+ struct recover {
+ struct dm_dirty_log *dl; /* Dirty log. */
+ struct dm_rh_client *rh; /* Region hash. */
+
+ struct dm_io_client *dm_io_client; /* recovery dm-io client. */
+ /* dm-mem-cache client resource context for recovery stripes. */
+ struct dm_mem_cache_client *mem_cache_client;
+
+ struct list_head stripes; /* List of recovery stripes. */
+
+ region_t nr_regions;
+ region_t nr_regions_to_recover;
+ region_t nr_regions_recovered;
+ unsigned long start_jiffies;
+ unsigned long end_jiffies;
+
+ unsigned bandwidth; /* Recovery bandwidth [%]. */
+ unsigned bandwidth_work; /* Recovery bandwidth [factor]. */
+ unsigned bandwidth_parm; /* " constructor parm. */
+ unsigned io_size; /* recovery io size <= region size. */
+ unsigned io_size_parm; /* recovery io size ctr parameter. */
+ unsigned recovery; /* Recovery allowed/prohibited. */
+ unsigned recovery_stripes; /* # of parallel recovery stripes. */
+
+ /* recovery io throttling. */
+ atomic_t io_count[IO_NR_COUNT]; /* counter recover/regular io.*/
+ unsigned long last_jiffies;
+ } recover;
+
+ /* RAID set parameters. */
+ struct {
+ struct raid_type *raid_type; /* RAID type (eg, RAID4). */
+ unsigned raid_parms; /* # variable raid parameters. */
+
+ unsigned chunk_size; /* Sectors per chunk. */
+ unsigned chunk_size_parm;
+ unsigned chunk_shift; /* rsector chunk size shift. */
+
+ unsigned io_size; /* Sectors per io. */
+ unsigned io_size_parm;
+ unsigned io_mask; /* Mask for bio_copy_page_list(). */
+ unsigned io_inv_mask; /* Mask for raid_address(). */
+
+ sector_t sectors_per_dev; /* Sectors per device. */
+
+ atomic_t failed_devs; /* Amount of devices failed. */
+
+ /* Index of device to initialize. */
+ int dev_to_init;
+ int dev_to_init_parm;
+
+ /* Raid devices dynamically allocated. */
+ unsigned raid_devs; /* # of RAID devices below. */
+ unsigned data_devs; /* # of RAID data devices. */
+
+ int ei; /* index of failed RAID device. */
+
+ /* Index of dedicated parity device (i.e. RAID4). */
+ int pi;
+ int pi_parm; /* constructor parm for status output. */
+ } set;
+
+ /* REMOVEME: devel stats counters. */
+ atomic_t stats[S_NR_STATS];
+
+ /* Dynamically allocated temporary pointers for xor(). */
+ unsigned long **data;
+
+ /* Dynamically allocated RAID devices. Alignment? */
+ struct raid_dev dev[0];
+};
+
+/* Define RAID set bit operations. */
+BITOPS(RS, Bandwidth, raid_set, RS_RECOVERY_BANDWIDTH)
+BITOPS(RS, CheckOverwrite, raid_set, RS_CHECK_OVERWRITE)
+BITOPS(RS, Dead, raid_set, RS_DEAD)
+BITOPS(RS, DeadEndioMessage, raid_set, RS_DEAD_ENDIO_MESSAGE)
+BITOPS(RS, Degraded, raid_set, RS_DEGRADED)
+BITOPS(RS, DevelStats, raid_set, RS_DEVEL_STATS)
+BITOPS(RS, EnforceParityCreation, raid_set, RS_ENFORCE_PARITY_CREATION)
+BITOPS(RS, ProhibitWrites, raid_set, RS_PROHIBIT_WRITES)
+BITOPS(RS, Recover, raid_set, RS_RECOVER)
+BITOPS(RS, ScBusy, raid_set, RS_SC_BUSY)
+BITOPS(RS, Suspend, raid_set, RS_SUSPEND)
+#undef BITOPS
+
+/*-----------------------------------------------------------------
+ * Raid-4/5 set structures.
+ *---------------------------------------------------------------*/
+/* RAID level definitions. */
+enum raid_level {
+ raid4,
+ raid5,
+};
+
+/* Symmetric/Asymmetric, Left/Right parity rotating algorithms. */
+enum raid_algorithm {
+ none,
+ left_asym,
+ right_asym,
+ left_sym,
+ right_sym,
+};
+
+struct raid_type {
+ const char *name; /* RAID algorithm. */
+ const char *descr; /* Descriptor text for logging. */
+ const unsigned parity_devs; /* # of parity devices. */
+ const unsigned minimal_devs; /* minimal # of devices in set. */
+ const enum raid_level level; /* RAID level. */
+ const enum raid_algorithm algorithm; /* RAID algorithm. */
+};
+
+/* Supported raid types and properties. */
+static struct raid_type raid_types[] = {
+ {"raid4", "RAID4 (dedicated parity disk)", 1, 3, raid4, none},
+ {"raid5_la", "RAID5 (left asymmetric)", 1, 3, raid5, left_asym},
+ {"raid5_ra", "RAID5 (right asymmetric)", 1, 3, raid5, right_asym},
+ {"raid5_ls", "RAID5 (left symmetric)", 1, 3, raid5, left_sym},
+ {"raid5_rs", "RAID5 (right symmetric)", 1, 3, raid5, right_sym},
+};
+
+/* Address as calculated by raid_address(). */
+struct raid_address {
+ sector_t key; /* Hash key (address of stripe % chunk_size). */
+ unsigned di, pi; /* Data and parity disks index. */
+};
+
+/* REMOVEME: reset statistics counters. */
+static void stats_reset(struct raid_set *rs)
+{
+ unsigned s = S_NR_STATS;
+
+ while (s--)
+ atomic_set(rs->stats + s, 0);
+}
+
+/*----------------------------------------------------------------
+ * RAID set management routines.
+ *--------------------------------------------------------------*/
+/*
+ * Begin small helper functions.
+ */
+/* No need to be called from region hash indirectly at dm_rh_dec(). */
+static void wake_dummy(void *context) {}
+
+/* Return # of io reference. */
+static int io_ref(struct raid_set *rs)
+{
+ return atomic_read(&rs->io.in_process);
+}
+
+/* Get an io reference. */
+static void io_get(struct raid_set *rs)
+{
+ int p = atomic_inc_return(&rs->io.in_process);
+
+ if (p > atomic_read(&rs->io.in_process_max))
+ atomic_set(&rs->io.in_process_max, p); /* REMOVEME: max. */
+}
+
+/* Put the io reference and conditionally wake io waiters. */
+static void io_put(struct raid_set *rs)
+{
+ /* Intel: rebuild data corrupter? */
+ if (atomic_dec_and_test(&rs->io.in_process))
+ wake_up(&rs->io.suspendq);
+ else
+ BUG_ON(io_ref(rs) < 0);
+}
+
+/* Wait until all io has been processed. */
+static void wait_ios(struct raid_set *rs)
+{
+ wait_event(rs->io.suspendq, !io_ref(rs));
+}
+
+/* Queue (optionally delayed) io work. */
+static void wake_do_raid_delayed(struct raid_set *rs, unsigned long delay)
+{
+ queue_delayed_work(rs->io.wq, &rs->io.dws_do_raid, delay);
+}
+
+/* Queue io work immediately (called from region hash too). */
+static void wake_do_raid(void *context)
+{
+ struct raid_set *rs = context;
+
+ queue_work(rs->io.wq, &rs->io.dws_do_raid.work);
+}
+
+/* Calculate device sector offset. */
+static sector_t _sector(struct raid_set *rs, struct bio *bio)
+{
+ sector_t sector = bio->bi_sector;
+
+ sector_div(sector, rs->set.data_devs);
+ return sector;
+}
+
+/* Return # of active stripes in stripe cache. */
+static int sc_active(struct stripe_cache *sc)
+{
+ return atomic_read(&sc->active_stripes);
+}
+
+/* Stripe cache busy indicator. */
+static int sc_busy(struct raid_set *rs)
+{
+ return sc_active(&rs->sc) >
+ atomic_read(&rs->sc.stripes) - (STRIPES_MIN / 2);
+}
+
+/* Set chunks states. */
+enum chunk_dirty_type { CLEAN, DIRTY, ERROR };
+static void chunk_set(struct stripe_chunk *chunk, enum chunk_dirty_type type)
+{
+ switch (type) {
+ case CLEAN:
+ ClearChunkDirty(chunk);
+ break;
+ case DIRTY:
+ SetChunkDirty(chunk);
+ break;
+ case ERROR:
+ SetChunkError(chunk);
+ SetStripeError(chunk->stripe);
+ return;
+ default:
+ BUG();
+ }
+
+ SetChunkUptodate(chunk);
+ SetChunkIo(chunk);
+ ClearChunkError(chunk);
+}
+
+/* Return region state for a sector. */
+static int region_state(struct raid_set *rs, sector_t sector,
+ enum dm_rh_region_states state)
+{
+ struct dm_rh_client *rh = rs->recover.rh;
+ region_t region = dm_rh_sector_to_region(rh, sector);
+
+ return !!(dm_rh_get_state(rh, region, 1) & state);
+}
+
+/*
+ * Return true in case a chunk should be read/written
+ *
+ * Conditions to read/write:
+ * o chunk not uptodate
+ * o chunk dirty
+ *
+ * Conditios to avoid io:
+ * o io already ongoing on chunk
+ * o io explitely prohibited
+ */
+static int chunk_io(struct stripe_chunk *chunk)
+{
+ /* 2nd run optimization (flag set below on first run). */
+ if (TestClearChunkMustIo(chunk))
+ return 1;
+
+ /* Avoid io if prohibited or a locked chunk. */
+ if (!ChunkIo(chunk) || ChunkLocked(chunk))
+ return 0;
+
+ if (!ChunkUptodate(chunk) || ChunkDirty(chunk)) {
+ SetChunkMustIo(chunk); /* 2nd run optimization. */
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Call a function on each chunk needing io unless device failed. */
+static unsigned for_each_io_dev(struct stripe *stripe,
+ void (*f_io)(struct stripe *stripe, unsigned p))
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned p, r = 0;
+
+ for (p = 0; p < rs->set.raid_devs; p++) {
+ if (chunk_io(CHUNK(stripe, p)) && !DevFailed(rs->dev + p)) {
+ f_io(stripe, p);
+ r++;
+ }
+ }
+
+ return r;
+}
+
+/*
+ * Index of device to calculate parity on.
+ *
+ * Either the parity device index *or* the selected
+ * device to init after a spare replacement.
+ */
+static int dev_for_parity(struct stripe *stripe, int *sync)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int r = region_state(rs, stripe->key, DM_RH_NOSYNC | DM_RH_RECOVERING);
+
+ *sync = !r;
+
+ /* Reconstruct a particular device ?. */
+ if (r && rs->set.dev_to_init > -1)
+ return rs->set.dev_to_init;
+ else if (rs->set.raid_type->level == raid4)
+ return rs->set.pi;
+ else if (!StripeRecover(stripe))
+ return stripe->idx.parity;
+ else
+ return -1;
+}
+
+/* RAID set congested function. */
+static int rs_congested(void *congested_data, int bdi_bits)
+{
+ int r;
+ unsigned p;
+ struct raid_set *rs = congested_data;
+
+ if (sc_busy(rs) || RSSuspend(rs) || RSProhibitWrites(rs))
+ r = 1;
+ else for (r = 0, p = rs->set.raid_devs; !r && p--; ) {
+ /* If any of our component devices are overloaded. */
+ struct request_queue *q = bdev_get_queue(rs->dev[p].dev->bdev);
+
+ r |= bdi_congested(&q->backing_dev_info, bdi_bits);
+ }
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (r ? S_CONGESTED : S_NOT_CONGESTED));
+ return r;
+}
+
+/* RAID device degrade check. */
+static void rs_check_degrade_dev(struct raid_set *rs,
+ struct stripe *stripe, unsigned p)
+{
+ if (TestSetDevFailed(rs->dev + p))
+ return;
+
+ /* Through an event in case of member device errors. */
+ if ((atomic_inc_return(&rs->set.failed_devs) >
+ rs->set.raid_type->parity_devs) &&
+ !TestSetRSDead(rs)) {
+ /* Display RAID set dead message once. */
+ unsigned p;
+ char buf[BDEVNAME_SIZE];
+
+ DMERR("FATAL: too many devices failed -> RAID set broken");
+ for (p = 0; p < rs->set.raid_devs; p++) {
+ if (DevFailed(rs->dev + p))
+ DMERR("device /dev/%s failed",
+ bdevname(rs->dev[p].dev->bdev, buf));
+ }
+ }
+
+ /* Only log the first member error. */
+ if (!TestSetRSDegraded(rs)) {
+ char buf[BDEVNAME_SIZE];
+
+ /* Store index for recovery. */
+ rs->set.ei = p;
+ DMERR("CRITICAL: %sio error on device /dev/%s "
+ "in region=%llu; DEGRADING RAID set\n",
+ stripe ? "" : "FAKED ",
+ bdevname(rs->dev[p].dev->bdev, buf),
+ (unsigned long long) (stripe ? stripe->key : 0));
+ DMERR("further device error messages suppressed");
+ }
+
+ /* Prohibit further writes to allow for userpace to update metadata. */
+ SetRSProhibitWrites(rs);
+ schedule_work(&rs->io.ws_do_table_event);
+}
+
+/* RAID set degrade check. */
+static void rs_check_degrade(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned p = rs->set.raid_devs;
+
+ while (p--) {
+ if (ChunkError(CHUNK(stripe, p)))
+ rs_check_degrade_dev(rs, stripe, p);
+ }
+}
+
+/* Lookup a RAID device by name or by major:minor number. */
+static int raid_dev_lookup(struct raid_set *rs, struct raid_dev *dev_lookup)
+{
+ unsigned p;
+ struct raid_dev *dev;
+
+ /*
+ * Must be an incremental loop, because the device array
+ * can have empty slots still on calls from raid_ctr()
+ */
+ for (dev = rs->dev, p = 0;
+ dev->dev && p < rs->set.raid_devs;
+ dev++, p++) {
+ if (dev_lookup->dev->bdev->bd_dev == dev->dev->bdev->bd_dev)
+ return p;
+ }
+
+ return -ENODEV;
+}
+/*
+ * End small helper functions.
+ */
+
+/*
+ * Stripe hash functions
+ */
+/* Initialize/destroy stripe hash. */
+static int hash_init(struct stripe_hash *hash, unsigned stripes)
+{
+ unsigned buckets = roundup_pow_of_two(stripes >> 1);
+ static unsigned hash_primes[] = {
+ /* Table of primes for hash_fn/table size optimization. */
+ 1, 2, 3, 7, 13, 27, 53, 97, 193, 389, 769,
+ 1543, 3079, 6151, 12289, 24593, 49157, 98317,
+ };
+
+ /* Allocate stripe hash buckets. */
+ hash->hash = vmalloc(buckets * sizeof(*hash->hash));
+ if (!hash->hash)
+ return -ENOMEM;
+
+ hash->buckets = buckets;
+ hash->mask = buckets - 1;
+ hash->shift = ffs(buckets);
+ if (hash->shift > ARRAY_SIZE(hash_primes))
+ hash->shift = ARRAY_SIZE(hash_primes) - 1;
+
+ BUG_ON(hash->shift < 2);
+ hash->prime = hash_primes[hash->shift];
+
+ /* Initialize buckets. */
+ while (buckets--)
+ INIT_LIST_HEAD(hash->hash + buckets);
+ return 0;
+}
+
+static void hash_exit(struct stripe_hash *hash)
+{
+ if (hash->hash) {
+ vfree(hash->hash);
+ hash->hash = NULL;
+ }
+}
+
+static unsigned hash_fn(struct stripe_hash *hash, sector_t key)
+{
+ return (unsigned) (((key * hash->prime) >> hash->shift) & hash->mask);
+}
+
+static struct list_head *hash_bucket(struct stripe_hash *hash, sector_t key)
+{
+ return hash->hash + hash_fn(hash, key);
+}
+
+/* Insert an entry into a hash. */
+static void stripe_insert(struct stripe_hash *hash, struct stripe *stripe)
+{
+ list_add(stripe->lists + LIST_HASH, hash_bucket(hash, stripe->key));
+}
+
+/* Lookup an entry in the stripe hash. */
+static struct stripe *stripe_lookup(struct stripe_cache *sc, sector_t key)
+{
+ unsigned look = 0;
+ struct stripe *stripe;
+ struct list_head *bucket = hash_bucket(&sc->hash, key);
+
+ list_for_each_entry(stripe, bucket, lists[LIST_HASH]) {
+ look++;
+
+ if (stripe->key == key) {
+ /* REMOVEME: statisics. */
+ if (look > atomic_read(RS(sc)->stats + S_MAX_LOOKUP))
+ atomic_set(RS(sc)->stats + S_MAX_LOOKUP, look);
+ return stripe;
+ }
+ }
+
+ return NULL;
+}
+
+/* Resize the stripe cache hash on size changes. */
+static int sc_hash_resize(struct stripe_cache *sc)
+{
+ /* Resize indicated ? */
+ if (atomic_read(&sc->stripes) != atomic_read(&sc->stripes_last)) {
+ int r;
+ struct stripe_hash hash;
+
+ r = hash_init(&hash, atomic_read(&sc->stripes));
+ if (r)
+ return r;
+
+ if (sc->hash.hash) {
+ unsigned b = sc->hash.buckets;
+ struct list_head *pos, *tmp;
+
+ /* Walk old buckets and insert into new. */
+ while (b--) {
+ list_for_each_safe(pos, tmp, sc->hash.hash + b)
+ stripe_insert(&hash,
+ list_entry(pos, struct stripe,
+ lists[LIST_HASH]));
+ }
+
+ }
+
+ hash_exit(&sc->hash);
+ memcpy(&sc->hash, &hash, sizeof(sc->hash));
+ atomic_set(&sc->stripes_last, atomic_read(&sc->stripes));
+ }
+
+ return 0;
+}
+/* End hash stripe hash function. */
+
+/* List add, delete, push and pop functions. */
+/* Add stripe to flush list. */
+#define DEL_LIST(lh) \
+ if (!list_empty(lh)) \
+ list_del_init(lh);
+
+/* Delete stripe from hash. */
+static void stripe_hash_del(struct stripe *stripe)
+{
+ DEL_LIST(stripe->lists + LIST_HASH);
+}
+
+/* Return stripe reference count. */
+static inline int stripe_ref(struct stripe *stripe)
+{
+ return atomic_read(&stripe->cnt);
+}
+
+static void stripe_flush_add(struct stripe *stripe)
+{
+ struct stripe_cache *sc = stripe->sc;
+ struct list_head *lh = stripe->lists + LIST_FLUSH;
+
+ if (!StripeReconstruct(stripe) && list_empty(lh))
+ list_add_tail(lh, sc->lists + LIST_FLUSH);
+}
+
+/*
+ * Add stripe to LRU (inactive) list.
+ *
+ * Need lock, because of concurrent access from message interface.
+ */
+static void stripe_lru_add(struct stripe *stripe)
+{
+ if (!StripeRecover(stripe)) {
+ struct list_head *lh = stripe->lists + LIST_LRU;
+
+ if (list_empty(lh))
+ list_add_tail(lh, stripe->sc->lists + LIST_LRU);
+ }
+}
+
+#define POP_LIST(list) \
+ do { \
+ if (list_empty(sc->lists + (list))) \
+ stripe = NULL; \
+ else { \
+ stripe = list_first_entry(sc->lists + (list), \
+ struct stripe, \
+ lists[(list)]); \
+ list_del_init(stripe->lists + (list)); \
+ } \
+ } while (0);
+
+/* Pop an available stripe off the LRU list. */
+static struct stripe *stripe_lru_pop(struct stripe_cache *sc)
+{
+ struct stripe *stripe;
+
+ POP_LIST(LIST_LRU);
+ return stripe;
+}
+
+/* Pop an available stripe off the io list. */
+static struct stripe *stripe_io_pop(struct stripe_cache *sc)
+{
+ struct stripe *stripe;
+
+ POP_LIST(LIST_FLUSH);
+ return stripe;
+}
+
+/* Push a stripe safely onto the endio list to be handled by do_endios(). */
+static void stripe_endio_push(struct stripe *stripe)
+{
+ unsigned long flags;
+ struct stripe_cache *sc = stripe->sc;
+ struct list_head *stripe_list = stripe->lists + LIST_ENDIO,
+ *sc_list = sc->lists + LIST_ENDIO;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ /* This runs in parallel with do_endios(). */
+ spin_lock_irqsave(lock, flags);
+ if (list_empty(stripe_list))
+ list_add_tail(stripe_list, sc_list);
+ spin_unlock_irqrestore(lock, flags);
+
+ wake_do_raid(RS(sc)); /* Wake myself. */
+}
+
+/* Pop a stripe off safely off the endio list. */
+static struct stripe *stripe_endio_pop(struct stripe_cache *sc)
+{
+ struct stripe *stripe;
+ spinlock_t *lock = sc->locks + LOCK_ENDIO;
+
+ /* This runs in parallel with endio(). */
+ spin_lock_irq(lock);
+ POP_LIST(LIST_ENDIO)
+ spin_unlock_irq(lock);
+ return stripe;
+}
+#undef POP_LIST
+
+/*
+ * Stripe cache locking functions
+ */
+/* Dummy lock function for single host RAID4+5. */
+static void *no_lock(sector_t key, enum dm_lock_type type)
+{
+ return &no_lock;
+}
+
+/* Dummy unlock function for single host RAID4+5. */
+static void no_unlock(void *lock_handle)
+{
+}
+
+/* No locking (for single host RAID 4+5). */
+static struct dm_raid45_locking_type locking_none = {
+ .lock = no_lock,
+ .unlock = no_unlock,
+};
+
+/* Lock a stripe (for clustering). */
+static int
+stripe_lock(struct stripe *stripe, int rw, sector_t key)
+{
+ stripe->lock = RS(stripe->sc)->locking->lock(key, rw == READ ? DM_RAID45_SHARED : DM_RAID45_EX);
+ return stripe->lock ? 0 : -EPERM;
+}
+
+/* Unlock a stripe (for clustering). */
+static void stripe_unlock(struct stripe *stripe)
+{
+ RS(stripe->sc)->locking->unlock(stripe->lock);
+ stripe->lock = NULL;
+}
+
+/* Test io pending on stripe. */
+static int stripe_io_ref(struct stripe *stripe)
+{
+ return atomic_read(&stripe->io.pending);
+}
+
+static void stripe_io_get(struct stripe *stripe)
+{
+ if (atomic_inc_return(&stripe->io.pending) == 1)
+ /* REMOVEME: statistics */
+ atomic_inc(&stripe->sc->active_stripes);
+ else
+ BUG_ON(stripe_io_ref(stripe) < 0);
+}
+
+static void stripe_io_put(struct stripe *stripe)
+{
+ if (atomic_dec_and_test(&stripe->io.pending)) {
+ if (unlikely(StripeRecover(stripe)))
+ /* Don't put recovery stripe on endio list. */
+ wake_do_raid(RS(stripe->sc));
+ else
+ /* Add regular stripe to endio list and wake daemon. */
+ stripe_endio_push(stripe);
+
+ /* REMOVEME: statistics */
+ atomic_dec(&stripe->sc->active_stripes);
+ } else
+ BUG_ON(stripe_io_ref(stripe) < 0);
+}
+
+/* Take stripe reference out. */
+static int stripe_get(struct stripe *stripe)
+{
+ int r;
+ struct list_head *lh = stripe->lists + LIST_LRU;
+
+ /* Delete stripe from LRU (inactive) list if on. */
+ DEL_LIST(lh);
+ BUG_ON(stripe_ref(stripe) < 0);
+
+ /* Lock stripe on first reference */
+ r = (atomic_inc_return(&stripe->cnt) == 1) ?
+ stripe_lock(stripe, WRITE, stripe->key) : 0;
+
+ return r;
+}
+#undef DEL_LIST
+
+/* Return references on a chunk. */
+static int chunk_ref(struct stripe_chunk *chunk)
+{
+ return atomic_read(&chunk->cnt);
+}
+
+/* Take out reference on a chunk. */
+static int chunk_get(struct stripe_chunk *chunk)
+{
+ return atomic_inc_return(&chunk->cnt);
+}
+
+/* Drop reference on a chunk. */
+static void chunk_put(struct stripe_chunk *chunk)
+{
+ BUG_ON(atomic_dec_return(&chunk->cnt) < 0);
+}
+
+/*
+ * Drop reference on a stripe.
+ *
+ * Move it to list of LRU stripes if zero.
+ */
+static void stripe_put(struct stripe *stripe)
+{
+ if (atomic_dec_and_test(&stripe->cnt)) {
+ BUG_ON(stripe_io_ref(stripe));
+ stripe_unlock(stripe);
+ } else
+ BUG_ON(stripe_ref(stripe) < 0);
+}
+
+/* Helper needed by for_each_io_dev(). */
+static void stripe_get_references(struct stripe *stripe, unsigned p)
+{
+
+ /*
+ * Another one to reference the stripe in
+ * order to protect vs. LRU list moves.
+ */
+ io_get(RS(stripe->sc)); /* Global io references. */
+ stripe_get(stripe);
+ stripe_io_get(stripe); /* One for each chunk io. */
+}
+
+/* Helper for endio() to put all take references. */
+static void stripe_put_references(struct stripe *stripe)
+{
+ stripe_io_put(stripe); /* One for each chunk io. */
+ stripe_put(stripe);
+ io_put(RS(stripe->sc));
+}
+
+/*
+ * Stripe cache functions.
+ */
+/*
+ * Invalidate all chunks (i.e. their pages) of a stripe.
+ *
+ * I only keep state for the whole chunk.
+ */
+static inline void stripe_chunk_invalidate(struct stripe_chunk *chunk)
+{
+ chunk->io.flags = 0;
+}
+
+static void
+stripe_chunks_invalidate(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--)
+ stripe_chunk_invalidate(CHUNK(stripe, p));
+}
+
+/* Prepare stripe for (re)use. */
+static void stripe_invalidate(struct stripe *stripe)
+{
+ stripe->io.flags = 0;
+ stripe->idx.parity = stripe->idx.recover = -1;
+ stripe_chunks_invalidate(stripe);
+}
+
+/*
+ * Allow io on all chunks of a stripe.
+ * If not set, IO will not occur; i.e. it's prohibited.
+ *
+ * Actual IO submission for allowed chunks depends
+ * on their !uptodate or dirty state.
+ */
+static void stripe_allow_io(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--)
+ SetChunkIo(CHUNK(stripe, p));
+}
+
+/* Initialize a stripe. */
+static void stripe_init(struct stripe_cache *sc, struct stripe *stripe)
+{
+ unsigned i, p = RS(sc)->set.raid_devs;
+
+ /* Work all io chunks. */
+ while (p--) {
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+
+ atomic_set(&chunk->cnt, 0);
+ chunk->stripe = stripe;
+ i = ARRAY_SIZE(chunk->bl);
+ while (i--)
+ bio_list_init(chunk->bl + i);
+ }
+
+ stripe->sc = sc;
+
+ i = ARRAY_SIZE(stripe->lists);
+ while (i--)
+ INIT_LIST_HEAD(stripe->lists + i);
+
+ stripe->io.size = RS(sc)->set.io_size;
+ atomic_set(&stripe->cnt, 0);
+ atomic_set(&stripe->io.pending, 0);
+ stripe_invalidate(stripe);
+}
+
+/* Number of pages per chunk. */
+static inline unsigned chunk_pages(unsigned sectors)
+{
+ return dm_div_up(sectors, SECTORS_PER_PAGE);
+}
+
+/* Number of pages per stripe. */
+static inline unsigned stripe_pages(struct raid_set *rs, unsigned io_size)
+{
+ return chunk_pages(io_size) * rs->set.raid_devs;
+}
+
+/* Initialize part of page_list (recovery). */
+static void stripe_zero_pl_part(struct stripe *stripe, int p,
+ unsigned start, unsigned count)
+{
+ unsigned o = start / SECTORS_PER_PAGE, pages = chunk_pages(count);
+ /* Get offset into the page_list. */
+ struct page_list *pl = pl_elem(PL(stripe, p), o);
+
+ BUG_ON(!pl);
+ while (pl && pages--) {
+ BUG_ON(!pl->page);
+ memset(page_address(pl->page), 0, PAGE_SIZE);
+ pl = pl->next;
+ }
+}
+
+/* Initialize parity chunk of stripe. */
+static void stripe_zero_chunk(struct stripe *stripe, int p)
+{
+ if (p > -1)
+ stripe_zero_pl_part(stripe, p, 0, stripe->io.size);
+}
+
+/* Return dynamic stripe structure size. */
+static size_t stripe_size(struct raid_set *rs)
+{
+ return sizeof(struct stripe) +
+ rs->set.raid_devs * sizeof(struct stripe_chunk);
+}
+
+/* Allocate a stripe and its memory object. */
+/* XXX adjust to cope with stripe cache and recovery stripe caches. */
+enum grow { SC_GROW, SC_KEEP };
+static struct stripe *stripe_alloc(struct stripe_cache *sc,
+ struct dm_mem_cache_client *mc,
+ enum grow grow)
+{
+ int r;
+ struct stripe *stripe;
+
+ stripe = kmem_cache_zalloc(sc->kc.cache, GFP_KERNEL);
+ if (stripe) {
+ /* Grow the dm-mem-cache by one object. */
+ if (grow == SC_GROW) {
+ r = dm_mem_cache_grow(mc, 1);
+ if (r)
+ goto err_free;
+ }
+
+ stripe->obj = dm_mem_cache_alloc(mc);
+ if (IS_ERR(stripe->obj))
+ goto err_shrink;
+
+ stripe_init(sc, stripe);
+ }
+
+ return stripe;
+
+err_shrink:
+ if (grow == SC_GROW)
+ dm_mem_cache_shrink(mc, 1);
+err_free:
+ kmem_cache_free(sc->kc.cache, stripe);
+ return NULL;
+}
+
+/*
+ * Free a stripes memory object, shrink the
+ * memory cache and free the stripe itself.
+ */
+static void stripe_free(struct stripe *stripe, struct dm_mem_cache_client *mc)
+{
+ dm_mem_cache_free(mc, stripe->obj);
+ dm_mem_cache_shrink(mc, 1);
+ kmem_cache_free(stripe->sc->kc.cache, stripe);
+}
+
+/* Free the recovery stripe. */
+static void stripe_recover_free(struct raid_set *rs)
+{
+ struct recover *rec = &rs->recover;
+ struct dm_mem_cache_client *mc;
+
+ mc = rec->mem_cache_client;
+ rec->mem_cache_client = NULL;
+ if (mc) {
+ struct stripe *stripe;
+
+ while (!list_empty(&rec->stripes)) {
+ stripe = list_first_entry(&rec->stripes, struct stripe,
+ lists[LIST_RECOVER]);
+ list_del(stripe->lists + LIST_RECOVER);
+ kfree(stripe->recover);
+ stripe_free(stripe, mc);
+ }
+
+ dm_mem_cache_client_destroy(mc);
+ dm_io_client_destroy(rec->dm_io_client);
+ rec->dm_io_client = NULL;
+ }
+}
+
+/* Grow stripe cache. */
+static int sc_grow(struct stripe_cache *sc, unsigned stripes, enum grow grow)
+{
+ int r = 0;
+
+ /* Try to allocate this many (additional) stripes. */
+ while (stripes--) {
+ struct stripe *stripe =
+ stripe_alloc(sc, sc->mem_cache_client, grow);
+
+ if (likely(stripe)) {
+ stripe_lru_add(stripe);
+ atomic_inc(&sc->stripes);
+ } else {
+ r = -ENOMEM;
+ break;
+ }
+ }
+
+ return r ? r : sc_hash_resize(sc);
+}
+
+/* Shrink stripe cache. */
+static int sc_shrink(struct stripe_cache *sc, unsigned stripes)
+{
+ int r = 0;
+
+ /* Try to get unused stripe from LRU list. */
+ while (stripes--) {
+ struct stripe *stripe;
+
+ stripe = stripe_lru_pop(sc);
+ if (stripe) {
+ /* An LRU stripe may never have ios pending! */
+ BUG_ON(stripe_io_ref(stripe));
+ BUG_ON(stripe_ref(stripe));
+ atomic_dec(&sc->stripes);
+ /* Remove from hash if on before deletion. */
+ stripe_hash_del(stripe);
+ stripe_free(stripe, sc->mem_cache_client);
+ } else {
+ r = -ENOENT;
+ break;
+ }
+ }
+
+ /* Check if stats are still sane. */
+ if (atomic_read(&sc->active_stripes_max) >
+ atomic_read(&sc->stripes))
+ atomic_set(&sc->active_stripes_max, 0);
+
+ if (r)
+ return r;
+
+ return atomic_read(&sc->stripes) ? sc_hash_resize(sc) : 0;
+}
+
+/* Create stripe cache and recovery. */
+static int sc_init(struct raid_set *rs, unsigned stripes)
+{
+ unsigned i, r, rstripes;
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+ struct recover *rec = &rs->recover;
+ struct mapped_device *md;
+ struct gendisk *disk;
+
+
+ /* Initialize lists and locks. */
+ i = ARRAY_SIZE(sc->lists);
+ while (i--)
+ INIT_LIST_HEAD(sc->lists + i);
+
+ INIT_LIST_HEAD(&rec->stripes);
+
+ /* Initialize endio and LRU list locks. */
+ i = NR_LOCKS;
+ while (i--)
+ spin_lock_init(sc->locks + i);
+
+ /* Initialize atomic variables. */
+ atomic_set(&sc->stripes, 0);
+ atomic_set(&sc->stripes_to_set, 0);
+ atomic_set(&sc->active_stripes, 0);
+ atomic_set(&sc->active_stripes_max, 0); /* REMOVEME: statistics. */
+
+ /*
+ * We need a runtime unique # to suffix the kmem cache name
+ * because we'll have one for each active RAID set.
+ */
+ md = dm_table_get_md(rs->ti->table);
+ disk = dm_disk(md);
+ snprintf(sc->kc.name, sizeof(sc->kc.name), "%s-%d.%d", TARGET,
+ disk->first_minor, atomic_inc_return(&_stripe_sc_nr));
+ sc->kc.cache = kmem_cache_create(sc->kc.name, stripe_size(rs),
+ 0, 0, NULL);
+ if (!sc->kc.cache)
+ return -ENOMEM;
+
+ /* Create memory cache client context for RAID stripe cache. */
+ sc->mem_cache_client =
+ dm_mem_cache_client_create(stripes, rs->set.raid_devs,
+ chunk_pages(rs->set.io_size));
+ if (IS_ERR(sc->mem_cache_client))
+ return PTR_ERR(sc->mem_cache_client);
+
+ /* Create memory cache client context for RAID recovery stripe(s). */
+ rstripes = rec->recovery_stripes;
+ rec->mem_cache_client =
+ dm_mem_cache_client_create(rstripes, rs->set.raid_devs,
+ chunk_pages(rec->io_size));
+ if (IS_ERR(rec->mem_cache_client))
+ return PTR_ERR(rec->mem_cache_client);
+
+ /* Create dm-io client context for IO stripes. */
+ sc->dm_io_client =
+ dm_io_client_create((stripes > 32 ? 32 : stripes) *
+ rs->set.raid_devs *
+ chunk_pages(rs->set.io_size));
+ if (IS_ERR(sc->dm_io_client))
+ return PTR_ERR(sc->dm_io_client);
+
+ /* FIXME: intermingeled with stripe cache initialization. */
+ /* Create dm-io client context for recovery stripes. */
+ rec->dm_io_client =
+ dm_io_client_create(rstripes * rs->set.raid_devs *
+ chunk_pages(rec->io_size));
+ if (IS_ERR(rec->dm_io_client))
+ return PTR_ERR(rec->dm_io_client);
+
+ /* Allocate stripes for set recovery. */
+ while (rstripes--) {
+ stripe = stripe_alloc(sc, rec->mem_cache_client, SC_KEEP);
+ if (!stripe)
+ return -ENOMEM;
+
+ stripe->recover = kzalloc(sizeof(*stripe->recover), GFP_KERNEL);
+ if (!stripe->recover) {
+ stripe_free(stripe, rec->mem_cache_client);
+ return -ENOMEM;
+ }
+
+ SetStripeRecover(stripe);
+ stripe->io.size = rec->io_size;
+ list_add_tail(stripe->lists + LIST_RECOVER, &rec->stripes);
+ /* Don't add recovery stripes to LRU list! */
+ }
+
+ /*
+ * Allocate the stripe objetcs from the
+ * cache and add them to the LRU list.
+ */
+ r = sc_grow(sc, stripes, SC_KEEP);
+ if (!r)
+ atomic_set(&sc->stripes_last, stripes);
+
+ return r;
+}
+
+/* Destroy the stripe cache. */
+static void sc_exit(struct stripe_cache *sc)
+{
+ struct raid_set *rs = RS(sc);
+
+ if (sc->kc.cache) {
+ stripe_recover_free(rs);
+ BUG_ON(sc_shrink(sc, atomic_read(&sc->stripes)));
+ kmem_cache_destroy(sc->kc.cache);
+ sc->kc.cache = NULL;
+
+ if (sc->mem_cache_client && !IS_ERR(sc->mem_cache_client))
+ dm_mem_cache_client_destroy(sc->mem_cache_client);
+
+ if (sc->dm_io_client && !IS_ERR(sc->dm_io_client))
+ dm_io_client_destroy(sc->dm_io_client);
+
+ hash_exit(&sc->hash);
+ }
+}
+
+/*
+ * Calculate RAID address
+ *
+ * Delivers tuple with the index of the data disk holding the chunk
+ * in the set, the parity disks index and the start of the stripe
+ * within the address space of the set (used as the stripe cache hash key).
+ */
+/* thx MD. */
+static struct raid_address *raid_address(struct raid_set *rs, sector_t sector,
+ struct raid_address *addr)
+{
+ sector_t stripe, tmp;
+
+ /*
+ * chunk_number = sector / chunk_size
+ * stripe_number = chunk_number / data_devs
+ * di = stripe % data_devs;
+ */
+ stripe = sector >> rs->set.chunk_shift;
+ addr->di = sector_div(stripe, rs->set.data_devs);
+
+ switch (rs->set.raid_type->level) {
+ case raid4:
+ addr->pi = rs->set.pi;
+ goto check_shift_di;
+ case raid5:
+ tmp = stripe;
+ addr->pi = sector_div(tmp, rs->set.raid_devs);
+
+ switch (rs->set.raid_type->algorithm) {
+ case left_asym: /* Left asymmetric. */
+ addr->pi = rs->set.data_devs - addr->pi;
+ case right_asym: /* Right asymmetric. */
+check_shift_di:
+ if (addr->di >= addr->pi)
+ addr->di++;
+ break;
+ case left_sym: /* Left symmetric. */
+ addr->pi = rs->set.data_devs - addr->pi;
+ case right_sym: /* Right symmetric. */
+ addr->di = (addr->pi + addr->di + 1) %
+ rs->set.raid_devs;
+ break;
+ case none: /* Ain't happen: RAID4 algorithm placeholder. */
+ BUG();
+ }
+ }
+
+ /*
+ * Start offset of the stripes chunk on any single device of the RAID
+ * set, adjusted in case io size differs from chunk size.
+ */
+ addr->key = (stripe << rs->set.chunk_shift) +
+ (sector & rs->set.io_inv_mask);
+ return addr;
+}
+
+/*
+ * Copy data across between stripe pages and bio vectors.
+ *
+ * Pay attention to data alignment in stripe and bio pages.
+ */
+static void bio_copy_page_list(int rw, struct stripe *stripe,
+ struct page_list *pl, struct bio *bio)
+{
+ unsigned i, page_offset;
+ void *page_addr;
+ struct raid_set *rs = RS(stripe->sc);
+ struct bio_vec *bv;
+
+ /* Get start page in page list for this sector. */
+ i = (bio->bi_sector & rs->set.io_mask) / SECTORS_PER_PAGE;
+ pl = pl_elem(pl, i);
+ BUG_ON(!pl);
+ BUG_ON(!pl->page);
+
+ page_addr = page_address(pl->page);
+ page_offset = to_bytes(bio->bi_sector & (SECTORS_PER_PAGE - 1));
+
+ /* Walk all segments and copy data across between bio_vecs and pages. */
+ bio_for_each_segment(bv, bio, i) {
+ int len = bv->bv_len, size;
+ unsigned bio_offset = 0;
+ void *bio_addr = __bio_kmap_atomic(bio, i, KM_USER0);
+redo:
+ size = (page_offset + len > PAGE_SIZE) ?
+ PAGE_SIZE - page_offset : len;
+
+ if (rw == READ)
+ memcpy(bio_addr + bio_offset,
+ page_addr + page_offset, size);
+ else
+ memcpy(page_addr + page_offset,
+ bio_addr + bio_offset, size);
+
+ page_offset += size;
+ if (page_offset == PAGE_SIZE) {
+ /*
+ * We reached the end of the chunk page ->
+ * need to refer to the next one to copy more data.
+ */
+ len -= size;
+ if (len) {
+ /* Get next page. */
+ pl = pl->next;
+ BUG_ON(!pl);
+ BUG_ON(!pl->page);
+ page_addr = page_address(pl->page);
+ page_offset = 0;
+ bio_offset += size;
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BIO_COPY_PL_NEXT);
+ goto redo;
+ }
+ }
+
+ __bio_kunmap_atomic(bio_addr, KM_USER0);
+ }
+}
+
+/*
+ * Xor optimization macros.
+ */
+/* Xor data pointer declaration and initialization macros. */
+#define DECLARE_2 unsigned long *d0 = data[0], *d1 = data[1]
+#define DECLARE_3 DECLARE_2, *d2 = data[2]
+#define DECLARE_4 DECLARE_3, *d3 = data[3]
+#define DECLARE_5 DECLARE_4, *d4 = data[4]
+#define DECLARE_6 DECLARE_5, *d5 = data[5]
+#define DECLARE_7 DECLARE_6, *d6 = data[6]
+#define DECLARE_8 DECLARE_7, *d7 = data[7]
+
+/* Xor unrole macros. */
+#define D2(n) d0[n] = d0[n] ^ d1[n]
+#define D3(n) D2(n) ^ d2[n]
+#define D4(n) D3(n) ^ d3[n]
+#define D5(n) D4(n) ^ d4[n]
+#define D6(n) D5(n) ^ d5[n]
+#define D7(n) D6(n) ^ d6[n]
+#define D8(n) D7(n) ^ d7[n]
+
+#define X_2(macro, offset) macro(offset); macro(offset + 1);
+#define X_4(macro, offset) X_2(macro, offset); X_2(macro, offset + 2);
+#define X_8(macro, offset) X_4(macro, offset); X_4(macro, offset + 4);
+#define X_16(macro, offset) X_8(macro, offset); X_8(macro, offset + 8);
+#define X_32(macro, offset) X_16(macro, offset); X_16(macro, offset + 16);
+#define X_64(macro, offset) X_32(macro, offset); X_32(macro, offset + 32);
+
+/* Define a _xor_#chunks_#xors_per_run() function. */
+#define _XOR(chunks, xors_per_run) \
+static void _xor ## chunks ## _ ## xors_per_run(unsigned long **data) \
+{ \
+ unsigned end = XOR_SIZE / sizeof(data[0]), i; \
+ DECLARE_ ## chunks; \
+\
+ for (i = 0; i < end; i += xors_per_run) { \
+ X_ ## xors_per_run(D ## chunks, i); \
+ } \
+}
+
+/* Define xor functions for 2 - 8 chunks and xors per run. */
+#define MAKE_XOR_PER_RUN(xors_per_run) \
+ _XOR(2, xors_per_run); _XOR(3, xors_per_run); \
+ _XOR(4, xors_per_run); _XOR(5, xors_per_run); \
+ _XOR(6, xors_per_run); _XOR(7, xors_per_run); \
+ _XOR(8, xors_per_run);
+
+MAKE_XOR_PER_RUN(8) /* Define _xor_*_8() functions. */
+MAKE_XOR_PER_RUN(16) /* Define _xor_*_16() functions. */
+MAKE_XOR_PER_RUN(32) /* Define _xor_*_32() functions. */
+MAKE_XOR_PER_RUN(64) /* Define _xor_*_64() functions. */
+
+#define MAKE_XOR(xors_per_run) \
+struct { \
+ void (*f)(unsigned long **); \
+} static xor_funcs ## xors_per_run[] = { \
+ { NULL }, /* NULL pointers to optimize indexing in xor(). */ \
+ { NULL }, \
+ { _xor2_ ## xors_per_run }, \
+ { _xor3_ ## xors_per_run }, \
+ { _xor4_ ## xors_per_run }, \
+ { _xor5_ ## xors_per_run }, \
+ { _xor6_ ## xors_per_run }, \
+ { _xor7_ ## xors_per_run }, \
+ { _xor8_ ## xors_per_run }, \
+}; \
+\
+static void xor_ ## xors_per_run(unsigned n, unsigned long **data) \
+{ \
+ /* Call respective function for amount of chunks. */ \
+ xor_funcs ## xors_per_run[n].f(data); \
+}
+
+/* Define xor_8() - xor_64 functions. */
+MAKE_XOR(8)
+MAKE_XOR(16)
+MAKE_XOR(32)
+MAKE_XOR(64)
+/*
+ * END xor optimization macros.
+ */
+
+/* Maximum number of chunks, which can be xor'ed in one go. */
+#define XOR_CHUNKS_MAX (ARRAY_SIZE(xor_funcs8) - 1)
+
+/* xor_blocks wrapper to allow for using that crypto library function. */
+static void xor_blocks_wrapper(unsigned n, unsigned long **data)
+{
+ BUG_ON(n < 2 || n > MAX_XOR_BLOCKS + 1);
+ xor_blocks(n - 1, XOR_SIZE, (void *) data[0], (void **) data + 1);
+}
+
+struct xor_func {
+ xor_function_t f;
+ const char *name;
+} static xor_funcs[] = {
+ { xor_64, "xor_64" },
+ { xor_32, "xor_32" },
+ { xor_16, "xor_16" },
+ { xor_8, "xor_8" },
+ { xor_blocks_wrapper, "xor_blocks" },
+};
+
+/*
+ * Check, if chunk has to be xored in/out:
+ *
+ * o if writes are queued
+ * o if writes are merged
+ * o if stripe is to be reconstructed
+ * o if recovery stripe
+ */
+static inline int chunk_must_xor(struct stripe_chunk *chunk)
+{
+ if (ChunkUptodate(chunk)) {
+ BUG_ON(!bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)) &&
+ !bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED)));
+
+ if (!bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)) ||
+ !bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED)))
+ return 1;
+
+ if (StripeReconstruct(chunk->stripe) ||
+ StripeRecover(chunk->stripe))
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Calculate crc.
+ *
+ * This indexes into the chunks of a stripe and their pages.
+ *
+ * All chunks will be xored into the indexed (@pi)
+ * chunk in maximum groups of xor.chunks.
+ *
+ */
+static void xor(struct stripe *stripe, unsigned pi, unsigned sector)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned max_chunks = rs->xor.chunks, n = 1,
+ o = sector / SECTORS_PER_PAGE, /* Offset into the page_list. */
+ p = rs->set.raid_devs;
+ unsigned long **d = rs->data;
+ xor_function_t xor_f = rs->xor.f->f;
+
+ BUG_ON(sector > stripe->io.size);
+
+ /* Address of parity page to xor into. */
+ d[0] = page_address(pl_elem(PL(stripe, pi), o)->page);
+
+ while (p--) {
+ /* Preset pointers to data pages. */
+ if (p != pi && chunk_must_xor(CHUNK(stripe, p)))
+ d[n++] = page_address(pl_elem(PL(stripe, p), o)->page);
+
+ /* If max chunks -> xor. */
+ if (n == max_chunks) {
+ mutex_lock(&rs->io.xor_lock);
+ xor_f(n, d);
+ mutex_unlock(&rs->io.xor_lock);
+ n = 1;
+ }
+ }
+
+ /* If chunks -> xor. */
+ if (n > 1) {
+ mutex_lock(&rs->io.xor_lock);
+ xor_f(n, d);
+ mutex_unlock(&rs->io.xor_lock);
+ }
+}
+
+/* Common xor loop through all stripe page lists. */
+static void common_xor(struct stripe *stripe, sector_t count,
+ unsigned off, unsigned pi)
+{
+ unsigned sector;
+
+ BUG_ON(!count);
+ for (sector = off; sector < count; sector += SECTORS_PER_PAGE)
+ xor(stripe, pi, sector);
+
+ /* Set parity page uptodate and clean. */
+ chunk_set(CHUNK(stripe, pi), CLEAN);
+ atomic_inc(RS(stripe->sc)->stats + S_XORS); /* REMOVEME: statistics. */
+}
+
+/*
+ * Calculate parity sectors on intact stripes.
+ *
+ * Need to calculate raid address for recover stripe, because its
+ * chunk sizes differs and is typically larger than io chunk size.
+ */
+static void parity_xor(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int size_differs = stripe->io.size != rs->set.io_size;
+ unsigned chunk_size = rs->set.chunk_size, io_size = stripe->io.size,
+ xor_size = chunk_size > io_size ? io_size : chunk_size;
+ sector_t off;
+
+ /* This can be the recover stripe with a larger io size. */
+ for (off = 0; off < io_size; off += xor_size) {
+ /*
+ * Recover stripe is likely bigger than regular io
+ * ones and has no precalculated parity disk index ->
+ * need to calculate RAID address.
+ */
+ if (unlikely(size_differs)) {
+ struct raid_address addr;
+
+ raid_address(rs, (stripe->key + off) *
+ rs->set.data_devs, &addr);
+ stripe->idx.parity = addr.pi;
+ stripe_zero_pl_part(stripe, addr.pi, off, xor_size);
+ }
+
+ common_xor(stripe, xor_size, off, stripe->idx.parity);
+ chunk_set(CHUNK(stripe, stripe->idx.parity), DIRTY);
+ }
+}
+
+/* Reconstruct missing chunk. */
+static void stripe_reconstruct(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ int p = rs->set.raid_devs, pr = stripe->idx.recover;
+
+ BUG_ON(pr < 0);
+
+ /* Check if all but the chunk to be reconstructed are uptodate. */
+ while (p--)
+ BUG_ON(p != pr && !ChunkUptodate(CHUNK(stripe, p)));
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (RSDegraded(rs) ? S_RECONSTRUCT_EI :
+ S_RECONSTRUCT_DEV));
+ /* Zero chunk to be reconstructed. */
+ stripe_zero_chunk(stripe, pr);
+ common_xor(stripe, stripe->io.size, 0, pr);
+}
+
+/*
+ * Recovery io throttling
+ */
+/* Conditionally reset io counters. */
+static int recover_io_reset(struct raid_set *rs)
+{
+ unsigned long j = jiffies;
+
+ /* Pay attention to jiffies overflows. */
+ if (j > rs->recover.last_jiffies + HZ ||
+ j < rs->recover.last_jiffies) {
+ atomic_set(rs->recover.io_count + IO_WORK, 0);
+ atomic_set(rs->recover.io_count + IO_RECOVER, 0);
+ rs->recover.last_jiffies = j;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Count ios. */
+static void recover_io_count(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+
+ atomic_inc(rs->recover.io_count +
+ (StripeRecover(stripe) ? IO_RECOVER : IO_WORK));
+}
+
+/* Try getting a stripe either from the hash or from the LRU list. */
+static struct stripe *stripe_find(struct raid_set *rs,
+ struct raid_address *addr)
+{
+ int r;
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+
+ /* Try stripe from hash. */
+ stripe = stripe_lookup(sc, addr->key);
+ if (stripe) {
+ r = stripe_get(stripe);
+ if (r)
+ goto get_lock_failed;
+
+ atomic_inc(rs->stats + S_HITS_1ST); /* REMOVEME: statistics. */
+ } else {
+ /* Not in hash -> try to get an LRU stripe. */
+ stripe = stripe_lru_pop(sc);
+ if (stripe) {
+ /*
+ * An LRU stripe may not be referenced
+ * and may never have ios pending!
+ */
+ BUG_ON(stripe_ref(stripe));
+ BUG_ON(stripe_io_ref(stripe));
+
+ /* Remove from hash if on before reuse. */
+ stripe_hash_del(stripe);
+
+ /* Invalidate before reinserting with changed key. */
+ stripe_invalidate(stripe);
+
+ stripe->key = addr->key;
+ stripe->region = dm_rh_sector_to_region(rs->recover.rh,
+ addr->key);
+ stripe->idx.parity = addr->pi;
+ r = stripe_get(stripe);
+ if (r)
+ goto get_lock_failed;
+
+ /* Insert stripe into the stripe hash. */
+ stripe_insert(&sc->hash, stripe);
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_INSCACHE);
+ }
+ }
+
+ return stripe;
+
+get_lock_failed:
+ stripe_put(stripe);
+ return NULL;
+}
+
+/*
+ * Process end io
+ *
+ * I need to do it here because I can't in interrupt
+ */
+/* End io all bios on a bio list. */
+static void bio_list_endio(struct stripe *stripe, struct bio_list *bl,
+ int p, int error)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ struct bio *bio;
+ struct page_list *pl = PL(stripe, p);
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+
+ /* Update region counters. */
+ while ((bio = bio_list_pop(bl))) {
+ if (bio_data_dir(bio) == WRITE)
+ /* Drop io pending count for any writes. */
+ dm_rh_dec(rs->recover.rh, stripe->region);
+ else if (!error)
+ /* Copy data accross. */
+ bio_copy_page_list(READ, stripe, pl, bio);
+
+ bio_endio(bio, error);
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (bio_data_dir(bio) == READ ?
+ S_BIOS_ENDIO_READ : S_BIOS_ENDIO_WRITE));
+
+ chunk_put(chunk);
+ stripe_put(stripe);
+ io_put(rs); /* Wake any suspend waiters on last bio. */
+ }
+}
+
+/*
+ * End io all reads/writes on a stripe copying
+ * read data accross from stripe to bios and
+ * decrementing region counters for writes.
+ *
+ * Processing of ios depeding on state:
+ * o no chunk error -> endio ok
+ * o degraded:
+ * - chunk error and read -> ignore to be requeued
+ * - chunk error and write -> endio ok
+ * o dead (more than parity_devs failed) and chunk_error-> endio failed
+ */
+static void stripe_endio(int rw, struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned p = rs->set.raid_devs;
+ int write = (rw != READ);
+
+ while (p--) {
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+ struct bio_list *bl;
+
+ BUG_ON(ChunkLocked(chunk));
+
+ bl = BL_CHUNK(chunk, rw);
+ if (bio_list_empty(bl))
+ continue;
+
+ if (unlikely(ChunkError(chunk) || !ChunkUptodate(chunk))) {
+ /* RAID set dead. */
+ if (unlikely(RSDead(rs)))
+ bio_list_endio(stripe, bl, p, -EIO);
+ /* RAID set degraded. */
+ else if (write)
+ bio_list_endio(stripe, bl, p, 0);
+ } else {
+ BUG_ON(!RSDegraded(rs) && ChunkDirty(chunk));
+ bio_list_endio(stripe, bl, p, 0);
+ }
+ }
+}
+
+/* Fail all ios hanging off all bio lists of a stripe. */
+static void stripe_fail_io(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned p = rs->set.raid_devs;
+
+ while (p--) {
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+ int i = ARRAY_SIZE(chunk->bl);
+
+ /* Fail all bios on all bio lists of the stripe. */
+ while (i--) {
+ struct bio_list *bl = chunk->bl + i;
+
+ if (!bio_list_empty(bl))
+ bio_list_endio(stripe, bl, p, -EIO);
+ }
+ }
+
+ /* Put stripe on LRU list. */
+ BUG_ON(stripe_io_ref(stripe));
+ BUG_ON(stripe_ref(stripe));
+}
+
+/* Unlock all required chunks. */
+static void stripe_chunks_unlock(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+ struct stripe_chunk *chunk;
+
+ while (p--) {
+ chunk = CHUNK(stripe, p);
+
+ if (TestClearChunkUnlock(chunk))
+ ClearChunkLocked(chunk);
+ }
+}
+
+/*
+ * Queue reads and writes to a stripe by hanging
+ * their bios off the stripesets read/write lists.
+ */
+static int stripe_queue_bio(struct raid_set *rs, struct bio *bio,
+ struct bio_list *reject)
+{
+ struct raid_address addr;
+ struct stripe *stripe;
+
+ stripe = stripe_find(rs, raid_address(rs, bio->bi_sector, &addr));
+ if (stripe) {
+ int r = 0, rw = bio_data_dir(bio);
+
+ /* Distinguish reads and writes. */
+ bio_list_add(BL(stripe, addr.di, rw), bio);
+
+ if (rw == READ)
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BIOS_ADDED_READ);
+ else {
+ /* Inrement pending write count on region. */
+ dm_rh_inc(rs->recover.rh, stripe->region);
+ r = 1;
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BIOS_ADDED_WRITE);
+ }
+
+ /*
+ * Put on io (flush) list in case of
+ * initial bio queued to chunk.
+ */
+ if (chunk_get(CHUNK(stripe, addr.di)) == 1)
+ stripe_flush_add(stripe);
+
+ return r;
+ }
+
+ /* Got no stripe from cache or failed to lock it -> reject bio. */
+ bio_list_add(reject, bio);
+ atomic_inc(rs->stats + S_IOS_POST); /* REMOVEME: statistics. */
+ return 0;
+}
+
+/*
+ * Handle all stripes by handing them to the daemon, because we can't
+ * map their chunk pages to copy the data in interrupt context.
+ *
+ * We don't want to handle them here either, while interrupts are disabled.
+ */
+
+/* Read/write endio function for dm-io (interrupt context). */
+static void endio(unsigned long error, void *context)
+{
+ struct stripe_chunk *chunk = context;
+
+ if (unlikely(error)) {
+ chunk_set(chunk, ERROR);
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(chunk->stripe->sc)->stats + S_STRIPE_ERROR);
+ } else
+ chunk_set(chunk, CLEAN);
+
+ /*
+ * For recovery stripes, I need to reset locked locked
+ * here, because those aren't processed in do_endios().
+ */
+ if (unlikely(StripeRecover(chunk->stripe)))
+ ClearChunkLocked(chunk);
+ else
+ SetChunkUnlock(chunk);
+
+ /* Indirectly puts stripe on cache's endio list via stripe_io_put(). */
+ stripe_put_references(chunk->stripe);
+}
+
+/* Read/Write a chunk asynchronously. */
+static void stripe_chunk_rw(struct stripe *stripe, unsigned p)
+{
+ struct stripe_cache *sc = stripe->sc;
+ struct raid_set *rs = RS(sc);
+ struct dm_mem_cache_object *obj = stripe->obj + p;
+ struct page_list *pl = obj->pl;
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+ struct raid_dev *dev = rs->dev + p;
+ struct dm_io_region io = {
+ .bdev = dev->dev->bdev,
+ .sector = stripe->key,
+ .count = stripe->io.size,
+ };
+ struct dm_io_request control = {
+ .bi_rw = ChunkDirty(chunk) ? WRITE : READ,
+ .mem = {
+ .type = DM_IO_PAGE_LIST,
+ .ptr.pl = pl,
+ .offset = 0,
+ },
+ .notify = {
+ .fn = endio,
+ .context = chunk,
+ },
+ .client = StripeRecover(stripe) ? rs->recover.dm_io_client :
+ sc->dm_io_client,
+ };
+
+ BUG_ON(ChunkLocked(chunk));
+ BUG_ON(!ChunkUptodate(chunk) && ChunkDirty(chunk));
+ BUG_ON(ChunkUptodate(chunk) && !ChunkDirty(chunk));
+
+ /*
+ * Don't rw past end of device, which can happen, because
+ * typically sectors_per_dev isn't divisible by io_size.
+ */
+ if (unlikely(io.sector + io.count > rs->set.sectors_per_dev))
+ io.count = rs->set.sectors_per_dev - io.sector;
+
+ BUG_ON(!io.count);
+ io.sector += dev->start; /* Add <offset>. */
+ if (RSRecover(rs))
+ recover_io_count(stripe); /* Recovery io accounting. */
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (ChunkDirty(chunk) ? S_DM_IO_WRITE :
+ S_DM_IO_READ));
+ SetChunkLocked(chunk);
+ SetDevIoQueued(dev);
+ BUG_ON(dm_io(&control, 1, &io, NULL));
+}
+
+/*
+ * Write dirty or read not uptodate page lists of a stripe.
+ */
+static int stripe_chunks_rw(struct stripe *stripe)
+{
+ int r;
+ struct raid_set *rs = RS(stripe->sc);
+
+ /*
+ * Increment the pending count on the stripe
+ * first, so that we don't race in endio().
+ *
+ * An inc (IO) is needed for any chunk unless !ChunkIo(chunk):
+ *
+ * o not uptodate
+ * o dirtied by writes merged
+ * o dirtied by parity calculations
+ */
+ r = for_each_io_dev(stripe, stripe_get_references);
+ if (r) {
+ /* Io needed: chunks are either not uptodate or dirty. */
+ int max; /* REMOVEME: */
+ struct stripe_cache *sc = &rs->sc;
+
+ /* Submit actual io. */
+ for_each_io_dev(stripe, stripe_chunk_rw);
+
+ /* REMOVEME: statistics */
+ max = sc_active(sc);
+ if (atomic_read(&sc->active_stripes_max) < max)
+ atomic_set(&sc->active_stripes_max, max);
+
+ atomic_inc(rs->stats + S_FLUSHS);
+ /* END REMOVEME: statistics */
+ }
+
+ return r;
+}
+
+/* Merge in all writes hence dirtying respective chunks. */
+static void stripe_merge_writes(struct stripe *stripe)
+{
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--) {
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+ struct bio_list *write = BL_CHUNK(chunk, WRITE_QUEUED);
+
+ if (!bio_list_empty(write)) {
+ struct bio *bio;
+ struct page_list *pl = stripe->obj[p].pl;
+
+ /*
+ * We can play with the lists without holding a lock,
+ * because it is just us accessing them anyway.
+ */
+ bio_list_for_each(bio, write)
+ bio_copy_page_list(WRITE, stripe, pl, bio);
+
+ bio_list_merge(BL_CHUNK(chunk, WRITE_MERGED), write);
+ bio_list_init(write);
+ chunk_set(chunk, DIRTY);
+ }
+ }
+}
+
+/* Queue all writes to get merged. */
+static int stripe_queue_writes(struct stripe *stripe)
+{
+ int r = 0;
+ unsigned p = RS(stripe->sc)->set.raid_devs;
+
+ while (p--) {
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+ struct bio_list *write = BL_CHUNK(chunk, WRITE);
+
+ if (!bio_list_empty(write)) {
+ bio_list_merge(BL_CHUNK(chunk, WRITE_QUEUED), write);
+ bio_list_init(write);
+SetChunkIo(chunk);
+ r = 1;
+ }
+ }
+
+ return r;
+}
+
+
+/* Check, if a chunk gets completely overwritten. */
+static int stripe_check_chunk_overwrite(struct stripe *stripe, unsigned p)
+{
+ unsigned sectors = 0;
+ struct bio *bio;
+ struct bio_list *bl = BL(stripe, p, WRITE_QUEUED);
+
+ bio_list_for_each(bio, bl)
+ sectors += bio_sectors(bio);
+
+ BUG_ON(sectors > RS(stripe->sc)->set.io_size);
+ return sectors == RS(stripe->sc)->set.io_size;
+}
+
+/*
+ * Avoid io on broken/reconstructed drive in order to
+ * reconstruct date on endio.
+ *
+ * (*1*) We set StripeReconstruct() in here, so that _do_endios()
+ * will trigger a reconstruct call before resetting it.
+ */
+static int stripe_chunk_set_io_flags(struct stripe *stripe, int pr)
+{
+ struct stripe_chunk *chunk = CHUNK(stripe, pr);
+
+ /*
+ * Allow io on all chunks but the indexed one,
+ * because we're either degraded or prohibit it
+ * on the one for later reconstruction.
+ */
+ /* Includes ClearChunkIo(), ClearChunkUptodate(). */
+ stripe_chunk_invalidate(chunk);
+ stripe->idx.recover = pr;
+ SetStripeReconstruct(stripe);
+
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
+ return -EPERM;
+}
+
+/* Chunk locked/uptodate and device failed tests. */
+static struct stripe_chunk *
+stripe_chunk_check(struct stripe *stripe, unsigned p, unsigned *chunks_uptodate)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ struct stripe_chunk *chunk = CHUNK(stripe, p);
+
+ /* Can't access active chunks. */
+ if (ChunkLocked(chunk)) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_CHUNK_LOCKED);
+ return NULL;
+ }
+
+ /* Can't access broken devive. */
+ if (ChunkError(chunk) || DevFailed(rs->dev + p))
+ return NULL;
+
+ /* Can access uptodate chunks. */
+ if (ChunkUptodate(chunk)) {
+ (*chunks_uptodate)++;
+ return NULL;
+ }
+
+ return chunk;
+}
+
+/*
+ * Degraded/reconstruction mode.
+ *
+ * Check stripe state to figure which chunks don't need IO.
+ *
+ * Returns 0 for fully operational, -EPERM for degraded/resynchronizing.
+ */
+static int stripe_check_reconstruct(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+
+ if (RSDead(rs)) {
+ ClearStripeReconstruct(stripe);
+ ClearStripeReconstructed(stripe);
+ stripe_allow_io(stripe);
+ return 0;
+ }
+
+ /* Avoid further reconstruction setting, when already set. */
+ if (StripeReconstruct(stripe)) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_RECONSTRUCT_SET);
+ return -EBUSY;
+ }
+
+ /* Initially allow io on all chunks. */
+ stripe_allow_io(stripe);
+
+ /* Return if stripe is already reconstructed. */
+ if (StripeReconstructed(stripe)) {
+ atomic_inc(rs->stats + S_RECONSTRUCTED);
+ return 0;
+ }
+
+ /*
+ * Degraded/reconstruction mode (device failed) ->
+ * avoid io on the failed device.
+ */
+ if (unlikely(RSDegraded(rs))) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_DEGRADED);
+ /* Allow IO on all devices but the dead one. */
+ BUG_ON(rs->set.ei < 0);
+ return stripe_chunk_set_io_flags(stripe, rs->set.ei);
+ } else {
+ int sync, pi = dev_for_parity(stripe, &sync);
+
+ /*
+ * Reconstruction mode (ie. a particular (replaced) device or
+ * some (rotating) parity chunk is being resynchronized) ->
+ * o make sure all needed chunks are read in
+ * o cope with 3/4 disk array special case where it
+ * doesn't make a difference to read in parity
+ * to xor data in/out
+ */
+ if (RSEnforceParityCreation(rs) || !sync) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_NOSYNC);
+ /* Allow IO on all devs but the one to reconstruct. */
+ return stripe_chunk_set_io_flags(stripe, pi);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Check, if stripe is ready to merge writes.
+ * I.e. if all chunks present to allow to merge bios.
+ *
+ * We prohibit io on:
+ *
+ * o chunks without bios
+ * o chunks which get completely written over
+ */
+static int stripe_merge_possible(struct stripe *stripe, int nosync)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned chunks_overwrite = 0, chunks_prohibited = 0,
+ chunks_uptodate = 0, p = rs->set.raid_devs;
+
+ /* Walk all chunks. */
+ while (p--) {
+ struct stripe_chunk *chunk;
+
+ /* Prohibit io on broken devices. */
+ if (DevFailed(rs->dev + p)) {
+ chunk = CHUNK(stripe, p);
+ goto prohibit_io;
+ }
+
+ /* We can't optimize any further if no chunk. */
+ chunk = stripe_chunk_check(stripe, p, &chunks_uptodate);
+ if (!chunk || nosync)
+ continue;
+
+ /*
+ * We have a chunk, which is not uptodate.
+ *
+ * If this is not parity and we don't have
+ * reads queued, we can optimize further.
+ */
+ if (p != stripe->idx.parity &&
+ bio_list_empty(BL_CHUNK(chunk, READ)) &&
+ bio_list_empty(BL_CHUNK(chunk, WRITE_MERGED))) {
+ if (bio_list_empty(BL_CHUNK(chunk, WRITE_QUEUED)))
+ goto prohibit_io;
+ else if (RSCheckOverwrite(rs) &&
+ stripe_check_chunk_overwrite(stripe, p))
+ /* Completely overwritten chunk. */
+ chunks_overwrite++;
+ }
+
+ /* Allow io for chunks with bios and overwritten ones. */
+ SetChunkIo(chunk);
+ continue;
+
+prohibit_io:
+ /* No io for broken devices or for chunks w/o bios. */
+ ClearChunkIo(chunk);
+ chunks_prohibited++;
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
+ }
+
+ /* All data chunks will get written over. */
+ if (chunks_overwrite == rs->set.data_devs)
+ atomic_inc(rs->stats + S_OVERWRITE); /* REMOVEME: statistics.*/
+ else if (chunks_uptodate + chunks_prohibited < rs->set.raid_devs) {
+ /* We don't have enough chunks to merge. */
+ atomic_inc(rs->stats + S_CANT_MERGE); /* REMOVEME: statistics.*/
+ return -EPERM;
+ }
+
+ /*
+ * If we have all chunks up to date or overwrite them, we
+ * just zero the parity chunk and let stripe_rw() recreate it.
+ */
+ if (chunks_uptodate == rs->set.raid_devs ||
+ chunks_overwrite == rs->set.data_devs) {
+ stripe_zero_chunk(stripe, stripe->idx.parity);
+ BUG_ON(StripeReconstruct(stripe));
+ SetStripeReconstruct(stripe); /* Enforce xor in caller. */
+ } else {
+ /*
+ * With less chunks, we xor parity out.
+ *
+ * (*4*) We rely on !StripeReconstruct() in chunk_must_xor(),
+ * so that only chunks with queued or merged writes
+ * are being xored.
+ */
+ parity_xor(stripe);
+ }
+
+ /*
+ * We do have enough chunks to merge.
+ * All chunks are uptodate or get written over.
+ */
+ atomic_inc(rs->stats + S_CAN_MERGE); /* REMOVEME: statistics. */
+ return 0;
+}
+
+/*
+ * Avoid reading chunks in case we're fully operational.
+ *
+ * We prohibit io on any chunks without bios but the parity chunk.
+ */
+static void stripe_avoid_reads(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ unsigned dummy = 0, p = rs->set.raid_devs;
+
+ /* Walk all chunks. */
+ while (p--) {
+ struct stripe_chunk *chunk =
+ stripe_chunk_check(stripe, p, &dummy);
+
+ if (!chunk)
+ continue;
+
+ /* If parity or any bios pending -> allow io. */
+ if (chunk_ref(chunk) || p == stripe->idx.parity)
+ SetChunkIo(chunk);
+ else {
+ ClearChunkIo(chunk);
+ /* REMOVEME: statistics. */
+ atomic_inc(RS(stripe->sc)->stats + S_PROHIBITCHUNKIO);
+ }
+ }
+}
+
+/*
+ * Read/write a stripe.
+ *
+ * All stripe read/write activity goes through this function
+ * unless recovery, which has to call stripe_chunk_rw() directly.
+ *
+ * Make sure we don't try already merged stripes in order
+ * to avoid data corruption.
+ *
+ * Check the state of the RAID set and if degraded (or
+ * resynchronizing for reads), read in all other chunks but
+ * the one on the dead/resynchronizing device in order to be
+ * able to reconstruct the missing one in _do_endios().
+ *
+ * Can be called on active stripes in order
+ * to dispatch new io on inactive chunks.
+ *
+ * States to cover:
+ * o stripe to read and/or write
+ * o stripe with error to reconstruct
+ */
+static int stripe_rw(struct stripe *stripe)
+{
+ int nosync, r;
+ struct raid_set *rs = RS(stripe->sc);
+
+ /*
+ * Check, if a chunk needs to be reconstructed
+ * because of a degraded set or a region out of sync.
+ */
+ nosync = stripe_check_reconstruct(stripe);
+ switch (nosync) {
+ case -EBUSY:
+ return 0; /* Wait for stripe reconstruction to finish. */
+ case -EPERM:
+ goto io;
+ }
+
+ /*
+ * If we don't have merged writes pending, we can schedule
+ * queued writes to be merged next without corrupting data.
+ */
+ if (!StripeMerged(stripe)) {
+ r = stripe_queue_writes(stripe);
+ if (r)
+ /* Writes got queued -> flag RBW. */
+ SetStripeRBW(stripe);
+ }
+
+ /*
+ * Merge all writes hanging off uptodate/overwritten
+ * chunks of the stripe.
+ */
+ if (StripeRBW(stripe)) {
+ r = stripe_merge_possible(stripe, nosync);
+ if (!r) { /* Merge possible. */
+ struct stripe_chunk *chunk;
+
+ /*
+ * I rely on valid parity in order
+ * to xor a fraction of chunks out
+ * of parity and back in.
+ */
+ stripe_merge_writes(stripe); /* Merge writes in. */
+ parity_xor(stripe); /* Update parity. */
+ ClearStripeReconstruct(stripe); /* Reset xor enforce. */
+ SetStripeMerged(stripe); /* Writes merged. */
+ ClearStripeRBW(stripe); /* Disable RBW. */
+
+ /*
+ * REMOVEME: sanity check on parity chunk
+ * states after writes got merged.
+ */
+ chunk = CHUNK(stripe, stripe->idx.parity);
+ BUG_ON(ChunkLocked(chunk));
+ BUG_ON(!ChunkUptodate(chunk));
+ BUG_ON(!ChunkDirty(chunk));
+ BUG_ON(!ChunkIo(chunk));
+ }
+ } else if (!nosync && !StripeMerged(stripe))
+ /* Read avoidance if not degraded/resynchronizing/merged. */
+ stripe_avoid_reads(stripe);
+
+io:
+ /* Now submit any reads/writes for non-uptodate or dirty chunks. */
+ r = stripe_chunks_rw(stripe);
+ if (!r) {
+ /*
+ * No io submitted because of chunk io
+ * prohibited or locked chunks/failed devices
+ * -> push to end io list for processing.
+ */
+ stripe_endio_push(stripe);
+ atomic_inc(rs->stats + S_NO_RW); /* REMOVEME: statistics. */
+ }
+
+ return r;
+}
+
+/*
+ * Recovery functions
+ */
+/* Read a stripe off a raid set for recovery. */
+static int stripe_recover_read(struct stripe *stripe, int pi)
+{
+ BUG_ON(stripe_io_ref(stripe));
+
+ /* Invalidate all chunks so that they get read in. */
+ stripe_chunks_invalidate(stripe);
+ stripe_allow_io(stripe); /* Allow io on all recovery chunks. */
+
+ /*
+ * If we are reconstructing a perticular device, we can avoid
+ * reading the respective chunk in, because we're going to
+ * reconstruct it anyway.
+ *
+ * We can't do that for resynchronization of rotating parity,
+ * because the recovery stripe chunk size is typically larger
+ * than the sets chunk size.
+ */
+ if (pi > -1)
+ ClearChunkIo(CHUNK(stripe, pi));
+
+ return stripe_chunks_rw(stripe);
+}
+
+/* Write a stripe to a raid set for recovery. */
+static int stripe_recover_write(struct stripe *stripe, int pi)
+{
+ BUG_ON(stripe_io_ref(stripe));
+
+ /*
+ * If this is a reconstruct of a particular device, then
+ * reconstruct the respective chunk, else create parity chunk.
+ */
+ if (pi > -1) {
+ stripe_zero_chunk(stripe, pi);
+ common_xor(stripe, stripe->io.size, 0, pi);
+ chunk_set(CHUNK(stripe, pi), DIRTY);
+ } else
+ parity_xor(stripe);
+
+ return stripe_chunks_rw(stripe);
+}
+
+/* Read/write a recovery stripe. */
+static int stripe_recover_rw(struct stripe *stripe)
+{
+ int r = 0, sync = 0;
+
+ /* Read/write flip-flop. */
+ if (TestClearStripeRBW(stripe)) {
+ SetStripeMerged(stripe);
+ stripe->key = stripe->recover->pos;
+ r = stripe_recover_read(stripe, dev_for_parity(stripe, &sync));
+ BUG_ON(!r);
+ } else if (TestClearStripeMerged(stripe)) {
+ r = stripe_recover_write(stripe, dev_for_parity(stripe, &sync));
+ BUG_ON(!r);
+ }
+
+ BUG_ON(sync);
+ return r;
+}
+
+/* Recover bandwidth available ?. */
+static int recover_bandwidth(struct raid_set *rs)
+{
+ int r, work;
+
+ /* On reset or when bios delayed -> allow recovery. */
+ r = recover_io_reset(rs);
+ if (r || RSBandwidth(rs))
+ goto out;
+
+ work = atomic_read(rs->recover.io_count + IO_WORK);
+ if (work) {
+ /* Pay attention to larger recover stripe size. */
+ int recover = atomic_read(rs->recover.io_count + IO_RECOVER) *
+ rs->recover.io_size / rs->set.io_size;
+
+ /*
+ * Don't use more than given bandwidth
+ * of the work io for recovery.
+ */
+ if (recover > work / rs->recover.bandwidth_work) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_NO_BANDWIDTH);
+ return 0;
+ }
+ }
+
+out:
+ atomic_inc(rs->stats + S_BANDWIDTH); /* REMOVEME: statistics. */
+ return 1;
+}
+
+/* Try to get a region to recover. */
+static int stripe_recover_get_region(struct stripe *stripe)
+{
+ struct raid_set *rs = RS(stripe->sc);
+ struct recover *rec = &rs->recover;
+ struct recover_addr *addr = stripe->recover;
+ struct dm_dirty_log *dl = rec->dl;
+ struct dm_rh_client *rh = rec->rh;
+
+ BUG_ON(!dl);
+ BUG_ON(!rh);
+
+ /* Return, that we have region first to finish it during suspension. */
+ if (addr->reg)
+ return 1;
+
+ if (RSSuspend(rs))
+ return -EPERM;
+
+ if (dl->type->get_sync_count(dl) >= rec->nr_regions)
+ return -ENOENT;
+
+ /* If we don't have enough bandwidth, we don't proceed recovering. */
+ if (!recover_bandwidth(rs))
+ return -EAGAIN;
+
+ /* Start quiescing a region. */
+ dm_rh_recovery_prepare(rh);
+ addr->reg = dm_rh_recovery_start(rh);
+ if (!addr->reg)
+ return -EAGAIN;
+
+ addr->pos = dm_rh_region_to_sector(rh, dm_rh_get_region_key(addr->reg));
+ addr->end = addr->pos + dm_rh_get_region_size(rh);
+
+ /*
+ * Take one global io reference out for the
+ * whole region, which is going to be released
+ * when the region is completely done with.
+ */
+ io_get(rs);
+ return 0;
+}
+
+/* Update region hash state. */
+enum recover_type { REC_FAILURE = 0, REC_SUCCESS = 1 };
+static void recover_rh_update(struct stripe *stripe, enum recover_type success)
+{
+ struct recover_addr *addr = stripe->recover;
+ struct raid_set *rs = RS(stripe->sc);
+ struct recover *rec = &rs->recover;
+
+ if (!addr->reg) {
+ DMERR("%s- Called w/o region", __func__);
+ return;
+ }
+
+ dm_rh_recovery_end(addr->reg, success);
+ if (success)
+ rec->nr_regions_recovered++;
+
+ addr->reg = NULL;
+
+ /*
+ * Completely done with this region ->
+ * release the 1st io reference.
+ */
+ io_put(rs);
+}
+
+/* Set start of recovery state. */
+static void set_start_recovery(struct raid_set *rs)
+{
+ /* Initialize recovery. */
+ rs->recover.start_jiffies = jiffies;
+ rs->recover.end_jiffies = 0;
+}
+
+/* Set end of recovery state. */
+static void set_end_recovery(struct raid_set *rs)
+{
+ ClearRSRecover(rs);
+/* Achtung: nicht mehr zurück setzten -> 'i' belibt in status output und userpace könnte sich darauf verlassen, das es verschiwndet!!!! */
+ rs->set.dev_to_init = -1;
+
+ /* Check for jiffies overrun. */
+ rs->recover.end_jiffies = jiffies;
+ if (rs->recover.end_jiffies < rs->recover.start_jiffies)
+ rs->recover.end_jiffies = ~0;
+}
+
+/* Handle recovery on one recovery stripe. */
+static int _do_recovery(struct stripe *stripe)
+{
+ int r;
+ struct raid_set *rs = RS(stripe->sc);
+ struct recover_addr *addr = stripe->recover;
+
+ /* If recovery is active -> return. */
+ if (stripe_io_ref(stripe))
+ return 1;
+
+ /* IO error is fatal for recovery -> stop it. */
+ if (unlikely(StripeError(stripe)))
+ goto err;
+
+ /* Recovery end required. */
+ if (unlikely(RSDegraded(rs)))
+ goto err;
+
+ /* Get a region to recover. */
+ r = stripe_recover_get_region(stripe);
+ switch (r) {
+ case 0: /* Got a new region: flag initial read before write. */
+ SetStripeRBW(stripe);
+ case 1: /* Have a region in the works. */
+ break;
+ case -EAGAIN:
+ /* No bandwidth/quiesced region yet, try later. */
+ if (!io_ref(rs))
+ wake_do_raid_delayed(rs, HZ / 4);
+ case -EPERM:
+ /* Suspend. */
+ return 1;
+ case -ENOENT: /* No more regions to recover. */
+ schedule_work(&rs->io.ws_do_table_event);
+ return 0;
+ default:
+ BUG();
+ }
+
+ /* Read/write a recover stripe. */
+ r = stripe_recover_rw(stripe);
+ if (r)
+ /* IO initiated. */
+ return 1;
+
+ /* Read and write finished-> update recovery position within region. */
+ addr->pos += stripe->io.size;
+
+ /* If we're at end of region, update region hash. */
+ if (addr->pos >= addr->end ||
+ addr->pos >= rs->set.sectors_per_dev)
+ recover_rh_update(stripe, REC_SUCCESS);
+ else
+ /* Prepare to read next region segment. */
+ SetStripeRBW(stripe);
+
+ /* Schedule myself for another round... */
+ wake_do_raid(rs);
+ return 1;
+
+err:
+ /* FIXME: rather try recovering other regions on error? */
+ rs_check_degrade(stripe);
+ recover_rh_update(stripe, REC_FAILURE);
+
+ /* Check state of partially recovered array. */
+ if (RSDegraded(rs) && !RSDead(rs) &&
+ rs->set.dev_to_init != -1 &&
+ rs->set.ei != rs->set.dev_to_init) {
+ /* Broken drive != drive to recover -> FATAL. */
+ SetRSDead(rs);
+ DMERR("FATAL: failed device != device to initialize -> "
+ "RAID set broken");
+ }
+
+ if (StripeError(stripe) || RSDegraded(rs)) {
+ char buf[BDEVNAME_SIZE];
+
+ DMERR("stopping recovery due to "
+ "ERROR on /dev/%s, stripe at offset %llu",
+ bdevname(rs->dev[rs->set.ei].dev->bdev, buf),
+ (unsigned long long) stripe->key);
+
+ }
+
+ /* Make sure, that all quiesced regions get released. */
+ while (addr->reg) {
+ dm_rh_recovery_end(addr->reg, -EIO);
+ addr->reg = dm_rh_recovery_start(rs->recover.rh);
+ }
+
+ return 0;
+}
+
+/* Called by main io daemon to recover regions. */
+static int do_recovery(struct raid_set *rs)
+{
+ if (RSRecover(rs)) {
+ int r = 0;
+ struct stripe *stripe;
+
+ list_for_each_entry(stripe, &rs->recover.stripes,
+ lists[LIST_RECOVER])
+ r += _do_recovery(stripe);
+
+ if (r)
+ return r;
+
+ set_end_recovery(rs);
+ stripe_recover_free(rs);
+ }
+
+ return 0;
+}
+
+/*
+ * END recovery functions
+ */
+
+/* End io process all stripes handed in by endio() callback. */
+static void _do_endios(struct raid_set *rs, struct stripe *stripe,
+ struct list_head *flush_list)
+{
+ /* First unlock all required chunks. */
+ stripe_chunks_unlock(stripe);
+
+ /*
+ * If an io error on a stripe occured, degrade the RAID set
+ * and try to endio as many bios as possible. If any bios can't
+ * be endio processed, requeue the stripe (stripe_ref() != 0).
+ */
+ if (TestClearStripeError(stripe)) {
+ /*
+ * FIXME: if read, rewrite the failed chunk after reconstruction
+ * in order to trigger disk bad sector relocation.
+ */
+ rs_check_degrade(stripe); /* Resets ChunkError(). */
+ ClearStripeReconstruct(stripe);
+ ClearStripeReconstructed(stripe);
+
+ /*
+ * FIXME: if write, don't endio writes in flight and don't
+ * allow for new writes until userspace has updated
+ * its metadata.
+ */
+ }
+
+ /* Got to reconstruct a missing chunk. */
+ if (StripeReconstruct(stripe)) {
+ /*
+ * (*2*) We use StripeReconstruct() to allow for
+ * all chunks to be xored into the reconstructed
+ * one (see chunk_must_xor()).
+ */
+ stripe_reconstruct(stripe);
+
+ /*
+ * (*3*) Now we reset StripeReconstruct() and flag
+ * StripeReconstructed() to show to stripe_rw(),
+ * that we have reconstructed a missing chunk.
+ */
+ ClearStripeReconstruct(stripe);
+ SetStripeReconstructed(stripe);
+
+ /* FIXME: reschedule to be written in case of read. */
+ /* if (!RSDead && RSDegraded(rs) !StripeRBW(stripe)) {
+ chunk_set(CHUNK(stripe, stripe->idx.recover), DIRTY);
+ stripe_chunks_rw(stripe);
+ } */
+
+ stripe->idx.recover = -1;
+ }
+
+ /*
+ * Now that we eventually got a complete stripe, we
+ * can process the rest of the end ios on reads.
+ */
+ stripe_endio(READ, stripe);
+
+ /* End io all merged writes if not prohibited. */
+ if (!RSProhibitWrites(rs) && StripeMerged(stripe)) {
+ ClearStripeMerged(stripe);
+ stripe_endio(WRITE_MERGED, stripe);
+ }
+
+ /* If RAID set is dead -> fail any ios to dead drives. */
+ if (RSDead(rs)) {
+ if (!TestSetRSDeadEndioMessage(rs))
+ DMERR("RAID set dead: failing ios to dead devices");
+
+ stripe_fail_io(stripe);
+ }
+
+ /*
+ * We have stripe references still,
+ * beacuse of read before writes or IO errors ->
+ * got to put on flush list for processing.
+ */
+ if (stripe_ref(stripe)) {
+ BUG_ON(!list_empty(stripe->lists + LIST_LRU));
+ list_add_tail(stripe->lists + LIST_FLUSH, flush_list);
+ atomic_inc(rs->stats + S_REQUEUE); /* REMOVEME: statistics. */
+ } else
+ stripe_lru_add(stripe);
+}
+
+/* Pop any endio stripes off of the endio list and belabour them. */
+static void do_endios(struct raid_set *rs)
+{
+ struct stripe_cache *sc = &rs->sc;
+ struct stripe *stripe;
+ /* IO flush list for sorted requeued stripes. */
+ struct list_head flush_list;
+
+ INIT_LIST_HEAD(&flush_list);
+
+ while ((stripe = stripe_endio_pop(sc))) {
+ /* Avoid endio on stripes with newly io'ed chunks. */
+ if (!stripe_io_ref(stripe))
+ _do_endios(rs, stripe, &flush_list);
+ }
+
+ /*
+ * Insert any requeued stripes in the proper
+ * order at the beginning of the io (flush) list.
+ */
+ list_splice(&flush_list, sc->lists + LIST_FLUSH);
+}
+
+/* Flush any stripes on the io list. */
+static int do_flush(struct raid_set *rs)
+{
+ int r = 0;
+ struct stripe *stripe;
+
+ while ((stripe = stripe_io_pop(&rs->sc)))
+ r += stripe_rw(stripe); /* Read/write stripe. */
+
+ return r;
+}
+
+/* Stripe cache resizing. */
+static void do_sc_resize(struct raid_set *rs)
+{
+ unsigned set = atomic_read(&rs->sc.stripes_to_set);
+
+ if (set) {
+ unsigned cur = atomic_read(&rs->sc.stripes);
+ int r = (set > cur) ? sc_grow(&rs->sc, set - cur, SC_GROW) :
+ sc_shrink(&rs->sc, cur - set);
+
+ /* Flag end of resizeing if ok. */
+ if (!r)
+ atomic_set(&rs->sc.stripes_to_set, 0);
+ }
+}
+
+/*
+ * Process all ios
+ *
+ * We do different things with the io depending
+ * on the state of the region that it is in:
+ *
+ * o reads: hang off stripe cache or postpone if full
+ *
+ * o writes:
+ *
+ * CLEAN/DIRTY/NOSYNC: increment pending and hang io off stripe's stripe set.
+ * In case stripe cache is full or busy, postpone the io.
+ *
+ * RECOVERING: delay the io until recovery of the region completes.
+ *
+ */
+static void do_ios(struct raid_set *rs, struct bio_list *ios)
+{
+ int r;
+ unsigned flush = 0, delay = 0;
+ sector_t sector;
+ struct dm_rh_client *rh = rs->recover.rh;
+ struct bio *bio;
+ struct bio_list reject;
+
+ bio_list_init(&reject);
+
+ /*
+ * Classify each io:
+ * o delay writes to recovering regions (let reads go through)
+ * o queue io to all other regions
+ */
+ while ((bio = bio_list_pop(ios))) {
+ /*
+ * In case we get a barrier bio, push it back onto
+ * the input queue unless all work queues are empty
+ * and the stripe cache is inactive.
+ */
- if (unlikely(bio_empty_barrier(bio))) {
++ if (bio->bi_rw & REQ_FLUSH) {
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + S_BARRIER);
+ if (delay ||
+ !list_empty(rs->sc.lists + LIST_FLUSH) ||
+ !bio_list_empty(&reject) ||
+ sc_active(&rs->sc)) {
+ bio_list_push(ios, bio);
+ break;
+ }
+ }
+
+ /* If writes prohibited because of failures -> postpone. */
+ if (RSProhibitWrites(rs) && bio_data_dir(bio) == WRITE) {
+ bio_list_add(&reject, bio);
+ continue;
+ }
+
+ /* Check for recovering regions. */
+ sector = _sector(rs, bio);
+ r = region_state(rs, sector, DM_RH_RECOVERING);
+ if (unlikely(r)) {
+ delay++;
+ /* Wait writing to recovering regions. */
+ dm_rh_delay_by_region(rh, bio,
+ dm_rh_sector_to_region(rh,
+ sector));
+ /* REMOVEME: statistics.*/
+ atomic_inc(rs->stats + S_DELAYED_BIOS);
+ atomic_inc(rs->stats + S_SUM_DELAYED_BIOS);
+
+ /* Force bandwidth tests in recovery. */
+ SetRSBandwidth(rs);
+ } else {
+ /*
+ * Process ios to non-recovering regions by queueing
+ * them to stripes (does dm_rh_inc()) for writes).
+ */
+ flush += stripe_queue_bio(rs, bio, &reject);
+ }
+ }
+
+ if (flush) {
+ /* FIXME: better error handling. */
+ r = dm_rh_flush(rh); /* Writes got queued -> flush dirty log. */
+ if (r)
+ DMERR_LIMIT("dirty log flush");
+ }
+
+ /* Merge any rejected bios back to the head of the input list. */
+ bio_list_merge_head(ios, &reject);
+}
+
+/* Unplug: let any queued io role on the sets devices. */
+static void do_unplug(struct raid_set *rs)
+{
+ struct raid_dev *dev = rs->dev + rs->set.raid_devs;
+
+ while (dev-- > rs->dev) {
+ /* Only call any device unplug function, if io got queued. */
+ if (TestClearDevIoQueued(dev))
+ blk_unplug(bdev_get_queue(dev->dev->bdev));
+ }
+}
+
+/* Send an event in case we're getting too busy. */
+static void do_busy_event(struct raid_set *rs)
+{
+ if (sc_busy(rs)) {
+ if (!TestSetRSScBusy(rs))
+ schedule_work(&rs->io.ws_do_table_event);
+ } else
+ ClearRSScBusy(rs);
+}
+
+/* Throw an event. */
+static void do_table_event(struct work_struct *ws)
+{
+ struct raid_set *rs = container_of(ws, struct raid_set,
+ io.ws_do_table_event);
+ dm_table_event(rs->ti->table);
+}
+
+
+/*-----------------------------------------------------------------
+ * RAID daemon
+ *---------------------------------------------------------------*/
+/*
+ * o belabour all end ios
+ * o update the region hash states
+ * o optionally shrink the stripe cache
+ * o optionally do recovery
+ * o unplug any component raid devices with queued bios
+ * o grab the input queue
+ * o work an all requeued or new ios and perform stripe cache flushs
+ * o unplug any component raid devices with queued bios
+ * o check, if the stripe cache gets too busy and throw an event if so
+ */
+static void do_raid(struct work_struct *ws)
+{
+ int r;
+ struct raid_set *rs = container_of(ws, struct raid_set,
+ io.dws_do_raid.work);
+ struct bio_list *ios = &rs->io.work, *ios_in = &rs->io.in;
+
+ /*
+ * We always need to end io, so that ios can get errored in
+ * case the set failed and the region counters get decremented
+ * before we update region hash states and go any further.
+ */
+ do_endios(rs);
+ dm_rh_update_states(rs->recover.rh, 1);
+
+ /*
+ * Now that we've end io'd, which may have put stripes on the LRU list
+ * to allow for shrinking, we resize the stripe cache if requested.
+ */
+ do_sc_resize(rs);
+
+ /* Try to recover regions. */
+ r = do_recovery(rs);
+ if (r)
+ do_unplug(rs); /* Unplug the sets device queues. */
+
+ /* Quickly grab all new ios queued and add them to the work list. */
+ mutex_lock(&rs->io.in_lock);
+ bio_list_merge(ios, ios_in);
+ bio_list_init(ios_in);
+ mutex_unlock(&rs->io.in_lock);
+
+ if (!bio_list_empty(ios))
+ do_ios(rs, ios); /* Got ios to work into the cache. */
+
+ r = do_flush(rs); /* Flush any stripes on io list. */
+ if (r)
+ do_unplug(rs); /* Unplug the sets device queues. */
+
+ do_busy_event(rs); /* Check if we got too busy. */
+}
+
+/*
+ * Callback for region hash to dispatch
+ * delayed bios queued to recovered regions
+ * (gets called via dm_rh_update_states()).
+ */
+static void dispatch_delayed_bios(void *context, struct bio_list *bl)
+{
+ struct raid_set *rs = context;
+ struct bio *bio;
+
+ /* REMOVEME: statistics; decrement pending delayed bios counter. */
+ bio_list_for_each(bio, bl)
+ atomic_dec(rs->stats + S_DELAYED_BIOS);
+
+ /* Merge region hash private list to work list. */
+ bio_list_merge_head(&rs->io.work, bl);
+ bio_list_init(bl);
+ ClearRSBandwidth(rs);
+}
+
+/*************************************************************
+ * Constructor helpers
+ *************************************************************/
+/* Calculate MB/sec. */
+static unsigned mbpers(struct raid_set *rs, unsigned io_size)
+{
+ return to_bytes((rs->xor.speed * rs->set.data_devs *
+ io_size * HZ / XOR_SPEED_TICKS) >> 10) >> 10;
+}
+
+/*
+ * Discover fastest xor algorithm and # of chunks combination.
+ */
+/* Calculate speed of particular algorithm and # of chunks. */
+static unsigned xor_speed(struct stripe *stripe)
+{
+ int ticks = XOR_SPEED_TICKS;
+ unsigned p = RS(stripe->sc)->set.raid_devs, r = 0;
+ unsigned long j;
+
+ /* Set uptodate so that common_xor()->xor() will belabour chunks. */
+ while (p--)
+ SetChunkUptodate(CHUNK(stripe, p));
+
+ /* Wait for next tick. */
+ for (j = jiffies; j == jiffies; );
+
+ /* Do xors for a few ticks. */
+ while (ticks--) {
+ unsigned xors = 0;
+
+ for (j = jiffies; j == jiffies; ) {
+ mb();
+ common_xor(stripe, stripe->io.size, 0, 0);
+ mb();
+ xors++;
+ mb();
+ }
+
+ if (xors > r)
+ r = xors;
+ }
+
+ return r;
+}
+
+/* Define for xor multi recovery stripe optimization runs. */
+#define DMRAID45_XOR_TEST
+
+/* Optimize xor algorithm for this RAID set. */
+static unsigned xor_optimize(struct raid_set *rs)
+{
+ unsigned chunks_max = 2, speed_max = 0;
+ struct xor_func *f = ARRAY_END(xor_funcs), *f_max = NULL;
+ struct stripe *stripe;
+ unsigned io_size = 0, speed_hm = 0, speed_min = ~0, speed_xor_blocks = 0;
+
+ BUG_ON(list_empty(&rs->recover.stripes));
+#ifndef DMRAID45_XOR_TEST
+ stripe = list_first_entry(&rs->recover.stripes, struct stripe,
+ lists[LIST_RECOVER]);
+#endif
+
+ /* Try all xor functions. */
+ while (f-- > xor_funcs) {
+ unsigned speed;
+
+#ifdef DMRAID45_XOR_TEST
+ list_for_each_entry(stripe, &rs->recover.stripes,
+ lists[LIST_RECOVER]) {
+ io_size = stripe->io.size;
+#endif
+
+ /* Set actual xor function for common_xor(). */
+ rs->xor.f = f;
+ rs->xor.chunks = (f->f == xor_blocks_wrapper ?
+ (MAX_XOR_BLOCKS + 1) :
+ XOR_CHUNKS_MAX);
+ if (rs->xor.chunks > rs->set.raid_devs)
+ rs->xor.chunks = rs->set.raid_devs;
+
+ for ( ; rs->xor.chunks > 1; rs->xor.chunks--) {
+ speed = xor_speed(stripe);
+
+#ifdef DMRAID45_XOR_TEST
+ if (f->f == xor_blocks_wrapper) {
+ if (speed > speed_xor_blocks)
+ speed_xor_blocks = speed;
+ } else if (speed > speed_hm)
+ speed_hm = speed;
+
+ if (speed < speed_min)
+ speed_min = speed;
+#endif
+
+ if (speed > speed_max) {
+ speed_max = speed;
+ chunks_max = rs->xor.chunks;
+ f_max = f;
+ }
+ }
+#ifdef DMRAID45_XOR_TEST
+ }
+#endif
+ }
+
+ /* Memorize optimal parameters. */
+ rs->xor.f = f_max;
+ rs->xor.chunks = chunks_max;
+#ifdef DMRAID45_XOR_TEST
+ DMINFO("%s stripes=%u/size=%u min=%u xor_blocks=%u hm=%u max=%u",
+ speed_max == speed_hm ? "HM" : "NB",
+ rs->recover.recovery_stripes, io_size, speed_min,
+ speed_xor_blocks, speed_hm, speed_max);
+#endif
+ return speed_max;
+}
+
+/*
+ * Allocate a RAID context (a RAID set)
+ */
+/* Structure for variable RAID parameters. */
+struct variable_parms {
+ int bandwidth;
+ int bandwidth_parm;
+ int chunk_size;
+ int chunk_size_parm;
+ int io_size;
+ int io_size_parm;
+ int stripes;
+ int stripes_parm;
+ int recover_io_size;
+ int recover_io_size_parm;
+ int raid_parms;
+ int recovery;
+ int recovery_stripes;
+ int recovery_stripes_parm;
+};
+
+static struct raid_set *
+context_alloc(struct raid_type *raid_type, struct variable_parms *p,
+ unsigned raid_devs, sector_t sectors_per_dev,
+ struct dm_target *ti, unsigned dl_parms, char **argv)
+{
+ int r;
+ size_t len;
+ sector_t region_size, ti_len;
+ struct raid_set *rs = NULL;
+ struct dm_dirty_log *dl;
+ struct recover *rec;
+
+ /*
+ * Create the dirty log
+ *
+ * We need to change length for the dirty log constructor,
+ * because we want an amount of regions for all stripes derived
+ * from the single device size, so that we can keep region
+ * size = 2^^n independant of the number of devices
+ */
+ ti_len = ti->len;
+ ti->len = sectors_per_dev;
+ dl = dm_dirty_log_create(argv[0], ti, NULL, dl_parms, argv + 2);
+ ti->len = ti_len;
+ if (!dl)
+ goto bad_dirty_log;
+
+ /* Chunk size *must* be smaller than region size. */
+ region_size = dl->type->get_region_size(dl);
+ if (p->chunk_size > region_size)
+ goto bad_chunk_size;
+
+ /* Recover io size *must* be smaller than region size as well. */
+ if (p->recover_io_size > region_size)
+ goto bad_recover_io_size;
+
+ /* Size and allocate the RAID set structure. */
+ len = sizeof(*rs->data) + sizeof(*rs->dev);
+ if (dm_array_too_big(sizeof(*rs), len, raid_devs))
+ goto bad_array;
+
+ len = sizeof(*rs) + raid_devs * len;
+ rs = kzalloc(len, GFP_KERNEL);
+ if (!rs)
+ goto bad_alloc;
+
+ rec = &rs->recover;
+ atomic_set(&rs->io.in_process, 0);
+ atomic_set(&rs->io.in_process_max, 0);
+ rec->io_size = p->recover_io_size;
+
+ /* Pointer to data array. */
+ rs->data = (unsigned long **)
+ ((void *) rs->dev + raid_devs * sizeof(*rs->dev));
+ rec->dl = dl;
+ rs->set.raid_devs = raid_devs;
+ rs->set.data_devs = raid_devs - raid_type->parity_devs;
+ rs->set.raid_type = raid_type;
+
+ rs->set.raid_parms = p->raid_parms;
+ rs->set.chunk_size_parm = p->chunk_size_parm;
+ rs->set.io_size_parm = p->io_size_parm;
+ rs->sc.stripes_parm = p->stripes_parm;
+ rec->io_size_parm = p->recover_io_size_parm;
+ rec->bandwidth_parm = p->bandwidth_parm;
+ rec->recovery = p->recovery;
+ rec->recovery_stripes = p->recovery_stripes;
+
+ /*
+ * Set chunk and io size and respective shifts
+ * (used to avoid divisions)
+ */
+ rs->set.chunk_size = p->chunk_size;
+ rs->set.chunk_shift = ffs(p->chunk_size) - 1;
+
+ rs->set.io_size = p->io_size;
+ rs->set.io_mask = p->io_size - 1;
+ /* Mask to adjust address key in case io_size != chunk_size. */
+ rs->set.io_inv_mask = (p->chunk_size - 1) & ~rs->set.io_mask;
+
+ rs->set.sectors_per_dev = sectors_per_dev;
+
+ rs->set.ei = -1; /* Indicate no failed device. */
+ atomic_set(&rs->set.failed_devs, 0);
+
+ rs->ti = ti;
+
+ atomic_set(rec->io_count + IO_WORK, 0);
+ atomic_set(rec->io_count + IO_RECOVER, 0);
+
+ /* Initialize io lock and queues. */
+ mutex_init(&rs->io.in_lock);
+ mutex_init(&rs->io.xor_lock);
+ bio_list_init(&rs->io.in);
+ bio_list_init(&rs->io.work);
+
+ init_waitqueue_head(&rs->io.suspendq); /* Suspend waiters (dm-io). */
+
+ rec->nr_regions = dm_sector_div_up(sectors_per_dev, region_size);
+ rec->rh = dm_region_hash_create(rs, dispatch_delayed_bios,
+ wake_dummy, wake_do_raid, 0, p->recovery_stripes,
+ dl, region_size, rec->nr_regions);
+ if (IS_ERR(rec->rh))
+ goto bad_rh;
+
+ /* Initialize stripe cache. */
+ r = sc_init(rs, p->stripes);
+ if (r)
+ goto bad_sc;
+
+ /* REMOVEME: statistics. */
+ stats_reset(rs);
+ ClearRSDevelStats(rs); /* Disnable development status. */
+ return rs;
+
+bad_dirty_log:
+ TI_ERR_RET("Error creating dirty log", ERR_PTR(-ENOMEM));
+
+bad_chunk_size:
+ dm_dirty_log_destroy(dl);
+ TI_ERR_RET("Chunk size larger than region size", ERR_PTR(-EINVAL));
+
+bad_recover_io_size:
+ dm_dirty_log_destroy(dl);
+ TI_ERR_RET("Recover stripe io size larger than region size",
+ ERR_PTR(-EINVAL));
+
+bad_array:
+ dm_dirty_log_destroy(dl);
+ TI_ERR_RET("Arry too big", ERR_PTR(-EINVAL));
+
+bad_alloc:
+ dm_dirty_log_destroy(dl);
+ TI_ERR_RET("Cannot allocate raid context", ERR_PTR(-ENOMEM));
+
+bad_rh:
+ dm_dirty_log_destroy(dl);
+ ti->error = DM_MSG_PREFIX "Error creating dirty region hash";
+ goto free_rs;
+
+bad_sc:
+ dm_region_hash_destroy(rec->rh); /* Destroys dirty log too. */
+ sc_exit(&rs->sc);
+ ti->error = DM_MSG_PREFIX "Error creating stripe cache";
+free_rs:
+ kfree(rs);
+ return ERR_PTR(-ENOMEM);
+}
+
+/* Free a RAID context (a RAID set). */
+static void context_free(struct raid_set *rs, unsigned p)
+{
+ while (p--)
+ dm_put_device(rs->ti, rs->dev[p].dev);
+
+ sc_exit(&rs->sc);
+ dm_region_hash_destroy(rs->recover.rh); /* Destroys dirty log too. */
+ kfree(rs);
+}
+
+/* Create work queue and initialize delayed work. */
+static int rs_workqueue_init(struct raid_set *rs)
+{
+ struct dm_target *ti = rs->ti;
+
+ rs->io.wq = create_singlethread_workqueue(DAEMON);
+ if (!rs->io.wq)
+ TI_ERR_RET("failed to create " DAEMON, -ENOMEM);
+
+ INIT_DELAYED_WORK(&rs->io.dws_do_raid, do_raid);
+ INIT_WORK(&rs->io.ws_do_table_event, do_table_event);
+ return 0;
+}
+
+/* Return pointer to raid_type structure for raid name. */
+static struct raid_type *get_raid_type(char *name)
+{
+ struct raid_type *r = ARRAY_END(raid_types);
+
+ while (r-- > raid_types) {
+ if (!strcmp(r->name, name))
+ return r;
+ }
+
+ return NULL;
+}
+
+/* FIXME: factor out to dm core. */
+static int multiple(sector_t a, sector_t b, sector_t *n)
+{
+ sector_t r = a;
+
+ sector_div(r, b);
+ *n = r;
+ return a == r * b;
+}
+
+/* Log RAID set information to kernel log. */
+static void rs_log(struct raid_set *rs, unsigned io_size)
+{
+ unsigned p;
+ char buf[BDEVNAME_SIZE];
+
+ for (p = 0; p < rs->set.raid_devs; p++)
+ DMINFO("/dev/%s is raid disk %u%s",
+ bdevname(rs->dev[p].dev->bdev, buf), p,
+ (p == rs->set.pi) ? " (parity)" : "");
+
+ DMINFO("%d/%d/%d sectors chunk/io/recovery size, %u stripes\n"
+ "algorithm \"%s\", %u chunks with %uMB/s\n"
+ "%s set with net %u/%u devices",
+ rs->set.chunk_size, rs->set.io_size, rs->recover.io_size,
+ atomic_read(&rs->sc.stripes),
+ rs->xor.f->name, rs->xor.chunks, mbpers(rs, io_size),
+ rs->set.raid_type->descr, rs->set.data_devs, rs->set.raid_devs);
+}
+
+/* Get all devices and offsets. */
+static int dev_parms(struct raid_set *rs, char **argv, int *p)
+{
+ struct dm_target *ti = rs->ti;
+
+DMINFO("rs->set.sectors_per_dev=%llu", (unsigned long long) rs->set.sectors_per_dev);
+ for (*p = 0; *p < rs->set.raid_devs; (*p)++, argv += 2) {
+ int r;
+ unsigned long long tmp;
+ struct raid_dev *dev = rs->dev + *p;
+
+ /* Get offset and device. */
+ if (sscanf(argv[1], "%llu", &tmp) != 1 ||
+ tmp > rs->set.sectors_per_dev)
+ TI_ERR("Invalid RAID device offset parameter");
+
+ dev->start = tmp;
+ r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
+ &dev->dev);
+ if (r)
+ TI_ERR_RET("RAID device lookup failure", r);
+
+ r = raid_dev_lookup(rs, dev);
+ if (r != -ENODEV && r < *p) {
+ (*p)++; /* Ensure dm_put_device() on actual device. */
+ TI_ERR_RET("Duplicate RAID device", -ENXIO);
+ }
+ }
+
+ return 0;
+}
+
+/* Set recovery bandwidth. */
+static void
+recover_set_bandwidth(struct raid_set *rs, unsigned bandwidth)
+{
+ rs->recover.bandwidth = bandwidth;
+ rs->recover.bandwidth_work = 100 / bandwidth;
+}
+
+/* Handle variable number of RAID parameters. */
+static int get_raid_variable_parms(struct dm_target *ti, char **argv,
+ struct variable_parms *vp)
+{
+ int p, value;
+ struct {
+ int action; /* -1: skip, 0: no power2 check, 1: power2 check */
+ char *errmsg;
+ int min, max;
+ int *var, *var2, *var3;
+ } argctr[] = {
+ { 1,
+ "Invalid chunk size; must be -1 or 2^^n and <= 16384",
+ IO_SIZE_MIN, CHUNK_SIZE_MAX,
+ &vp->chunk_size_parm, &vp->chunk_size, &vp->io_size },
+ { 0,
+ "Invalid number of stripes: must be -1 or >= 8 and <= 16384",
+ STRIPES_MIN, STRIPES_MAX,
+ &vp->stripes_parm, &vp->stripes, NULL },
+ { 1,
+ "Invalid io size; must -1 or >= 8, 2^^n and less equal "
+ "min(BIO_MAX_SECTORS/2, chunk size)",
+ IO_SIZE_MIN, 0, /* Needs to be updated in loop below. */
+ &vp->io_size_parm, &vp->io_size, NULL },
+ { 1,
+ "Invalid recovery io size; must be -1 or "
+ "2^^n and less equal BIO_MAX_SECTORS/2",
+ RECOVER_IO_SIZE_MIN, BIO_MAX_SECTORS / 2,
+ &vp->recover_io_size_parm, &vp->recover_io_size, NULL },
+ { 0,
+ "Invalid recovery bandwidth percentage; "
+ "must be -1 or > 0 and <= 100",
+ BANDWIDTH_MIN, BANDWIDTH_MAX,
+ &vp->bandwidth_parm, &vp->bandwidth, NULL },
+ /* Handle sync argument seperately in loop. */
+ { -1,
+ "Invalid recovery switch; must be \"sync\" or \"nosync\"" },
+ { 0,
+ "Invalid number of recovery stripes;"
+ "must be -1, > 0 and <= 64",
+ RECOVERY_STRIPES_MIN, RECOVERY_STRIPES_MAX,
+ &vp->recovery_stripes_parm, &vp->recovery_stripes, NULL },
+ }, *varp;
+
+ /* Fetch # of variable raid parameters. */
+ if (sscanf(*(argv++), "%d", &vp->raid_parms) != 1 ||
+ !range_ok(vp->raid_parms, 0, 7))
+ TI_ERR("Bad variable raid parameters number");
+
+ /* Preset variable RAID parameters. */
+ vp->chunk_size = CHUNK_SIZE_DEFAULT;
+ vp->io_size = IO_SIZE_DEFAULT;
+ vp->stripes = STRIPES_DEFAULT;
+ vp->recover_io_size = RECOVER_IO_SIZE_DEFAULT;
+ vp->bandwidth = BANDWIDTH_DEFAULT;
+ vp->recovery = 1;
+ vp->recovery_stripes = RECOVERY_STRIPES_DEFAULT;
+
+ /* Walk the array of argument constraints for all given ones. */
+ for (p = 0, varp = argctr; p < vp->raid_parms; p++, varp++) {
+ BUG_ON(varp >= ARRAY_END(argctr));
+
+ /* Special case for "[no]sync" string argument. */
+ if (varp->action < 0) {
+ if (!strcmp(*argv, "sync"))
+ ;
+ else if (!strcmp(*argv, "nosync"))
+ vp->recovery = 0;
+ else
+ TI_ERR(varp->errmsg);
+
+ argv++;
+ continue;
+ }
+
+ /*
+ * Special case for io_size depending
+ * on previously set chunk size.
+ */
+ if (p == 2)
+ varp->max = min(BIO_MAX_SECTORS / 2, vp->chunk_size);
+
+ if (sscanf(*(argv++), "%d", &value) != 1 ||
+ (value != -1 &&
+ ((varp->action && !is_power_of_2(value)) ||
+ !range_ok(value, varp->min, varp->max))))
+ TI_ERR(varp->errmsg);
+
+ *varp->var = value;
+ if (value != -1) {
+ if (varp->var2)
+ *varp->var2 = value;
+ if (varp->var3)
+ *varp->var3 = value;
+ }
+ }
+
+ return 0;
+}
+
+/* Parse optional locking parameters. */
+static int get_raid_locking_parms(struct dm_target *ti, char **argv,
+ int *locking_parms,
+ struct dm_raid45_locking_type **locking_type)
+{
+ if (!strnicmp(argv[0], "locking", strlen(argv[0]))) {
+ char *lckstr = argv[1];
+ size_t lcksz = strlen(lckstr);
+
+ if (!strnicmp(lckstr, "none", lcksz)) {
+ *locking_type = &locking_none;
+ *locking_parms = 2;
+ } else if (!strnicmp(lckstr, "cluster", lcksz)) {
+ DMERR("locking type \"%s\" not yet implemented",
+ lckstr);
+ return -EINVAL;
+ } else {
+ DMERR("unknown locking type \"%s\"", lckstr);
+ return -EINVAL;
+ }
+ }
+
+ *locking_parms = 0;
+ *locking_type = &locking_none;
+ return 0;
+}
+
+/* Set backing device read ahead properties of RAID set. */
+static void rs_set_read_ahead(struct raid_set *rs,
+ unsigned sectors, unsigned stripes)
+{
+ unsigned ra_pages = dm_div_up(sectors, SECTORS_PER_PAGE);
+ struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
+
+ /* Set read-ahead for the RAID set and the component devices. */
+ if (ra_pages) {
+ unsigned p = rs->set.raid_devs;
+
+ bdi->ra_pages = stripes * ra_pages * rs->set.data_devs;
+
+ while (p--) {
+ struct request_queue *q =
+ bdev_get_queue(rs->dev[p].dev->bdev);
+
+ q->backing_dev_info.ra_pages = ra_pages;
+ }
+ }
+}
+
+/* Set congested function. */
+static void rs_set_congested_fn(struct raid_set *rs)
+{
+ struct mapped_device *md = dm_table_get_md(rs->ti->table);
+ struct backing_dev_info *bdi = &dm_disk(md)->queue->backing_dev_info;
+
+ /* Set congested function and data. */
+ bdi->congested_fn = rs_congested;
+ bdi->congested_data = rs;
+}
+
+/*
+ * Construct a RAID4/5 mapping:
+ *
+ * log_type #log_params <log_params> \
+ * raid_type [#parity_dev] #raid_variable_params <raid_params> \
+ * [locking "none"/"cluster"]
+ * #raid_devs #dev_to_initialize [<dev_path> <offset>]{3,}
+ *
+ * log_type = "core"/"disk",
+ * #log_params = 1-3 (1-2 for core dirty log type, 3 for disk dirty log only)
+ * log_params = [dirty_log_path] region_size [[no]sync])
+ *
+ * raid_type = "raid4", "raid5_la", "raid5_ra", "raid5_ls", "raid5_rs"
+ *
+ * #parity_dev = N if raid_type = "raid4"
+ * o N = -1: pick default = last device
+ * o N >= 0 and < #raid_devs: parity device index
+ *
+ * #raid_variable_params = 0-7; raid_params (-1 = default):
+ * [chunk_size [#stripes [io_size [recover_io_size \
+ * [%recovery_bandwidth [recovery_switch [#recovery_stripes]]]]]]]
+ * o chunk_size (unit to calculate drive addresses; must be 2^^n, > 8
+ * and <= CHUNK_SIZE_MAX)
+ * o #stripes is number of stripes allocated to stripe cache
+ * (must be > 1 and < STRIPES_MAX)
+ * o io_size (io unit size per device in sectors; must be 2^^n and > 8)
+ * o recover_io_size (io unit size per device for recovery in sectors;
+ must be 2^^n, > SECTORS_PER_PAGE and <= region_size)
+ * o %recovery_bandwith is the maximum amount spend for recovery during
+ * application io (1-100%)
+ * o recovery switch = [sync|nosync]
+ * o #recovery_stripes is the number of recovery stripes used for
+ * parallel recovery of the RAID set
+ * If raid_variable_params = 0, defaults will be used.
+ * Any raid_variable_param can be set to -1 to apply a default
+ *
+ * #raid_devs = N (N >= 3)
+ *
+ * #dev_to_initialize = N
+ * -1: initialize parity on all devices
+ * >= 0 and < #raid_devs: initialize raid_path; used to force reconstruction
+ * of a failed devices content after replacement
+ *
+ * <dev_path> = device_path (eg, /dev/sdd1)
+ * <offset> = begin at offset on <dev_path>
+ *
+ */
+#define MIN_PARMS 13
+static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+ int dev_to_init, dl_parms, i, locking_parms,
+ parity_parm, pi = -1, r, raid_devs;
+ sector_t tmp, sectors_per_dev;
+ struct dm_raid45_locking_type *locking;
+ struct raid_set *rs;
+ struct raid_type *raid_type;
+ struct variable_parms parms;
+
+ /* Ensure minimum number of parameters. */
+ if (argc < MIN_PARMS)
+ TI_ERR("Not enough parameters");
+
+ /* Fetch # of dirty log parameters. */
+ if (sscanf(argv[1], "%d", &dl_parms) != 1 ||
+ !range_ok(dl_parms, 1, 4711)) /* ;-) */
+ TI_ERR("Bad dirty log parameters number");
+
+ /* Check raid_type. */
+ raid_type = get_raid_type(argv[dl_parms + 2]);
+ if (!raid_type)
+ TI_ERR("Bad raid type");
+
+ /* In case of RAID4, parity drive is selectable. */
+ parity_parm = !!(raid_type->level == raid4);
+
+ /* Handle variable number of RAID parameters. */
+ r = get_raid_variable_parms(ti, argv + dl_parms + parity_parm + 3,
+ &parms);
+ if (r)
+ return r;
+
+ /* Handle any locking parameters. */
+ r = get_raid_locking_parms(ti,
+ argv + dl_parms + parity_parm +
+ parms.raid_parms + 4,
+ &locking_parms, &locking);
+ if (r)
+ return r;
+
+ /* # of raid devices. */
+ i = dl_parms + parity_parm + parms.raid_parms + locking_parms + 4;
+ if (sscanf(argv[i], "%d", &raid_devs) != 1 ||
+ raid_devs < raid_type->minimal_devs)
+ TI_ERR("Invalid number of raid devices");
+
+ /* In case of RAID4, check parity drive index is in limits. */
+ if (raid_type->level == raid4) {
+ /* Fetch index of parity device. */
+ if (sscanf(argv[dl_parms + 3], "%d", &pi) != 1 ||
+ (pi != -1 && !range_ok(pi, 0, raid_devs - 1)))
+ TI_ERR("Invalid RAID4 parity device index");
+ }
+
+ /*
+ * Index of device to initialize starts at 0
+ *
+ * o -1 -> don't initialize a selected device;
+ * initialize parity conforming to algorithm
+ * o 0..raid_devs-1 -> initialize respective device
+ * (used for reconstruction of a replaced device)
+ */
+ if (sscanf(argv[dl_parms + parity_parm + parms.raid_parms +
+ locking_parms + 5], "%d", &dev_to_init) != 1 ||
+ !range_ok(dev_to_init, -1, raid_devs - 1))
+ TI_ERR("Invalid number for raid device to initialize");
+
+ /* Check # of raid device arguments. */
+ if (argc - dl_parms - parity_parm - parms.raid_parms - 6 !=
+ 2 * raid_devs)
+ TI_ERR("Wrong number of raid device/offset arguments");
+
+ /*
+ * Check that the table length is devisable
+ * w/o rest by (raid_devs - parity_devs)
+ */
+ if (!multiple(ti->len, raid_devs - raid_type->parity_devs,
+ §ors_per_dev))
+ TI_ERR("Target length not divisible by number of data devices");
+
+ /*
+ * Check that the device size is
+ * devisable w/o rest by chunk size
+ */
+ if (!multiple(sectors_per_dev, parms.chunk_size, &tmp))
+ TI_ERR("Device length not divisible by chunk_size");
+
+ /****************************************************************
+ * Now that we checked the constructor arguments ->
+ * let's allocate the RAID set
+ ****************************************************************/
+ rs = context_alloc(raid_type, &parms, raid_devs, sectors_per_dev,
+ ti, dl_parms, argv);
+ if (IS_ERR(rs))
+ return PTR_ERR(rs);
+
+
+ rs->set.dev_to_init = rs->set.dev_to_init_parm = dev_to_init;
+ rs->set.pi = rs->set.pi_parm = pi;
+
+ /* Set RAID4 parity drive index. */
+ if (raid_type->level == raid4)
+ rs->set.pi = (pi == -1) ? rs->set.data_devs : pi;
+
+ recover_set_bandwidth(rs, parms.bandwidth);
+
+ /* Use locking type to lock stripe access. */
+ rs->locking = locking;
+
+ /* Get the device/offset tupels. */
+ argv += dl_parms + 6 + parity_parm + parms.raid_parms;
+ r = dev_parms(rs, argv, &i);
+ if (r)
+ goto err;
+
+ /* Set backing device information (eg. read ahead). */
+ rs_set_read_ahead(rs, 2 * rs->set.chunk_size /* sectors per device */,
+ 2 /* # of stripes */);
+ rs_set_congested_fn(rs); /* Set congested function. */
+ SetRSCheckOverwrite(rs); /* Allow chunk overwrite checks. */
+ rs->xor.speed = xor_optimize(rs); /* Select best xor algorithm. */
+
+ /* Set for recovery of any nosync regions. */
+ if (parms.recovery)
+ SetRSRecover(rs);
+ else {
+ /*
+ * Need to free recovery stripe(s) here in case
+ * of nosync, because xor_optimize uses one.
+ */
+ set_start_recovery(rs);
+ set_end_recovery(rs);
+ stripe_recover_free(rs);
+ }
+
+ /*
+ * Enable parity chunk creation enformcement for
+ * little numbers of array members where it doesn'ti
+ * gain us performance to xor parity out and back in as
+ * with larger array member numbers.
+ */
+ if (rs->set.raid_devs <= rs->set.raid_type->minimal_devs + 1)
+ SetRSEnforceParityCreation(rs);
+
+ /*
+ * Make sure that dm core only hands maximum io size
+ * length down and pays attention to io boundaries.
+ */
+ ti->split_io = rs->set.io_size;
+ ti->private = rs;
+
+ /* Initialize work queue to handle this RAID set's io. */
+ r = rs_workqueue_init(rs);
+ if (r)
+ goto err;
+
+ rs_log(rs, rs->recover.io_size); /* Log information about RAID set. */
+ return 0;
+
+err:
+ context_free(rs, i);
+ return r;
+}
+
+/*
+ * Destruct a raid mapping
+ */
+static void raid_dtr(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+
+ destroy_workqueue(rs->io.wq);
+ context_free(rs, rs->set.raid_devs);
+}
+
+/* Raid mapping function. */
+static int raid_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ /* I don't want to waste stripe cache capacity. */
+ if (bio_rw(bio) == READA)
+ return -EIO;
+ else {
+ struct raid_set *rs = ti->private;
+
+ /*
+ * Get io reference to be waiting for to drop
+ * to zero on device suspension/destruction.
+ */
+ io_get(rs);
+ bio->bi_sector -= ti->begin; /* Remap sector. */
+
+ /* Queue io to RAID set. */
+ mutex_lock(&rs->io.in_lock);
+ bio_list_add(&rs->io.in, bio);
+ mutex_unlock(&rs->io.in_lock);
+
+ /* Wake daemon to process input list. */
+ wake_do_raid(rs);
+
+ /* REMOVEME: statistics. */
+ atomic_inc(rs->stats + (bio_data_dir(bio) == READ ?
+ S_BIOS_READ : S_BIOS_WRITE));
+ return DM_MAPIO_SUBMITTED; /* Handle later. */
+ }
+}
+
+/* Device suspend. */
+static void raid_presuspend(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct dm_dirty_log *dl = rs->recover.dl;
+
+ SetRSSuspend(rs);
+
+ if (RSRecover(rs))
+ dm_rh_stop_recovery(rs->recover.rh);
+
+ cancel_delayed_work(&rs->io.dws_do_raid);
+ flush_workqueue(rs->io.wq);
+ wait_ios(rs); /* Wait for completion of all ios being processed. */
+
+ if (dl->type->presuspend && dl->type->presuspend(dl))
+ /* FIXME: need better error handling. */
+ DMWARN("log presuspend failed");
+}
+
+static void raid_postsuspend(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct dm_dirty_log *dl = rs->recover.dl;
+
+ if (dl->type->postsuspend && dl->type->postsuspend(dl))
+ /* FIXME: need better error handling. */
+ DMWARN("log postsuspend failed");
+
+}
+
+/* Device resume. */
+static void raid_resume(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct recover *rec = &rs->recover;
+ struct dm_dirty_log *dl = rec->dl;
+
+DMINFO("%s...", __func__);
+ if (dl->type->resume && dl->type->resume(dl))
+ /* Resume dirty log. */
+ /* FIXME: need better error handling. */
+ DMWARN("log resume failed");
+
+ rec->nr_regions_to_recover =
+ rec->nr_regions - dl->type->get_sync_count(dl);
+
+ /* Restart any unfinished recovery. */
+ if (RSRecover(rs)) {
+ set_start_recovery(rs);
+ dm_rh_start_recovery(rec->rh);
+ }
+
+ ClearRSSuspend(rs);
+}
+
+/* Return stripe cache size. */
+static unsigned sc_size(struct raid_set *rs)
+{
+ return to_sector(atomic_read(&rs->sc.stripes) *
+ (sizeof(struct stripe) +
+ (sizeof(struct stripe_chunk) +
+ (sizeof(struct page_list) +
+ to_bytes(rs->set.io_size) *
+ rs->set.raid_devs)) +
+ (rs->recover.end_jiffies ?
+ 0 : rs->recover.recovery_stripes *
+ to_bytes(rs->set.raid_devs * rs->recover.io_size))));
+}
+
+/* REMOVEME: status output for development. */
+static void raid_devel_stats(struct dm_target *ti, char *result,
+ unsigned *size, unsigned maxlen)
+{
+ unsigned sz = *size;
+ unsigned long j;
+ char buf[BDEVNAME_SIZE], *p;
+ struct stats_map *sm;
+ struct raid_set *rs = ti->private;
+ struct recover *rec = &rs->recover;
+ struct timespec ts;
+
+ DMEMIT("%s %s=%u bw=%u\n",
+ version, rs->xor.f->name, rs->xor.chunks, rs->recover.bandwidth);
+ DMEMIT("act_ios=%d ", io_ref(rs));
+ DMEMIT("act_ios_max=%d\n", atomic_read(&rs->io.in_process_max));
+ DMEMIT("act_stripes=%d ", sc_active(&rs->sc));
+ DMEMIT("act_stripes_max=%d\n",
+ atomic_read(&rs->sc.active_stripes_max));
+
+ for (sm = stats_map; sm < ARRAY_END(stats_map); sm++)
+ DMEMIT("%s%d", sm->str, atomic_read(rs->stats + sm->type));
+
+ DMEMIT(" checkovr=%s\n", RSCheckOverwrite(rs) ? "on" : "off");
+ DMEMIT("sc=%u/%u/%u/%u/%u/%u/%u\n", rs->set.chunk_size,
+ atomic_read(&rs->sc.stripes), rs->set.io_size,
+ rec->recovery_stripes, rec->io_size, rs->sc.hash.buckets,
+ sc_size(rs));
+
+ j = (rec->end_jiffies ? rec->end_jiffies : jiffies) -
+ rec->start_jiffies;
+ jiffies_to_timespec(j, &ts);
+ sprintf(buf, "%ld.%ld", ts.tv_sec, ts.tv_nsec);
+ p = strchr(buf, '.');
+ p[3] = 0;
+
+ DMEMIT("rg=%llu/%llu/%llu/%u %s\n",
+ (unsigned long long) rec->nr_regions_recovered,
+ (unsigned long long) rec->nr_regions_to_recover,
+ (unsigned long long) rec->nr_regions, rec->bandwidth, buf);
+
+ *size = sz;
+}
+
+static int raid_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned maxlen)
+{
+ unsigned p, sz = 0;
+ char buf[BDEVNAME_SIZE];
+ struct raid_set *rs = ti->private;
+ struct dm_dirty_log *dl = rs->recover.dl;
+ int raid_parms[] = {
+ rs->set.chunk_size_parm,
+ rs->sc.stripes_parm,
+ rs->set.io_size_parm,
+ rs->recover.io_size_parm,
+ rs->recover.bandwidth_parm,
+ -2,
+ rs->recover.recovery_stripes,
+ };
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ /* REMOVEME: statistics. */
+ if (RSDevelStats(rs))
+ raid_devel_stats(ti, result, &sz, maxlen);
+
+ DMEMIT("%u ", rs->set.raid_devs);
+
+ for (p = 0; p < rs->set.raid_devs; p++)
+ DMEMIT("%s ",
+ format_dev_t(buf, rs->dev[p].dev->bdev->bd_dev));
+
+ DMEMIT("2 ");
+ for (p = 0; p < rs->set.raid_devs; p++) {
+ DMEMIT("%c", !DevFailed(rs->dev + p) ? 'A' : 'D');
+
+ if (p == rs->set.pi)
+ DMEMIT("p");
+
+ if (p == rs->set.dev_to_init)
+ DMEMIT("i");
+ }
+
+ DMEMIT(" %llu/%llu ",
+ (unsigned long long) dl->type->get_sync_count(dl),
+ (unsigned long long) rs->recover.nr_regions);
+
+ sz += dl->type->status(dl, type, result+sz, maxlen-sz);
+ break;
+ case STATUSTYPE_TABLE:
+ sz = rs->recover.dl->type->status(rs->recover.dl, type,
+ result, maxlen);
+ DMEMIT("%s %u ", rs->set.raid_type->name, rs->set.raid_parms);
+
+ for (p = 0; p < rs->set.raid_parms; p++) {
+ if (raid_parms[p] > -2)
+ DMEMIT("%d ", raid_parms[p]);
+ else
+ DMEMIT("%s ", rs->recover.recovery ?
+ "sync" : "nosync");
+ }
+
+ DMEMIT("%u %d ", rs->set.raid_devs, rs->set.dev_to_init);
+
+ for (p = 0; p < rs->set.raid_devs; p++)
+ DMEMIT("%s %llu ",
+ format_dev_t(buf, rs->dev[p].dev->bdev->bd_dev),
+ (unsigned long long) rs->dev[p].start);
+ }
+
+ return 0;
+}
+
+/*
+ * Message interface
+ */
+/* Turn a delta into an absolute value. */
+static int _absolute(char *action, int act, int r)
+{
+ size_t len = strlen(action);
+
+ if (len < 2)
+ len = 2;
+
+ /* Make delta absolute. */
+ if (!strncmp("set", action, len))
+ ;
+ else if (!strncmp("grow", action, len))
+ r += act;
+ else if (!strncmp("shrink", action, len))
+ r = act - r;
+ else
+ r = -EINVAL;
+
+ return r;
+}
+
+ /* Change recovery io bandwidth. */
+static int bandwidth_change(struct raid_set *rs, int argc, char **argv,
+ enum raid_set_flags flag)
+{
+ int act = rs->recover.bandwidth, bandwidth;
+
+ if (argc != 2)
+ return -EINVAL;
+
+ if (sscanf(argv[1], "%d", &bandwidth) == 1 &&
+ range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ /* Make delta bandwidth absolute. */
+ bandwidth = _absolute(argv[0], act, bandwidth);
+
+ /* Check range. */
+ if (range_ok(bandwidth, BANDWIDTH_MIN, BANDWIDTH_MAX)) {
+ recover_set_bandwidth(rs, bandwidth);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/* Set/reset development feature flags. */
+static int devel_flags(struct raid_set *rs, int argc, char **argv,
+ enum raid_set_flags flag)
+{
+ size_t len;
+
+ if (argc != 1)
+ return -EINVAL;
+
+ len = strlen(argv[0]);
+ if (len < 2)
+ len = 2;
+
+ if (!strncmp(argv[0], "on", len))
+ return test_and_set_bit(flag, &rs->io.flags) ? -EPERM : 0;
+ else if (!strncmp(argv[0], "off", len))
+ return test_and_clear_bit(flag, &rs->io.flags) ? 0 : -EPERM;
+ else if (!strncmp(argv[0], "reset", len)) {
+ if (flag == RS_DEVEL_STATS) {
+ if (test_bit(flag, &rs->io.flags)) {
+ stats_reset(rs);
+ return 0;
+ } else
+ return -EPERM;
+ } else {
+ set_bit(flag, &rs->io.flags);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/* Resize the stripe cache. */
+static int sc_resize(struct raid_set *rs, int argc, char **argv,
+ enum raid_set_flags flag)
+{
+ int act, stripes;
+
+ if (argc != 2)
+ return -EINVAL;
+
+ /* Deny permission in case the daemon is still resizing!. */
+ if (atomic_read(&rs->sc.stripes_to_set))
+ return -EPERM;
+
+ if (sscanf(argv[1], "%d", &stripes) == 1 &&
+ stripes > 0) {
+ act = atomic_read(&rs->sc.stripes);
+
+ /* Make delta stripes absolute. */
+ stripes = _absolute(argv[0], act, stripes);
+
+ /*
+ * Check range and that the # of stripes changes.
+ * We leave the resizing to the wroker.
+ */
+ if (range_ok(stripes, STRIPES_MIN, STRIPES_MAX) &&
+ stripes != atomic_read(&rs->sc.stripes)) {
+ atomic_set(&rs->sc.stripes_to_set, stripes);
+ wake_do_raid(rs);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/* Change xor algorithm and number of chunks. */
+static int xor_set(struct raid_set *rs, int argc, char **argv,
+ enum raid_set_flags flag)
+{
+ if (argc == 2) {
+ int chunks;
+ char *algorithm = argv[0];
+ struct xor_func *f = ARRAY_END(xor_funcs);
+
+ if (sscanf(argv[1], "%d", &chunks) == 1 &&
+ range_ok(chunks, 2, XOR_CHUNKS_MAX) &&
+ chunks <= rs->set.raid_devs) {
+ while (f-- > xor_funcs) {
+ if (!strcmp(algorithm, f->name)) {
+ unsigned io_size = 0;
+ struct stripe *stripe = stripe_alloc(&rs->sc, rs->sc.mem_cache_client, SC_GROW);
+
+ DMINFO("xor: %s", f->name);
+ if (f->f == xor_blocks_wrapper &&
+ chunks > MAX_XOR_BLOCKS + 1) {
+ DMERR("chunks > MAX_XOR_BLOCKS"
+ " + 1");
+ break;
+ }
+
+ mutex_lock(&rs->io.xor_lock);
+ rs->xor.f = f;
+ rs->xor.chunks = chunks;
+ rs->xor.speed = 0;
+ mutex_unlock(&rs->io.xor_lock);
+
+ if (stripe) {
+ rs->xor.speed = xor_speed(stripe);
+ io_size = stripe->io.size;
+ stripe_free(stripe, rs->sc.mem_cache_client);
+ }
+
+ rs_log(rs, io_size);
+ return 0;
+ }
+ }
+ }
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Allow writes after they got prohibited because of a device failure.
+ *
+ * This needs to be called after userspace updated metadata state
+ * based on an event being thrown during device failure processing.
+ */
+static int allow_writes(struct raid_set *rs, int argc, char **argv,
+ enum raid_set_flags flag)
+{
+ if (TestClearRSProhibitWrites(rs)) {
+DMINFO("%s waking", __func__);
+ wake_do_raid(rs);
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+/* Parse the RAID message. */
+/*
+ * 'all[ow_writes]'
+ * 'ba[ndwidth] {se[t],g[row],sh[rink]} #' # e.g 'ba se 50'
+ * "o[verwrite] {on,of[f],r[eset]}' # e.g. 'o of'
+ * 'sta[tistics] {on,of[f],r[eset]}' # e.g. 'stat of'
+ * 'str[ipecache] {se[t],g[row],sh[rink]} #' # e.g. 'stripe set 1024'
+ * 'xor algorithm #chunks' # e.g. 'xor xor_8 5'
+ *
+ */
+static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+ if (argc) {
+ size_t len = strlen(argv[0]);
+ struct raid_set *rs = ti->private;
+ struct {
+ const char *name;
+ int (*f) (struct raid_set *rs, int argc, char **argv,
+ enum raid_set_flags flag);
+ enum raid_set_flags flag;
+ } msg_descr[] = {
+ { "allow_writes", allow_writes, 0 },
+ { "bandwidth", bandwidth_change, 0 },
+ { "overwrite", devel_flags, RS_CHECK_OVERWRITE },
+ { "statistics", devel_flags, RS_DEVEL_STATS },
+ { "stripe_cache", sc_resize, 0 },
+ { "xor", xor_set, 0 },
+ }, *m = ARRAY_END(msg_descr);
+
+ if (len < 3)
+ len = 3;
+
+ while (m-- > msg_descr) {
+ if (!strncmp(argv[0], m->name, len))
+ return m->f(rs, argc - 1, argv + 1, m->flag);
+ }
+
+ }
+
+ return -EINVAL;
+}
+/*
+ * END message interface
+ */
+
+/* Provide io hints. */
+static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+ struct raid_set *rs = ti->private;
+
+ blk_limits_io_min(limits, rs->set.chunk_size);
+ blk_limits_io_opt(limits, rs->set.chunk_size * rs->set.data_devs);
+}
+
+static struct target_type raid_target = {
+ .name = "raid45",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = raid_ctr,
+ .dtr = raid_dtr,
+ .map = raid_map,
+ .presuspend = raid_presuspend,
+ .postsuspend = raid_postsuspend,
+ .resume = raid_resume,
+ .status = raid_status,
+ .message = raid_message,
+ .io_hints = raid_io_hints,
+};
+
+static void init_exit(const char *bad_msg, const char *good_msg, int r)
+{
+ if (r)
+ DMERR("Failed to %sregister target [%d]", bad_msg, r);
+ else
+ DMINFO("%s %s", good_msg, version);
+}
+
+static int __init dm_raid_init(void)
+{
+ int r = dm_register_target(&raid_target);
+
+ init_exit("", "initialized", r);
+ return r;
+}
+
+static void __exit dm_raid_exit(void)
+{
+ dm_unregister_target(&raid_target);
+ init_exit("un", "exit", 0);
+}
+
+/* Module hooks. */
+module_init(dm_raid_init);
+module_exit(dm_raid_exit);
+
+MODULE_DESCRIPTION(DM_NAME " raid4/5 target");
+MODULE_AUTHOR("Heinz Mauelshagen <heinzm@redhat.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("dm-raid4");
+MODULE_ALIAS("dm-raid5");
--- /dev/null
+ /*
+ * n_tty.c --- implements the N_TTY line discipline.
+ *
+ * This code used to be in tty_io.c, but things are getting hairy
+ * enough that it made sense to split things off. (The N_TTY
+ * processing has changed so much that it's hardly recognizable,
+ * anyway...)
+ *
+ * Note that the open routine for N_TTY is guaranteed never to return
+ * an error. This is because Linux will fall back to setting a line
+ * to N_TTY if it can not switch to any other line discipline.
+ *
+ * Written by Theodore Ts'o, Copyright 1994.
+ *
+ * This file also contains code originally written by Linus Torvalds,
+ * Copyright 1991, 1992, 1993, and by Julian Cowley, Copyright 1994.
+ *
+ * This file may be redistributed under the terms of the GNU General Public
+ * License.
+ *
+ * Reduced memory usage for older ARM systems - Russell King.
+ *
+ * 2000/01/20 Fixed SMP locking on put_tty_queue using bits of
+ * the patch by Andrew J. Kroll <ag784@freenet.buffalo.edu>
+ * who actually finally proved there really was a race.
+ *
+ * 2002/03/18 Implemented n_tty_wakeup to send SIGIO POLL_OUTs to
+ * waiting writing processes-Sapan Bhatia <sapan@corewars.org>.
+ * Also fixed a bug in BLOCKING mode where n_tty_write returns
+ * EAGAIN
+ */
+
+ #include <linux/types.h>
+ #include <linux/major.h>
+ #include <linux/errno.h>
+ #include <linux/signal.h>
+ #include <linux/fcntl.h>
+ #include <linux/sched.h>
+ #include <linux/interrupt.h>
+ #include <linux/tty.h>
+ #include <linux/timer.h>
+ #include <linux/ctype.h>
+ #include <linux/mm.h>
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/bitops.h>
+ #include <linux/audit.h>
+ #include <linux/file.h>
+ #include <linux/uaccess.h>
+ #include <linux/module.h>
+
+ #include <asm/system.h>
+
+ /* number of characters left in xmit buffer before select has we have room */
+ #define WAKEUP_CHARS 256
+
+ /*
+ * This defines the low- and high-watermarks for throttling and
+ * unthrottling the TTY driver. These watermarks are used for
+ * controlling the space in the read buffer.
+ */
+ #define TTY_THRESHOLD_THROTTLE 128 /* now based on remaining room */
+ #define TTY_THRESHOLD_UNTHROTTLE 128
+
+ /*
+ * Special byte codes used in the echo buffer to represent operations
+ * or special handling of characters. Bytes in the echo buffer that
+ * are not part of such special blocks are treated as normal character
+ * codes.
+ */
+ #define ECHO_OP_START 0xff
+ #define ECHO_OP_MOVE_BACK_COL 0x80
+ #define ECHO_OP_SET_CANON_COL 0x81
+ #define ECHO_OP_ERASE_TAB 0x82
+
+ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
+ unsigned char __user *ptr)
+ {
+ tty_audit_add_data(tty, &x, 1);
+ return put_user(x, ptr);
+ }
+
+ /**
+ * n_tty_set__room - receive space
+ * @tty: terminal
+ *
+ * Called by the driver to find out how much data it is
+ * permitted to feed to the line discipline without any being lost
+ * and thus to manage flow control. Not serialized. Answers for the
+ * "instant".
+ */
+
+ static void n_tty_set_room(struct tty_struct *tty)
+ {
+ /* tty->read_cnt is not read locked ? */
+ int left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
+
+ /*
+ * If we are doing input canonicalization, and there are no
+ * pending newlines, let characters through without limit, so
+ * that erase characters will be handled. Other excess
+ * characters will be beeped.
+ */
+ if (left <= 0)
+ left = tty->icanon && !tty->canon_data;
+ tty->receive_room = left;
+ }
+
+ static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty)
+ {
+ if (tty->read_cnt < N_TTY_BUF_SIZE) {
+ tty->read_buf[tty->read_head] = c;
+ tty->read_head = (tty->read_head + 1) & (N_TTY_BUF_SIZE-1);
+ tty->read_cnt++;
+ }
+ }
+
+ /**
+ * put_tty_queue - add character to tty
+ * @c: character
+ * @tty: tty device
+ *
+ * Add a character to the tty read_buf queue. This is done under the
+ * read_lock to serialize character addition and also to protect us
+ * against parallel reads or flushes
+ */
+
+ static void put_tty_queue(unsigned char c, struct tty_struct *tty)
+ {
+ unsigned long flags;
+ /*
+ * The problem of stomping on the buffers ends here.
+ * Why didn't anyone see this one coming? --AJK
+ */
+ spin_lock_irqsave(&tty->read_lock, flags);
+ put_tty_queue_nolock(c, tty);
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ }
+
+ /**
+ * check_unthrottle - allow new receive data
+ * @tty; tty device
+ *
+ * Check whether to call the driver unthrottle functions
+ *
+ * Can sleep, may be called under the atomic_read_lock mutex but
+ * this is not guaranteed.
+ */
+ static void check_unthrottle(struct tty_struct *tty)
+ {
+ if (tty->count)
+ tty_unthrottle(tty);
+ }
+
+ /**
+ * reset_buffer_flags - reset buffer state
+ * @tty: terminal to reset
+ *
+ * Reset the read buffer counters, clear the flags,
+ * and make sure the driver is unthrottled. Called
+ * from n_tty_open() and n_tty_flush_buffer().
+ *
+ * Locking: tty_read_lock for read fields.
+ */
+
+ static void reset_buffer_flags(struct tty_struct *tty)
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&tty->read_lock, flags);
+ tty->read_head = tty->read_tail = tty->read_cnt = 0;
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+
+ mutex_lock(&tty->echo_lock);
+ tty->echo_pos = tty->echo_cnt = tty->echo_overrun = 0;
+ mutex_unlock(&tty->echo_lock);
+
+ tty->canon_head = tty->canon_data = tty->erasing = 0;
+ memset(&tty->read_flags, 0, sizeof tty->read_flags);
+ n_tty_set_room(tty);
+ check_unthrottle(tty);
+ }
+
+ /**
+ * n_tty_flush_buffer - clean input queue
+ * @tty: terminal device
+ *
+ * Flush the input buffer. Called when the line discipline is
+ * being closed, when the tty layer wants the buffer flushed (eg
+ * at hangup) or when the N_TTY line discipline internally has to
+ * clean the pending queue (for example some signals).
+ *
+ * Locking: ctrl_lock, read_lock.
+ */
+
+ static void n_tty_flush_buffer(struct tty_struct *tty)
+ {
+ unsigned long flags;
+ /* clear everything and unthrottle the driver */
+ reset_buffer_flags(tty);
+
+ if (!tty->link)
+ return;
+
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ if (tty->link->packet) {
+ tty->ctrl_status |= TIOCPKT_FLUSHREAD;
+ wake_up_interruptible(&tty->link->read_wait);
+ }
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ }
+
+ /**
+ * n_tty_chars_in_buffer - report available bytes
+ * @tty: tty device
+ *
+ * Report the number of characters buffered to be delivered to user
+ * at this instant in time.
+ *
+ * Locking: read_lock
+ */
+
+ static ssize_t n_tty_chars_in_buffer(struct tty_struct *tty)
+ {
+ unsigned long flags;
+ ssize_t n = 0;
+
+ spin_lock_irqsave(&tty->read_lock, flags);
+ if (!tty->icanon) {
+ n = tty->read_cnt;
+ } else if (tty->canon_data) {
+ n = (tty->canon_head > tty->read_tail) ?
+ tty->canon_head - tty->read_tail :
+ tty->canon_head + (N_TTY_BUF_SIZE - tty->read_tail);
+ }
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ return n;
+ }
+
+ /**
+ * is_utf8_continuation - utf8 multibyte check
+ * @c: byte to check
+ *
+ * Returns true if the utf8 character 'c' is a multibyte continuation
+ * character. We use this to correctly compute the on screen size
+ * of the character when printing
+ */
+
+ static inline int is_utf8_continuation(unsigned char c)
+ {
+ return (c & 0xc0) == 0x80;
+ }
+
+ /**
+ * is_continuation - multibyte check
+ * @c: byte to check
+ *
+ * Returns true if the utf8 character 'c' is a multibyte continuation
+ * character and the terminal is in unicode mode.
+ */
+
+ static inline int is_continuation(unsigned char c, struct tty_struct *tty)
+ {
+ return I_IUTF8(tty) && is_utf8_continuation(c);
+ }
+
+ /**
+ * do_output_char - output one character
+ * @c: character (or partial unicode symbol)
+ * @tty: terminal device
+ * @space: space available in tty driver write buffer
+ *
+ * This is a helper function that handles one output character
+ * (including special characters like TAB, CR, LF, etc.),
+ * doing OPOST processing and putting the results in the
+ * tty driver's write buffer.
+ *
+ * Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY
+ * and NLDLY. They simply aren't relevant in the world today.
+ * If you ever need them, add them here.
+ *
+ * Returns the number of bytes of buffer space used or -1 if
+ * no space left.
+ *
+ * Locking: should be called under the output_lock to protect
+ * the column state and space left in the buffer
+ */
+
+ static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
+ {
+ int spaces;
+
+ if (!space)
+ return -1;
+
+ switch (c) {
+ case '\n':
+ if (O_ONLRET(tty))
+ tty->column = 0;
+ if (O_ONLCR(tty)) {
+ if (space < 2)
+ return -1;
+ tty->canon_column = tty->column = 0;
+ tty->ops->write(tty, "\r\n", 2);
+ return 2;
+ }
+ tty->canon_column = tty->column;
+ break;
+ case '\r':
+ if (O_ONOCR(tty) && tty->column == 0)
+ return 0;
+ if (O_OCRNL(tty)) {
+ c = '\n';
+ if (O_ONLRET(tty))
+ tty->canon_column = tty->column = 0;
+ break;
+ }
+ tty->canon_column = tty->column = 0;
+ break;
+ case '\t':
+ spaces = 8 - (tty->column & 7);
+ if (O_TABDLY(tty) == XTABS) {
+ if (space < spaces)
+ return -1;
+ tty->column += spaces;
+ tty->ops->write(tty, " ", spaces);
+ return spaces;
+ }
+ tty->column += spaces;
+ break;
+ case '\b':
+ if (tty->column > 0)
+ tty->column--;
+ break;
+ default:
+ if (!iscntrl(c)) {
+ if (O_OLCUC(tty))
+ c = toupper(c);
+ if (!is_continuation(c, tty))
+ tty->column++;
+ }
+ break;
+ }
+
+ tty_put_char(tty, c);
+ return 1;
+ }
+
+ /**
+ * process_output - output post processor
+ * @c: character (or partial unicode symbol)
+ * @tty: terminal device
+ *
+ * Output one character with OPOST processing.
+ * Returns -1 when the output device is full and the character
+ * must be retried.
+ *
+ * Locking: output_lock to protect column state and space left
+ * (also, this is called from n_tty_write under the
+ * tty layer write lock)
+ */
+
+ static int process_output(unsigned char c, struct tty_struct *tty)
+ {
+ int space, retval;
+
+ mutex_lock(&tty->output_lock);
+
+ space = tty_write_room(tty);
+ retval = do_output_char(c, tty, space);
+
+ mutex_unlock(&tty->output_lock);
+ if (retval < 0)
+ return -1;
+ else
+ return 0;
+ }
+
+ /**
+ * process_output_block - block post processor
+ * @tty: terminal device
+ * @buf: character buffer
+ * @nr: number of bytes to output
+ *
+ * Output a block of characters with OPOST processing.
+ * Returns the number of characters output.
+ *
+ * This path is used to speed up block console writes, among other
+ * things when processing blocks of output data. It handles only
+ * the simple cases normally found and helps to generate blocks of
+ * symbols for the console driver and thus improve performance.
+ *
+ * Locking: output_lock to protect column state and space left
+ * (also, this is called from n_tty_write under the
+ * tty layer write lock)
+ */
+
+ static ssize_t process_output_block(struct tty_struct *tty,
+ const unsigned char *buf, unsigned int nr)
+ {
+ int space;
+ int i;
+ const unsigned char *cp;
+
+ mutex_lock(&tty->output_lock);
+
+ space = tty_write_room(tty);
+ if (!space) {
+ mutex_unlock(&tty->output_lock);
+ return 0;
+ }
+ if (nr > space)
+ nr = space;
+
+ for (i = 0, cp = buf; i < nr; i++, cp++) {
+ unsigned char c = *cp;
+
+ switch (c) {
+ case '\n':
+ if (O_ONLRET(tty))
+ tty->column = 0;
+ if (O_ONLCR(tty))
+ goto break_out;
+ tty->canon_column = tty->column;
+ break;
+ case '\r':
+ if (O_ONOCR(tty) && tty->column == 0)
+ goto break_out;
+ if (O_OCRNL(tty))
+ goto break_out;
+ tty->canon_column = tty->column = 0;
+ break;
+ case '\t':
+ goto break_out;
+ case '\b':
+ if (tty->column > 0)
+ tty->column--;
+ break;
+ default:
+ if (!iscntrl(c)) {
+ if (O_OLCUC(tty))
+ goto break_out;
+ if (!is_continuation(c, tty))
+ tty->column++;
+ }
+ break;
+ }
+ }
+ break_out:
+ i = tty->ops->write(tty, buf, i);
+
+ mutex_unlock(&tty->output_lock);
+ return i;
+ }
+
+ /**
+ * process_echoes - write pending echo characters
+ * @tty: terminal device
+ *
+ * Write previously buffered echo (and other ldisc-generated)
+ * characters to the tty.
+ *
+ * Characters generated by the ldisc (including echoes) need to
+ * be buffered because the driver's write buffer can fill during
+ * heavy program output. Echoing straight to the driver will
+ * often fail under these conditions, causing lost characters and
+ * resulting mismatches of ldisc state information.
+ *
+ * Since the ldisc state must represent the characters actually sent
+ * to the driver at the time of the write, operations like certain
+ * changes in column state are also saved in the buffer and executed
+ * here.
+ *
+ * A circular fifo buffer is used so that the most recent characters
+ * are prioritized. Also, when control characters are echoed with a
+ * prefixed "^", the pair is treated atomically and thus not separated.
+ *
+ * Locking: output_lock to protect column state and space left,
+ * echo_lock to protect the echo buffer
+ */
+
+ static void process_echoes(struct tty_struct *tty)
+ {
+ int space, nr;
+ unsigned char c;
+ unsigned char *cp, *buf_end;
+
+ if (!tty->echo_cnt)
+ return;
+
+ mutex_lock(&tty->output_lock);
+ mutex_lock(&tty->echo_lock);
+
+ space = tty_write_room(tty);
+
+ buf_end = tty->echo_buf + N_TTY_BUF_SIZE;
+ cp = tty->echo_buf + tty->echo_pos;
+ nr = tty->echo_cnt;
+ while (nr > 0) {
+ c = *cp;
+ if (c == ECHO_OP_START) {
+ unsigned char op;
+ unsigned char *opp;
+ int no_space_left = 0;
+
+ /*
+ * If the buffer byte is the start of a multi-byte
+ * operation, get the next byte, which is either the
+ * op code or a control character value.
+ */
+ opp = cp + 1;
+ if (opp == buf_end)
+ opp -= N_TTY_BUF_SIZE;
+ op = *opp;
+
+ switch (op) {
+ unsigned int num_chars, num_bs;
+
+ case ECHO_OP_ERASE_TAB:
+ if (++opp == buf_end)
+ opp -= N_TTY_BUF_SIZE;
+ num_chars = *opp;
+
+ /*
+ * Determine how many columns to go back
+ * in order to erase the tab.
+ * This depends on the number of columns
+ * used by other characters within the tab
+ * area. If this (modulo 8) count is from
+ * the start of input rather than from a
+ * previous tab, we offset by canon column.
+ * Otherwise, tab spacing is normal.
+ */
+ if (!(num_chars & 0x80))
+ num_chars += tty->canon_column;
+ num_bs = 8 - (num_chars & 7);
+
+ if (num_bs > space) {
+ no_space_left = 1;
+ break;
+ }
+ space -= num_bs;
+ while (num_bs--) {
+ tty_put_char(tty, '\b');
+ if (tty->column > 0)
+ tty->column--;
+ }
+ cp += 3;
+ nr -= 3;
+ break;
+
+ case ECHO_OP_SET_CANON_COL:
+ tty->canon_column = tty->column;
+ cp += 2;
+ nr -= 2;
+ break;
+
+ case ECHO_OP_MOVE_BACK_COL:
+ if (tty->column > 0)
+ tty->column--;
+ cp += 2;
+ nr -= 2;
+ break;
+
+ case ECHO_OP_START:
+ /* This is an escaped echo op start code */
+ if (!space) {
+ no_space_left = 1;
+ break;
+ }
+ tty_put_char(tty, ECHO_OP_START);
+ tty->column++;
+ space--;
+ cp += 2;
+ nr -= 2;
+ break;
+
+ default:
+ /*
+ * If the op is not a special byte code,
+ * it is a ctrl char tagged to be echoed
+ * as "^X" (where X is the letter
+ * representing the control char).
+ * Note that we must ensure there is
+ * enough space for the whole ctrl pair.
+ *
+ */
+ if (space < 2) {
+ no_space_left = 1;
+ break;
+ }
+ tty_put_char(tty, '^');
+ tty_put_char(tty, op ^ 0100);
+ tty->column += 2;
+ space -= 2;
+ cp += 2;
+ nr -= 2;
+ }
+
+ if (no_space_left)
+ break;
+ } else {
+ if (O_OPOST(tty) &&
+ !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) {
+ int retval = do_output_char(c, tty, space);
+ if (retval < 0)
+ break;
+ space -= retval;
+ } else {
+ if (!space)
+ break;
+ tty_put_char(tty, c);
+ space -= 1;
+ }
+ cp += 1;
+ nr -= 1;
+ }
+
+ /* When end of circular buffer reached, wrap around */
+ if (cp >= buf_end)
+ cp -= N_TTY_BUF_SIZE;
+ }
+
+ if (nr == 0) {
+ tty->echo_pos = 0;
+ tty->echo_cnt = 0;
+ tty->echo_overrun = 0;
+ } else {
+ int num_processed = tty->echo_cnt - nr;
+ tty->echo_pos += num_processed;
+ tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+ tty->echo_cnt = nr;
+ if (num_processed > 0)
+ tty->echo_overrun = 0;
+ }
+
+ mutex_unlock(&tty->echo_lock);
+ mutex_unlock(&tty->output_lock);
+
+ if (tty->ops->flush_chars)
+ tty->ops->flush_chars(tty);
+ }
+
+ /**
+ * add_echo_byte - add a byte to the echo buffer
+ * @c: unicode byte to echo
+ * @tty: terminal device
+ *
+ * Add a character or operation byte to the echo buffer.
+ *
+ * Should be called under the echo lock to protect the echo buffer.
+ */
+
+ static void add_echo_byte(unsigned char c, struct tty_struct *tty)
+ {
+ int new_byte_pos;
+
+ if (tty->echo_cnt == N_TTY_BUF_SIZE) {
+ /* Circular buffer is already at capacity */
+ new_byte_pos = tty->echo_pos;
+
+ /*
+ * Since the buffer start position needs to be advanced,
+ * be sure to step by a whole operation byte group.
+ */
+ if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START) {
+ if (tty->echo_buf[(tty->echo_pos + 1) &
+ (N_TTY_BUF_SIZE - 1)] ==
+ ECHO_OP_ERASE_TAB) {
+ tty->echo_pos += 3;
+ tty->echo_cnt -= 2;
+ } else {
+ tty->echo_pos += 2;
+ tty->echo_cnt -= 1;
+ }
+ } else {
+ tty->echo_pos++;
+ }
+ tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+
+ tty->echo_overrun = 1;
+ } else {
+ new_byte_pos = tty->echo_pos + tty->echo_cnt;
+ new_byte_pos &= N_TTY_BUF_SIZE - 1;
+ tty->echo_cnt++;
+ }
+
+ tty->echo_buf[new_byte_pos] = c;
+ }
+
+ /**
+ * echo_move_back_col - add operation to move back a column
+ * @tty: terminal device
+ *
+ * Add an operation to the echo buffer to move back one column.
+ *
+ * Locking: echo_lock to protect the echo buffer
+ */
+
+ static void echo_move_back_col(struct tty_struct *tty)
+ {
+ mutex_lock(&tty->echo_lock);
+
+ add_echo_byte(ECHO_OP_START, tty);
+ add_echo_byte(ECHO_OP_MOVE_BACK_COL, tty);
+
+ mutex_unlock(&tty->echo_lock);
+ }
+
+ /**
+ * echo_set_canon_col - add operation to set the canon column
+ * @tty: terminal device
+ *
+ * Add an operation to the echo buffer to set the canon column
+ * to the current column.
+ *
+ * Locking: echo_lock to protect the echo buffer
+ */
+
+ static void echo_set_canon_col(struct tty_struct *tty)
+ {
+ mutex_lock(&tty->echo_lock);
+
+ add_echo_byte(ECHO_OP_START, tty);
+ add_echo_byte(ECHO_OP_SET_CANON_COL, tty);
+
+ mutex_unlock(&tty->echo_lock);
+ }
+
+ /**
+ * echo_erase_tab - add operation to erase a tab
+ * @num_chars: number of character columns already used
+ * @after_tab: true if num_chars starts after a previous tab
+ * @tty: terminal device
+ *
+ * Add an operation to the echo buffer to erase a tab.
+ *
+ * Called by the eraser function, which knows how many character
+ * columns have been used since either a previous tab or the start
+ * of input. This information will be used later, along with
+ * canon column (if applicable), to go back the correct number
+ * of columns.
+ *
+ * Locking: echo_lock to protect the echo buffer
+ */
+
+ static void echo_erase_tab(unsigned int num_chars, int after_tab,
+ struct tty_struct *tty)
+ {
+ mutex_lock(&tty->echo_lock);
+
+ add_echo_byte(ECHO_OP_START, tty);
+ add_echo_byte(ECHO_OP_ERASE_TAB, tty);
+
+ /* We only need to know this modulo 8 (tab spacing) */
+ num_chars &= 7;
+
+ /* Set the high bit as a flag if num_chars is after a previous tab */
+ if (after_tab)
+ num_chars |= 0x80;
+
+ add_echo_byte(num_chars, tty);
+
+ mutex_unlock(&tty->echo_lock);
+ }
+
+ /**
+ * echo_char_raw - echo a character raw
+ * @c: unicode byte to echo
+ * @tty: terminal device
+ *
+ * Echo user input back onto the screen. This must be called only when
+ * L_ECHO(tty) is true. Called from the driver receive_buf path.
+ *
+ * This variant does not treat control characters specially.
+ *
+ * Locking: echo_lock to protect the echo buffer
+ */
+
+ static void echo_char_raw(unsigned char c, struct tty_struct *tty)
+ {
+ mutex_lock(&tty->echo_lock);
+
+ if (c == ECHO_OP_START) {
+ add_echo_byte(ECHO_OP_START, tty);
+ add_echo_byte(ECHO_OP_START, tty);
+ } else {
+ add_echo_byte(c, tty);
+ }
+
+ mutex_unlock(&tty->echo_lock);
+ }
+
+ /**
+ * echo_char - echo a character
+ * @c: unicode byte to echo
+ * @tty: terminal device
+ *
+ * Echo user input back onto the screen. This must be called only when
+ * L_ECHO(tty) is true. Called from the driver receive_buf path.
+ *
+ * This variant tags control characters to be echoed as "^X"
+ * (where X is the letter representing the control char).
+ *
+ * Locking: echo_lock to protect the echo buffer
+ */
+
+ static void echo_char(unsigned char c, struct tty_struct *tty)
+ {
+ mutex_lock(&tty->echo_lock);
+
+ if (c == ECHO_OP_START) {
+ add_echo_byte(ECHO_OP_START, tty);
+ add_echo_byte(ECHO_OP_START, tty);
+ } else {
+ if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t')
+ add_echo_byte(ECHO_OP_START, tty);
+ add_echo_byte(c, tty);
+ }
+
+ mutex_unlock(&tty->echo_lock);
+ }
+
+ /**
+ * finish_erasing - complete erase
+ * @tty: tty doing the erase
+ */
+
+ static inline void finish_erasing(struct tty_struct *tty)
+ {
+ if (tty->erasing) {
+ echo_char_raw('/', tty);
+ tty->erasing = 0;
+ }
+ }
+
+ /**
+ * eraser - handle erase function
+ * @c: character input
+ * @tty: terminal device
+ *
+ * Perform erase and necessary output when an erase character is
+ * present in the stream from the driver layer. Handles the complexities
+ * of UTF-8 multibyte symbols.
+ *
+ * Locking: read_lock for tty buffers
+ */
+
+ static void eraser(unsigned char c, struct tty_struct *tty)
+ {
+ enum { ERASE, WERASE, KILL } kill_type;
+ int head, seen_alnums, cnt;
+ unsigned long flags;
+
+ /* FIXME: locking needed ? */
+ if (tty->read_head == tty->canon_head) {
+ /* process_output('\a', tty); */ /* what do you think? */
+ return;
+ }
+ if (c == ERASE_CHAR(tty))
+ kill_type = ERASE;
+ else if (c == WERASE_CHAR(tty))
+ kill_type = WERASE;
+ else {
+ if (!L_ECHO(tty)) {
+ spin_lock_irqsave(&tty->read_lock, flags);
+ tty->read_cnt -= ((tty->read_head - tty->canon_head) &
+ (N_TTY_BUF_SIZE - 1));
+ tty->read_head = tty->canon_head;
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ return;
+ }
+ if (!L_ECHOK(tty) || !L_ECHOKE(tty) || !L_ECHOE(tty)) {
+ spin_lock_irqsave(&tty->read_lock, flags);
+ tty->read_cnt -= ((tty->read_head - tty->canon_head) &
+ (N_TTY_BUF_SIZE - 1));
+ tty->read_head = tty->canon_head;
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ finish_erasing(tty);
+ echo_char(KILL_CHAR(tty), tty);
+ /* Add a newline if ECHOK is on and ECHOKE is off. */
+ if (L_ECHOK(tty))
+ echo_char_raw('\n', tty);
+ return;
+ }
+ kill_type = KILL;
+ }
+
+ seen_alnums = 0;
+ /* FIXME: Locking ?? */
+ while (tty->read_head != tty->canon_head) {
+ head = tty->read_head;
+
+ /* erase a single possibly multibyte character */
+ do {
+ head = (head - 1) & (N_TTY_BUF_SIZE-1);
+ c = tty->read_buf[head];
+ } while (is_continuation(c, tty) && head != tty->canon_head);
+
+ /* do not partially erase */
+ if (is_continuation(c, tty))
+ break;
+
+ if (kill_type == WERASE) {
+ /* Equivalent to BSD's ALTWERASE. */
+ if (isalnum(c) || c == '_')
+ seen_alnums++;
+ else if (seen_alnums)
+ break;
+ }
+ cnt = (tty->read_head - head) & (N_TTY_BUF_SIZE-1);
+ spin_lock_irqsave(&tty->read_lock, flags);
+ tty->read_head = head;
+ tty->read_cnt -= cnt;
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ if (L_ECHO(tty)) {
+ if (L_ECHOPRT(tty)) {
+ if (!tty->erasing) {
+ echo_char_raw('\\', tty);
+ tty->erasing = 1;
+ }
+ /* if cnt > 1, output a multi-byte character */
+ echo_char(c, tty);
+ while (--cnt > 0) {
+ head = (head+1) & (N_TTY_BUF_SIZE-1);
+ echo_char_raw(tty->read_buf[head], tty);
+ echo_move_back_col(tty);
+ }
+ } else if (kill_type == ERASE && !L_ECHOE(tty)) {
+ echo_char(ERASE_CHAR(tty), tty);
+ } else if (c == '\t') {
+ unsigned int num_chars = 0;
+ int after_tab = 0;
+ unsigned long tail = tty->read_head;
+
+ /*
+ * Count the columns used for characters
+ * since the start of input or after a
+ * previous tab.
+ * This info is used to go back the correct
+ * number of columns.
+ */
+ while (tail != tty->canon_head) {
+ tail = (tail-1) & (N_TTY_BUF_SIZE-1);
+ c = tty->read_buf[tail];
+ if (c == '\t') {
+ after_tab = 1;
+ break;
+ } else if (iscntrl(c)) {
+ if (L_ECHOCTL(tty))
+ num_chars += 2;
+ } else if (!is_continuation(c, tty)) {
+ num_chars++;
+ }
+ }
+ echo_erase_tab(num_chars, after_tab, tty);
+ } else {
+ if (iscntrl(c) && L_ECHOCTL(tty)) {
+ echo_char_raw('\b', tty);
+ echo_char_raw(' ', tty);
+ echo_char_raw('\b', tty);
+ }
+ if (!iscntrl(c) || L_ECHOCTL(tty)) {
+ echo_char_raw('\b', tty);
+ echo_char_raw(' ', tty);
+ echo_char_raw('\b', tty);
+ }
+ }
+ }
+ if (kill_type == ERASE)
+ break;
+ }
+ if (tty->read_head == tty->canon_head && L_ECHO(tty))
+ finish_erasing(tty);
+ }
+
+ /**
+ * isig - handle the ISIG optio
+ * @sig: signal
+ * @tty: terminal
+ * @flush: force flush
+ *
+ * Called when a signal is being sent due to terminal input. This
+ * may caus terminal flushing to take place according to the termios
+ * settings and character used. Called from the driver receive_buf
+ * path so serialized.
+ *
+ * Locking: ctrl_lock, read_lock (both via flush buffer)
+ */
+
+ static inline void isig(int sig, struct tty_struct *tty, int flush)
+ {
+ if (tty->pgrp)
+ kill_pgrp(tty->pgrp, sig, 1);
+ if (flush || !L_NOFLSH(tty)) {
+ n_tty_flush_buffer(tty);
+ tty_driver_flush_buffer(tty);
+ }
+ }
+
+ /**
+ * n_tty_receive_break - handle break
+ * @tty: terminal
+ *
+ * An RS232 break event has been hit in the incoming bitstream. This
+ * can cause a variety of events depending upon the termios settings.
+ *
+ * Called from the receive_buf path so single threaded.
+ */
+
+ static inline void n_tty_receive_break(struct tty_struct *tty)
+ {
+ if (I_IGNBRK(tty))
+ return;
+ if (I_BRKINT(tty)) {
+ isig(SIGINT, tty, 1);
+ return;
+ }
+ if (I_PARMRK(tty)) {
+ put_tty_queue('\377', tty);
+ put_tty_queue('\0', tty);
+ }
+ put_tty_queue('\0', tty);
+ wake_up_interruptible(&tty->read_wait);
+ }
+
+ /**
+ * n_tty_receive_overrun - handle overrun reporting
+ * @tty: terminal
+ *
+ * Data arrived faster than we could process it. While the tty
+ * driver has flagged this the bits that were missed are gone
+ * forever.
+ *
+ * Called from the receive_buf path so single threaded. Does not
+ * need locking as num_overrun and overrun_time are function
+ * private.
+ */
+
+ static inline void n_tty_receive_overrun(struct tty_struct *tty)
+ {
+ char buf[64];
+
+ tty->num_overrun++;
+ if (time_before(tty->overrun_time, jiffies - HZ) ||
+ time_after(tty->overrun_time, jiffies)) {
+ printk(KERN_WARNING "%s: %d input overrun(s)\n",
+ tty_name(tty, buf),
+ tty->num_overrun);
+ tty->overrun_time = jiffies;
+ tty->num_overrun = 0;
+ }
+ }
+
+ /**
+ * n_tty_receive_parity_error - error notifier
+ * @tty: terminal device
+ * @c: character
+ *
+ * Process a parity error and queue the right data to indicate
+ * the error case if necessary. Locking as per n_tty_receive_buf.
+ */
+ static inline void n_tty_receive_parity_error(struct tty_struct *tty,
+ unsigned char c)
+ {
+ if (I_IGNPAR(tty))
+ return;
+ if (I_PARMRK(tty)) {
+ put_tty_queue('\377', tty);
+ put_tty_queue('\0', tty);
+ put_tty_queue(c, tty);
+ } else if (I_INPCK(tty))
+ put_tty_queue('\0', tty);
+ else
+ put_tty_queue(c, tty);
+ wake_up_interruptible(&tty->read_wait);
+ }
+
+ /**
+ * n_tty_receive_char - perform processing
+ * @tty: terminal device
+ * @c: character
+ *
+ * Process an individual character of input received from the driver.
+ * This is serialized with respect to itself by the rules for the
+ * driver above.
+ */
+
+ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
+ {
+ unsigned long flags;
+ int parmrk;
+
+ if (tty->raw) {
+ put_tty_queue(c, tty);
+ return;
+ }
+
+ if (I_ISTRIP(tty))
+ c &= 0x7f;
+ if (I_IUCLC(tty) && L_IEXTEN(tty))
+ c = tolower(c);
+
+ if (L_EXTPROC(tty)) {
+ put_tty_queue(c, tty);
+ return;
+ }
+
+ if (tty->stopped && !tty->flow_stopped && I_IXON(tty) &&
+ I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty) &&
+ c != INTR_CHAR(tty) && c != QUIT_CHAR(tty) && c != SUSP_CHAR(tty)) {
+ start_tty(tty);
+ process_echoes(tty);
+ }
+
+ if (tty->closing) {
+ if (I_IXON(tty)) {
+ if (c == START_CHAR(tty)) {
+ start_tty(tty);
+ process_echoes(tty);
+ } else if (c == STOP_CHAR(tty))
+ stop_tty(tty);
+ }
+ return;
+ }
+
+ /*
+ * If the previous character was LNEXT, or we know that this
+ * character is not one of the characters that we'll have to
+ * handle specially, do shortcut processing to speed things
+ * up.
+ */
+ if (!test_bit(c, tty->process_char_map) || tty->lnext) {
+ tty->lnext = 0;
+ parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+ if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+ /* beep if no space */
+ if (L_ECHO(tty))
+ process_output('\a', tty);
+ return;
+ }
+ if (L_ECHO(tty)) {
+ finish_erasing(tty);
+ /* Record the column of first canon char. */
+ if (tty->canon_head == tty->read_head)
+ echo_set_canon_col(tty);
+ echo_char(c, tty);
+ process_echoes(tty);
+ }
+ if (parmrk)
+ put_tty_queue(c, tty);
+ put_tty_queue(c, tty);
+ return;
+ }
+
+ if (I_IXON(tty)) {
+ if (c == START_CHAR(tty)) {
+ start_tty(tty);
+ process_echoes(tty);
+ return;
+ }
+ if (c == STOP_CHAR(tty)) {
+ stop_tty(tty);
+ return;
+ }
+ }
+
+ if (L_ISIG(tty)) {
+ int signal;
+ signal = SIGINT;
+ if (c == INTR_CHAR(tty))
+ goto send_signal;
+ signal = SIGQUIT;
+ if (c == QUIT_CHAR(tty))
+ goto send_signal;
+ signal = SIGTSTP;
+ if (c == SUSP_CHAR(tty)) {
+ send_signal:
+ /*
+ * Note that we do not use isig() here because we want
+ * the order to be:
+ * 1) flush, 2) echo, 3) signal
+ */
+ if (!L_NOFLSH(tty)) {
+ n_tty_flush_buffer(tty);
+ tty_driver_flush_buffer(tty);
+ }
+ if (I_IXON(tty))
+ start_tty(tty);
+ if (L_ECHO(tty)) {
+ echo_char(c, tty);
+ process_echoes(tty);
+ }
+ if (tty->pgrp)
+ kill_pgrp(tty->pgrp, signal, 1);
+ return;
+ }
+ }
+
+ if (c == '\r') {
+ if (I_IGNCR(tty))
+ return;
+ if (I_ICRNL(tty))
+ c = '\n';
+ } else if (c == '\n' && I_INLCR(tty))
+ c = '\r';
+
+ if (tty->icanon) {
+ if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) ||
+ (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) {
+ eraser(c, tty);
+ process_echoes(tty);
+ return;
+ }
+ if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) {
+ tty->lnext = 1;
+ if (L_ECHO(tty)) {
+ finish_erasing(tty);
+ if (L_ECHOCTL(tty)) {
+ echo_char_raw('^', tty);
+ echo_char_raw('\b', tty);
+ process_echoes(tty);
+ }
+ }
+ return;
+ }
+ if (c == REPRINT_CHAR(tty) && L_ECHO(tty) &&
+ L_IEXTEN(tty)) {
+ unsigned long tail = tty->canon_head;
+
+ finish_erasing(tty);
+ echo_char(c, tty);
+ echo_char_raw('\n', tty);
+ while (tail != tty->read_head) {
+ echo_char(tty->read_buf[tail], tty);
+ tail = (tail+1) & (N_TTY_BUF_SIZE-1);
+ }
+ process_echoes(tty);
+ return;
+ }
+ if (c == '\n') {
+ if (tty->read_cnt >= N_TTY_BUF_SIZE) {
+ if (L_ECHO(tty))
+ process_output('\a', tty);
+ return;
+ }
+ if (L_ECHO(tty) || L_ECHONL(tty)) {
+ echo_char_raw('\n', tty);
+ process_echoes(tty);
+ }
+ goto handle_newline;
+ }
+ if (c == EOF_CHAR(tty)) {
+ if (tty->read_cnt >= N_TTY_BUF_SIZE)
+ return;
+ if (tty->canon_head != tty->read_head)
+ set_bit(TTY_PUSH, &tty->flags);
+ c = __DISABLED_CHAR;
+ goto handle_newline;
+ }
+ if ((c == EOL_CHAR(tty)) ||
+ (c == EOL2_CHAR(tty) && L_IEXTEN(tty))) {
+ parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty))
+ ? 1 : 0;
+ if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) {
+ if (L_ECHO(tty))
+ process_output('\a', tty);
+ return;
+ }
+ /*
+ * XXX are EOL_CHAR and EOL2_CHAR echoed?!?
+ */
+ if (L_ECHO(tty)) {
+ /* Record the column of first canon char. */
+ if (tty->canon_head == tty->read_head)
+ echo_set_canon_col(tty);
+ echo_char(c, tty);
+ process_echoes(tty);
+ }
+ /*
+ * XXX does PARMRK doubling happen for
+ * EOL_CHAR and EOL2_CHAR?
+ */
+ if (parmrk)
+ put_tty_queue(c, tty);
+
+ handle_newline:
+ spin_lock_irqsave(&tty->read_lock, flags);
+ set_bit(tty->read_head, tty->read_flags);
+ put_tty_queue_nolock(c, tty);
+ tty->canon_head = tty->read_head;
+ tty->canon_data++;
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ kill_fasync(&tty->fasync, SIGIO, POLL_IN);
+ if (waitqueue_active(&tty->read_wait))
+ wake_up_interruptible(&tty->read_wait);
+ return;
+ }
+ }
+
+ parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+ if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+ /* beep if no space */
+ if (L_ECHO(tty))
+ process_output('\a', tty);
+ return;
+ }
+ if (L_ECHO(tty)) {
+ finish_erasing(tty);
+ if (c == '\n')
+ echo_char_raw('\n', tty);
+ else {
+ /* Record the column of first canon char. */
+ if (tty->canon_head == tty->read_head)
+ echo_set_canon_col(tty);
+ echo_char(c, tty);
+ }
+ process_echoes(tty);
+ }
+
+ if (parmrk)
+ put_tty_queue(c, tty);
+
+ put_tty_queue(c, tty);
+ }
+
+
+ /**
+ * n_tty_write_wakeup - asynchronous I/O notifier
+ * @tty: tty device
+ *
+ * Required for the ptys, serial driver etc. since processes
+ * that attach themselves to the master and rely on ASYNC
+ * IO must be woken up
+ */
+
+ static void n_tty_write_wakeup(struct tty_struct *tty)
+ {
+ if (tty->fasync && test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags))
+ kill_fasync(&tty->fasync, SIGIO, POLL_OUT);
+ }
+
+ /**
+ * n_tty_receive_buf - data receive
+ * @tty: terminal device
+ * @cp: buffer
+ * @fp: flag buffer
+ * @count: characters
+ *
+ * Called by the terminal driver when a block of characters has
+ * been received. This function must be called from soft contexts
+ * not from interrupt context. The driver is responsible for making
+ * calls one at a time and in order (or using flush_to_ldisc)
+ */
+
+ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+ char *fp, int count)
+ {
+ const unsigned char *p;
+ char *f, flags = TTY_NORMAL;
+ int i;
+ char buf[64];
+ unsigned long cpuflags;
+
+ if (!tty->read_buf)
+ return;
+
+ if (tty->real_raw) {
+ spin_lock_irqsave(&tty->read_lock, cpuflags);
+ i = min(N_TTY_BUF_SIZE - tty->read_cnt,
+ N_TTY_BUF_SIZE - tty->read_head);
+ i = min(count, i);
+ memcpy(tty->read_buf + tty->read_head, cp, i);
+ tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
+ tty->read_cnt += i;
+ cp += i;
+ count -= i;
+
+ i = min(N_TTY_BUF_SIZE - tty->read_cnt,
+ N_TTY_BUF_SIZE - tty->read_head);
+ i = min(count, i);
+ memcpy(tty->read_buf + tty->read_head, cp, i);
+ tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
+ tty->read_cnt += i;
+ spin_unlock_irqrestore(&tty->read_lock, cpuflags);
+ } else {
+ for (i = count, p = cp, f = fp; i; i--, p++) {
+ if (f)
+ flags = *f++;
+ switch (flags) {
+ case TTY_NORMAL:
+ n_tty_receive_char(tty, *p);
+ break;
+ case TTY_BREAK:
+ n_tty_receive_break(tty);
+ break;
+ case TTY_PARITY:
+ case TTY_FRAME:
+ n_tty_receive_parity_error(tty, *p);
+ break;
+ case TTY_OVERRUN:
+ n_tty_receive_overrun(tty);
+ break;
+ default:
+ printk(KERN_ERR "%s: unknown flag %d\n",
+ tty_name(tty, buf), flags);
+ break;
+ }
+ }
+ if (tty->ops->flush_chars)
+ tty->ops->flush_chars(tty);
+ }
+
+ n_tty_set_room(tty);
+
+ if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
+ L_EXTPROC(tty)) {
+ kill_fasync(&tty->fasync, SIGIO, POLL_IN);
+ if (waitqueue_active(&tty->read_wait))
+ wake_up_interruptible(&tty->read_wait);
+ }
+
+ /*
+ * Check the remaining room for the input canonicalization
+ * mode. We don't want to throttle the driver if we're in
+ * canonical mode and don't have a newline yet!
+ */
+ if (tty->receive_room < TTY_THRESHOLD_THROTTLE)
+ tty_throttle(tty);
+ }
+
+ int is_ignored(int sig)
+ {
+ return (sigismember(¤t->blocked, sig) ||
+ current->sighand->action[sig-1].sa.sa_handler == SIG_IGN);
+ }
+
+ /**
+ * n_tty_set_termios - termios data changed
+ * @tty: terminal
+ * @old: previous data
+ *
+ * Called by the tty layer when the user changes termios flags so
+ * that the line discipline can plan ahead. This function cannot sleep
+ * and is protected from re-entry by the tty layer. The user is
+ * guaranteed that this function will not be re-entered or in progress
+ * when the ldisc is closed.
+ *
+ * Locking: Caller holds tty->termios_mutex
+ */
+
+ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
+ {
+ int canon_change = 1;
+ BUG_ON(!tty);
+
+ if (old)
+ canon_change = (old->c_lflag ^ tty->termios->c_lflag) & ICANON;
+ if (canon_change) {
+ memset(&tty->read_flags, 0, sizeof tty->read_flags);
+ tty->canon_head = tty->read_tail;
+ tty->canon_data = 0;
+ tty->erasing = 0;
+ }
+
+ if (canon_change && !L_ICANON(tty) && tty->read_cnt)
+ wake_up_interruptible(&tty->read_wait);
+
+ tty->icanon = (L_ICANON(tty) != 0);
+ if (test_bit(TTY_HW_COOK_IN, &tty->flags)) {
+ tty->raw = 1;
+ tty->real_raw = 1;
+ n_tty_set_room(tty);
+ return;
+ }
+ if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) ||
+ I_ICRNL(tty) || I_INLCR(tty) || L_ICANON(tty) ||
+ I_IXON(tty) || L_ISIG(tty) || L_ECHO(tty) ||
+ I_PARMRK(tty)) {
+ memset(tty->process_char_map, 0, 256/8);
+
+ if (I_IGNCR(tty) || I_ICRNL(tty))
+ set_bit('\r', tty->process_char_map);
+ if (I_INLCR(tty))
+ set_bit('\n', tty->process_char_map);
+
+ if (L_ICANON(tty)) {
+ set_bit(ERASE_CHAR(tty), tty->process_char_map);
+ set_bit(KILL_CHAR(tty), tty->process_char_map);
+ set_bit(EOF_CHAR(tty), tty->process_char_map);
+ set_bit('\n', tty->process_char_map);
+ set_bit(EOL_CHAR(tty), tty->process_char_map);
+ if (L_IEXTEN(tty)) {
+ set_bit(WERASE_CHAR(tty),
+ tty->process_char_map);
+ set_bit(LNEXT_CHAR(tty),
+ tty->process_char_map);
+ set_bit(EOL2_CHAR(tty),
+ tty->process_char_map);
+ if (L_ECHO(tty))
+ set_bit(REPRINT_CHAR(tty),
+ tty->process_char_map);
+ }
+ }
+ if (I_IXON(tty)) {
+ set_bit(START_CHAR(tty), tty->process_char_map);
+ set_bit(STOP_CHAR(tty), tty->process_char_map);
+ }
+ if (L_ISIG(tty)) {
+ set_bit(INTR_CHAR(tty), tty->process_char_map);
+ set_bit(QUIT_CHAR(tty), tty->process_char_map);
+ set_bit(SUSP_CHAR(tty), tty->process_char_map);
+ }
+ clear_bit(__DISABLED_CHAR, tty->process_char_map);
+ tty->raw = 0;
+ tty->real_raw = 0;
+ } else {
+ tty->raw = 1;
+ if ((I_IGNBRK(tty) || (!I_BRKINT(tty) && !I_PARMRK(tty))) &&
+ (I_IGNPAR(tty) || !I_INPCK(tty)) &&
+ (tty->driver->flags & TTY_DRIVER_REAL_RAW))
+ tty->real_raw = 1;
+ else
+ tty->real_raw = 0;
+ }
+ n_tty_set_room(tty);
+ /* The termios change make the tty ready for I/O */
+ wake_up_interruptible(&tty->write_wait);
+ wake_up_interruptible(&tty->read_wait);
+ }
+
+ /**
+ * n_tty_close - close the ldisc for this tty
+ * @tty: device
+ *
+ * Called from the terminal layer when this line discipline is
+ * being shut down, either because of a close or becsuse of a
+ * discipline change. The function will not be called while other
+ * ldisc methods are in progress.
+ */
+
+ static void n_tty_close(struct tty_struct *tty)
+ {
+ n_tty_flush_buffer(tty);
+ if (tty->read_buf) {
+ kfree(tty->read_buf);
+ tty->read_buf = NULL;
+ }
+ if (tty->echo_buf) {
+ kfree(tty->echo_buf);
+ tty->echo_buf = NULL;
+ }
+ }
+
+ /**
+ * n_tty_open - open an ldisc
+ * @tty: terminal to open
+ *
+ * Called when this line discipline is being attached to the
+ * terminal device. Can sleep. Called serialized so that no
+ * other events will occur in parallel. No further open will occur
+ * until a close.
+ */
+
+ static int n_tty_open(struct tty_struct *tty)
+ {
+ if (!tty)
+ return -EINVAL;
+
+ /* These are ugly. Currently a malloc failure here can panic */
+ if (!tty->read_buf) {
+ tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+ if (!tty->read_buf)
+ return -ENOMEM;
+ }
+ if (!tty->echo_buf) {
+ tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+
+ if (!tty->echo_buf)
+ return -ENOMEM;
+ }
+ reset_buffer_flags(tty);
+ tty->column = 0;
+ n_tty_set_termios(tty, NULL);
+ tty->minimum_to_wake = 1;
+ tty->closing = 0;
+ return 0;
+ }
+
+ static inline int input_available_p(struct tty_struct *tty, int amt)
+ {
+ tty_flush_to_ldisc(tty);
+ if (tty->icanon && !L_EXTPROC(tty)) {
+ if (tty->canon_data)
+ return 1;
+ } else if (tty->read_cnt >= (amt ? amt : 1))
+ return 1;
+
+ return 0;
+ }
+
+ /**
+ * copy_from_read_buf - copy read data directly
+ * @tty: terminal device
+ * @b: user data
+ * @nr: size of data
+ *
+ * Helper function to speed up n_tty_read. It is only called when
+ * ICANON is off; it copies characters straight from the tty queue to
+ * user space directly. It can be profitably called twice; once to
+ * drain the space from the tail pointer to the (physical) end of the
+ * buffer, and once to drain the space from the (physical) beginning of
+ * the buffer to head pointer.
+ *
+ * Called under the tty->atomic_read_lock sem
+ *
+ */
+
+ static int copy_from_read_buf(struct tty_struct *tty,
+ unsigned char __user **b,
+ size_t *nr)
+
+ {
+ int retval;
+ size_t n;
+ unsigned long flags;
+
+ retval = 0;
+ spin_lock_irqsave(&tty->read_lock, flags);
+ n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+ n = min(*nr, n);
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ if (n) {
+ retval = copy_to_user(*b, &tty->read_buf[tty->read_tail], n);
+ n -= retval;
+ tty_audit_add_data(tty, &tty->read_buf[tty->read_tail], n);
+ spin_lock_irqsave(&tty->read_lock, flags);
+ tty->read_tail = (tty->read_tail + n) & (N_TTY_BUF_SIZE-1);
+ tty->read_cnt -= n;
+ /* Turn single EOF into zero-length read */
+ if (L_EXTPROC(tty) && tty->icanon && n == 1) {
+ if (!tty->read_cnt && (*b)[n-1] == EOF_CHAR(tty))
+ n--;
+ }
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+ *b += n;
+ *nr -= n;
+ }
+ return retval;
+ }
+
+ extern ssize_t redirected_tty_write(struct file *, const char __user *,
+ size_t, loff_t *);
+
+ /**
+ * job_control - check job control
+ * @tty: tty
+ * @file: file handle
+ *
+ * Perform job control management checks on this file/tty descriptor
+ * and if appropriate send any needed signals and return a negative
+ * error code if action should be taken.
+ *
+ * FIXME:
+ * Locking: None - redirected write test is safe, testing
+ * current->signal should possibly lock current->sighand
+ * pgrp locking ?
+ */
+
+ static int job_control(struct tty_struct *tty, struct file *file)
+ {
+ /* Job control check -- must be done at start and after
+ every sleep (POSIX.1 7.1.1.4). */
+ /* NOTE: not yet done after every sleep pending a thorough
+ check of the logic of this change. -- jlc */
+ /* don't stop on /dev/console */
+ if (file->f_op->write != redirected_tty_write &&
+ current->signal->tty == tty) {
+ if (!tty->pgrp)
+ printk(KERN_ERR "n_tty_read: no tty->pgrp!\n");
+ else if (task_pgrp(current) != tty->pgrp) {
+ if (is_ignored(SIGTTIN) ||
+ is_current_pgrp_orphaned())
+ return -EIO;
+ kill_pgrp(task_pgrp(current), SIGTTIN, 1);
+ set_thread_flag(TIF_SIGPENDING);
+ return -ERESTARTSYS;
+ }
+ }
+ return 0;
+ }
+
+
+ /**
+ * n_tty_read - read function for tty
+ * @tty: tty device
+ * @file: file object
+ * @buf: userspace buffer pointer
+ * @nr: size of I/O
+ *
+ * Perform reads for the line discipline. We are guaranteed that the
+ * line discipline will not be closed under us but we may get multiple
+ * parallel readers and must handle this ourselves. We may also get
+ * a hangup. Always called in user context, may sleep.
+ *
+ * This code must be sure never to sleep through a hangup.
+ */
+
+ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+ unsigned char __user *buf, size_t nr)
+ {
+ unsigned char __user *b = buf;
+ DECLARE_WAITQUEUE(wait, current);
+ int c;
+ int minimum, time;
+ ssize_t retval = 0;
+ ssize_t size;
+ long timeout;
+ unsigned long flags;
+ int packet;
+
+ do_it_again:
+
+ BUG_ON(!tty->read_buf);
+
+ c = job_control(tty, file);
+ if (c < 0)
+ return c;
+
+ minimum = time = 0;
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (!tty->icanon) {
+ time = (HZ / 10) * TIME_CHAR(tty);
+ minimum = MIN_CHAR(tty);
+ if (minimum) {
+ if (time)
+ tty->minimum_to_wake = 1;
+ else if (!waitqueue_active(&tty->read_wait) ||
+ (tty->minimum_to_wake > minimum))
+ tty->minimum_to_wake = minimum;
+ } else {
+ timeout = 0;
+ if (time) {
+ timeout = time;
+ time = 0;
+ }
+ tty->minimum_to_wake = minimum = 1;
+ }
+ }
+
+ /*
+ * Internal serialization of reads.
+ */
+ if (file->f_flags & O_NONBLOCK) {
+ if (!mutex_trylock(&tty->atomic_read_lock))
+ return -EAGAIN;
+ } else {
+ if (mutex_lock_interruptible(&tty->atomic_read_lock))
+ return -ERESTARTSYS;
+ }
+ packet = tty->packet;
+
+ add_wait_queue(&tty->read_wait, &wait);
+ while (nr) {
+ /* First test for status change. */
+ if (packet && tty->link->ctrl_status) {
+ unsigned char cs;
+ if (b != buf)
+ break;
+ spin_lock_irqsave(&tty->link->ctrl_lock, flags);
+ cs = tty->link->ctrl_status;
+ tty->link->ctrl_status = 0;
+ spin_unlock_irqrestore(&tty->link->ctrl_lock, flags);
+ if (tty_put_user(tty, cs, b++)) {
+ retval = -EFAULT;
+ b--;
+ break;
+ }
+ nr--;
+ break;
+ }
+ /* This statement must be first before checking for input
+ so that any interrupt will set the state back to
+ TASK_RUNNING. */
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (((minimum - (b - buf)) < tty->minimum_to_wake) &&
+ ((minimum - (b - buf)) >= 1))
+ tty->minimum_to_wake = (minimum - (b - buf));
+
+ if (!input_available_p(tty, 0)) {
++#ifdef CONFIG_BOOTSPLASH
++ if (file->f_dentry->d_inode->i_rdev == MKDEV(TTY_MAJOR,0) ||
++ file->f_dentry->d_inode->i_rdev == MKDEV(TTY_MAJOR,1) ||
++ file->f_dentry->d_inode->i_rdev == MKDEV(TTYAUX_MAJOR,0) ||
++ file->f_dentry->d_inode->i_rdev == MKDEV(TTYAUX_MAJOR,1)) {
++ extern int splash_verbose(void);
++ (void)splash_verbose();
++ }
++#endif
+ if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) {
+ retval = -EIO;
+ break;
+ }
+ if (tty_hung_up_p(file))
+ break;
+ if (!timeout)
+ break;
+ if (file->f_flags & O_NONBLOCK) {
+ retval = -EAGAIN;
+ break;
+ }
+ if (signal_pending(current)) {
+ retval = -ERESTARTSYS;
+ break;
+ }
+ /* FIXME: does n_tty_set_room need locking ? */
+ n_tty_set_room(tty);
+ timeout = schedule_timeout(timeout);
+ continue;
+ }
+ __set_current_state(TASK_RUNNING);
+
+ /* Deal with packet mode. */
+ if (packet && b == buf) {
+ if (tty_put_user(tty, TIOCPKT_DATA, b++)) {
+ retval = -EFAULT;
+ b--;
+ break;
+ }
+ nr--;
+ }
+
+ if (tty->icanon && !L_EXTPROC(tty)) {
+ /* N.B. avoid overrun if nr == 0 */
+ while (nr && tty->read_cnt) {
+ int eol;
+
+ eol = test_and_clear_bit(tty->read_tail,
+ tty->read_flags);
+ c = tty->read_buf[tty->read_tail];
+ spin_lock_irqsave(&tty->read_lock, flags);
+ tty->read_tail = ((tty->read_tail+1) &
+ (N_TTY_BUF_SIZE-1));
+ tty->read_cnt--;
+ if (eol) {
+ /* this test should be redundant:
+ * we shouldn't be reading data if
+ * canon_data is 0
+ */
+ if (--tty->canon_data < 0)
+ tty->canon_data = 0;
+ }
+ spin_unlock_irqrestore(&tty->read_lock, flags);
+
+ if (!eol || (c != __DISABLED_CHAR)) {
+ if (tty_put_user(tty, c, b++)) {
+ retval = -EFAULT;
+ b--;
+ break;
+ }
+ nr--;
+ }
+ if (eol) {
+ tty_audit_push(tty);
+ break;
+ }
+ }
+ if (retval)
+ break;
+ } else {
+ int uncopied;
+ /* The copy function takes the read lock and handles
+ locking internally for this case */
+ uncopied = copy_from_read_buf(tty, &b, &nr);
+ uncopied += copy_from_read_buf(tty, &b, &nr);
+ if (uncopied) {
+ retval = -EFAULT;
+ break;
+ }
+ }
+
+ /* If there is enough space in the read buffer now, let the
+ * low-level driver know. We use n_tty_chars_in_buffer() to
+ * check the buffer, as it now knows about canonical mode.
+ * Otherwise, if the driver is throttled and the line is
+ * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode,
+ * we won't get any more characters.
+ */
+ if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) {
+ n_tty_set_room(tty);
+ check_unthrottle(tty);
+ }
+
+ if (b - buf >= minimum)
+ break;
+ if (time)
+ timeout = time;
+ }
+ mutex_unlock(&tty->atomic_read_lock);
+ remove_wait_queue(&tty->read_wait, &wait);
+
+ if (!waitqueue_active(&tty->read_wait))
+ tty->minimum_to_wake = minimum;
+
+ __set_current_state(TASK_RUNNING);
+ size = b - buf;
+ if (size) {
+ retval = size;
+ if (nr)
+ clear_bit(TTY_PUSH, &tty->flags);
+ } else if (test_and_clear_bit(TTY_PUSH, &tty->flags))
+ goto do_it_again;
+
+ n_tty_set_room(tty);
+ return retval;
+ }
+
+ /**
+ * n_tty_write - write function for tty
+ * @tty: tty device
+ * @file: file object
+ * @buf: userspace buffer pointer
+ * @nr: size of I/O
+ *
+ * Write function of the terminal device. This is serialized with
+ * respect to other write callers but not to termios changes, reads
+ * and other such events. Since the receive code will echo characters,
+ * thus calling driver write methods, the output_lock is used in
+ * the output processing functions called here as well as in the
+ * echo processing function to protect the column state and space
+ * left in the buffer.
+ *
+ * This code must be sure never to sleep through a hangup.
+ *
+ * Locking: output_lock to protect column state and space left
+ * (note that the process_output*() functions take this
+ * lock themselves)
+ */
+
+ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
+ const unsigned char *buf, size_t nr)
+ {
+ const unsigned char *b = buf;
+ DECLARE_WAITQUEUE(wait, current);
+ int c;
+ ssize_t retval = 0;
+
+ /* Job control check -- must be done at start (POSIX.1 7.1.1.4). */
+ if (L_TOSTOP(tty) && file->f_op->write != redirected_tty_write) {
+ retval = tty_check_change(tty);
+ if (retval)
+ return retval;
+ }
+
+ /* Write out any echoed characters that are still pending */
+ process_echoes(tty);
+
+ add_wait_queue(&tty->write_wait, &wait);
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current)) {
+ retval = -ERESTARTSYS;
+ break;
+ }
+ if (tty_hung_up_p(file) || (tty->link && !tty->link->count)) {
+ retval = -EIO;
+ break;
+ }
+ if (O_OPOST(tty) && !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) {
+ while (nr > 0) {
+ ssize_t num = process_output_block(tty, b, nr);
+ if (num < 0) {
+ if (num == -EAGAIN)
+ break;
+ retval = num;
+ goto break_out;
+ }
+ b += num;
+ nr -= num;
+ if (nr == 0)
+ break;
+ c = *b;
+ if (process_output(c, tty) < 0)
+ break;
+ b++; nr--;
+ }
+ if (tty->ops->flush_chars)
+ tty->ops->flush_chars(tty);
+ } else {
+ while (nr > 0) {
+ c = tty->ops->write(tty, b, nr);
+ if (c < 0) {
+ retval = c;
+ goto break_out;
+ }
+ if (!c)
+ break;
+ b += c;
+ nr -= c;
+ }
+ }
+ if (!nr)
+ break;
+ if (file->f_flags & O_NONBLOCK) {
+ retval = -EAGAIN;
+ break;
+ }
+ schedule();
+ }
+ break_out:
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&tty->write_wait, &wait);
+ if (b - buf != nr && tty->fasync)
+ set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+ return (b - buf) ? b - buf : retval;
+ }
+
+ /**
+ * n_tty_poll - poll method for N_TTY
+ * @tty: terminal device
+ * @file: file accessing it
+ * @wait: poll table
+ *
+ * Called when the line discipline is asked to poll() for data or
+ * for special events. This code is not serialized with respect to
+ * other events save open/close.
+ *
+ * This code must be sure never to sleep through a hangup.
+ * Called without the kernel lock held - fine
+ */
+
+ static unsigned int n_tty_poll(struct tty_struct *tty, struct file *file,
+ poll_table *wait)
+ {
+ unsigned int mask = 0;
+
+ poll_wait(file, &tty->read_wait, wait);
+ poll_wait(file, &tty->write_wait, wait);
+ if (input_available_p(tty, TIME_CHAR(tty) ? 0 : MIN_CHAR(tty)))
+ mask |= POLLIN | POLLRDNORM;
+ if (tty->packet && tty->link->ctrl_status)
+ mask |= POLLPRI | POLLIN | POLLRDNORM;
+ if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
+ mask |= POLLHUP;
+ if (tty_hung_up_p(file))
+ mask |= POLLHUP;
+ if (!(mask & (POLLHUP | POLLIN | POLLRDNORM))) {
+ if (MIN_CHAR(tty) && !TIME_CHAR(tty))
+ tty->minimum_to_wake = MIN_CHAR(tty);
+ else
+ tty->minimum_to_wake = 1;
+ }
+ if (tty->ops->write && !tty_is_writelocked(tty) &&
+ tty_chars_in_buffer(tty) < WAKEUP_CHARS &&
+ tty_write_room(tty) > 0)
+ mask |= POLLOUT | POLLWRNORM;
+ return mask;
+ }
+
+ static unsigned long inq_canon(struct tty_struct *tty)
+ {
+ int nr, head, tail;
+
+ if (!tty->canon_data)
+ return 0;
+ head = tty->canon_head;
+ tail = tty->read_tail;
+ nr = (head - tail) & (N_TTY_BUF_SIZE-1);
+ /* Skip EOF-chars.. */
+ while (head != tail) {
+ if (test_bit(tail, tty->read_flags) &&
+ tty->read_buf[tail] == __DISABLED_CHAR)
+ nr--;
+ tail = (tail+1) & (N_TTY_BUF_SIZE-1);
+ }
+ return nr;
+ }
+
+ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
+ unsigned int cmd, unsigned long arg)
+ {
+ int retval;
+
+ switch (cmd) {
+ case TIOCOUTQ:
+ return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
+ case TIOCINQ:
+ /* FIXME: Locking */
+ retval = tty->read_cnt;
+ if (L_ICANON(tty))
+ retval = inq_canon(tty);
+ return put_user(retval, (unsigned int __user *) arg);
+ default:
+ return n_tty_ioctl_helper(tty, file, cmd, arg);
+ }
+ }
+
+ struct tty_ldisc_ops tty_ldisc_N_TTY = {
+ .magic = TTY_LDISC_MAGIC,
+ .name = "n_tty",
+ .open = n_tty_open,
+ .close = n_tty_close,
+ .flush_buffer = n_tty_flush_buffer,
+ .chars_in_buffer = n_tty_chars_in_buffer,
+ .read = n_tty_read,
+ .write = n_tty_write,
+ .ioctl = n_tty_ioctl,
+ .set_termios = n_tty_set_termios,
+ .poll = n_tty_poll,
+ .receive_buf = n_tty_receive_buf,
+ .write_wakeup = n_tty_write_wakeup
+ };
+
+ /**
+ * n_tty_inherit_ops - inherit N_TTY methods
+ * @ops: struct tty_ldisc_ops where to save N_TTY methods
+ *
+ * Used by a generic struct tty_ldisc_ops to easily inherit N_TTY
+ * methods.
+ */
+
+ void n_tty_inherit_ops(struct tty_ldisc_ops *ops)
+ {
+ *ops = tty_ldisc_N_TTY;
+ ops->owner = NULL;
+ ops->refcount = ops->flags = 0;
+ }
+ EXPORT_SYMBOL_GPL(n_tty_inherit_ops);
--- /dev/null
+ /*
+ * linux/drivers/char/tty_io.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+ /*
+ * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+ * or rs-channels. It also implements echoing, cooked mode etc.
+ *
+ * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+ *
+ * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+ * tty_struct and tty_queue structures. Previously there was an array
+ * of 256 tty_struct's which was statically allocated, and the
+ * tty_queue structures were allocated at boot time. Both are now
+ * dynamically allocated only when the tty is open.
+ *
+ * Also restructured routines so that there is more of a separation
+ * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+ * the low-level tty routines (serial.c, pty.c, console.c). This
+ * makes for cleaner and more compact code. -TYT, 9/17/92
+ *
+ * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+ * which can be dynamically activated and de-activated by the line
+ * discipline handling modules (like SLIP).
+ *
+ * NOTE: pay no attention to the line discipline code (yet); its
+ * interface is still subject to change in this version...
+ * -- TYT, 1/31/92
+ *
+ * Added functionality to the OPOST tty handling. No delays, but all
+ * other bits should be there.
+ * -- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
+ *
+ * Rewrote canonical mode and added more termios flags.
+ * -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
+ *
+ * Reorganized FASYNC support so mouse code can share it.
+ * -- ctm@ardi.com, 9Sep95
+ *
+ * New TIOCLINUX variants added.
+ * -- mj@k332.feld.cvut.cz, 19-Nov-95
+ *
+ * Restrict vt switching via ioctl()
+ * -- grif@cs.ucr.edu, 5-Dec-95
+ *
+ * Move console and virtual terminal code to more appropriate files,
+ * implement CONFIG_VT and generalize console device interface.
+ * -- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
+ *
+ * Rewrote tty_init_dev and tty_release_dev to eliminate races.
+ * -- Bill Hawes <whawes@star.net>, June 97
+ *
+ * Added devfs support.
+ * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
+ *
+ * Added support for a Unix98-style ptmx device.
+ * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
+ *
+ * Reduced memory usage for older ARM systems
+ * -- Russell King <rmk@arm.linux.org.uk>
+ *
+ * Move do_SAK() into process context. Less stack use in devfs functions.
+ * alloc_tty_struct() always uses kmalloc()
+ * -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+ */
+
+ #include <linux/types.h>
+ #include <linux/major.h>
+ #include <linux/errno.h>
+ #include <linux/signal.h>
+ #include <linux/fcntl.h>
+ #include <linux/sched.h>
+ #include <linux/interrupt.h>
+ #include <linux/tty.h>
+ #include <linux/tty_driver.h>
+ #include <linux/tty_flip.h>
+ #include <linux/devpts_fs.h>
+ #include <linux/file.h>
+ #include <linux/fdtable.h>
+ #include <linux/console.h>
+ #include <linux/timer.h>
+ #include <linux/ctype.h>
+ #include <linux/kd.h>
+ #include <linux/mm.h>
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/proc_fs.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/device.h>
+ #include <linux/wait.h>
+ #include <linux/bitops.h>
+ #include <linux/delay.h>
+ #include <linux/seq_file.h>
+ #include <linux/serial.h>
+
+ #include <linux/uaccess.h>
+ #include <asm/system.h>
+
+ #include <linux/kbd_kern.h>
+ #include <linux/vt_kern.h>
+ #include <linux/selection.h>
+
+ #include <linux/kmod.h>
+ #include <linux/nsproxy.h>
+
+ #undef TTY_DEBUG_HANGUP
+
+ #define TTY_PARANOIA_CHECK 1
+ #define CHECK_TTY_COUNT 1
+
+ struct ktermios tty_std_termios = { /* for the benefit of tty drivers */
+ .c_iflag = ICRNL | IXON,
+ .c_oflag = OPOST | ONLCR,
+ .c_cflag = B38400 | CS8 | CREAD | HUPCL,
+ .c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+ ECHOCTL | ECHOKE | IEXTEN,
+ .c_cc = INIT_C_CC,
+ .c_ispeed = 38400,
+ .c_ospeed = 38400
+ };
+
+ EXPORT_SYMBOL(tty_std_termios);
+
+ /* This list gets poked at by procfs and various bits of boot up code. This
+ could do with some rationalisation such as pulling the tty proc function
+ into this file */
+
+ LIST_HEAD(tty_drivers); /* linked list of tty drivers */
+
+ /* Mutex to protect creating and releasing a tty. This is shared with
+ vt.c for deeply disgusting hack reasons */
+ DEFINE_MUTEX(tty_mutex);
+ EXPORT_SYMBOL(tty_mutex);
+
+ /* Spinlock to protect the tty->tty_files list */
+ DEFINE_SPINLOCK(tty_files_lock);
+
+ static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
+ static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
+ ssize_t redirected_tty_write(struct file *, const char __user *,
+ size_t, loff_t *);
+ static unsigned int tty_poll(struct file *, poll_table *);
+ static int tty_open(struct inode *, struct file *);
+ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ #ifdef CONFIG_COMPAT
+ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
+ #else
+ #define tty_compat_ioctl NULL
+ #endif
+ static int __tty_fasync(int fd, struct file *filp, int on);
+ static int tty_fasync(int fd, struct file *filp, int on);
+ static void release_tty(struct tty_struct *tty, int idx);
+ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+
+ /**
+ * alloc_tty_struct - allocate a tty object
+ *
+ * Return a new empty tty structure. The data fields have not
+ * been initialized in any way but has been zeroed
+ *
+ * Locking: none
+ */
+
+ struct tty_struct *alloc_tty_struct(void)
+ {
+ return kzalloc(sizeof(struct tty_struct), GFP_KERNEL);
+ }
+
+ /**
+ * free_tty_struct - free a disused tty
+ * @tty: tty struct to free
+ *
+ * Free the write buffers, tty queue and tty memory itself.
+ *
+ * Locking: none. Must be called after tty is definitely unused
+ */
+
+ void free_tty_struct(struct tty_struct *tty)
+ {
+ if (tty->dev)
+ put_device(tty->dev);
+ kfree(tty->write_buf);
+ tty_buffer_free_all(tty);
+ kfree(tty);
+ }
+
+ static inline struct tty_struct *file_tty(struct file *file)
+ {
+ return ((struct tty_file_private *)file->private_data)->tty;
+ }
+
+ /* Associate a new file with the tty structure */
+ int tty_add_file(struct tty_struct *tty, struct file *file)
+ {
+ struct tty_file_private *priv;
+
+ priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->tty = tty;
+ priv->file = file;
+ file->private_data = priv;
+
+ spin_lock(&tty_files_lock);
+ list_add(&priv->list, &tty->tty_files);
+ spin_unlock(&tty_files_lock);
+
+ return 0;
+ }
+
+ /* Delete file from its tty */
+ void tty_del_file(struct file *file)
+ {
+ struct tty_file_private *priv = file->private_data;
+
+ spin_lock(&tty_files_lock);
+ list_del(&priv->list);
+ spin_unlock(&tty_files_lock);
+ file->private_data = NULL;
+ kfree(priv);
+ }
+
+
+ #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
+
+ /**
+ * tty_name - return tty naming
+ * @tty: tty structure
+ * @buf: buffer for output
+ *
+ * Convert a tty structure into a name. The name reflects the kernel
+ * naming policy and if udev is in use may not reflect user space
+ *
+ * Locking: none
+ */
+
+ char *tty_name(struct tty_struct *tty, char *buf)
+ {
+ if (!tty) /* Hmm. NULL pointer. That's fun. */
+ strcpy(buf, "NULL tty");
+ else
+ strcpy(buf, tty->name);
+ return buf;
+ }
+
+ EXPORT_SYMBOL(tty_name);
+
+ int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
+ const char *routine)
+ {
+ #ifdef TTY_PARANOIA_CHECK
+ if (!tty) {
+ printk(KERN_WARNING
+ "null TTY for (%d:%d) in %s\n",
+ imajor(inode), iminor(inode), routine);
+ return 1;
+ }
+ if (tty->magic != TTY_MAGIC) {
+ printk(KERN_WARNING
+ "bad magic number for tty struct (%d:%d) in %s\n",
+ imajor(inode), iminor(inode), routine);
+ return 1;
+ }
+ #endif
+ return 0;
+ }
+
+ static int check_tty_count(struct tty_struct *tty, const char *routine)
+ {
+ #ifdef CHECK_TTY_COUNT
+ struct list_head *p;
+ int count = 0;
+
+ spin_lock(&tty_files_lock);
+ list_for_each(p, &tty->tty_files) {
+ count++;
+ }
+ spin_unlock(&tty_files_lock);
+ if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver->subtype == PTY_TYPE_SLAVE &&
+ tty->link && tty->link->count)
+ count++;
+ if (tty->count != count) {
+ printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+ "!= #fd's(%d) in %s\n",
+ tty->name, tty->count, count, routine);
+ return count;
+ }
+ #endif
+ return 0;
+ }
+
+ /**
+ * get_tty_driver - find device of a tty
+ * @dev_t: device identifier
+ * @index: returns the index of the tty
+ *
+ * This routine returns a tty driver structure, given a device number
+ * and also passes back the index number.
+ *
+ * Locking: caller must hold tty_mutex
+ */
+
+ static struct tty_driver *get_tty_driver(dev_t device, int *index)
+ {
+ struct tty_driver *p;
+
+ list_for_each_entry(p, &tty_drivers, tty_drivers) {
+ dev_t base = MKDEV(p->major, p->minor_start);
+ if (device < base || device >= base + p->num)
+ continue;
+ *index = device - base;
+ return tty_driver_kref_get(p);
+ }
+ return NULL;
+ }
+
+ #ifdef CONFIG_CONSOLE_POLL
+
+ /**
+ * tty_find_polling_driver - find device of a polled tty
+ * @name: name string to match
+ * @line: pointer to resulting tty line nr
+ *
+ * This routine returns a tty driver structure, given a name
+ * and the condition that the tty driver is capable of polled
+ * operation.
+ */
+ struct tty_driver *tty_find_polling_driver(char *name, int *line)
+ {
+ struct tty_driver *p, *res = NULL;
+ int tty_line = 0;
+ int len;
+ char *str, *stp;
+
+ for (str = name; *str; str++)
+ if ((*str >= '0' && *str <= '9') || *str == ',')
+ break;
+ if (!*str)
+ return NULL;
+
+ len = str - name;
+ tty_line = simple_strtoul(str, &str, 10);
+
+ mutex_lock(&tty_mutex);
+ /* Search through the tty devices to look for a match */
+ list_for_each_entry(p, &tty_drivers, tty_drivers) {
+ if (strncmp(name, p->name, len) != 0)
+ continue;
+ stp = str;
+ if (*stp == ',')
+ stp++;
+ if (*stp == '\0')
+ stp = NULL;
+
+ if (tty_line >= 0 && tty_line < p->num && p->ops &&
+ p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
+ res = tty_driver_kref_get(p);
+ *line = tty_line;
+ break;
+ }
+ }
+ mutex_unlock(&tty_mutex);
+
+ return res;
+ }
+ EXPORT_SYMBOL_GPL(tty_find_polling_driver);
+ #endif
+
+ /**
+ * tty_check_change - check for POSIX terminal changes
+ * @tty: tty to check
+ *
+ * If we try to write to, or set the state of, a terminal and we're
+ * not in the foreground, send a SIGTTOU. If the signal is blocked or
+ * ignored, go ahead and perform the operation. (POSIX 7.2)
+ *
+ * Locking: ctrl_lock
+ */
+
+ int tty_check_change(struct tty_struct *tty)
+ {
+ unsigned long flags;
+ int ret = 0;
+
+ if (current->signal->tty != tty)
+ return 0;
+
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+
+ if (!tty->pgrp) {
+ printk(KERN_WARNING "tty_check_change: tty->pgrp == NULL!\n");
+ goto out_unlock;
+ }
+ if (task_pgrp(current) == tty->pgrp)
+ goto out_unlock;
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ if (is_ignored(SIGTTOU))
+ goto out;
+ if (is_current_pgrp_orphaned()) {
+ ret = -EIO;
+ goto out;
+ }
+ kill_pgrp(task_pgrp(current), SIGTTOU, 1);
+ set_thread_flag(TIF_SIGPENDING);
+ ret = -ERESTARTSYS;
+ out:
+ return ret;
+ out_unlock:
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ return ret;
+ }
+
+ EXPORT_SYMBOL(tty_check_change);
+
+ static ssize_t hung_up_tty_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+ {
+ return 0;
+ }
+
+ static ssize_t hung_up_tty_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+ {
+ return -EIO;
+ }
+
+ /* No kernel lock held - none needed ;) */
+ static unsigned int hung_up_tty_poll(struct file *filp, poll_table *wait)
+ {
+ return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+ }
+
+ static long hung_up_tty_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+ {
+ return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+ }
+
+ static long hung_up_tty_compat_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
+ {
+ return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+ }
+
+ static const struct file_operations tty_fops = {
+ .llseek = no_llseek,
+ .read = tty_read,
+ .write = tty_write,
+ .poll = tty_poll,
+ .unlocked_ioctl = tty_ioctl,
+ .compat_ioctl = tty_compat_ioctl,
+ .open = tty_open,
+ .release = tty_release,
+ .fasync = tty_fasync,
+ };
+
+ static const struct file_operations console_fops = {
+ .llseek = no_llseek,
+ .read = tty_read,
+ .write = redirected_tty_write,
+ .poll = tty_poll,
+ .unlocked_ioctl = tty_ioctl,
+ .compat_ioctl = tty_compat_ioctl,
+ .open = tty_open,
+ .release = tty_release,
+ .fasync = tty_fasync,
+ };
+
+ static const struct file_operations hung_up_tty_fops = {
+ .llseek = no_llseek,
+ .read = hung_up_tty_read,
+ .write = hung_up_tty_write,
+ .poll = hung_up_tty_poll,
+ .unlocked_ioctl = hung_up_tty_ioctl,
+ .compat_ioctl = hung_up_tty_compat_ioctl,
+ .release = tty_release,
+ };
+
+ static DEFINE_SPINLOCK(redirect_lock);
+ static struct file *redirect;
+
+ /**
+ * tty_wakeup - request more data
+ * @tty: terminal
+ *
+ * Internal and external helper for wakeups of tty. This function
+ * informs the line discipline if present that the driver is ready
+ * to receive more output data.
+ */
+
+ void tty_wakeup(struct tty_struct *tty)
+ {
+ struct tty_ldisc *ld;
+
+ if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) {
+ ld = tty_ldisc_ref(tty);
+ if (ld) {
+ if (ld->ops->write_wakeup)
+ ld->ops->write_wakeup(tty);
+ tty_ldisc_deref(ld);
+ }
+ }
+ wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+ }
+
+ EXPORT_SYMBOL_GPL(tty_wakeup);
+
+ /**
+ * __tty_hangup - actual handler for hangup events
+ * @work: tty device
+ *
+ * This can be called by the "eventd" kernel thread. That is process
+ * synchronous but doesn't hold any locks, so we need to make sure we
+ * have the appropriate locks for what we're doing.
+ *
+ * The hangup event clears any pending redirections onto the hung up
+ * device. It ensures future writes will error and it does the needed
+ * line discipline hangup and signal delivery. The tty object itself
+ * remains intact.
+ *
+ * Locking:
+ * BTM
+ * redirect lock for undoing redirection
+ * file list lock for manipulating list of ttys
+ * tty_ldisc_lock from called functions
+ * termios_mutex resetting termios data
+ * tasklist_lock to walk task list for hangup event
+ * ->siglock to protect ->signal/->sighand
+ */
+ void __tty_hangup(struct tty_struct *tty)
+ {
+ struct file *cons_filp = NULL;
+ struct file *filp, *f = NULL;
+ struct task_struct *p;
+ struct tty_file_private *priv;
+ int closecount = 0, n;
+ unsigned long flags;
+ int refs = 0;
+
+ if (!tty)
+ return;
+
+
+ spin_lock(&redirect_lock);
+ if (redirect && file_tty(redirect) == tty) {
+ f = redirect;
+ redirect = NULL;
+ }
+ spin_unlock(&redirect_lock);
+
+ tty_lock();
+
+ /* inuse_filps is protected by the single tty lock,
+ this really needs to change if we want to flush the
+ workqueue with the lock held */
+ check_tty_count(tty, "tty_hangup");
+
+ spin_lock(&tty_files_lock);
+ /* This breaks for file handles being sent over AF_UNIX sockets ? */
+ list_for_each_entry(priv, &tty->tty_files, list) {
+ filp = priv->file;
+ if (filp->f_op->write == redirected_tty_write)
+ cons_filp = filp;
+ if (filp->f_op->write != tty_write)
+ continue;
+ closecount++;
+ __tty_fasync(-1, filp, 0); /* can't block */
+ filp->f_op = &hung_up_tty_fops;
+ }
+ spin_unlock(&tty_files_lock);
+
+ tty_ldisc_hangup(tty);
+
+ read_lock(&tasklist_lock);
+ if (tty->session) {
+ do_each_pid_task(tty->session, PIDTYPE_SID, p) {
+ spin_lock_irq(&p->sighand->siglock);
+ if (p->signal->tty == tty) {
+ p->signal->tty = NULL;
+ /* We defer the dereferences outside fo
+ the tasklist lock */
+ refs++;
+ }
+ if (!p->signal->leader) {
+ spin_unlock_irq(&p->sighand->siglock);
+ continue;
+ }
+ __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+ __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ put_pid(p->signal->tty_old_pgrp); /* A noop */
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ if (tty->pgrp)
+ p->signal->tty_old_pgrp = get_pid(tty->pgrp);
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ spin_unlock_irq(&p->sighand->siglock);
+ } while_each_pid_task(tty->session, PIDTYPE_SID, p);
+ }
+ read_unlock(&tasklist_lock);
+
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ clear_bit(TTY_THROTTLED, &tty->flags);
+ clear_bit(TTY_PUSH, &tty->flags);
+ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+ put_pid(tty->session);
+ put_pid(tty->pgrp);
+ tty->session = NULL;
+ tty->pgrp = NULL;
+ tty->ctrl_status = 0;
+ set_bit(TTY_HUPPED, &tty->flags);
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+ /* Account for the p->signal references we killed */
+ while (refs--)
+ tty_kref_put(tty);
+
+ /*
+ * If one of the devices matches a console pointer, we
+ * cannot just call hangup() because that will cause
+ * tty->count and state->count to go out of sync.
+ * So we just call close() the right number of times.
+ */
+ if (cons_filp) {
+ if (tty->ops->close)
+ for (n = 0; n < closecount; n++)
+ tty->ops->close(tty, cons_filp);
+ } else if (tty->ops->hangup)
+ (tty->ops->hangup)(tty);
+ /*
+ * We don't want to have driver/ldisc interactions beyond
+ * the ones we did here. The driver layer expects no
+ * calls after ->hangup() from the ldisc side. However we
+ * can't yet guarantee all that.
+ */
+ set_bit(TTY_HUPPED, &tty->flags);
+ tty_ldisc_enable(tty);
+
+ tty_unlock();
+
+ if (f)
+ fput(f);
+ }
+
+ static void do_tty_hangup(struct work_struct *work)
+ {
+ struct tty_struct *tty =
+ container_of(work, struct tty_struct, hangup_work);
+
+ __tty_hangup(tty);
+ }
+
+ /**
+ * tty_hangup - trigger a hangup event
+ * @tty: tty to hangup
+ *
+ * A carrier loss (virtual or otherwise) has occurred on this like
+ * schedule a hangup sequence to run after this event.
+ */
+
+ void tty_hangup(struct tty_struct *tty)
+ {
+ #ifdef TTY_DEBUG_HANGUP
+ char buf[64];
+ printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf));
+ #endif
+ schedule_work(&tty->hangup_work);
+ }
+
+ EXPORT_SYMBOL(tty_hangup);
+
+ /**
+ * tty_vhangup - process vhangup
+ * @tty: tty to hangup
+ *
+ * The user has asked via system call for the terminal to be hung up.
+ * We do this synchronously so that when the syscall returns the process
+ * is complete. That guarantee is necessary for security reasons.
+ */
+
+ void tty_vhangup(struct tty_struct *tty)
+ {
+ #ifdef TTY_DEBUG_HANGUP
+ char buf[64];
+
+ printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
+ #endif
+ __tty_hangup(tty);
+ }
+
+ EXPORT_SYMBOL(tty_vhangup);
+
+
+ /**
+ * tty_vhangup_self - process vhangup for own ctty
+ *
+ * Perform a vhangup on the current controlling tty
+ */
+
+ void tty_vhangup_self(void)
+ {
+ struct tty_struct *tty;
+
+ tty = get_current_tty();
+ if (tty) {
+ tty_vhangup(tty);
+ tty_kref_put(tty);
+ }
+ }
+
+ /**
+ * tty_hung_up_p - was tty hung up
+ * @filp: file pointer of tty
+ *
+ * Return true if the tty has been subject to a vhangup or a carrier
+ * loss
+ */
+
+ int tty_hung_up_p(struct file *filp)
+ {
+ return (filp->f_op == &hung_up_tty_fops);
+ }
+
+ EXPORT_SYMBOL(tty_hung_up_p);
+
+ static void session_clear_tty(struct pid *session)
+ {
+ struct task_struct *p;
+ do_each_pid_task(session, PIDTYPE_SID, p) {
+ proc_clear_tty(p);
+ } while_each_pid_task(session, PIDTYPE_SID, p);
+ }
+
+ /**
+ * disassociate_ctty - disconnect controlling tty
+ * @on_exit: true if exiting so need to "hang up" the session
+ *
+ * This function is typically called only by the session leader, when
+ * it wants to disassociate itself from its controlling tty.
+ *
+ * It performs the following functions:
+ * (1) Sends a SIGHUP and SIGCONT to the foreground process group
+ * (2) Clears the tty from being controlling the session
+ * (3) Clears the controlling tty for all processes in the
+ * session group.
+ *
+ * The argument on_exit is set to 1 if called when a process is
+ * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ *
+ * Locking:
+ * BTM is taken for hysterical raisins, and held when
+ * called from no_tty().
+ * tty_mutex is taken to protect tty
+ * ->siglock is taken to protect ->signal/->sighand
+ * tasklist_lock is taken to walk process list for sessions
+ * ->siglock is taken to protect ->signal/->sighand
+ */
+
+ void disassociate_ctty(int on_exit)
+ {
+ struct tty_struct *tty;
+ struct pid *tty_pgrp = NULL;
+
+ if (!current->signal->leader)
+ return;
+
+ tty = get_current_tty();
+ if (tty) {
+ tty_pgrp = get_pid(tty->pgrp);
+ if (on_exit) {
+ if (tty->driver->type != TTY_DRIVER_TYPE_PTY)
+ tty_vhangup(tty);
+ }
+ tty_kref_put(tty);
+ } else if (on_exit) {
+ struct pid *old_pgrp;
+ spin_lock_irq(¤t->sighand->siglock);
+ old_pgrp = current->signal->tty_old_pgrp;
+ current->signal->tty_old_pgrp = NULL;
+ spin_unlock_irq(¤t->sighand->siglock);
+ if (old_pgrp) {
+ kill_pgrp(old_pgrp, SIGHUP, on_exit);
+ kill_pgrp(old_pgrp, SIGCONT, on_exit);
+ put_pid(old_pgrp);
+ }
+ return;
+ }
+ if (tty_pgrp) {
+ kill_pgrp(tty_pgrp, SIGHUP, on_exit);
+ if (!on_exit)
+ kill_pgrp(tty_pgrp, SIGCONT, on_exit);
+ put_pid(tty_pgrp);
+ }
+
+ spin_lock_irq(¤t->sighand->siglock);
+ put_pid(current->signal->tty_old_pgrp);
+ current->signal->tty_old_pgrp = NULL;
+ spin_unlock_irq(¤t->sighand->siglock);
+
+ tty = get_current_tty();
+ if (tty) {
+ unsigned long flags;
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ put_pid(tty->session);
+ put_pid(tty->pgrp);
+ tty->session = NULL;
+ tty->pgrp = NULL;
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ tty_kref_put(tty);
+ } else {
+ #ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+ " = NULL", tty);
+ #endif
+ }
+
+ /* Now clear signal->tty under the lock */
+ read_lock(&tasklist_lock);
+ session_clear_tty(task_session(current));
+ read_unlock(&tasklist_lock);
+ }
+
+ /**
+ *
+ * no_tty - Ensure the current process does not have a controlling tty
+ */
+ void no_tty(void)
+ {
+ struct task_struct *tsk = current;
+ tty_lock();
+ disassociate_ctty(0);
+ tty_unlock();
+ proc_clear_tty(tsk);
+ }
+
+
+ /**
+ * stop_tty - propagate flow control
+ * @tty: tty to stop
+ *
+ * Perform flow control to the driver. For PTY/TTY pairs we
+ * must also propagate the TIOCKPKT status. May be called
+ * on an already stopped device and will not re-call the driver
+ * method.
+ *
+ * This functionality is used by both the line disciplines for
+ * halting incoming flow and by the driver. It may therefore be
+ * called from any context, may be under the tty atomic_write_lock
+ * but not always.
+ *
+ * Locking:
+ * Uses the tty control lock internally
+ */
+
+ void stop_tty(struct tty_struct *tty)
+ {
+ unsigned long flags;
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ if (tty->stopped) {
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ return;
+ }
+ tty->stopped = 1;
+ if (tty->link && tty->link->packet) {
+ tty->ctrl_status &= ~TIOCPKT_START;
+ tty->ctrl_status |= TIOCPKT_STOP;
+ wake_up_interruptible_poll(&tty->link->read_wait, POLLIN);
+ }
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ if (tty->ops->stop)
+ (tty->ops->stop)(tty);
+ }
+
+ EXPORT_SYMBOL(stop_tty);
+
+ /**
+ * start_tty - propagate flow control
+ * @tty: tty to start
+ *
+ * Start a tty that has been stopped if at all possible. Perform
+ * any necessary wakeups and propagate the TIOCPKT status. If this
+ * is the tty was previous stopped and is being started then the
+ * driver start method is invoked and the line discipline woken.
+ *
+ * Locking:
+ * ctrl_lock
+ */
+
+ void start_tty(struct tty_struct *tty)
+ {
+ unsigned long flags;
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ if (!tty->stopped || tty->flow_stopped) {
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ return;
+ }
+ tty->stopped = 0;
+ if (tty->link && tty->link->packet) {
+ tty->ctrl_status &= ~TIOCPKT_STOP;
+ tty->ctrl_status |= TIOCPKT_START;
+ wake_up_interruptible_poll(&tty->link->read_wait, POLLIN);
+ }
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ if (tty->ops->start)
+ (tty->ops->start)(tty);
+ /* If we have a running line discipline it may need kicking */
+ tty_wakeup(tty);
+ }
+
+ EXPORT_SYMBOL(start_tty);
+
+ /**
+ * tty_read - read method for tty device files
+ * @file: pointer to tty file
+ * @buf: user buffer
+ * @count: size of user buffer
+ * @ppos: unused
+ *
+ * Perform the read system call function on this terminal device. Checks
+ * for hung up devices before calling the line discipline method.
+ *
+ * Locking:
+ * Locks the line discipline internally while needed. Multiple
+ * read calls may be outstanding in parallel.
+ */
+
+ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+ {
+ int i;
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct tty_struct *tty = file_tty(file);
+ struct tty_ldisc *ld;
+
+ if (tty_paranoia_check(tty, inode, "tty_read"))
+ return -EIO;
+ if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
+ return -EIO;
+
+ /* We want to wait for the line discipline to sort out in this
+ situation */
+ ld = tty_ldisc_ref_wait(tty);
+ if (ld->ops->read)
+ i = (ld->ops->read)(tty, file, buf, count);
+ else
+ i = -EIO;
+ tty_ldisc_deref(ld);
+ if (i > 0)
+ inode->i_atime = current_fs_time(inode->i_sb);
+ return i;
+ }
+
+ void tty_write_unlock(struct tty_struct *tty)
+ {
+ mutex_unlock(&tty->atomic_write_lock);
+ wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+ }
+
+ int tty_write_lock(struct tty_struct *tty, int ndelay)
+ {
+ if (!mutex_trylock(&tty->atomic_write_lock)) {
+ if (ndelay)
+ return -EAGAIN;
+ if (mutex_lock_interruptible(&tty->atomic_write_lock))
+ return -ERESTARTSYS;
+ }
+ return 0;
+ }
+
+ /*
+ * Split writes up in sane blocksizes to avoid
+ * denial-of-service type attacks
+ */
+ static inline ssize_t do_tty_write(
+ ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t),
+ struct tty_struct *tty,
+ struct file *file,
+ const char __user *buf,
+ size_t count)
+ {
+ ssize_t ret, written = 0;
+ unsigned int chunk;
+
+ ret = tty_write_lock(tty, file->f_flags & O_NDELAY);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * We chunk up writes into a temporary buffer. This
+ * simplifies low-level drivers immensely, since they
+ * don't have locking issues and user mode accesses.
+ *
+ * But if TTY_NO_WRITE_SPLIT is set, we should use a
+ * big chunk-size..
+ *
+ * The default chunk-size is 2kB, because the NTTY
+ * layer has problems with bigger chunks. It will
+ * claim to be able to handle more characters than
+ * it actually does.
+ *
+ * FIXME: This can probably go away now except that 64K chunks
+ * are too likely to fail unless switched to vmalloc...
+ */
+ chunk = 2048;
+ if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags))
+ chunk = 65536;
+ if (count < chunk)
+ chunk = count;
+
+ /* write_buf/write_cnt is protected by the atomic_write_lock mutex */
+ if (tty->write_cnt < chunk) {
+ unsigned char *buf_chunk;
+
+ if (chunk < 1024)
+ chunk = 1024;
+
+ buf_chunk = kmalloc(chunk, GFP_KERNEL);
+ if (!buf_chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ kfree(tty->write_buf);
+ tty->write_cnt = chunk;
+ tty->write_buf = buf_chunk;
+ }
+
+ /* Do the write .. */
+ for (;;) {
+ size_t size = count;
+ if (size > chunk)
+ size = chunk;
+ ret = -EFAULT;
+ if (copy_from_user(tty->write_buf, buf, size))
+ break;
+ ret = write(tty, file, tty->write_buf, size);
+ if (ret <= 0)
+ break;
+ written += ret;
+ buf += ret;
+ count -= ret;
+ if (!count)
+ break;
+ ret = -ERESTARTSYS;
+ if (signal_pending(current))
+ break;
+ cond_resched();
+ }
+ if (written) {
+ struct inode *inode = file->f_path.dentry->d_inode;
+ inode->i_mtime = current_fs_time(inode->i_sb);
+ ret = written;
+ }
+ out:
+ tty_write_unlock(tty);
+ return ret;
+ }
+
+ /**
+ * tty_write_message - write a message to a certain tty, not just the console.
+ * @tty: the destination tty_struct
+ * @msg: the message to write
+ *
+ * This is used for messages that need to be redirected to a specific tty.
+ * We don't put it into the syslog queue right now maybe in the future if
+ * really needed.
+ *
+ * We must still hold the BTM and test the CLOSING flag for the moment.
+ */
+
+ void tty_write_message(struct tty_struct *tty, char *msg)
+ {
+ if (tty) {
+ mutex_lock(&tty->atomic_write_lock);
+ tty_lock();
+ if (tty->ops->write && !test_bit(TTY_CLOSING, &tty->flags)) {
+ tty_unlock();
+ tty->ops->write(tty, msg, strlen(msg));
+ } else
+ tty_unlock();
+ tty_write_unlock(tty);
+ }
+ return;
+ }
+
+
+ /**
+ * tty_write - write method for tty device file
+ * @file: tty file pointer
+ * @buf: user data to write
+ * @count: bytes to write
+ * @ppos: unused
+ *
+ * Write data to a tty device via the line discipline.
+ *
+ * Locking:
+ * Locks the line discipline as required
+ * Writes to the tty driver are serialized by the atomic_write_lock
+ * and are then processed in chunks to the device. The line discipline
+ * write method will not be invoked in parallel for each device.
+ */
+
+ static ssize_t tty_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+ {
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct tty_struct *tty = file_tty(file);
+ struct tty_ldisc *ld;
+ ssize_t ret;
+
+ if (tty_paranoia_check(tty, inode, "tty_write"))
+ return -EIO;
+ if (!tty || !tty->ops->write ||
+ (test_bit(TTY_IO_ERROR, &tty->flags)))
+ return -EIO;
+ /* Short term debug to catch buggy drivers */
+ if (tty->ops->write_room == NULL)
+ printk(KERN_ERR "tty driver %s lacks a write_room method.\n",
+ tty->driver->name);
+ ld = tty_ldisc_ref_wait(tty);
+ if (!ld->ops->write)
+ ret = -EIO;
+ else
+ ret = do_tty_write(ld->ops->write, tty, file, buf, count);
+ tty_ldisc_deref(ld);
+ return ret;
+ }
+
+ ssize_t redirected_tty_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+ {
+ struct file *p = NULL;
+
+ spin_lock(&redirect_lock);
+ if (redirect) {
+ get_file(redirect);
+ p = redirect;
+ }
+ spin_unlock(&redirect_lock);
+
+ if (p) {
+ ssize_t res;
+ res = vfs_write(p, buf, count, &p->f_pos);
+ fput(p);
+ return res;
+ }
+ return tty_write(file, buf, count, ppos);
+ }
+
+ static char ptychar[] = "pqrstuvwxyzabcde";
+
+ /**
+ * pty_line_name - generate name for a pty
+ * @driver: the tty driver in use
+ * @index: the minor number
+ * @p: output buffer of at least 6 bytes
+ *
+ * Generate a name from a driver reference and write it to the output
+ * buffer.
+ *
+ * Locking: None
+ */
+ static void pty_line_name(struct tty_driver *driver, int index, char *p)
+ {
+ int i = index + driver->name_base;
+ /* ->name is initialized to "ttyp", but "tty" is expected */
+ sprintf(p, "%s%c%x",
+ driver->subtype == PTY_TYPE_SLAVE ? "tty" : driver->name,
+ ptychar[i >> 4 & 0xf], i & 0xf);
+ }
+
+ /**
+ * tty_line_name - generate name for a tty
+ * @driver: the tty driver in use
+ * @index: the minor number
+ * @p: output buffer of at least 7 bytes
+ *
+ * Generate a name from a driver reference and write it to the output
+ * buffer.
+ *
+ * Locking: None
+ */
+ static void tty_line_name(struct tty_driver *driver, int index, char *p)
+ {
+ sprintf(p, "%s%d", driver->name, index + driver->name_base);
+ }
+
+ /**
+ * tty_driver_lookup_tty() - find an existing tty, if any
+ * @driver: the driver for the tty
+ * @idx: the minor number
+ *
+ * Return the tty, if found or ERR_PTR() otherwise.
+ *
+ * Locking: tty_mutex must be held. If tty is found, the mutex must
+ * be held until the 'fast-open' is also done. Will change once we
+ * have refcounting in the driver and per driver locking
+ */
+ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
+ struct inode *inode, int idx)
+ {
+ struct tty_struct *tty;
+
+ if (driver->ops->lookup)
+ return driver->ops->lookup(driver, inode, idx);
+
+ tty = driver->ttys[idx];
+ return tty;
+ }
+
+ /**
+ * tty_init_termios - helper for termios setup
+ * @tty: the tty to set up
+ *
+ * Initialise the termios structures for this tty. Thus runs under
+ * the tty_mutex currently so we can be relaxed about ordering.
+ */
+
+ int tty_init_termios(struct tty_struct *tty)
+ {
+ struct ktermios *tp;
+ int idx = tty->index;
+
+ tp = tty->driver->termios[idx];
+ if (tp == NULL) {
+ tp = kzalloc(sizeof(struct ktermios[2]), GFP_KERNEL);
+ if (tp == NULL)
+ return -ENOMEM;
+ memcpy(tp, &tty->driver->init_termios,
+ sizeof(struct ktermios));
+ tty->driver->termios[idx] = tp;
+ }
+ tty->termios = tp;
+ tty->termios_locked = tp + 1;
+
+ /* Compatibility until drivers always set this */
+ tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
+ tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(tty_init_termios);
+
+ /**
+ * tty_driver_install_tty() - install a tty entry in the driver
+ * @driver: the driver for the tty
+ * @tty: the tty
+ *
+ * Install a tty object into the driver tables. The tty->index field
+ * will be set by the time this is called. This method is responsible
+ * for ensuring any need additional structures are allocated and
+ * configured.
+ *
+ * Locking: tty_mutex for now
+ */
+ static int tty_driver_install_tty(struct tty_driver *driver,
+ struct tty_struct *tty)
+ {
+ int idx = tty->index;
+ int ret;
+
+ if (driver->ops->install) {
+ ret = driver->ops->install(driver, tty);
+ return ret;
+ }
+
+ if (tty_init_termios(tty) == 0) {
+ tty_driver_kref_get(driver);
+ tty->count++;
+ driver->ttys[idx] = tty;
+ return 0;
+ }
+ return -ENOMEM;
+ }
+
+ /**
+ * tty_driver_remove_tty() - remove a tty from the driver tables
+ * @driver: the driver for the tty
+ * @idx: the minor number
+ *
+ * Remvoe a tty object from the driver tables. The tty->index field
+ * will be set by the time this is called.
+ *
+ * Locking: tty_mutex for now
+ */
+ static void tty_driver_remove_tty(struct tty_driver *driver,
+ struct tty_struct *tty)
+ {
+ if (driver->ops->remove)
+ driver->ops->remove(driver, tty);
+ else
+ driver->ttys[tty->index] = NULL;
+ }
+
+ /*
+ * tty_reopen() - fast re-open of an open tty
+ * @tty - the tty to open
+ *
+ * Return 0 on success, -errno on error.
+ *
+ * Locking: tty_mutex must be held from the time the tty was found
+ * till this open completes.
+ */
+ static int tty_reopen(struct tty_struct *tty)
+ {
+ struct tty_driver *driver = tty->driver;
+
+ if (test_bit(TTY_CLOSING, &tty->flags))
+ return -EIO;
+
+ if (driver->type == TTY_DRIVER_TYPE_PTY &&
+ driver->subtype == PTY_TYPE_MASTER) {
+ /*
+ * special case for PTY masters: only one open permitted,
+ * and the slave side open count is incremented as well.
+ */
+ if (tty->count)
+ return -EIO;
+
+ tty->link->count++;
+ }
+ tty->count++;
+ tty->driver = driver; /* N.B. why do this every time?? */
+
+ mutex_lock(&tty->ldisc_mutex);
+ WARN_ON(!test_bit(TTY_LDISC, &tty->flags));
+ mutex_unlock(&tty->ldisc_mutex);
+
+ return 0;
+ }
+
+ /**
+ * tty_init_dev - initialise a tty device
+ * @driver: tty driver we are opening a device on
+ * @idx: device index
+ * @ret_tty: returned tty structure
+ * @first_ok: ok to open a new device (used by ptmx)
+ *
+ * Prepare a tty device. This may not be a "new" clean device but
+ * could also be an active device. The pty drivers require special
+ * handling because of this.
+ *
+ * Locking:
+ * The function is called under the tty_mutex, which
+ * protects us from the tty struct or driver itself going away.
+ *
+ * On exit the tty device has the line discipline attached and
+ * a reference count of 1. If a pair was created for pty/tty use
+ * and the other was a pty master then it too has a reference count of 1.
+ *
+ * WSH 06/09/97: Rewritten to remove races and properly clean up after a
+ * failed open. The new code protects the open with a mutex, so it's
+ * really quite straightforward. The mutex locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+
+ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
+ int first_ok)
+ {
+ struct tty_struct *tty;
+ int retval;
+
+ /* Check if pty master is being opened multiple times */
+ if (driver->subtype == PTY_TYPE_MASTER &&
+ (driver->flags & TTY_DRIVER_DEVPTS_MEM) && !first_ok) {
+ return ERR_PTR(-EIO);
+ }
+
+ /*
+ * First time open is complex, especially for PTY devices.
+ * This code guarantees that either everything succeeds and the
+ * TTY is ready for operation, or else the table slots are vacated
+ * and the allocated memory released. (Except that the termios
+ * and locked termios may be retained.)
+ */
+
+ if (!try_module_get(driver->owner))
+ return ERR_PTR(-ENODEV);
+
+ tty = alloc_tty_struct();
+ if (!tty)
+ goto fail_no_mem;
+ initialize_tty_struct(tty, driver, idx);
+
+ retval = tty_driver_install_tty(driver, tty);
+ if (retval < 0) {
+ free_tty_struct(tty);
+ module_put(driver->owner);
+ return ERR_PTR(retval);
+ }
+
+ /*
+ * Structures all installed ... call the ldisc open routines.
+ * If we fail here just call release_tty to clean up. No need
+ * to decrement the use counts, as release_tty doesn't care.
+ */
+ retval = tty_ldisc_setup(tty, tty->link);
+ if (retval)
+ goto release_mem_out;
+ return tty;
+
+ fail_no_mem:
+ module_put(driver->owner);
+ return ERR_PTR(-ENOMEM);
+
+ /* call the tty release_tty routine to clean out this slot */
+ release_mem_out:
+ if (printk_ratelimit())
+ printk(KERN_INFO "tty_init_dev: ldisc open failed, "
+ "clearing slot %d\n", idx);
+ release_tty(tty, idx);
+ return ERR_PTR(retval);
+ }
+
+ void tty_free_termios(struct tty_struct *tty)
+ {
+ struct ktermios *tp;
+ int idx = tty->index;
+ /* Kill this flag and push into drivers for locking etc */
+ if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
+ /* FIXME: Locking on ->termios array */
+ tp = tty->termios;
+ tty->driver->termios[idx] = NULL;
+ kfree(tp);
+ }
+ }
+ EXPORT_SYMBOL(tty_free_termios);
+
+ void tty_shutdown(struct tty_struct *tty)
+ {
+ tty_driver_remove_tty(tty->driver, tty);
+ tty_free_termios(tty);
+ }
+ EXPORT_SYMBOL(tty_shutdown);
+
+ /**
+ * release_one_tty - release tty structure memory
+ * @kref: kref of tty we are obliterating
+ *
+ * Releases memory associated with a tty structure, and clears out the
+ * driver table slots. This function is called when a device is no longer
+ * in use. It also gets called when setup of a device fails.
+ *
+ * Locking:
+ * tty_mutex - sometimes only
+ * takes the file list lock internally when working on the list
+ * of ttys that the driver keeps.
+ *
+ * This method gets called from a work queue so that the driver private
+ * cleanup ops can sleep (needed for USB at least)
+ */
+ static void release_one_tty(struct work_struct *work)
+ {
+ struct tty_struct *tty =
+ container_of(work, struct tty_struct, hangup_work);
+ struct tty_driver *driver = tty->driver;
+
+ if (tty->ops->cleanup)
+ tty->ops->cleanup(tty);
+
+ tty->magic = 0;
+ tty_driver_kref_put(driver);
+ module_put(driver->owner);
+
+ spin_lock(&tty_files_lock);
+ list_del_init(&tty->tty_files);
+ spin_unlock(&tty_files_lock);
+
+ put_pid(tty->pgrp);
+ put_pid(tty->session);
+ free_tty_struct(tty);
+ }
+
+ static void queue_release_one_tty(struct kref *kref)
+ {
+ struct tty_struct *tty = container_of(kref, struct tty_struct, kref);
+
+ if (tty->ops->shutdown)
+ tty->ops->shutdown(tty);
+ else
+ tty_shutdown(tty);
+
+ /* The hangup queue is now free so we can reuse it rather than
+ waste a chunk of memory for each port */
+ INIT_WORK(&tty->hangup_work, release_one_tty);
+ schedule_work(&tty->hangup_work);
+ }
+
+ /**
+ * tty_kref_put - release a tty kref
+ * @tty: tty device
+ *
+ * Release a reference to a tty device and if need be let the kref
+ * layer destruct the object for us
+ */
+
+ void tty_kref_put(struct tty_struct *tty)
+ {
+ if (tty)
+ kref_put(&tty->kref, queue_release_one_tty);
+ }
+ EXPORT_SYMBOL(tty_kref_put);
+
+ /**
+ * release_tty - release tty structure memory
+ *
+ * Release both @tty and a possible linked partner (think pty pair),
+ * and decrement the refcount of the backing module.
+ *
+ * Locking:
+ * tty_mutex - sometimes only
+ * takes the file list lock internally when working on the list
+ * of ttys that the driver keeps.
+ * FIXME: should we require tty_mutex is held here ??
+ *
+ */
+ static void release_tty(struct tty_struct *tty, int idx)
+ {
+ /* This should always be true but check for the moment */
+ WARN_ON(tty->index != idx);
+
+ if (tty->link)
+ tty_kref_put(tty->link);
+ tty_kref_put(tty);
+ }
+
+ /**
+ * tty_release - vfs callback for close
+ * @inode: inode of tty
+ * @filp: file pointer for handle to tty
+ *
+ * Called the last time each file handle is closed that references
+ * this tty. There may however be several such references.
+ *
+ * Locking:
+ * Takes bkl. See tty_release_dev
+ *
+ * Even releasing the tty structures is a tricky business.. We have
+ * to be very careful that the structures are all released at the
+ * same time, as interrupts might otherwise get the wrong pointers.
+ *
+ * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
+ * lead to double frees or releasing memory still in use.
+ */
+
+ int tty_release(struct inode *inode, struct file *filp)
+ {
+ struct tty_struct *tty = file_tty(filp);
+ struct tty_struct *o_tty;
+ int pty_master, tty_closing, o_tty_closing, do_sleep;
+ int devpts;
+ int idx;
+ char buf[64];
+
+ if (tty_paranoia_check(tty, inode, "tty_release_dev"))
+ return 0;
+
+ tty_lock();
+ check_tty_count(tty, "tty_release_dev");
+
+ __tty_fasync(-1, filp, 0);
+
+ idx = tty->index;
+ pty_master = (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver->subtype == PTY_TYPE_MASTER);
+ devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
+ o_tty = tty->link;
+
+ #ifdef TTY_PARANOIA_CHECK
+ if (idx < 0 || idx >= tty->driver->num) {
+ printk(KERN_DEBUG "tty_release_dev: bad idx when trying to "
+ "free (%s)\n", tty->name);
+ tty_unlock();
+ return 0;
+ }
+ if (!devpts) {
+ if (tty != tty->driver->ttys[idx]) {
+ tty_unlock();
+ printk(KERN_DEBUG "tty_release_dev: driver.table[%d] not tty "
+ "for (%s)\n", idx, tty->name);
+ return 0;
+ }
+ if (tty->termios != tty->driver->termios[idx]) {
+ tty_unlock();
+ printk(KERN_DEBUG "tty_release_dev: driver.termios[%d] not termios "
+ "for (%s)\n",
+ idx, tty->name);
+ return 0;
+ }
+ }
+ #endif
+
+ #ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "tty_release_dev of %s (tty count=%d)...",
+ tty_name(tty, buf), tty->count);
+ #endif
+
+ #ifdef TTY_PARANOIA_CHECK
+ if (tty->driver->other &&
+ !(tty->driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
+ if (o_tty != tty->driver->other->ttys[idx]) {
+ tty_unlock();
+ printk(KERN_DEBUG "tty_release_dev: other->table[%d] "
+ "not o_tty for (%s)\n",
+ idx, tty->name);
+ return 0 ;
+ }
+ if (o_tty->termios != tty->driver->other->termios[idx]) {
+ tty_unlock();
+ printk(KERN_DEBUG "tty_release_dev: other->termios[%d] "
+ "not o_termios for (%s)\n",
+ idx, tty->name);
+ return 0;
+ }
+ if (o_tty->link != tty) {
+ tty_unlock();
+ printk(KERN_DEBUG "tty_release_dev: bad pty pointers\n");
+ return 0;
+ }
+ }
+ #endif
+ if (tty->ops->close)
+ tty->ops->close(tty, filp);
+
+ tty_unlock();
+ /*
+ * Sanity check: if tty->count is going to zero, there shouldn't be
+ * any waiters on tty->read_wait or tty->write_wait. We test the
+ * wait queues and kick everyone out _before_ actually starting to
+ * close. This ensures that we won't block while releasing the tty
+ * structure.
+ *
+ * The test for the o_tty closing is necessary, since the master and
+ * slave sides may close in any order. If the slave side closes out
+ * first, its count will be one, since the master side holds an open.
+ * Thus this test wouldn't be triggered at the time the slave closes,
+ * so we do it now.
+ *
+ * Note that it's possible for the tty to be opened again while we're
+ * flushing out waiters. By recalculating the closing flags before
+ * each iteration we avoid any problems.
+ */
+ while (1) {
+ /* Guard against races with tty->count changes elsewhere and
+ opens on /dev/tty */
+
+ mutex_lock(&tty_mutex);
+ tty_lock();
+ tty_closing = tty->count <= 1;
+ o_tty_closing = o_tty &&
+ (o_tty->count <= (pty_master ? 1 : 0));
+ do_sleep = 0;
+
+ if (tty_closing) {
+ if (waitqueue_active(&tty->read_wait)) {
+ wake_up_poll(&tty->read_wait, POLLIN);
+ do_sleep++;
+ }
+ if (waitqueue_active(&tty->write_wait)) {
+ wake_up_poll(&tty->write_wait, POLLOUT);
+ do_sleep++;
+ }
+ }
+ if (o_tty_closing) {
+ if (waitqueue_active(&o_tty->read_wait)) {
+ wake_up_poll(&o_tty->read_wait, POLLIN);
+ do_sleep++;
+ }
+ if (waitqueue_active(&o_tty->write_wait)) {
+ wake_up_poll(&o_tty->write_wait, POLLOUT);
+ do_sleep++;
+ }
+ }
+ if (!do_sleep)
+ break;
+
+ printk(KERN_WARNING "tty_release_dev: %s: read/write wait queue "
+ "active!\n", tty_name(tty, buf));
+ tty_unlock();
+ mutex_unlock(&tty_mutex);
+ schedule();
+ }
+
+ /*
+ * The closing flags are now consistent with the open counts on
+ * both sides, and we've completed the last operation that could
+ * block, so it's safe to proceed with closing.
+ */
+ if (pty_master) {
+ if (--o_tty->count < 0) {
+ printk(KERN_WARNING "tty_release_dev: bad pty slave count "
+ "(%d) for %s\n",
+ o_tty->count, tty_name(o_tty, buf));
+ o_tty->count = 0;
+ }
+ }
+ if (--tty->count < 0) {
+ printk(KERN_WARNING "tty_release_dev: bad tty->count (%d) for %s\n",
+ tty->count, tty_name(tty, buf));
+ tty->count = 0;
+ }
+
+ /*
+ * We've decremented tty->count, so we need to remove this file
+ * descriptor off the tty->tty_files list; this serves two
+ * purposes:
+ * - check_tty_count sees the correct number of file descriptors
+ * associated with this tty.
+ * - do_tty_hangup no longer sees this file descriptor as
+ * something that needs to be handled for hangups.
+ */
+ tty_del_file(filp);
+
+ /*
+ * Perform some housekeeping before deciding whether to return.
+ *
+ * Set the TTY_CLOSING flag if this was the last open. In the
+ * case of a pty we may have to wait around for the other side
+ * to close, and TTY_CLOSING makes sure we can't be reopened.
+ */
+ if (tty_closing)
+ set_bit(TTY_CLOSING, &tty->flags);
+ if (o_tty_closing)
+ set_bit(TTY_CLOSING, &o_tty->flags);
+
+ /*
+ * If _either_ side is closing, make sure there aren't any
+ * processes that still think tty or o_tty is their controlling
+ * tty.
+ */
+ if (tty_closing || o_tty_closing) {
+ read_lock(&tasklist_lock);
+ session_clear_tty(tty->session);
+ if (o_tty)
+ session_clear_tty(o_tty->session);
+ read_unlock(&tasklist_lock);
+ }
+
+ mutex_unlock(&tty_mutex);
+
+ /* check whether both sides are closing ... */
+ if (!tty_closing || (o_tty && !o_tty_closing)) {
+ tty_unlock();
+ return 0;
+ }
+
+ #ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "freeing tty structure...");
+ #endif
+ /*
+ * Ask the line discipline code to release its structures
+ */
+ tty_ldisc_release(tty, o_tty);
+ /*
+ * The release_tty function takes care of the details of clearing
+ * the slots and preserving the termios structure.
+ */
+ release_tty(tty, idx);
+
+ /* Make this pty number available for reallocation */
+ if (devpts)
+ devpts_kill_index(inode, idx);
+ tty_unlock();
+ return 0;
+ }
+
+ /**
+ * tty_open - open a tty device
+ * @inode: inode of device file
+ * @filp: file pointer to tty
+ *
+ * tty_open and tty_release keep up the tty count that contains the
+ * number of opens done on a tty. We cannot use the inode-count, as
+ * different inodes might point to the same tty.
+ *
+ * Open-counting is needed for pty masters, as well as for keeping
+ * track of serial lines: DTR is dropped when the last close happens.
+ * (This is not done solely through tty->count, now. - Ted 1/27/92)
+ *
+ * The termios state of a pty is reset on first open so that
+ * settings don't persist across reuse.
+ *
+ * Locking: tty_mutex protects tty, get_tty_driver and tty_init_dev work.
+ * tty->count should protect the rest.
+ * ->siglock protects ->signal/->sighand
+ */
+
+ static int tty_open(struct inode *inode, struct file *filp)
+ {
+ struct tty_struct *tty = NULL;
+ int noctty, retval;
+ struct tty_driver *driver;
+ int index;
+ dev_t device = inode->i_rdev;
+ unsigned saved_flags = filp->f_flags;
+
+ nonseekable_open(inode, filp);
+
+ retry_open:
+ noctty = filp->f_flags & O_NOCTTY;
+ index = -1;
+ retval = 0;
+
+ mutex_lock(&tty_mutex);
+ tty_lock();
+
+ if (device == MKDEV(TTYAUX_MAJOR, 0)) {
+ tty = get_current_tty();
+ if (!tty) {
+ tty_unlock();
+ mutex_unlock(&tty_mutex);
+ return -ENXIO;
+ }
+ driver = tty_driver_kref_get(tty->driver);
+ index = tty->index;
+ filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+ /* noctty = 1; */
+ /* FIXME: Should we take a driver reference ? */
+ tty_kref_put(tty);
+ goto got_driver;
+ }
+ #ifdef CONFIG_VT
+ if (device == MKDEV(TTY_MAJOR, 0)) {
+ extern struct tty_driver *console_driver;
+ driver = tty_driver_kref_get(console_driver);
+ index = fg_console;
+ noctty = 1;
+ goto got_driver;
+ }
+ #endif
+ if (device == MKDEV(TTYAUX_MAJOR, 1)) {
+ struct tty_driver *console_driver = console_device(&index);
+ if (console_driver) {
+ driver = tty_driver_kref_get(console_driver);
+ if (driver) {
+ /* Don't let /dev/console block */
+ filp->f_flags |= O_NONBLOCK;
+ noctty = 1;
+ goto got_driver;
+ }
+ }
+ tty_unlock();
+ mutex_unlock(&tty_mutex);
+ return -ENODEV;
+ }
+
+ driver = get_tty_driver(device, &index);
+ if (!driver) {
+ tty_unlock();
+ mutex_unlock(&tty_mutex);
+ return -ENODEV;
+ }
+ got_driver:
+ if (!tty) {
+ /* check whether we're reopening an existing tty */
+ tty = tty_driver_lookup_tty(driver, inode, index);
+
+ if (IS_ERR(tty)) {
+ tty_unlock();
+ mutex_unlock(&tty_mutex);
+ return PTR_ERR(tty);
+ }
+ }
+
+ if (tty) {
+ retval = tty_reopen(tty);
+ if (retval)
+ tty = ERR_PTR(retval);
+ } else
+ tty = tty_init_dev(driver, index, 0);
+
+ mutex_unlock(&tty_mutex);
+ tty_driver_kref_put(driver);
+ if (IS_ERR(tty)) {
+ tty_unlock();
+ return PTR_ERR(tty);
+ }
+
+ retval = tty_add_file(tty, filp);
+ if (retval) {
+ tty_unlock();
+ return retval;
+ }
+
+ check_tty_count(tty, "tty_open");
+ if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver->subtype == PTY_TYPE_MASTER)
+ noctty = 1;
+ #ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "opening %s...", tty->name);
+ #endif
+ if (!retval) {
+ if (tty->ops->open)
+ retval = tty->ops->open(tty, filp);
+ else
+ retval = -ENODEV;
+ }
+ filp->f_flags = saved_flags;
+
+ if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) &&
+ !capable(CAP_SYS_ADMIN))
+ retval = -EBUSY;
+
+ if (retval) {
+ #ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "error %d in opening %s...", retval,
+ tty->name);
+ #endif
+ tty_unlock(); /* need to call tty_release without BTM */
+ tty_release(inode, filp);
+ if (retval != -ERESTARTSYS)
+ return retval;
+
+ if (signal_pending(current))
+ return retval;
+
+ schedule();
+ /*
+ * Need to reset f_op in case a hangup happened.
+ */
+ tty_lock();
+ if (filp->f_op == &hung_up_tty_fops)
+ filp->f_op = &tty_fops;
+ tty_unlock();
+ goto retry_open;
+ }
+ tty_unlock();
+
+
+ mutex_lock(&tty_mutex);
+ tty_lock();
+ spin_lock_irq(¤t->sighand->siglock);
+ if (!noctty &&
+ current->signal->leader &&
+ !current->signal->tty &&
+ tty->session == NULL)
+ __proc_set_tty(current, tty);
+ spin_unlock_irq(¤t->sighand->siglock);
+ tty_unlock();
+ mutex_unlock(&tty_mutex);
+ return 0;
+ }
+
+
+
+ /**
+ * tty_poll - check tty status
+ * @filp: file being polled
+ * @wait: poll wait structures to update
+ *
+ * Call the line discipline polling method to obtain the poll
+ * status of the device.
+ *
+ * Locking: locks called line discipline but ldisc poll method
+ * may be re-entered freely by other callers.
+ */
+
+ static unsigned int tty_poll(struct file *filp, poll_table *wait)
+ {
+ struct tty_struct *tty = file_tty(filp);
+ struct tty_ldisc *ld;
+ int ret = 0;
+
+ if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
+ return 0;
+
+ ld = tty_ldisc_ref_wait(tty);
+ if (ld->ops->poll)
+ ret = (ld->ops->poll)(tty, filp, wait);
+ tty_ldisc_deref(ld);
+ return ret;
+ }
+
+ static int __tty_fasync(int fd, struct file *filp, int on)
+ {
+ struct tty_struct *tty = file_tty(filp);
+ unsigned long flags;
+ int retval = 0;
+
+ if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
+ goto out;
+
+ retval = fasync_helper(fd, filp, on, &tty->fasync);
+ if (retval <= 0)
+ goto out;
+
+ if (on) {
+ enum pid_type type;
+ struct pid *pid;
+ if (!waitqueue_active(&tty->read_wait))
+ tty->minimum_to_wake = 1;
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ if (tty->pgrp) {
+ pid = tty->pgrp;
+ type = PIDTYPE_PGID;
+ } else {
+ pid = task_pid(current);
+ type = PIDTYPE_PID;
+ }
+ get_pid(pid);
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ retval = __f_setown(filp, pid, type, 0);
+ put_pid(pid);
+ if (retval)
+ goto out;
+ } else {
+ if (!tty->fasync && !waitqueue_active(&tty->read_wait))
+ tty->minimum_to_wake = N_TTY_BUF_SIZE;
+ }
+ retval = 0;
+ out:
+ return retval;
+ }
+
+ static int tty_fasync(int fd, struct file *filp, int on)
+ {
+ int retval;
+ tty_lock();
+ retval = __tty_fasync(fd, filp, on);
+ tty_unlock();
+ return retval;
+ }
+
+ /**
+ * tiocsti - fake input character
+ * @tty: tty to fake input into
+ * @p: pointer to character
+ *
+ * Fake input to a tty device. Does the necessary locking and
+ * input management.
+ *
+ * FIXME: does not honour flow control ??
+ *
+ * Locking:
+ * Called functions take tty_ldisc_lock
+ * current->signal->tty check is safe without locks
+ *
+ * FIXME: may race normal receive processing
+ */
+
+ static int tiocsti(struct tty_struct *tty, char __user *p)
+ {
+ char ch, mbz = 0;
+ struct tty_ldisc *ld;
+
+ if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (get_user(ch, p))
+ return -EFAULT;
+ tty_audit_tiocsti(tty, ch);
+ ld = tty_ldisc_ref_wait(tty);
+ ld->ops->receive_buf(tty, &ch, &mbz, 1);
+ tty_ldisc_deref(ld);
+ return 0;
+ }
+
+ /**
+ * tiocgwinsz - implement window query ioctl
+ * @tty; tty
+ * @arg: user buffer for result
+ *
+ * Copies the kernel idea of the window size into the user buffer.
+ *
+ * Locking: tty->termios_mutex is taken to ensure the winsize data
+ * is consistent.
+ */
+
+ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
+ {
+ int err;
+
+ mutex_lock(&tty->termios_mutex);
+ err = copy_to_user(arg, &tty->winsize, sizeof(*arg));
+ mutex_unlock(&tty->termios_mutex);
+
+ return err ? -EFAULT: 0;
+ }
+
+ /**
+ * tty_do_resize - resize event
+ * @tty: tty being resized
+ * @rows: rows (character)
+ * @cols: cols (character)
+ *
+ * Update the termios variables and send the necessary signals to
+ * peform a terminal resize correctly
+ */
+
+ int tty_do_resize(struct tty_struct *tty, struct winsize *ws)
+ {
+ struct pid *pgrp;
+ unsigned long flags;
+
+ /* Lock the tty */
+ mutex_lock(&tty->termios_mutex);
+ if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
+ goto done;
+ /* Get the PID values and reference them so we can
+ avoid holding the tty ctrl lock while sending signals */
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ pgrp = get_pid(tty->pgrp);
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+ if (pgrp)
+ kill_pgrp(pgrp, SIGWINCH, 1);
+ put_pid(pgrp);
+
+ tty->winsize = *ws;
+ done:
+ mutex_unlock(&tty->termios_mutex);
+ return 0;
+ }
+
+ /**
+ * tiocswinsz - implement window size set ioctl
+ * @tty; tty side of tty
+ * @arg: user buffer for result
+ *
+ * Copies the user idea of the window size to the kernel. Traditionally
+ * this is just advisory information but for the Linux console it
+ * actually has driver level meaning and triggers a VC resize.
+ *
+ * Locking:
+ * Driver dependant. The default do_resize method takes the
+ * tty termios mutex and ctrl_lock. The console takes its own lock
+ * then calls into the default method.
+ */
+
+ static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg)
+ {
+ struct winsize tmp_ws;
+ if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
+ return -EFAULT;
+
+ if (tty->ops->resize)
+ return tty->ops->resize(tty, &tmp_ws);
+ else
+ return tty_do_resize(tty, &tmp_ws);
+ }
+
+ /**
+ * tioccons - allow admin to move logical console
+ * @file: the file to become console
+ *
+ * Allow the adminstrator to move the redirected console device
+ *
+ * Locking: uses redirect_lock to guard the redirect information
+ */
+
+ static int tioccons(struct file *file)
+ {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (file->f_op->write == redirected_tty_write) {
+ struct file *f;
+ spin_lock(&redirect_lock);
+ f = redirect;
+ redirect = NULL;
+ spin_unlock(&redirect_lock);
+ if (f)
+ fput(f);
+ return 0;
+ }
+ spin_lock(&redirect_lock);
+ if (redirect) {
+ spin_unlock(&redirect_lock);
+ return -EBUSY;
+ }
+ get_file(file);
+ redirect = file;
+ spin_unlock(&redirect_lock);
+ return 0;
+ }
+
+ /**
+ * fionbio - non blocking ioctl
+ * @file: file to set blocking value
+ * @p: user parameter
+ *
+ * Historical tty interfaces had a blocking control ioctl before
+ * the generic functionality existed. This piece of history is preserved
+ * in the expected tty API of posix OS's.
+ *
+ * Locking: none, the open file handle ensures it won't go away.
+ */
+
+ static int fionbio(struct file *file, int __user *p)
+ {
+ int nonblock;
+
+ if (get_user(nonblock, p))
+ return -EFAULT;
+
+ spin_lock(&file->f_lock);
+ if (nonblock)
+ file->f_flags |= O_NONBLOCK;
+ else
+ file->f_flags &= ~O_NONBLOCK;
+ spin_unlock(&file->f_lock);
+ return 0;
+ }
+
+ /**
+ * tiocsctty - set controlling tty
+ * @tty: tty structure
+ * @arg: user argument
+ *
+ * This ioctl is used to manage job control. It permits a session
+ * leader to set this tty as the controlling tty for the session.
+ *
+ * Locking:
+ * Takes tty_mutex() to protect tty instance
+ * Takes tasklist_lock internally to walk sessions
+ * Takes ->siglock() when updating signal->tty
+ */
+
+ static int tiocsctty(struct tty_struct *tty, int arg)
+ {
+ int ret = 0;
+ if (current->signal->leader && (task_session(current) == tty->session))
+ return ret;
+
+ mutex_lock(&tty_mutex);
+ /*
+ * The process must be a session leader and
+ * not have a controlling tty already.
+ */
+ if (!current->signal->leader || current->signal->tty) {
+ ret = -EPERM;
+ goto unlock;
+ }
+
+ if (tty->session) {
+ /*
+ * This tty is already the controlling
+ * tty for another session group!
+ */
+ if (arg == 1 && capable(CAP_SYS_ADMIN)) {
+ /*
+ * Steal it away
+ */
+ read_lock(&tasklist_lock);
+ session_clear_tty(tty->session);
+ read_unlock(&tasklist_lock);
+ } else {
+ ret = -EPERM;
+ goto unlock;
+ }
+ }
+ proc_set_tty(current, tty);
+ unlock:
+ mutex_unlock(&tty_mutex);
+ return ret;
+ }
+
+ /**
+ * tty_get_pgrp - return a ref counted pgrp pid
+ * @tty: tty to read
+ *
+ * Returns a refcounted instance of the pid struct for the process
+ * group controlling the tty.
+ */
+
+ struct pid *tty_get_pgrp(struct tty_struct *tty)
+ {
+ unsigned long flags;
+ struct pid *pgrp;
+
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ pgrp = get_pid(tty->pgrp);
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+ return pgrp;
+ }
+ EXPORT_SYMBOL_GPL(tty_get_pgrp);
+
+ /**
+ * tiocgpgrp - get process group
+ * @tty: tty passed by user
+ * @real_tty: tty side of the tty pased by the user if a pty else the tty
+ * @p: returned pid
+ *
+ * Obtain the process group of the tty. If there is no process group
+ * return an error.
+ *
+ * Locking: none. Reference to current->signal->tty is safe.
+ */
+
+ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+ {
+ struct pid *pid;
+ int ret;
+ /*
+ * (tty == real_tty) is a cheap way of
+ * testing if the tty is NOT a master pty.
+ */
+ if (tty == real_tty && current->signal->tty != real_tty)
+ return -ENOTTY;
+ pid = tty_get_pgrp(real_tty);
+ ret = put_user(pid_vnr(pid), p);
+ put_pid(pid);
+ return ret;
+ }
+
+ /**
+ * tiocspgrp - attempt to set process group
+ * @tty: tty passed by user
+ * @real_tty: tty side device matching tty passed by user
+ * @p: pid pointer
+ *
+ * Set the process group of the tty to the session passed. Only
+ * permitted where the tty session is our session.
+ *
+ * Locking: RCU, ctrl lock
+ */
+
+ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+ {
+ struct pid *pgrp;
+ pid_t pgrp_nr;
+ int retval = tty_check_change(real_tty);
+ unsigned long flags;
+
+ if (retval == -EIO)
+ return -ENOTTY;
+ if (retval)
+ return retval;
+ if (!current->signal->tty ||
+ (current->signal->tty != real_tty) ||
+ (real_tty->session != task_session(current)))
+ return -ENOTTY;
+ if (get_user(pgrp_nr, p))
+ return -EFAULT;
+ if (pgrp_nr < 0)
+ return -EINVAL;
+ rcu_read_lock();
+ pgrp = find_vpid(pgrp_nr);
+ retval = -ESRCH;
+ if (!pgrp)
+ goto out_unlock;
+ retval = -EPERM;
+ if (session_of_pgrp(pgrp) != task_session(current))
+ goto out_unlock;
+ retval = 0;
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ put_pid(real_tty->pgrp);
+ real_tty->pgrp = get_pid(pgrp);
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ out_unlock:
+ rcu_read_unlock();
+ return retval;
+ }
+
+ /**
+ * tiocgsid - get session id
+ * @tty: tty passed by user
+ * @real_tty: tty side of the tty pased by the user if a pty else the tty
+ * @p: pointer to returned session id
+ *
+ * Obtain the session id of the tty. If there is no session
+ * return an error.
+ *
+ * Locking: none. Reference to current->signal->tty is safe.
+ */
+
+ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+ {
+ /*
+ * (tty == real_tty) is a cheap way of
+ * testing if the tty is NOT a master pty.
+ */
+ if (tty == real_tty && current->signal->tty != real_tty)
+ return -ENOTTY;
+ if (!real_tty->session)
+ return -ENOTTY;
+ return put_user(pid_vnr(real_tty->session), p);
+ }
+
+ /**
+ * tiocsetd - set line discipline
+ * @tty: tty device
+ * @p: pointer to user data
+ *
+ * Set the line discipline according to user request.
+ *
+ * Locking: see tty_set_ldisc, this function is just a helper
+ */
+
+ static int tiocsetd(struct tty_struct *tty, int __user *p)
+ {
+ int ldisc;
+ int ret;
+
+ if (get_user(ldisc, p))
+ return -EFAULT;
+
+ ret = tty_set_ldisc(tty, ldisc);
+
+ return ret;
+ }
+
+ /**
+ * send_break - performed time break
+ * @tty: device to break on
+ * @duration: timeout in mS
+ *
+ * Perform a timed break on hardware that lacks its own driver level
+ * timed break functionality.
+ *
+ * Locking:
+ * atomic_write_lock serializes
+ *
+ */
+
+ static int send_break(struct tty_struct *tty, unsigned int duration)
+ {
+ int retval;
+
+ if (tty->ops->break_ctl == NULL)
+ return 0;
+
+ if (tty->driver->flags & TTY_DRIVER_HARDWARE_BREAK)
+ retval = tty->ops->break_ctl(tty, duration);
+ else {
+ /* Do the work ourselves */
+ if (tty_write_lock(tty, 0) < 0)
+ return -EINTR;
+ retval = tty->ops->break_ctl(tty, -1);
+ if (retval)
+ goto out;
+ if (!signal_pending(current))
+ msleep_interruptible(duration);
+ retval = tty->ops->break_ctl(tty, 0);
+ out:
+ tty_write_unlock(tty);
+ if (signal_pending(current))
+ retval = -EINTR;
+ }
+ return retval;
+ }
+
+ /**
+ * tty_tiocmget - get modem status
+ * @tty: tty device
+ * @file: user file pointer
+ * @p: pointer to result
+ *
+ * Obtain the modem status bits from the tty driver if the feature
+ * is supported. Return -EINVAL if it is not available.
+ *
+ * Locking: none (up to the driver)
+ */
+
+ static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
+ {
+ int retval = -EINVAL;
+
+ if (tty->ops->tiocmget) {
+ retval = tty->ops->tiocmget(tty, file);
+
+ if (retval >= 0)
+ retval = put_user(retval, p);
+ }
+ return retval;
+ }
+
+ /**
+ * tty_tiocmset - set modem status
+ * @tty: tty device
+ * @file: user file pointer
+ * @cmd: command - clear bits, set bits or set all
+ * @p: pointer to desired bits
+ *
+ * Set the modem status bits from the tty driver if the feature
+ * is supported. Return -EINVAL if it is not available.
+ *
+ * Locking: none (up to the driver)
+ */
+
+ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd,
+ unsigned __user *p)
+ {
+ int retval;
+ unsigned int set, clear, val;
+
+ if (tty->ops->tiocmset == NULL)
+ return -EINVAL;
+
+ retval = get_user(val, p);
+ if (retval)
+ return retval;
+ set = clear = 0;
+ switch (cmd) {
+ case TIOCMBIS:
+ set = val;
+ break;
+ case TIOCMBIC:
+ clear = val;
+ break;
+ case TIOCMSET:
+ set = val;
+ clear = ~val;
+ break;
+ }
+ set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
+ clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
+ return tty->ops->tiocmset(tty, file, set, clear);
+ }
+
+ static int tty_tiocgicount(struct tty_struct *tty, void __user *arg)
+ {
+ int retval = -EINVAL;
+ struct serial_icounter_struct icount;
+ memset(&icount, 0, sizeof(icount));
+ if (tty->ops->get_icount)
+ retval = tty->ops->get_icount(tty, &icount);
+ if (retval != 0)
+ return retval;
+ if (copy_to_user(arg, &icount, sizeof(icount)))
+ return -EFAULT;
+ return 0;
+ }
+
+ struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+ {
+ if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver->subtype == PTY_TYPE_MASTER)
+ tty = tty->link;
+ return tty;
+ }
+ EXPORT_SYMBOL(tty_pair_get_tty);
+
+ struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
+ {
+ if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+ tty->driver->subtype == PTY_TYPE_MASTER)
+ return tty;
+ return tty->link;
+ }
+ EXPORT_SYMBOL(tty_pair_get_pty);
+
+ /*
+ * Split this up, as gcc can choke on it otherwise..
+ */
+ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ {
+ struct tty_struct *tty = file_tty(file);
+ struct tty_struct *real_tty;
+ void __user *p = (void __user *)arg;
+ int retval;
+ struct tty_ldisc *ld;
+ struct inode *inode = file->f_dentry->d_inode;
+
+ if (tty_paranoia_check(tty, inode, "tty_ioctl"))
+ return -EINVAL;
+
+ real_tty = tty_pair_get_tty(tty);
+
+ /*
+ * Factor out some common prep work
+ */
+ switch (cmd) {
+ case TIOCSETD:
+ case TIOCSBRK:
+ case TIOCCBRK:
+ case TCSBRK:
+ case TCSBRKP:
+ retval = tty_check_change(tty);
+ if (retval)
+ return retval;
+ if (cmd != TIOCCBRK) {
+ tty_wait_until_sent(tty, 0);
+ if (signal_pending(current))
+ return -EINTR;
+ }
+ break;
+ }
+
+ /*
+ * Now do the stuff.
+ */
+ switch (cmd) {
+ case TIOCSTI:
+ return tiocsti(tty, p);
+ case TIOCGWINSZ:
+ return tiocgwinsz(real_tty, p);
+ case TIOCSWINSZ:
+ return tiocswinsz(real_tty, p);
+ case TIOCCONS:
+ return real_tty != tty ? -EINVAL : tioccons(file);
+ case FIONBIO:
+ return fionbio(file, p);
+ case TIOCEXCL:
+ set_bit(TTY_EXCLUSIVE, &tty->flags);
+ return 0;
+ case TIOCNXCL:
+ clear_bit(TTY_EXCLUSIVE, &tty->flags);
+ return 0;
+ case TIOCNOTTY:
+ if (current->signal->tty != tty)
+ return -ENOTTY;
+ no_tty();
+ return 0;
+ case TIOCSCTTY:
+ return tiocsctty(tty, arg);
+ case TIOCGPGRP:
+ return tiocgpgrp(tty, real_tty, p);
+ case TIOCSPGRP:
+ return tiocspgrp(tty, real_tty, p);
+ case TIOCGSID:
+ return tiocgsid(tty, real_tty, p);
+ case TIOCGETD:
+ return put_user(tty->ldisc->ops->num, (int __user *)p);
+ case TIOCSETD:
+ return tiocsetd(tty, p);
+ /*
++ * Without the real device to which /dev/console is connected,
++ * blogd can not work.
++ * blogd spawns a pty/tty pair,
++ * set /dev/console to the tty of that pair (ioctl TIOCCONS),
++ * then reads in all input from the current /dev/console,
++ * buffer or write the readed data to /var/log/boot.msg
++ * _and_ to the original real device.
++ */
++ case TIOCGDEV:
++ {
++ unsigned int ret = new_encode_dev(tty_devnum(real_tty));
++ return put_user(ret, (unsigned int __user *)p);
++ }
++
++ /*
+ * Break handling
+ */
+ case TIOCSBRK: /* Turn break on, unconditionally */
+ if (tty->ops->break_ctl)
+ return tty->ops->break_ctl(tty, -1);
+ return 0;
+ case TIOCCBRK: /* Turn break off, unconditionally */
+ if (tty->ops->break_ctl)
+ return tty->ops->break_ctl(tty, 0);
+ return 0;
+ case TCSBRK: /* SVID version: non-zero arg --> no break */
+ /* non-zero arg means wait for all output data
+ * to be sent (performed above) but don't send break.
+ * This is used by the tcdrain() termios function.
+ */
+ if (!arg)
+ return send_break(tty, 250);
+ return 0;
+ case TCSBRKP: /* support for POSIX tcsendbreak() */
+ return send_break(tty, arg ? arg*100 : 250);
+
+ case TIOCMGET:
+ return tty_tiocmget(tty, file, p);
+ case TIOCMSET:
+ case TIOCMBIC:
+ case TIOCMBIS:
+ return tty_tiocmset(tty, file, cmd, p);
+ case TIOCGICOUNT:
+ retval = tty_tiocgicount(tty, p);
+ /* For the moment allow fall through to the old method */
+ if (retval != -EINVAL)
+ return retval;
+ break;
+ case TCFLSH:
+ switch (arg) {
+ case TCIFLUSH:
+ case TCIOFLUSH:
+ /* flush tty buffer and allow ldisc to process ioctl */
+ tty_buffer_flush(tty);
+ break;
+ }
+ break;
+ }
+ if (tty->ops->ioctl) {
+ retval = (tty->ops->ioctl)(tty, file, cmd, arg);
+ if (retval != -ENOIOCTLCMD)
+ return retval;
+ }
+ ld = tty_ldisc_ref_wait(tty);
+ retval = -EINVAL;
+ if (ld->ops->ioctl) {
+ retval = ld->ops->ioctl(tty, file, cmd, arg);
+ if (retval == -ENOIOCTLCMD)
+ retval = -EINVAL;
+ }
+ tty_ldisc_deref(ld);
+ return retval;
+ }
+
+ #ifdef CONFIG_COMPAT
+ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+ {
+ struct inode *inode = file->f_dentry->d_inode;
+ struct tty_struct *tty = file_tty(file);
+ struct tty_ldisc *ld;
+ int retval = -ENOIOCTLCMD;
+
+ if (tty_paranoia_check(tty, inode, "tty_ioctl"))
+ return -EINVAL;
+
+ if (tty->ops->compat_ioctl) {
+ retval = (tty->ops->compat_ioctl)(tty, file, cmd, arg);
+ if (retval != -ENOIOCTLCMD)
+ return retval;
+ }
+
+ ld = tty_ldisc_ref_wait(tty);
+ if (ld->ops->compat_ioctl)
+ retval = ld->ops->compat_ioctl(tty, file, cmd, arg);
+ tty_ldisc_deref(ld);
+
+ return retval;
+ }
+ #endif
+
+ /*
+ * This implements the "Secure Attention Key" --- the idea is to
+ * prevent trojan horses by killing all processes associated with this
+ * tty when the user hits the "Secure Attention Key". Required for
+ * super-paranoid applications --- see the Orange Book for more details.
+ *
+ * This code could be nicer; ideally it should send a HUP, wait a few
+ * seconds, then send a INT, and then a KILL signal. But you then
+ * have to coordinate with the init process, since all processes associated
+ * with the current tty must be dead before the new getty is allowed
+ * to spawn.
+ *
+ * Now, if it would be correct ;-/ The current code has a nasty hole -
+ * it doesn't catch files in flight. We may send the descriptor to ourselves
+ * via AF_UNIX socket, close it and later fetch from socket. FIXME.
+ *
+ * Nasty bug: do_SAK is being called in interrupt context. This can
+ * deadlock. We punt it up to process context. AKPM - 16Mar2001
+ */
+ void __do_SAK(struct tty_struct *tty)
+ {
+ #ifdef TTY_SOFT_SAK
+ tty_hangup(tty);
+ #else
+ struct task_struct *g, *p;
+ struct pid *session;
+ int i;
+ struct file *filp;
+ struct fdtable *fdt;
+
+ if (!tty)
+ return;
+ session = tty->session;
+
+ tty_ldisc_flush(tty);
+
+ tty_driver_flush_buffer(tty);
+
+ read_lock(&tasklist_lock);
+ /* Kill the entire session */
+ do_each_pid_task(session, PIDTYPE_SID, p) {
+ printk(KERN_NOTICE "SAK: killed process %d"
+ " (%s): task_session(p)==tty->session\n",
+ task_pid_nr(p), p->comm);
+ send_sig(SIGKILL, p, 1);
+ } while_each_pid_task(session, PIDTYPE_SID, p);
+ /* Now kill any processes that happen to have the
+ * tty open.
+ */
+ do_each_thread(g, p) {
+ if (p->signal->tty == tty) {
+ printk(KERN_NOTICE "SAK: killed process %d"
+ " (%s): task_session(p)==tty->session\n",
+ task_pid_nr(p), p->comm);
+ send_sig(SIGKILL, p, 1);
+ continue;
+ }
+ task_lock(p);
+ if (p->files) {
+ /*
+ * We don't take a ref to the file, so we must
+ * hold ->file_lock instead.
+ */
+ spin_lock(&p->files->file_lock);
+ fdt = files_fdtable(p->files);
+ for (i = 0; i < fdt->max_fds; i++) {
+ filp = fcheck_files(p->files, i);
+ if (!filp)
+ continue;
+ if (filp->f_op->read == tty_read &&
+ file_tty(filp) == tty) {
+ printk(KERN_NOTICE "SAK: killed process %d"
+ " (%s): fd#%d opened to the tty\n",
+ task_pid_nr(p), p->comm, i);
+ force_sig(SIGKILL, p);
+ break;
+ }
+ }
+ spin_unlock(&p->files->file_lock);
+ }
+ task_unlock(p);
+ } while_each_thread(g, p);
+ read_unlock(&tasklist_lock);
+ #endif
+ }
+
+ static void do_SAK_work(struct work_struct *work)
+ {
+ struct tty_struct *tty =
+ container_of(work, struct tty_struct, SAK_work);
+ __do_SAK(tty);
+ }
+
+ /*
+ * The tq handling here is a little racy - tty->SAK_work may already be queued.
+ * Fortunately we don't need to worry, because if ->SAK_work is already queued,
+ * the values which we write to it will be identical to the values which it
+ * already has. --akpm
+ */
+ void do_SAK(struct tty_struct *tty)
+ {
+ if (!tty)
+ return;
+ schedule_work(&tty->SAK_work);
+ }
+
+ EXPORT_SYMBOL(do_SAK);
+
+ static int dev_match_devt(struct device *dev, void *data)
+ {
+ dev_t *devt = data;
+ return dev->devt == *devt;
+ }
+
+ /* Must put_device() after it's unused! */
+ static struct device *tty_get_device(struct tty_struct *tty)
+ {
+ dev_t devt = tty_devnum(tty);
+ return class_find_device(tty_class, NULL, &devt, dev_match_devt);
+ }
+
+
+ /**
+ * initialize_tty_struct
+ * @tty: tty to initialize
+ *
+ * This subroutine initializes a tty structure that has been newly
+ * allocated.
+ *
+ * Locking: none - tty in question must not be exposed at this point
+ */
+
+ void initialize_tty_struct(struct tty_struct *tty,
+ struct tty_driver *driver, int idx)
+ {
+ memset(tty, 0, sizeof(struct tty_struct));
+ kref_init(&tty->kref);
+ tty->magic = TTY_MAGIC;
+ tty_ldisc_init(tty);
+ tty->session = NULL;
+ tty->pgrp = NULL;
+ tty->overrun_time = jiffies;
+ tty->buf.head = tty->buf.tail = NULL;
+ tty_buffer_init(tty);
+ mutex_init(&tty->termios_mutex);
+ mutex_init(&tty->ldisc_mutex);
+ init_waitqueue_head(&tty->write_wait);
+ init_waitqueue_head(&tty->read_wait);
+ INIT_WORK(&tty->hangup_work, do_tty_hangup);
+ mutex_init(&tty->atomic_read_lock);
+ mutex_init(&tty->atomic_write_lock);
+ mutex_init(&tty->output_lock);
+ mutex_init(&tty->echo_lock);
+ spin_lock_init(&tty->read_lock);
+ spin_lock_init(&tty->ctrl_lock);
+ INIT_LIST_HEAD(&tty->tty_files);
+ INIT_WORK(&tty->SAK_work, do_SAK_work);
+
+ tty->driver = driver;
+ tty->ops = driver->ops;
+ tty->index = idx;
+ tty_line_name(driver, idx, tty->name);
+ tty->dev = tty_get_device(tty);
+ }
+
+ /**
+ * tty_put_char - write one character to a tty
+ * @tty: tty
+ * @ch: character
+ *
+ * Write one byte to the tty using the provided put_char method
+ * if present. Returns the number of characters successfully output.
+ *
+ * Note: the specific put_char operation in the driver layer may go
+ * away soon. Don't call it directly, use this method
+ */
+
+ int tty_put_char(struct tty_struct *tty, unsigned char ch)
+ {
+ if (tty->ops->put_char)
+ return tty->ops->put_char(tty, ch);
+ return tty->ops->write(tty, &ch, 1);
+ }
+ EXPORT_SYMBOL_GPL(tty_put_char);
+
+ struct class *tty_class;
+
+ /**
+ * tty_register_device - register a tty device
+ * @driver: the tty driver that describes the tty device
+ * @index: the index in the tty driver for this tty device
+ * @device: a struct device that is associated with this tty device.
+ * This field is optional, if there is no known struct device
+ * for this tty device it can be set to NULL safely.
+ *
+ * Returns a pointer to the struct device for this tty device
+ * (or ERR_PTR(-EFOO) on error).
+ *
+ * This call is required to be made to register an individual tty device
+ * if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set. If
+ * that bit is not set, this function should not be called by a tty
+ * driver.
+ *
+ * Locking: ??
+ */
+
+ struct device *tty_register_device(struct tty_driver *driver, unsigned index,
+ struct device *device)
+ {
+ char name[64];
+ dev_t dev = MKDEV(driver->major, driver->minor_start) + index;
+
+ if (index >= driver->num) {
+ printk(KERN_ERR "Attempt to register invalid tty line number "
+ " (%d).\n", index);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (driver->type == TTY_DRIVER_TYPE_PTY)
+ pty_line_name(driver, index, name);
+ else
+ tty_line_name(driver, index, name);
+
+ return device_create(tty_class, device, dev, NULL, name);
+ }
+ EXPORT_SYMBOL(tty_register_device);
+
+ /**
+ * tty_unregister_device - unregister a tty device
+ * @driver: the tty driver that describes the tty device
+ * @index: the index in the tty driver for this tty device
+ *
+ * If a tty device is registered with a call to tty_register_device() then
+ * this function must be called when the tty device is gone.
+ *
+ * Locking: ??
+ */
+
+ void tty_unregister_device(struct tty_driver *driver, unsigned index)
+ {
+ device_destroy(tty_class,
+ MKDEV(driver->major, driver->minor_start) + index);
+ }
+ EXPORT_SYMBOL(tty_unregister_device);
+
+ struct tty_driver *alloc_tty_driver(int lines)
+ {
+ struct tty_driver *driver;
+
+ driver = kzalloc(sizeof(struct tty_driver), GFP_KERNEL);
+ if (driver) {
+ kref_init(&driver->kref);
+ driver->magic = TTY_DRIVER_MAGIC;
+ driver->num = lines;
+ /* later we'll move allocation of tables here */
+ }
+ return driver;
+ }
+ EXPORT_SYMBOL(alloc_tty_driver);
+
+ static void destruct_tty_driver(struct kref *kref)
+ {
+ struct tty_driver *driver = container_of(kref, struct tty_driver, kref);
+ int i;
+ struct ktermios *tp;
+ void *p;
+
+ if (driver->flags & TTY_DRIVER_INSTALLED) {
+ /*
+ * Free the termios and termios_locked structures because
+ * we don't want to get memory leaks when modular tty
+ * drivers are removed from the kernel.
+ */
+ for (i = 0; i < driver->num; i++) {
+ tp = driver->termios[i];
+ if (tp) {
+ driver->termios[i] = NULL;
+ kfree(tp);
+ }
+ if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV))
+ tty_unregister_device(driver, i);
+ }
+ p = driver->ttys;
+ proc_tty_unregister_driver(driver);
+ driver->ttys = NULL;
+ driver->termios = NULL;
+ kfree(p);
+ cdev_del(&driver->cdev);
+ }
+ kfree(driver);
+ }
+
+ void tty_driver_kref_put(struct tty_driver *driver)
+ {
+ kref_put(&driver->kref, destruct_tty_driver);
+ }
+ EXPORT_SYMBOL(tty_driver_kref_put);
+
+ void tty_set_operations(struct tty_driver *driver,
+ const struct tty_operations *op)
+ {
+ driver->ops = op;
+ };
+ EXPORT_SYMBOL(tty_set_operations);
+
+ void put_tty_driver(struct tty_driver *d)
+ {
+ tty_driver_kref_put(d);
+ }
+ EXPORT_SYMBOL(put_tty_driver);
+
+ /*
+ * Called by a tty driver to register itself.
+ */
+ int tty_register_driver(struct tty_driver *driver)
+ {
+ int error;
+ int i;
+ dev_t dev;
+ void **p = NULL;
+ struct device *d;
+
+ if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM) && driver->num) {
+ p = kzalloc(driver->num * 2 * sizeof(void *), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ }
+
+ if (!driver->major) {
+ error = alloc_chrdev_region(&dev, driver->minor_start,
+ driver->num, driver->name);
+ if (!error) {
+ driver->major = MAJOR(dev);
+ driver->minor_start = MINOR(dev);
+ }
+ } else {
+ dev = MKDEV(driver->major, driver->minor_start);
+ error = register_chrdev_region(dev, driver->num, driver->name);
+ }
+ if (error < 0) {
+ kfree(p);
+ return error;
+ }
+
+ if (p) {
+ driver->ttys = (struct tty_struct **)p;
+ driver->termios = (struct ktermios **)(p + driver->num);
+ } else {
+ driver->ttys = NULL;
+ driver->termios = NULL;
+ }
+
+ cdev_init(&driver->cdev, &tty_fops);
+ driver->cdev.owner = driver->owner;
+ error = cdev_add(&driver->cdev, dev, driver->num);
+ if (error) {
+ unregister_chrdev_region(dev, driver->num);
+ driver->ttys = NULL;
+ driver->termios = NULL;
+ kfree(p);
+ return error;
+ }
+
+ mutex_lock(&tty_mutex);
+ list_add(&driver->tty_drivers, &tty_drivers);
+ mutex_unlock(&tty_mutex);
+
+ if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) {
+ for (i = 0; i < driver->num; i++) {
+ d = tty_register_device(driver, i, NULL);
+ if (IS_ERR(d)) {
+ error = PTR_ERR(d);
+ goto err;
+ }
+ }
+ }
+ proc_tty_register_driver(driver);
+ driver->flags |= TTY_DRIVER_INSTALLED;
+ return 0;
+
+ err:
+ for (i--; i >= 0; i--)
+ tty_unregister_device(driver, i);
+
+ mutex_lock(&tty_mutex);
+ list_del(&driver->tty_drivers);
+ mutex_unlock(&tty_mutex);
+
+ unregister_chrdev_region(dev, driver->num);
+ driver->ttys = NULL;
+ driver->termios = NULL;
+ kfree(p);
+ return error;
+ }
+
+ EXPORT_SYMBOL(tty_register_driver);
+
+ /*
+ * Called by a tty driver to unregister itself.
+ */
+ int tty_unregister_driver(struct tty_driver *driver)
+ {
+ #if 0
+ /* FIXME */
+ if (driver->refcount)
+ return -EBUSY;
+ #endif
+ unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
+ driver->num);
+ mutex_lock(&tty_mutex);
+ list_del(&driver->tty_drivers);
+ mutex_unlock(&tty_mutex);
+ return 0;
+ }
+
+ EXPORT_SYMBOL(tty_unregister_driver);
+
+ dev_t tty_devnum(struct tty_struct *tty)
+ {
+ return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
+ }
+ EXPORT_SYMBOL(tty_devnum);
+
+ void proc_clear_tty(struct task_struct *p)
+ {
+ unsigned long flags;
+ struct tty_struct *tty;
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ tty = p->signal->tty;
+ p->signal->tty = NULL;
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ tty_kref_put(tty);
+ }
+
+ /* Called under the sighand lock */
+
+ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+ {
+ if (tty) {
+ unsigned long flags;
+ /* We should not have a session or pgrp to put here but.... */
+ spin_lock_irqsave(&tty->ctrl_lock, flags);
+ put_pid(tty->session);
+ put_pid(tty->pgrp);
+ tty->pgrp = get_pid(task_pgrp(tsk));
+ spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+ tty->session = get_pid(task_session(tsk));
+ if (tsk->signal->tty) {
+ printk(KERN_DEBUG "tty not NULL!!\n");
+ tty_kref_put(tsk->signal->tty);
+ }
+ }
+ put_pid(tsk->signal->tty_old_pgrp);
+ tsk->signal->tty = tty_kref_get(tty);
+ tsk->signal->tty_old_pgrp = NULL;
+ }
+
+ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+ {
+ spin_lock_irq(&tsk->sighand->siglock);
+ __proc_set_tty(tsk, tty);
+ spin_unlock_irq(&tsk->sighand->siglock);
+ }
+
+ struct tty_struct *get_current_tty(void)
+ {
+ struct tty_struct *tty;
+ unsigned long flags;
+
+ spin_lock_irqsave(¤t->sighand->siglock, flags);
+ tty = tty_kref_get(current->signal->tty);
+ spin_unlock_irqrestore(¤t->sighand->siglock, flags);
+ return tty;
+ }
+ EXPORT_SYMBOL_GPL(get_current_tty);
+
+ void tty_default_fops(struct file_operations *fops)
+ {
+ *fops = tty_fops;
+ }
+
+ /*
+ * Initialize the console device. This is called *early*, so
+ * we can't necessarily depend on lots of kernel help here.
+ * Just do some early initializations, and do the complex setup
+ * later.
+ */
+ void __init console_init(void)
+ {
+ initcall_t *call;
+
+ /* Setup the default TTY line discipline. */
+ tty_ldisc_begin();
+
+ /*
+ * set up the console device so that later boot sequences can
+ * inform about problems etc..
+ */
+ call = __con_initcall_start;
+ while (call < __con_initcall_end) {
+ (*call)();
+ call++;
+ }
+ }
+
+ static char *tty_devnode(struct device *dev, mode_t *mode)
+ {
+ if (!mode)
+ return NULL;
+ if (dev->devt == MKDEV(TTYAUX_MAJOR, 0) ||
+ dev->devt == MKDEV(TTYAUX_MAJOR, 2))
+ *mode = 0666;
+ return NULL;
+ }
+
+ static int __init tty_class_init(void)
+ {
+ tty_class = class_create(THIS_MODULE, "tty");
+ if (IS_ERR(tty_class))
+ return PTR_ERR(tty_class);
+ tty_class->devnode = tty_devnode;
+ return 0;
+ }
+
+ postcore_initcall(tty_class_init);
+
+ /* 3/2004 jmc: why do these devices exist? */
+
+ static struct cdev tty_cdev, console_cdev;
+
+ /*
+ * Ok, now we can initialize the rest of the tty devices and can count
+ * on memory allocations, interrupts etc..
+ */
+ int __init tty_init(void)
+ {
+ cdev_init(&tty_cdev, &tty_fops);
+ if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
+ register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
+ panic("Couldn't register /dev/tty driver\n");
+ device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL,
+ "tty");
+
+ cdev_init(&console_cdev, &console_fops);
+ if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
+ register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
+ panic("Couldn't register /dev/console driver\n");
+ device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL,
+ "console");
+
+ #ifdef CONFIG_VT
+ vty_init(&console_fops);
+ #endif
+ return 0;
+ }
+
--- /dev/null
+ /*
+ * linux/drivers/char/keyboard.c
+ *
+ * Written for linux by Johan Myreen as a translation from
+ * the assembly version by Linus (with diacriticals added)
+ *
+ * Some additional features added by Christoph Niemann (ChN), March 1993
+ *
+ * Loadable keymaps by Risto Kankkunen, May 1993
+ *
+ * Diacriticals redone & other small changes, aeb@cwi.nl, June 1993
+ * Added decr/incr_console, dynamic keymaps, Unicode support,
+ * dynamic function/string keys, led setting, Sept 1994
+ * `Sticky' modifier keys, 951006.
+ *
+ * 11-11-96: SAK should now work in the raw mode (Martin Mares)
+ *
+ * Modified to provide 'generic' keyboard support by Hamish Macdonald
+ * Merge with the m68k keyboard driver and split-off of the PC low-level
+ * parts by Geert Uytterhoeven, May 1997
+ *
+ * 27-05-97: Added support for the Magic SysRq Key (Martin Mares)
+ * 30-07-98: Dead keys redone, aeb@cwi.nl.
+ * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik)
+ */
+
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+ #include <linux/consolemap.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/tty.h>
+ #include <linux/tty_flip.h>
+ #include <linux/mm.h>
+ #include <linux/string.h>
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/irq.h>
+
+ #include <linux/kbd_kern.h>
+ #include <linux/kbd_diacr.h>
+ #include <linux/vt_kern.h>
+ #include <linux/input.h>
+ #include <linux/reboot.h>
+ #include <linux/notifier.h>
+ #include <linux/jiffies.h>
+
+ extern void ctrl_alt_del(void);
+
+ /*
+ * Exported functions/variables
+ */
+
+ #define KBD_DEFMODE ((1 << VC_REPEAT) | (1 << VC_META))
+
+ /*
+ * Some laptops take the 789uiojklm,. keys as number pad when NumLock is on.
+ * This seems a good reason to start with NumLock off. On HIL keyboards
+ * of PARISC machines however there is no NumLock key and everyone expects the keypad
+ * to be used for numbers.
+ */
+
+ #if defined(CONFIG_PARISC) && (defined(CONFIG_KEYBOARD_HIL) || defined(CONFIG_KEYBOARD_HIL_OLD))
+ #define KBD_DEFLEDS (1 << VC_NUMLOCK)
+ #else
+ #define KBD_DEFLEDS 0
+ #endif
+
+ #define KBD_DEFLOCK 0
+
+ void compute_shiftstate(void);
+
+ /*
+ * Handler Tables.
+ */
+
+ #define K_HANDLERS\
+ k_self, k_fn, k_spec, k_pad,\
+ k_dead, k_cons, k_cur, k_shift,\
+ k_meta, k_ascii, k_lock, k_lowercase,\
+ k_slock, k_dead2, k_brl, k_ignore
+
+ typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
+ char up_flag);
+ static k_handler_fn K_HANDLERS;
+ static k_handler_fn *k_handler[16] = { K_HANDLERS };
+
+ #define FN_HANDLERS\
+ fn_null, fn_enter, fn_show_ptregs, fn_show_mem,\
+ fn_show_state, fn_send_intr, fn_lastcons, fn_caps_toggle,\
+ fn_num, fn_hold, fn_scroll_forw, fn_scroll_back,\
+ fn_boot_it, fn_caps_on, fn_compose, fn_SAK,\
+ fn_dec_console, fn_inc_console, fn_spawn_con, fn_bare_num
+
+ typedef void (fn_handler_fn)(struct vc_data *vc);
+ static fn_handler_fn FN_HANDLERS;
+ static fn_handler_fn *fn_handler[] = { FN_HANDLERS };
+
+ /*
+ * Variables exported for vt_ioctl.c
+ */
+
+ /* maximum values each key_handler can handle */
+ const int max_vals[] = {
+ 255, ARRAY_SIZE(func_table) - 1, ARRAY_SIZE(fn_handler) - 1, NR_PAD - 1,
+ NR_DEAD - 1, 255, 3, NR_SHIFT - 1, 255, NR_ASCII - 1, NR_LOCK - 1,
+ 255, NR_LOCK - 1, 255, NR_BRL - 1
+ };
+
+ const int NR_TYPES = ARRAY_SIZE(max_vals);
+
+ struct kbd_struct kbd_table[MAX_NR_CONSOLES];
+ EXPORT_SYMBOL_GPL(kbd_table);
+ static struct kbd_struct *kbd = kbd_table;
+
+ struct vt_spawn_console vt_spawn_con = {
+ .lock = __SPIN_LOCK_UNLOCKED(vt_spawn_con.lock),
+ .pid = NULL,
+ .sig = 0,
+ };
+
+ /*
+ * Variables exported for vt.c
+ */
+
+ int shift_state = 0;
+
+ /*
+ * Internal Data.
+ */
+
+ static struct input_handler kbd_handler;
+ static DEFINE_SPINLOCK(kbd_event_lock);
+ static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; /* keyboard key bitmap */
+ static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */
+ static bool dead_key_next;
+ static int npadch = -1; /* -1 or number assembled on pad */
+ static unsigned int diacr;
+ static char rep; /* flag telling character repeat */
+
+ static unsigned char ledstate = 0xff; /* undefined */
+ static unsigned char ledioctl;
+
+ static struct ledptr {
+ unsigned int *addr;
+ unsigned int mask;
+ unsigned char valid:1;
+ } ledptrs[3];
+
+ /*
+ * Notifier list for console keyboard events
+ */
+ static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list);
+
+ int register_keyboard_notifier(struct notifier_block *nb)
+ {
+ return atomic_notifier_chain_register(&keyboard_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(register_keyboard_notifier);
+
+ int unregister_keyboard_notifier(struct notifier_block *nb)
+ {
+ return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
+
+ /*
+ * Translation of scancodes to keycodes. We set them on only the first
+ * keyboard in the list that accepts the scancode and keycode.
+ * Explanation for not choosing the first attached keyboard anymore:
+ * USB keyboards for example have two event devices: one for all "normal"
+ * keys and one for extra function keys (like "volume up", "make coffee",
+ * etc.). So this means that scancodes for the extra function keys won't
+ * be valid for the first event device, but will be for the second.
+ */
+
+ struct getset_keycode_data {
+ struct input_keymap_entry ke;
+ int error;
+ };
+
+ static int getkeycode_helper(struct input_handle *handle, void *data)
+ {
+ struct getset_keycode_data *d = data;
+
+ d->error = input_get_keycode(handle->dev, &d->ke);
+
+ return d->error == 0; /* stop as soon as we successfully get one */
+ }
+
+ int getkeycode(unsigned int scancode)
+ {
+ struct getset_keycode_data d = {
+ .ke = {
+ .flags = 0,
+ .len = sizeof(scancode),
+ .keycode = 0,
+ },
+ .error = -ENODEV,
+ };
+
+ memcpy(d.ke.scancode, &scancode, sizeof(scancode));
+
+ input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper);
+
+ return d.error ?: d.ke.keycode;
+ }
+
+ static int setkeycode_helper(struct input_handle *handle, void *data)
+ {
+ struct getset_keycode_data *d = data;
+
+ d->error = input_set_keycode(handle->dev, &d->ke);
+
+ return d->error == 0; /* stop as soon as we successfully set one */
+ }
+
+ int setkeycode(unsigned int scancode, unsigned int keycode)
+ {
+ struct getset_keycode_data d = {
+ .ke = {
+ .flags = 0,
+ .len = sizeof(scancode),
+ .keycode = keycode,
+ },
+ .error = -ENODEV,
+ };
+
+ memcpy(d.ke.scancode, &scancode, sizeof(scancode));
+
+ input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper);
+
+ return d.error;
+ }
+
+ /*
+ * Making beeps and bells. Note that we prefer beeps to bells, but when
+ * shutting the sound off we do both.
+ */
+
+ static int kd_sound_helper(struct input_handle *handle, void *data)
+ {
+ unsigned int *hz = data;
+ struct input_dev *dev = handle->dev;
+
+ if (test_bit(EV_SND, dev->evbit)) {
+ if (test_bit(SND_TONE, dev->sndbit)) {
+ input_inject_event(handle, EV_SND, SND_TONE, *hz);
+ if (*hz)
+ return 0;
+ }
+ if (test_bit(SND_BELL, dev->sndbit))
+ input_inject_event(handle, EV_SND, SND_BELL, *hz ? 1 : 0);
+ }
+
+ return 0;
+ }
+
+ static void kd_nosound(unsigned long ignored)
+ {
+ static unsigned int zero;
+
+ input_handler_for_each_handle(&kbd_handler, &zero, kd_sound_helper);
+ }
+
+ static DEFINE_TIMER(kd_mksound_timer, kd_nosound, 0, 0);
+
+ void kd_mksound(unsigned int hz, unsigned int ticks)
+ {
+ del_timer_sync(&kd_mksound_timer);
+
+ input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper);
+
+ if (hz && ticks)
+ mod_timer(&kd_mksound_timer, jiffies + ticks);
+ }
+ EXPORT_SYMBOL(kd_mksound);
+
+ /*
+ * Setting the keyboard rate.
+ */
+
+ static int kbd_rate_helper(struct input_handle *handle, void *data)
+ {
+ struct input_dev *dev = handle->dev;
+ struct kbd_repeat *rep = data;
+
+ if (test_bit(EV_REP, dev->evbit)) {
+
+ if (rep[0].delay > 0)
+ input_inject_event(handle,
+ EV_REP, REP_DELAY, rep[0].delay);
+ if (rep[0].period > 0)
+ input_inject_event(handle,
+ EV_REP, REP_PERIOD, rep[0].period);
+
+ rep[1].delay = dev->rep[REP_DELAY];
+ rep[1].period = dev->rep[REP_PERIOD];
+ }
+
+ return 0;
+ }
+
+ int kbd_rate(struct kbd_repeat *rep)
+ {
+ struct kbd_repeat data[2] = { *rep };
+
+ input_handler_for_each_handle(&kbd_handler, data, kbd_rate_helper);
+ *rep = data[1]; /* Copy currently used settings */
+
+ return 0;
+ }
+
+ /*
+ * Helper Functions.
+ */
+ static void put_queue(struct vc_data *vc, int ch)
+ {
+ struct tty_struct *tty = vc->port.tty;
+
+ if (tty) {
+ tty_insert_flip_char(tty, ch, 0);
+ con_schedule_flip(tty);
+ }
+ }
+
+ static void puts_queue(struct vc_data *vc, char *cp)
+ {
+ struct tty_struct *tty = vc->port.tty;
+
+ if (!tty)
+ return;
+
+ while (*cp) {
+ tty_insert_flip_char(tty, *cp, 0);
+ cp++;
+ }
+ con_schedule_flip(tty);
+ }
+
+ static void applkey(struct vc_data *vc, int key, char mode)
+ {
+ static char buf[] = { 0x1b, 'O', 0x00, 0x00 };
+
+ buf[1] = (mode ? 'O' : '[');
+ buf[2] = key;
+ puts_queue(vc, buf);
+ }
+
+ /*
+ * Many other routines do put_queue, but I think either
+ * they produce ASCII, or they produce some user-assigned
+ * string, and in both cases we might assume that it is
+ * in utf-8 already.
+ */
+ static void to_utf8(struct vc_data *vc, uint c)
+ {
+ if (c < 0x80)
+ /* 0******* */
+ put_queue(vc, c);
+ else if (c < 0x800) {
+ /* 110***** 10****** */
+ put_queue(vc, 0xc0 | (c >> 6));
+ put_queue(vc, 0x80 | (c & 0x3f));
+ } else if (c < 0x10000) {
+ if (c >= 0xD800 && c < 0xE000)
+ return;
+ if (c == 0xFFFF)
+ return;
+ /* 1110**** 10****** 10****** */
+ put_queue(vc, 0xe0 | (c >> 12));
+ put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
+ put_queue(vc, 0x80 | (c & 0x3f));
+ } else if (c < 0x110000) {
+ /* 11110*** 10****** 10****** 10****** */
+ put_queue(vc, 0xf0 | (c >> 18));
+ put_queue(vc, 0x80 | ((c >> 12) & 0x3f));
+ put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
+ put_queue(vc, 0x80 | (c & 0x3f));
+ }
+ }
+
+ /*
+ * Called after returning from RAW mode or when changing consoles - recompute
+ * shift_down[] and shift_state from key_down[] maybe called when keymap is
+ * undefined, so that shiftkey release is seen
+ */
+ void compute_shiftstate(void)
+ {
+ unsigned int i, j, k, sym, val;
+
+ shift_state = 0;
+ memset(shift_down, 0, sizeof(shift_down));
+
+ for (i = 0; i < ARRAY_SIZE(key_down); i++) {
+
+ if (!key_down[i])
+ continue;
+
+ k = i * BITS_PER_LONG;
+
+ for (j = 0; j < BITS_PER_LONG; j++, k++) {
+
+ if (!test_bit(k, key_down))
+ continue;
+
+ sym = U(key_maps[0][k]);
+ if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
+ continue;
+
+ val = KVAL(sym);
+ if (val == KVAL(K_CAPSSHIFT))
+ val = KVAL(K_SHIFT);
+
+ shift_down[val]++;
+ shift_state |= (1 << val);
+ }
+ }
+ }
+
+ /*
+ * We have a combining character DIACR here, followed by the character CH.
+ * If the combination occurs in the table, return the corresponding value.
+ * Otherwise, if CH is a space or equals DIACR, return DIACR.
+ * Otherwise, conclude that DIACR was not combining after all,
+ * queue it and return CH.
+ */
+ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch)
+ {
+ unsigned int d = diacr;
+ unsigned int i;
+
+ diacr = 0;
+
+ if ((d & ~0xff) == BRL_UC_ROW) {
+ if ((ch & ~0xff) == BRL_UC_ROW)
+ return d | ch;
+ } else {
+ for (i = 0; i < accent_table_size; i++)
+ if (accent_table[i].diacr == d && accent_table[i].base == ch)
+ return accent_table[i].result;
+ }
+
+ if (ch == ' ' || ch == (BRL_UC_ROW|0) || ch == d)
+ return d;
+
+ if (kbd->kbdmode == VC_UNICODE)
+ to_utf8(vc, d);
+ else {
+ int c = conv_uni_to_8bit(d);
+ if (c != -1)
+ put_queue(vc, c);
+ }
+
+ return ch;
+ }
+
+ /*
+ * Special function handlers
+ */
+ static void fn_enter(struct vc_data *vc)
+ {
+ if (diacr) {
+ if (kbd->kbdmode == VC_UNICODE)
+ to_utf8(vc, diacr);
+ else {
+ int c = conv_uni_to_8bit(diacr);
+ if (c != -1)
+ put_queue(vc, c);
+ }
+ diacr = 0;
+ }
+
+ put_queue(vc, 13);
+ if (vc_kbd_mode(kbd, VC_CRLF))
+ put_queue(vc, 10);
+ }
+
+ static void fn_caps_toggle(struct vc_data *vc)
+ {
+ if (rep)
+ return;
+
+ chg_vc_kbd_led(kbd, VC_CAPSLOCK);
+ }
+
+ static void fn_caps_on(struct vc_data *vc)
+ {
+ if (rep)
+ return;
+
+ set_vc_kbd_led(kbd, VC_CAPSLOCK);
+ }
+
+ static void fn_show_ptregs(struct vc_data *vc)
+ {
+ struct pt_regs *regs = get_irq_regs();
+
+ if (regs)
+ show_regs(regs);
+ }
+
+ static void fn_hold(struct vc_data *vc)
+ {
+ struct tty_struct *tty = vc->port.tty;
+
+ if (rep || !tty)
+ return;
+
+ /*
+ * Note: SCROLLOCK will be set (cleared) by stop_tty (start_tty);
+ * these routines are also activated by ^S/^Q.
+ * (And SCROLLOCK can also be set by the ioctl KDSKBLED.)
+ */
+ if (tty->stopped)
+ start_tty(tty);
+ else
+ stop_tty(tty);
+ }
+
+ static void fn_num(struct vc_data *vc)
+ {
+ if (vc_kbd_mode(kbd, VC_APPLIC))
+ applkey(vc, 'P', 1);
+ else
+ fn_bare_num(vc);
+ }
+
+ /*
+ * Bind this to Shift-NumLock if you work in application keypad mode
+ * but want to be able to change the NumLock flag.
+ * Bind this to NumLock if you prefer that the NumLock key always
+ * changes the NumLock flag.
+ */
+ static void fn_bare_num(struct vc_data *vc)
+ {
+ if (!rep)
+ chg_vc_kbd_led(kbd, VC_NUMLOCK);
+ }
+
+ static void fn_lastcons(struct vc_data *vc)
+ {
+ /* switch to the last used console, ChN */
+ set_console(last_console);
+ }
+
+ static void fn_dec_console(struct vc_data *vc)
+ {
+ int i, cur = fg_console;
+
+ /* Currently switching? Queue this next switch relative to that. */
+ if (want_console != -1)
+ cur = want_console;
+
+ for (i = cur - 1; i != cur; i--) {
+ if (i == -1)
+ i = MAX_NR_CONSOLES - 1;
+ if (vc_cons_allocated(i))
+ break;
+ }
+ set_console(i);
+ }
+
+ static void fn_inc_console(struct vc_data *vc)
+ {
+ int i, cur = fg_console;
+
+ /* Currently switching? Queue this next switch relative to that. */
+ if (want_console != -1)
+ cur = want_console;
+
+ for (i = cur+1; i != cur; i++) {
+ if (i == MAX_NR_CONSOLES)
+ i = 0;
+ if (vc_cons_allocated(i))
+ break;
+ }
+ set_console(i);
+ }
+
+ static void fn_send_intr(struct vc_data *vc)
+ {
+ struct tty_struct *tty = vc->port.tty;
+
+ if (!tty)
+ return;
+ tty_insert_flip_char(tty, 0, TTY_BREAK);
+ con_schedule_flip(tty);
+ }
+
+ static void fn_scroll_forw(struct vc_data *vc)
+ {
+ scrollfront(vc, 0);
+ }
+
+ static void fn_scroll_back(struct vc_data *vc)
+ {
+ scrollback(vc, 0);
+ }
+
+ static void fn_show_mem(struct vc_data *vc)
+ {
+ show_mem();
+ }
+
+ static void fn_show_state(struct vc_data *vc)
+ {
+ show_state();
+ }
+
+ static void fn_boot_it(struct vc_data *vc)
+ {
+ ctrl_alt_del();
+ }
+
+ static void fn_compose(struct vc_data *vc)
+ {
+ dead_key_next = true;
+ }
+
+ static void fn_spawn_con(struct vc_data *vc)
+ {
+ spin_lock(&vt_spawn_con.lock);
+ if (vt_spawn_con.pid)
+ if (kill_pid(vt_spawn_con.pid, vt_spawn_con.sig, 1)) {
+ put_pid(vt_spawn_con.pid);
+ vt_spawn_con.pid = NULL;
+ }
+ spin_unlock(&vt_spawn_con.lock);
+ }
+
+ static void fn_SAK(struct vc_data *vc)
+ {
+ struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work;
+ schedule_work(SAK_work);
+ }
+
+ static void fn_null(struct vc_data *vc)
+ {
+ compute_shiftstate();
+ }
+
+ /*
+ * Special key handlers
+ */
+ static void k_ignore(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ }
+
+ static void k_spec(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ if (up_flag)
+ return;
+ if (value >= ARRAY_SIZE(fn_handler))
+ return;
+ if ((kbd->kbdmode == VC_RAW ||
+ kbd->kbdmode == VC_MEDIUMRAW) &&
+ value != KVAL(K_SAK))
+ return; /* SAK is allowed even in raw mode */
+ fn_handler[value](vc);
+ }
+
+ static void k_lowercase(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ pr_err("k_lowercase was called - impossible\n");
+ }
+
+ static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag)
+ {
+ if (up_flag)
+ return; /* no action, if this is a key release */
+
+ if (diacr)
+ value = handle_diacr(vc, value);
+
+ if (dead_key_next) {
+ dead_key_next = false;
+ diacr = value;
+ return;
+ }
+ if (kbd->kbdmode == VC_UNICODE)
+ to_utf8(vc, value);
+ else {
+ int c = conv_uni_to_8bit(value);
+ if (c != -1)
+ put_queue(vc, c);
+ }
+ }
+
+ /*
+ * Handle dead key. Note that we now may have several
+ * dead keys modifying the same character. Very useful
+ * for Vietnamese.
+ */
+ static void k_deadunicode(struct vc_data *vc, unsigned int value, char up_flag)
+ {
+ if (up_flag)
+ return;
+
+ diacr = (diacr ? handle_diacr(vc, value) : value);
+ }
+
+ static void k_self(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ k_unicode(vc, conv_8bit_to_uni(value), up_flag);
+ }
+
+ static void k_dead2(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ k_deadunicode(vc, value, up_flag);
+ }
+
+ /*
+ * Obsolete - for backwards compatibility only
+ */
+ static void k_dead(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ static const unsigned char ret_diacr[NR_DEAD] = {'`', '\'', '^', '~', '"', ',' };
+
+ k_deadunicode(vc, ret_diacr[value], up_flag);
+ }
+
+ static void k_cons(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ if (up_flag)
+ return;
+
+ set_console(value);
+ }
+
+ static void k_fn(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ if (up_flag)
+ return;
+
+ if ((unsigned)value < ARRAY_SIZE(func_table)) {
+ if (func_table[value])
+ puts_queue(vc, func_table[value]);
+ } else
+ pr_err("k_fn called with value=%d\n", value);
+ }
+
+ static void k_cur(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ static const char cur_chars[] = "BDCA";
+
+ if (up_flag)
+ return;
+
+ applkey(vc, cur_chars[value], vc_kbd_mode(kbd, VC_CKMODE));
+ }
+
+ static void k_pad(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ static const char pad_chars[] = "0123456789+-*/\015,.?()#";
+ static const char app_map[] = "pqrstuvwxylSRQMnnmPQS";
+
+ if (up_flag)
+ return; /* no action, if this is a key release */
+
+ /* kludge... shift forces cursor/number keys */
+ if (vc_kbd_mode(kbd, VC_APPLIC) && !shift_down[KG_SHIFT]) {
+ applkey(vc, app_map[value], 1);
+ return;
+ }
+
+ if (!vc_kbd_led(kbd, VC_NUMLOCK)) {
+
+ switch (value) {
+ case KVAL(K_PCOMMA):
+ case KVAL(K_PDOT):
+ k_fn(vc, KVAL(K_REMOVE), 0);
+ return;
+ case KVAL(K_P0):
+ k_fn(vc, KVAL(K_INSERT), 0);
+ return;
+ case KVAL(K_P1):
+ k_fn(vc, KVAL(K_SELECT), 0);
+ return;
+ case KVAL(K_P2):
+ k_cur(vc, KVAL(K_DOWN), 0);
+ return;
+ case KVAL(K_P3):
+ k_fn(vc, KVAL(K_PGDN), 0);
+ return;
+ case KVAL(K_P4):
+ k_cur(vc, KVAL(K_LEFT), 0);
+ return;
+ case KVAL(K_P6):
+ k_cur(vc, KVAL(K_RIGHT), 0);
+ return;
+ case KVAL(K_P7):
+ k_fn(vc, KVAL(K_FIND), 0);
+ return;
+ case KVAL(K_P8):
+ k_cur(vc, KVAL(K_UP), 0);
+ return;
+ case KVAL(K_P9):
+ k_fn(vc, KVAL(K_PGUP), 0);
+ return;
+ case KVAL(K_P5):
+ applkey(vc, 'G', vc_kbd_mode(kbd, VC_APPLIC));
+ return;
+ }
+ }
+
+ put_queue(vc, pad_chars[value]);
+ if (value == KVAL(K_PENTER) && vc_kbd_mode(kbd, VC_CRLF))
+ put_queue(vc, 10);
+ }
+
+ static void k_shift(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ int old_state = shift_state;
+
+ if (rep)
+ return;
+ /*
+ * Mimic typewriter:
+ * a CapsShift key acts like Shift but undoes CapsLock
+ */
+ if (value == KVAL(K_CAPSSHIFT)) {
+ value = KVAL(K_SHIFT);
+ if (!up_flag)
+ clr_vc_kbd_led(kbd, VC_CAPSLOCK);
+ }
+
+ if (up_flag) {
+ /*
+ * handle the case that two shift or control
+ * keys are depressed simultaneously
+ */
+ if (shift_down[value])
+ shift_down[value]--;
+ } else
+ shift_down[value]++;
+
+ if (shift_down[value])
+ shift_state |= (1 << value);
+ else
+ shift_state &= ~(1 << value);
+
+ /* kludge */
+ if (up_flag && shift_state != old_state && npadch != -1) {
+ if (kbd->kbdmode == VC_UNICODE)
+ to_utf8(vc, npadch);
+ else
+ put_queue(vc, npadch & 0xff);
+ npadch = -1;
+ }
+ }
+
+ static void k_meta(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ if (up_flag)
+ return;
+
+ if (vc_kbd_mode(kbd, VC_META)) {
+ put_queue(vc, '\033');
+ put_queue(vc, value);
+ } else
+ put_queue(vc, value | 0x80);
+ }
+
+ static void k_ascii(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ int base;
+
+ if (up_flag)
+ return;
+
+ if (value < 10) {
+ /* decimal input of code, while Alt depressed */
+ base = 10;
+ } else {
+ /* hexadecimal input of code, while AltGr depressed */
+ value -= 10;
+ base = 16;
+ }
+
+ if (npadch == -1)
+ npadch = value;
+ else
+ npadch = npadch * base + value;
+ }
+
+ static void k_lock(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ if (up_flag || rep)
+ return;
+
+ chg_vc_kbd_lock(kbd, value);
+ }
+
+ static void k_slock(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ k_shift(vc, value, up_flag);
+ if (up_flag || rep)
+ return;
+
+ chg_vc_kbd_slock(kbd, value);
+ /* try to make Alt, oops, AltGr and such work */
+ if (!key_maps[kbd->lockstate ^ kbd->slockstate]) {
+ kbd->slockstate = 0;
+ chg_vc_kbd_slock(kbd, value);
+ }
+ }
+
+ /* by default, 300ms interval for combination release */
+ static unsigned brl_timeout = 300;
+ MODULE_PARM_DESC(brl_timeout, "Braille keys release delay in ms (0 for commit on first key release)");
+ module_param(brl_timeout, uint, 0644);
+
+ static unsigned brl_nbchords = 1;
+ MODULE_PARM_DESC(brl_nbchords, "Number of chords that produce a braille pattern (0 for dead chords)");
+ module_param(brl_nbchords, uint, 0644);
+
+ static void k_brlcommit(struct vc_data *vc, unsigned int pattern, char up_flag)
+ {
+ static unsigned long chords;
+ static unsigned committed;
+
+ if (!brl_nbchords)
+ k_deadunicode(vc, BRL_UC_ROW | pattern, up_flag);
+ else {
+ committed |= pattern;
+ chords++;
+ if (chords == brl_nbchords) {
+ k_unicode(vc, BRL_UC_ROW | committed, up_flag);
+ chords = 0;
+ committed = 0;
+ }
+ }
+ }
+
+ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
+ {
+ static unsigned pressed, committing;
+ static unsigned long releasestart;
+
+ if (kbd->kbdmode != VC_UNICODE) {
+ if (!up_flag)
+ pr_warning("keyboard mode must be unicode for braille patterns\n");
+ return;
+ }
+
+ if (!value) {
+ k_unicode(vc, BRL_UC_ROW, up_flag);
+ return;
+ }
+
+ if (value > 8)
+ return;
+
+ if (!up_flag) {
+ pressed |= 1 << (value - 1);
+ if (!brl_timeout)
+ committing = pressed;
+ } else if (brl_timeout) {
+ if (!committing ||
+ time_after(jiffies,
+ releasestart + msecs_to_jiffies(brl_timeout))) {
+ committing = pressed;
+ releasestart = jiffies;
+ }
+ pressed &= ~(1 << (value - 1));
+ if (!pressed && committing) {
+ k_brlcommit(vc, committing, 0);
+ committing = 0;
+ }
+ } else {
+ if (committing) {
+ k_brlcommit(vc, committing, 0);
+ committing = 0;
+ }
+ pressed &= ~(1 << (value - 1));
+ }
+ }
+
+ /*
+ * The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
+ * or (ii) whatever pattern of lights people want to show using KDSETLED,
+ * or (iii) specified bits of specified words in kernel memory.
+ */
+ unsigned char getledstate(void)
+ {
+ return ledstate;
+ }
+
+ void setledstate(struct kbd_struct *kbd, unsigned int led)
+ {
+ if (!(led & ~7)) {
+ ledioctl = led;
+ kbd->ledmode = LED_SHOW_IOCTL;
+ } else
+ kbd->ledmode = LED_SHOW_FLAGS;
+
+ set_leds();
+ }
+
+ static inline unsigned char getleds(void)
+ {
+ struct kbd_struct *kbd = kbd_table + fg_console;
+ unsigned char leds;
+ int i;
+
+ if (kbd->ledmode == LED_SHOW_IOCTL)
+ return ledioctl;
+
+ leds = kbd->ledflagstate;
+
+ if (kbd->ledmode == LED_SHOW_MEM) {
+ for (i = 0; i < 3; i++)
+ if (ledptrs[i].valid) {
+ if (*ledptrs[i].addr & ledptrs[i].mask)
+ leds |= (1 << i);
+ else
+ leds &= ~(1 << i);
+ }
+ }
+ return leds;
+ }
+
+ static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+ {
+ unsigned char leds = *(unsigned char *)data;
+
+ if (test_bit(EV_LED, handle->dev->evbit)) {
+ input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
+ input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02));
+ input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04));
+ input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
+ }
+
+ return 0;
+ }
+
+ /*
+ * This is the tasklet that updates LED state on all keyboards
+ * attached to the box. The reason we use tasklet is that we
+ * need to handle the scenario when keyboard handler is not
+ * registered yet but we already getting updates form VT to
+ * update led state.
+ */
+ static void kbd_bh(unsigned long dummy)
+ {
+ unsigned char leds = getleds();
+
+ if (leds != ledstate) {
+ input_handler_for_each_handle(&kbd_handler, &leds,
+ kbd_update_leds_helper);
+ ledstate = leds;
+ }
+ }
+
+ DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
+
+ #if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_ALPHA) ||\
+ defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) ||\
+ defined(CONFIG_PARISC) || defined(CONFIG_SUPERH) ||\
+ (defined(CONFIG_ARM) && defined(CONFIG_KEYBOARD_ATKBD) && !defined(CONFIG_ARCH_RPC)) ||\
+ defined(CONFIG_AVR32)
+
+ #define HW_RAW(dev) (test_bit(EV_MSC, dev->evbit) && test_bit(MSC_RAW, dev->mscbit) &&\
+ ((dev)->id.bustype == BUS_I8042) && ((dev)->id.vendor == 0x0001) && ((dev)->id.product == 0x0001))
+
+ static const unsigned short x86_keycodes[256] =
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84,118, 86, 87, 88,115,120,119,121,112,123, 92,
+ 284,285,309, 0,312, 91,327,328,329,331,333,335,336,337,338,339,
+ 367,288,302,304,350, 89,334,326,267,126,268,269,125,347,348,349,
+ 360,261,262,263,268,376,100,101,321,316,373,286,289,102,351,355,
+ 103,104,105,275,287,279,258,106,274,107,294,364,358,363,362,361,
+ 291,108,381,281,290,272,292,305,280, 99,112,257,306,359,113,114,
+ 264,117,271,374,379,265,266, 93, 94, 95, 85,259,375,260, 90,116,
+ 377,109,111,277,278,282,283,295,296,297,299,300,301,293,303,307,
+ 308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330,
+ 332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 };
+
+ #ifdef CONFIG_SPARC
+ static int sparc_l1_a_state;
+ extern void sun_do_break(void);
+ #endif
+
+ static int emulate_raw(struct vc_data *vc, unsigned int keycode,
+ unsigned char up_flag)
+ {
+ int code;
+
+ switch (keycode) {
+
+ case KEY_PAUSE:
+ put_queue(vc, 0xe1);
+ put_queue(vc, 0x1d | up_flag);
+ put_queue(vc, 0x45 | up_flag);
+ break;
+
+ case KEY_HANGEUL:
+ if (!up_flag)
+ put_queue(vc, 0xf2);
+ break;
+
+ case KEY_HANJA:
+ if (!up_flag)
+ put_queue(vc, 0xf1);
+ break;
+
+ case KEY_SYSRQ:
+ /*
+ * Real AT keyboards (that's what we're trying
+ * to emulate here emit 0xe0 0x2a 0xe0 0x37 when
+ * pressing PrtSc/SysRq alone, but simply 0x54
+ * when pressing Alt+PrtSc/SysRq.
+ */
+ if (test_bit(KEY_LEFTALT, key_down) ||
+ test_bit(KEY_RIGHTALT, key_down)) {
+ put_queue(vc, 0x54 | up_flag);
+ } else {
+ put_queue(vc, 0xe0);
+ put_queue(vc, 0x2a | up_flag);
+ put_queue(vc, 0xe0);
+ put_queue(vc, 0x37 | up_flag);
+ }
+ break;
+
+ default:
+ if (keycode > 255)
+ return -1;
+
+ code = x86_keycodes[keycode];
+ if (!code)
+ return -1;
+
+ if (code & 0x100)
+ put_queue(vc, 0xe0);
+ put_queue(vc, (code & 0x7f) | up_flag);
+
+ break;
+ }
+
+ return 0;
+ }
+
+ #else
+
+ #define HW_RAW(dev) 0
+
+ static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag)
+ {
+ if (keycode > 127)
+ return -1;
+
+ put_queue(vc, keycode | up_flag);
+ return 0;
+ }
+ #endif
+
+ static void kbd_rawcode(unsigned char data)
+ {
+ struct vc_data *vc = vc_cons[fg_console].d;
+
+ kbd = kbd_table + vc->vc_num;
+ if (kbd->kbdmode == VC_RAW)
+ put_queue(vc, data);
+ }
+
+ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
+ {
+ struct vc_data *vc = vc_cons[fg_console].d;
+ unsigned short keysym, *key_map;
+ unsigned char type;
+ bool raw_mode;
+ struct tty_struct *tty;
+ int shift_final;
+ struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down };
+ int rc;
+
+ tty = vc->port.tty;
+
+ if (tty && (!tty->driver_data)) {
+ /* No driver data? Strange. Okay we fix it then. */
+ tty->driver_data = vc;
+ }
+
+ kbd = kbd_table + vc->vc_num;
+
+ #ifdef CONFIG_SPARC
+ if (keycode == KEY_STOP)
+ sparc_l1_a_state = down;
+ #endif
+
+ rep = (down == 2);
+
+ raw_mode = (kbd->kbdmode == VC_RAW);
+ if (raw_mode && !hw_raw)
+ if (emulate_raw(vc, keycode, !down << 7))
+ if (keycode < BTN_MISC && printk_ratelimit())
+ pr_warning("can't emulate rawmode for keycode %d\n",
+ keycode);
+
++#ifdef CONFIG_BOOTSPLASH
++ /* This code has to be redone for some non-x86 platforms */
++ if (down == 1 && (keycode == 0x3c || keycode == 0x01)) { /* F2 and ESC on PC keyboard */
++ extern int splash_verbose(void);
++ if (splash_verbose())
++ return;
++ }
++#endif
++
+ #ifdef CONFIG_SPARC
+ if (keycode == KEY_A && sparc_l1_a_state) {
+ sparc_l1_a_state = false;
+ sun_do_break();
+ }
+ #endif
+
+ if (kbd->kbdmode == VC_MEDIUMRAW) {
+ /*
+ * This is extended medium raw mode, with keys above 127
+ * encoded as 0, high 7 bits, low 7 bits, with the 0 bearing
+ * the 'up' flag if needed. 0 is reserved, so this shouldn't
+ * interfere with anything else. The two bytes after 0 will
+ * always have the up flag set not to interfere with older
+ * applications. This allows for 16384 different keycodes,
+ * which should be enough.
+ */
+ if (keycode < 128) {
+ put_queue(vc, keycode | (!down << 7));
+ } else {
+ put_queue(vc, !down << 7);
+ put_queue(vc, (keycode >> 7) | 0x80);
+ put_queue(vc, keycode | 0x80);
+ }
+ raw_mode = true;
+ }
+
+ if (down)
+ set_bit(keycode, key_down);
+ else
+ clear_bit(keycode, key_down);
+
+ if (rep &&
+ (!vc_kbd_mode(kbd, VC_REPEAT) ||
+ (tty && !L_ECHO(tty) && tty_chars_in_buffer(tty)))) {
+ /*
+ * Don't repeat a key if the input buffers are not empty and the
+ * characters get aren't echoed locally. This makes key repeat
+ * usable with slow applications and under heavy loads.
+ */
+ return;
+ }
+
+ param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
+ param.ledstate = kbd->ledflagstate;
+ key_map = key_maps[shift_final];
+
+ rc = atomic_notifier_call_chain(&keyboard_notifier_list,
+ KBD_KEYCODE, ¶m);
+ if (rc == NOTIFY_STOP || !key_map) {
+ atomic_notifier_call_chain(&keyboard_notifier_list,
+ KBD_UNBOUND_KEYCODE, ¶m);
+ compute_shiftstate();
+ kbd->slockstate = 0;
+ return;
+ }
+
+ if (keycode < NR_KEYS)
+ keysym = key_map[keycode];
+ else if (keycode >= KEY_BRL_DOT1 && keycode <= KEY_BRL_DOT8)
+ keysym = U(K(KT_BRL, keycode - KEY_BRL_DOT1 + 1));
+ else
+ return;
+
+ type = KTYP(keysym);
+
+ if (type < 0xf0) {
+ param.value = keysym;
+ rc = atomic_notifier_call_chain(&keyboard_notifier_list,
+ KBD_UNICODE, ¶m);
+ if (rc != NOTIFY_STOP)
+ if (down && !raw_mode)
+ to_utf8(vc, keysym);
+ return;
+ }
+
+ type -= 0xf0;
+
+ if (type == KT_LETTER) {
+ type = KT_LATIN;
+ if (vc_kbd_led(kbd, VC_CAPSLOCK)) {
+ key_map = key_maps[shift_final ^ (1 << KG_SHIFT)];
+ if (key_map)
+ keysym = key_map[keycode];
+ }
+ }
+
+ param.value = keysym;
+ rc = atomic_notifier_call_chain(&keyboard_notifier_list,
+ KBD_KEYSYM, ¶m);
+ if (rc == NOTIFY_STOP)
+ return;
+
+ if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
+ return;
+
+ (*k_handler[type])(vc, keysym & 0xff, !down);
+
+ param.ledstate = kbd->ledflagstate;
+ atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, ¶m);
+
+ if (type != KT_SLOCK)
+ kbd->slockstate = 0;
+ }
+
+ static void kbd_event(struct input_handle *handle, unsigned int event_type,
+ unsigned int event_code, int value)
+ {
+ /* We are called with interrupts disabled, just take the lock */
+ spin_lock(&kbd_event_lock);
+
+ if (event_type == EV_MSC && event_code == MSC_RAW && HW_RAW(handle->dev))
+ kbd_rawcode(value);
+ if (event_type == EV_KEY)
+ kbd_keycode(event_code, value, HW_RAW(handle->dev));
+
+ spin_unlock(&kbd_event_lock);
+
+ tasklet_schedule(&keyboard_tasklet);
+ do_poke_blanked_console = 1;
+ schedule_console_callback();
+ }
+
+ static bool kbd_match(struct input_handler *handler, struct input_dev *dev)
+ {
+ int i;
+
+ if (test_bit(EV_SND, dev->evbit))
+ return true;
+
+ if (test_bit(EV_KEY, dev->evbit)) {
+ for (i = KEY_RESERVED; i < BTN_MISC; i++)
+ if (test_bit(i, dev->keybit))
+ return true;
+ for (i = KEY_BRL_DOT1; i <= KEY_BRL_DOT10; i++)
+ if (test_bit(i, dev->keybit))
+ return true;
+ }
+
+ return false;
+ }
+
+ /*
+ * When a keyboard (or other input device) is found, the kbd_connect
+ * function is called. The function then looks at the device, and if it
+ * likes it, it can open it and get events from it. In this (kbd_connect)
+ * function, we should decide which VT to bind that keyboard to initially.
+ */
+ static int kbd_connect(struct input_handler *handler, struct input_dev *dev,
+ const struct input_device_id *id)
+ {
+ struct input_handle *handle;
+ int error;
+
+ handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ handle->dev = dev;
+ handle->handler = handler;
+ handle->name = "kbd";
+
+ error = input_register_handle(handle);
+ if (error)
+ goto err_free_handle;
+
+ error = input_open_device(handle);
+ if (error)
+ goto err_unregister_handle;
+
+ return 0;
+
+ err_unregister_handle:
+ input_unregister_handle(handle);
+ err_free_handle:
+ kfree(handle);
+ return error;
+ }
+
+ static void kbd_disconnect(struct input_handle *handle)
+ {
+ input_close_device(handle);
+ input_unregister_handle(handle);
+ kfree(handle);
+ }
+
+ /*
+ * Start keyboard handler on the new keyboard by refreshing LED state to
+ * match the rest of the system.
+ */
+ static void kbd_start(struct input_handle *handle)
+ {
+ tasklet_disable(&keyboard_tasklet);
+
+ if (ledstate != 0xff)
+ kbd_update_leds_helper(handle, &ledstate);
+
+ tasklet_enable(&keyboard_tasklet);
+ }
+
+ static const struct input_device_id kbd_ids[] = {
+ {
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+ .evbit = { BIT_MASK(EV_KEY) },
+ },
+
+ {
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+ .evbit = { BIT_MASK(EV_SND) },
+ },
+
+ { }, /* Terminating entry */
+ };
+
+ MODULE_DEVICE_TABLE(input, kbd_ids);
+
+ static struct input_handler kbd_handler = {
+ .event = kbd_event,
+ .match = kbd_match,
+ .connect = kbd_connect,
+ .disconnect = kbd_disconnect,
+ .start = kbd_start,
+ .name = "kbd",
+ .id_table = kbd_ids,
+ };
+
+ int __init kbd_init(void)
+ {
+ int i;
+ int error;
+
+ for (i = 0; i < MAX_NR_CONSOLES; i++) {
+ kbd_table[i].ledflagstate = KBD_DEFLEDS;
+ kbd_table[i].default_ledflagstate = KBD_DEFLEDS;
+ kbd_table[i].ledmode = LED_SHOW_FLAGS;
+ kbd_table[i].lockstate = KBD_DEFLOCK;
+ kbd_table[i].slockstate = 0;
+ kbd_table[i].modeflags = KBD_DEFMODE;
+ kbd_table[i].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE;
+ }
+
+ error = input_register_handler(&kbd_handler);
+ if (error)
+ return error;
+
+ tasklet_enable(&keyboard_tasklet);
+ tasklet_schedule(&keyboard_tasklet);
+
+ return 0;
+ }
--- /dev/null
+ /*
+ * linux/drivers/char/vt.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+ /*
+ * Hopefully this will be a rather complete VT102 implementation.
+ *
+ * Beeping thanks to John T Kohl.
+ *
+ * Virtual Consoles, Screen Blanking, Screen Dumping, Color, Graphics
+ * Chars, and VT100 enhancements by Peter MacDonald.
+ *
+ * Copy and paste function by Andrew Haylett,
+ * some enhancements by Alessandro Rubini.
+ *
+ * Code to check for different video-cards mostly by Galen Hunt,
+ * <g-hunt@ee.utah.edu>
+ *
+ * Rudimentary ISO 10646/Unicode/UTF-8 character set support by
+ * Markus Kuhn, <mskuhn@immd4.informatik.uni-erlangen.de>.
+ *
+ * Dynamic allocation of consoles, aeb@cwi.nl, May 1994
+ * Resizing of consoles, aeb, 940926
+ *
+ * Code for xterm like mouse click reporting by Peter Orbaek 20-Jul-94
+ * <poe@daimi.aau.dk>
+ *
+ * User-defined bell sound, new setterm control sequences and printk
+ * redirection by Martin Mares <mj@k332.feld.cvut.cz> 19-Nov-95
+ *
+ * APM screenblank bug fixed Takashi Manabe <manabe@roy.dsl.tutics.tut.jp>
+ *
+ * Merge with the abstract console driver by Geert Uytterhoeven
+ * <geert@linux-m68k.org>, Jan 1997.
+ *
+ * Original m68k console driver modifications by
+ *
+ * - Arno Griffioen <arno@usn.nl>
+ * - David Carter <carter@cs.bris.ac.uk>
+ *
+ * The abstract console driver provides a generic interface for a text
+ * console. It supports VGA text mode, frame buffer based graphical consoles
+ * and special graphics processors that are only accessible through some
+ * registers (e.g. a TMS340x0 GSP).
+ *
+ * The interface to the hardware is specified using a special structure
+ * (struct consw) which contains function pointers to console operations
+ * (see <linux/console.h> for more information).
+ *
+ * Support for changeable cursor shape
+ * by Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>, August 1997
+ *
+ * Ported to i386 and con_scrolldelta fixed
+ * by Emmanuel Marty <core@ggi-project.org>, April 1998
+ *
+ * Resurrected character buffers in videoram plus lots of other trickery
+ * by Martin Mares <mj@atrey.karlin.mff.cuni.cz>, July 1998
+ *
+ * Removed old-style timers, introduced console_timer, made timer
+ * deletion SMP-safe. 17Jun00, Andrew Morton
+ *
+ * Removed console_lock, enabled interrupts across all console operations
+ * 13 March 2001, Andrew Morton
+ *
+ * Fixed UTF-8 mode so alternate charset modes always work according
+ * to control sequences interpreted in do_con_trol function
+ * preserving backward VT100 semigraphics compatibility,
+ * malformed UTF sequences represented as sequences of replacement glyphs,
+ * original codes or '?' as a last resort if replacement glyph is undefined
+ * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
+ */
+
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/tty.h>
+ #include <linux/tty_flip.h>
+ #include <linux/kernel.h>
+ #include <linux/string.h>
+ #include <linux/errno.h>
+ #include <linux/kd.h>
+ #include <linux/slab.h>
+ #include <linux/major.h>
+ #include <linux/mm.h>
+ #include <linux/console.h>
+ #include <linux/init.h>
+ #include <linux/mutex.h>
+ #include <linux/vt_kern.h>
+ #include <linux/selection.h>
+ #include <linux/smp_lock.h>
+ #include <linux/tiocl.h>
+ #include <linux/kbd_kern.h>
+ #include <linux/consolemap.h>
+ #include <linux/timer.h>
+ #include <linux/interrupt.h>
+ #include <linux/workqueue.h>
+ #include <linux/pm.h>
+ #include <linux/font.h>
+ #include <linux/bitops.h>
+ #include <linux/notifier.h>
+ #include <linux/device.h>
+ #include <linux/io.h>
+ #include <asm/system.h>
+ #include <linux/uaccess.h>
+ #include <linux/kdb.h>
+ #include <linux/ctype.h>
+
+ #define MAX_NR_CON_DRIVER 16
+
+ #define CON_DRIVER_FLAG_MODULE 1
+ #define CON_DRIVER_FLAG_INIT 2
+ #define CON_DRIVER_FLAG_ATTR 4
+
+ struct con_driver {
+ const struct consw *con;
+ const char *desc;
+ struct device *dev;
+ int node;
+ int first;
+ int last;
+ int flag;
+ };
+
+ static struct con_driver registered_con_driver[MAX_NR_CON_DRIVER];
+ const struct consw *conswitchp;
+
+ /* A bitmap for codes <32. A bit of 1 indicates that the code
+ * corresponding to that bit number invokes some special action
+ * (such as cursor movement) and should not be displayed as a
+ * glyph unless the disp_ctrl mode is explicitly enabled.
+ */
+ #define CTRL_ACTION 0x0d00ff81
+ #define CTRL_ALWAYS 0x0800f501 /* Cannot be overridden by disp_ctrl */
+
+ /*
+ * Here is the default bell parameters: 750HZ, 1/8th of a second
+ */
+ #define DEFAULT_BELL_PITCH 750
+ #define DEFAULT_BELL_DURATION (HZ/8)
+
+ struct vc vc_cons [MAX_NR_CONSOLES];
+
+ #ifndef VT_SINGLE_DRIVER
+ static const struct consw *con_driver_map[MAX_NR_CONSOLES];
+ #endif
+
+ static int con_open(struct tty_struct *, struct file *);
+ static void vc_init(struct vc_data *vc, unsigned int rows,
+ unsigned int cols, int do_clear);
+ static void gotoxy(struct vc_data *vc, int new_x, int new_y);
+ static void save_cur(struct vc_data *vc);
+ static void reset_terminal(struct vc_data *vc, int do_clear);
+ static void con_flush_chars(struct tty_struct *tty);
+ static int set_vesa_blanking(char __user *p);
+ static void set_cursor(struct vc_data *vc);
+ static void hide_cursor(struct vc_data *vc);
+ static void console_callback(struct work_struct *ignored);
+ static void blank_screen_t(unsigned long dummy);
+ static void set_palette(struct vc_data *vc);
+
+ static int printable; /* Is console ready for printing? */
+ int default_utf8 = true;
+ module_param(default_utf8, int, S_IRUGO | S_IWUSR);
+ int global_cursor_default = -1;
+ module_param(global_cursor_default, int, S_IRUGO | S_IWUSR);
+
+ static int cur_default = CUR_DEFAULT;
+ module_param(cur_default, int, S_IRUGO | S_IWUSR);
+
+ /*
+ * ignore_poke: don't unblank the screen when things are typed. This is
+ * mainly for the privacy of braille terminal users.
+ */
+ static int ignore_poke;
+
+ int do_poke_blanked_console;
+ int console_blanked;
+
+ static int vesa_blank_mode; /* 0:none 1:suspendV 2:suspendH 3:powerdown */
+ static int vesa_off_interval;
+ static int blankinterval = 10*60;
+ core_param(consoleblank, blankinterval, int, 0444);
+
+ static DECLARE_WORK(console_work, console_callback);
+
+ /*
+ * fg_console is the current virtual console,
+ * last_console is the last used one,
+ * want_console is the console we want to switch to,
+ * saved_* variants are for save/restore around kernel debugger enter/leave
+ */
+ int fg_console;
+ int last_console;
+ int want_console = -1;
+ static int saved_fg_console;
+ static int saved_last_console;
+ static int saved_want_console;
+ static int saved_vc_mode;
+ static int saved_console_blanked;
+
+ /*
+ * For each existing display, we have a pointer to console currently visible
+ * on that display, allowing consoles other than fg_console to be refreshed
+ * appropriately. Unless the low-level driver supplies its own display_fg
+ * variable, we use this one for the "master display".
+ */
+ static struct vc_data *master_display_fg;
+
+ /*
+ * Unfortunately, we need to delay tty echo when we're currently writing to the
+ * console since the code is (and always was) not re-entrant, so we schedule
+ * all flip requests to process context with schedule-task() and run it from
+ * console_callback().
+ */
+
+ /*
+ * For the same reason, we defer scrollback to the console callback.
+ */
+ static int scrollback_delta;
+
+ /*
+ * Hook so that the power management routines can (un)blank
+ * the console on our behalf.
+ */
+ int (*console_blank_hook)(int);
+
+ static DEFINE_TIMER(console_timer, blank_screen_t, 0, 0);
+ static int blank_state;
+ static int blank_timer_expired;
+ enum {
+ blank_off = 0,
+ blank_normal_wait,
+ blank_vesa_wait,
+ };
+
+ /*
+ * Notifier list for console events.
+ */
+ static ATOMIC_NOTIFIER_HEAD(vt_notifier_list);
+
+ int register_vt_notifier(struct notifier_block *nb)
+ {
+ return atomic_notifier_chain_register(&vt_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(register_vt_notifier);
+
+ int unregister_vt_notifier(struct notifier_block *nb)
+ {
+ return atomic_notifier_chain_unregister(&vt_notifier_list, nb);
+ }
+ EXPORT_SYMBOL_GPL(unregister_vt_notifier);
+
+ static void notify_write(struct vc_data *vc, unsigned int unicode)
+ {
+ struct vt_notifier_param param = { .vc = vc, unicode = unicode };
+ atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, ¶m);
+ }
+
+ static void notify_update(struct vc_data *vc)
+ {
+ struct vt_notifier_param param = { .vc = vc };
+ atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, ¶m);
+ }
+ /*
+ * Low-Level Functions
+ */
+
+ #define IS_FG(vc) ((vc)->vc_num == fg_console)
+
+ #ifdef VT_BUF_VRAM_ONLY
+ #define DO_UPDATE(vc) 0
+ #else
+ #define DO_UPDATE(vc) (CON_IS_VISIBLE(vc) && !console_blanked)
+ #endif
+
+ static inline unsigned short *screenpos(struct vc_data *vc, int offset, int viewed)
+ {
+ unsigned short *p;
+
+ if (!viewed)
+ p = (unsigned short *)(vc->vc_origin + offset);
+ else if (!vc->vc_sw->con_screen_pos)
+ p = (unsigned short *)(vc->vc_visible_origin + offset);
+ else
+ p = vc->vc_sw->con_screen_pos(vc, offset);
+ return p;
+ }
+
+ /* Called from the keyboard irq path.. */
+ static inline void scrolldelta(int lines)
+ {
+ /* FIXME */
+ /* scrolldelta needs some kind of consistency lock, but the BKL was
+ and still is not protecting versus the scheduled back end */
+ scrollback_delta += lines;
+ schedule_console_callback();
+ }
+
+ void schedule_console_callback(void)
+ {
+ schedule_work(&console_work);
+ }
+
+ static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
+ {
+ unsigned short *d, *s;
+
+ if (t+nr >= b)
+ nr = b - t - 1;
+ if (b > vc->vc_rows || t >= b || nr < 1)
+ return;
+ if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_UP, nr))
+ return;
+ d = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
+ s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * (t + nr));
+ scr_memmovew(d, s, (b - t - nr) * vc->vc_size_row);
+ scr_memsetw(d + (b - t - nr) * vc->vc_cols, vc->vc_video_erase_char,
+ vc->vc_size_row * nr);
+ }
+
+ static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr)
+ {
+ unsigned short *s;
+ unsigned int step;
+
+ if (t+nr >= b)
+ nr = b - t - 1;
+ if (b > vc->vc_rows || t >= b || nr < 1)
+ return;
+ if (CON_IS_VISIBLE(vc) && vc->vc_sw->con_scroll(vc, t, b, SM_DOWN, nr))
+ return;
+ s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t);
+ step = vc->vc_cols * nr;
+ scr_memmovew(s + step, s, (b - t - nr) * vc->vc_size_row);
+ scr_memsetw(s, vc->vc_video_erase_char, 2 * step);
+ }
+
+ static void do_update_region(struct vc_data *vc, unsigned long start, int count)
+ {
+ #ifndef VT_BUF_VRAM_ONLY
+ unsigned int xx, yy, offset;
+ u16 *p;
+
+ p = (u16 *) start;
+ if (!vc->vc_sw->con_getxy) {
+ offset = (start - vc->vc_origin) / 2;
+ xx = offset % vc->vc_cols;
+ yy = offset / vc->vc_cols;
+ } else {
+ int nxx, nyy;
+ start = vc->vc_sw->con_getxy(vc, start, &nxx, &nyy);
+ xx = nxx; yy = nyy;
+ }
+ for(;;) {
+ u16 attrib = scr_readw(p) & 0xff00;
+ int startx = xx;
+ u16 *q = p;
+ while (xx < vc->vc_cols && count) {
+ if (attrib != (scr_readw(p) & 0xff00)) {
+ if (p > q)
+ vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
+ startx = xx;
+ q = p;
+ attrib = scr_readw(p) & 0xff00;
+ }
+ p++;
+ xx++;
+ count--;
+ }
+ if (p > q)
+ vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
+ if (!count)
+ break;
+ xx = 0;
+ yy++;
+ if (vc->vc_sw->con_getxy) {
+ p = (u16 *)start;
+ start = vc->vc_sw->con_getxy(vc, start, NULL, NULL);
+ }
+ }
+ #endif
+ }
+
+ void update_region(struct vc_data *vc, unsigned long start, int count)
+ {
+ WARN_CONSOLE_UNLOCKED();
+
+ if (DO_UPDATE(vc)) {
+ hide_cursor(vc);
+ do_update_region(vc, start, count);
+ set_cursor(vc);
+ }
+ }
+
+ /* Structure of attributes is hardware-dependent */
+
+ static u8 build_attr(struct vc_data *vc, u8 _color, u8 _intensity, u8 _blink,
+ u8 _underline, u8 _reverse, u8 _italic)
+ {
+ if (vc->vc_sw->con_build_attr)
+ return vc->vc_sw->con_build_attr(vc, _color, _intensity,
+ _blink, _underline, _reverse, _italic);
+
+ #ifndef VT_BUF_VRAM_ONLY
+ /*
+ * ++roman: I completely changed the attribute format for monochrome
+ * mode (!can_do_color). The formerly used MDA (monochrome display
+ * adapter) format didn't allow the combination of certain effects.
+ * Now the attribute is just a bit vector:
+ * Bit 0..1: intensity (0..2)
+ * Bit 2 : underline
+ * Bit 3 : reverse
+ * Bit 7 : blink
+ */
+ {
+ u8 a = _color;
+ if (!vc->vc_can_do_color)
+ return _intensity |
+ (_italic ? 2 : 0) |
+ (_underline ? 4 : 0) |
+ (_reverse ? 8 : 0) |
+ (_blink ? 0x80 : 0);
+ if (_italic)
+ a = (a & 0xF0) | vc->vc_itcolor;
+ else if (_underline)
+ a = (a & 0xf0) | vc->vc_ulcolor;
+ else if (_intensity == 0)
+ a = (a & 0xf0) | vc->vc_ulcolor;
+ if (_reverse)
+ a = ((a) & 0x88) | ((((a) >> 4) | ((a) << 4)) & 0x77);
+ if (_blink)
+ a ^= 0x80;
+ if (_intensity == 2)
+ a ^= 0x08;
+ if (vc->vc_hi_font_mask == 0x100)
+ a <<= 1;
+ return a;
+ }
+ #else
+ return 0;
+ #endif
+ }
+
+ static void update_attr(struct vc_data *vc)
+ {
+ vc->vc_attr = build_attr(vc, vc->vc_color, vc->vc_intensity,
+ vc->vc_blink, vc->vc_underline,
+ vc->vc_reverse ^ vc->vc_decscnm, vc->vc_italic);
+ vc->vc_video_erase_char = (build_attr(vc, vc->vc_color, 1, vc->vc_blink, 0, vc->vc_decscnm, 0) << 8) | ' ';
+ }
+
+ /* Note: inverting the screen twice should revert to the original state */
+ void invert_screen(struct vc_data *vc, int offset, int count, int viewed)
+ {
+ unsigned short *p;
+
+ WARN_CONSOLE_UNLOCKED();
+
+ count /= 2;
+ p = screenpos(vc, offset, viewed);
+ if (vc->vc_sw->con_invert_region)
+ vc->vc_sw->con_invert_region(vc, p, count);
+ #ifndef VT_BUF_VRAM_ONLY
+ else {
+ u16 *q = p;
+ int cnt = count;
+ u16 a;
+
+ if (!vc->vc_can_do_color) {
+ while (cnt--) {
+ a = scr_readw(q);
+ a ^= 0x0800;
+ scr_writew(a, q);
+ q++;
+ }
+ } else if (vc->vc_hi_font_mask == 0x100) {
+ while (cnt--) {
+ a = scr_readw(q);
+ a = ((a) & 0x11ff) | (((a) & 0xe000) >> 4) | (((a) & 0x0e00) << 4);
+ scr_writew(a, q);
+ q++;
+ }
+ } else {
+ while (cnt--) {
+ a = scr_readw(q);
+ a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) | (((a) & 0x0700) << 4);
+ scr_writew(a, q);
+ q++;
+ }
+ }
+ }
+ #endif
+ if (DO_UPDATE(vc))
+ do_update_region(vc, (unsigned long) p, count);
+ }
+
+ /* used by selection: complement pointer position */
+ void complement_pos(struct vc_data *vc, int offset)
+ {
+ static int old_offset = -1;
+ static unsigned short old;
+ static unsigned short oldx, oldy;
+
+ WARN_CONSOLE_UNLOCKED();
+
+ if (old_offset != -1 && old_offset >= 0 &&
+ old_offset < vc->vc_screenbuf_size) {
+ scr_writew(old, screenpos(vc, old_offset, 1));
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_putc(vc, old, oldy, oldx);
+ }
+
+ old_offset = offset;
+
+ if (offset != -1 && offset >= 0 &&
+ offset < vc->vc_screenbuf_size) {
+ unsigned short new;
+ unsigned short *p;
+ p = screenpos(vc, offset, 1);
+ old = scr_readw(p);
+ new = old ^ vc->vc_complement_mask;
+ scr_writew(new, p);
+ if (DO_UPDATE(vc)) {
+ oldx = (offset >> 1) % vc->vc_cols;
+ oldy = (offset >> 1) / vc->vc_cols;
+ vc->vc_sw->con_putc(vc, new, oldy, oldx);
+ }
+ }
+
+ }
+
+ static void insert_char(struct vc_data *vc, unsigned int nr)
+ {
+ unsigned short *p, *q = (unsigned short *)vc->vc_pos;
+
+ p = q + vc->vc_cols - nr - vc->vc_x;
+ while (--p >= q)
+ scr_writew(scr_readw(p), p + nr);
+ scr_memsetw(q, vc->vc_video_erase_char, nr * 2);
+ vc->vc_need_wrap = 0;
+ if (DO_UPDATE(vc)) {
+ unsigned short oldattr = vc->vc_attr;
+ vc->vc_sw->con_bmove(vc, vc->vc_y, vc->vc_x, vc->vc_y, vc->vc_x + nr, 1,
+ vc->vc_cols - vc->vc_x - nr);
+ vc->vc_attr = vc->vc_video_erase_char >> 8;
+ while (nr--)
+ vc->vc_sw->con_putc(vc, vc->vc_video_erase_char, vc->vc_y, vc->vc_x + nr);
+ vc->vc_attr = oldattr;
+ }
+ }
+
+ static void delete_char(struct vc_data *vc, unsigned int nr)
+ {
+ unsigned int i = vc->vc_x;
+ unsigned short *p = (unsigned short *)vc->vc_pos;
+
+ while (++i <= vc->vc_cols - nr) {
+ scr_writew(scr_readw(p+nr), p);
+ p++;
+ }
+ scr_memsetw(p, vc->vc_video_erase_char, nr * 2);
+ vc->vc_need_wrap = 0;
+ if (DO_UPDATE(vc)) {
+ unsigned short oldattr = vc->vc_attr;
+ vc->vc_sw->con_bmove(vc, vc->vc_y, vc->vc_x + nr, vc->vc_y, vc->vc_x, 1,
+ vc->vc_cols - vc->vc_x - nr);
+ vc->vc_attr = vc->vc_video_erase_char >> 8;
+ while (nr--)
+ vc->vc_sw->con_putc(vc, vc->vc_video_erase_char, vc->vc_y,
+ vc->vc_cols - 1 - nr);
+ vc->vc_attr = oldattr;
+ }
+ }
+
+ static int softcursor_original;
+
+ static void add_softcursor(struct vc_data *vc)
+ {
+ int i = scr_readw((u16 *) vc->vc_pos);
+ u32 type = vc->vc_cursor_type;
+
+ if (! (type & 0x10)) return;
+ if (softcursor_original != -1) return;
+ softcursor_original = i;
+ i |= ((type >> 8) & 0xff00 );
+ i ^= ((type) & 0xff00 );
+ if ((type & 0x20) && ((softcursor_original & 0x7000) == (i & 0x7000))) i ^= 0x7000;
+ if ((type & 0x40) && ((i & 0x700) == ((i & 0x7000) >> 4))) i ^= 0x0700;
+ scr_writew(i, (u16 *) vc->vc_pos);
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_putc(vc, i, vc->vc_y, vc->vc_x);
+ }
+
+ static void hide_softcursor(struct vc_data *vc)
+ {
+ if (softcursor_original != -1) {
+ scr_writew(softcursor_original, (u16 *)vc->vc_pos);
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_putc(vc, softcursor_original,
+ vc->vc_y, vc->vc_x);
+ softcursor_original = -1;
+ }
+ }
+
+ static void hide_cursor(struct vc_data *vc)
+ {
+ if (vc == sel_cons)
+ clear_selection();
+ vc->vc_sw->con_cursor(vc, CM_ERASE);
+ hide_softcursor(vc);
+ }
+
+ static void set_cursor(struct vc_data *vc)
+ {
+ if (!IS_FG(vc) || console_blanked ||
+ vc->vc_mode == KD_GRAPHICS)
+ return;
+ if (vc->vc_deccm) {
+ if (vc == sel_cons)
+ clear_selection();
+ add_softcursor(vc);
+ if ((vc->vc_cursor_type & 0x0f) != 1)
+ vc->vc_sw->con_cursor(vc, CM_DRAW);
+ } else
+ hide_cursor(vc);
+ }
+
+ static void set_origin(struct vc_data *vc)
+ {
+ WARN_CONSOLE_UNLOCKED();
+
+ if (!CON_IS_VISIBLE(vc) ||
+ !vc->vc_sw->con_set_origin ||
+ !vc->vc_sw->con_set_origin(vc))
+ vc->vc_origin = (unsigned long)vc->vc_screenbuf;
+ vc->vc_visible_origin = vc->vc_origin;
+ vc->vc_scr_end = vc->vc_origin + vc->vc_screenbuf_size;
+ vc->vc_pos = vc->vc_origin + vc->vc_size_row * vc->vc_y + 2 * vc->vc_x;
+ }
+
+ static inline void save_screen(struct vc_data *vc)
+ {
+ WARN_CONSOLE_UNLOCKED();
+
+ if (vc->vc_sw->con_save_screen)
+ vc->vc_sw->con_save_screen(vc);
+ }
+
+ /*
+ * Redrawing of screen
+ */
+
+ static void clear_buffer_attributes(struct vc_data *vc)
+ {
+ unsigned short *p = (unsigned short *)vc->vc_origin;
+ int count = vc->vc_screenbuf_size / 2;
+ int mask = vc->vc_hi_font_mask | 0xff;
+
+ for (; count > 0; count--, p++) {
+ scr_writew((scr_readw(p)&mask) | (vc->vc_video_erase_char & ~mask), p);
+ }
+ }
+
+ void redraw_screen(struct vc_data *vc, int is_switch)
+ {
+ int redraw = 0;
+
+ WARN_CONSOLE_UNLOCKED();
+
+ if (!vc) {
+ /* strange ... */
+ /* printk("redraw_screen: tty %d not allocated ??\n", new_console+1); */
+ return;
+ }
+
+ if (is_switch) {
+ struct vc_data *old_vc = vc_cons[fg_console].d;
+ if (old_vc == vc)
+ return;
+ if (!CON_IS_VISIBLE(vc))
+ redraw = 1;
+ *vc->vc_display_fg = vc;
+ fg_console = vc->vc_num;
+ hide_cursor(old_vc);
+ if (!CON_IS_VISIBLE(old_vc)) {
+ save_screen(old_vc);
+ set_origin(old_vc);
+ }
+ } else {
+ hide_cursor(vc);
+ redraw = 1;
+ }
+
+ if (redraw) {
+ int update;
+ int old_was_color = vc->vc_can_do_color;
+
+ set_origin(vc);
+ update = vc->vc_sw->con_switch(vc);
+ set_palette(vc);
+ /*
+ * If console changed from mono<->color, the best we can do
+ * is to clear the buffer attributes. As it currently stands,
+ * rebuilding new attributes from the old buffer is not doable
+ * without overly complex code.
+ */
+ if (old_was_color != vc->vc_can_do_color) {
+ update_attr(vc);
+ clear_buffer_attributes(vc);
+ }
+
+ /* Forcibly update if we're panicing */
+ if ((update && vc->vc_mode != KD_GRAPHICS) ||
+ vt_force_oops_output(vc))
+ do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
+ }
+ set_cursor(vc);
+ if (is_switch) {
+ set_leds();
+ compute_shiftstate();
+ notify_update(vc);
+ }
+ }
+
+ /*
+ * Allocation, freeing and resizing of VTs.
+ */
+
+ int vc_cons_allocated(unsigned int i)
+ {
+ return (i < MAX_NR_CONSOLES && vc_cons[i].d);
+ }
+
+ static void visual_init(struct vc_data *vc, int num, int init)
+ {
+ /* ++Geert: vc->vc_sw->con_init determines console size */
+ if (vc->vc_sw)
+ module_put(vc->vc_sw->owner);
+ vc->vc_sw = conswitchp;
+ #ifndef VT_SINGLE_DRIVER
+ if (con_driver_map[num])
+ vc->vc_sw = con_driver_map[num];
+ #endif
+ __module_get(vc->vc_sw->owner);
+ vc->vc_num = num;
+ vc->vc_display_fg = &master_display_fg;
+ vc->vc_uni_pagedir_loc = &vc->vc_uni_pagedir;
+ vc->vc_uni_pagedir = 0;
+ vc->vc_hi_font_mask = 0;
+ vc->vc_complement_mask = 0;
+ vc->vc_can_do_color = 0;
+ vc->vc_panic_force_write = false;
+ vc->vc_sw->con_init(vc, init);
+ if (!vc->vc_complement_mask)
+ vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
+ vc->vc_s_complement_mask = vc->vc_complement_mask;
+ vc->vc_size_row = vc->vc_cols << 1;
+ vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row;
+ }
+
+ int vc_allocate(unsigned int currcons) /* return 0 on success */
+ {
+ WARN_CONSOLE_UNLOCKED();
+
+ if (currcons >= MAX_NR_CONSOLES)
+ return -ENXIO;
+ if (!vc_cons[currcons].d) {
+ struct vc_data *vc;
+ struct vt_notifier_param param;
+
+ /* prevent users from taking too much memory */
+ if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ /* due to the granularity of kmalloc, we waste some memory here */
+ /* the alloc is done in two steps, to optimize the common situation
+ of a 25x80 console (structsize=216, screenbuf_size=4000) */
+ /* although the numbers above are not valid since long ago, the
+ point is still up-to-date and the comment still has its value
+ even if only as a historical artifact. --mj, July 1998 */
+ param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
+ if (!vc)
+ return -ENOMEM;
+ vc_cons[currcons].d = vc;
+ tty_port_init(&vc->port);
+ INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
+ visual_init(vc, currcons, 1);
+ if (!*vc->vc_uni_pagedir_loc)
+ con_set_default_unimap(vc);
+ vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
+ if (!vc->vc_screenbuf) {
+ kfree(vc);
+ vc_cons[currcons].d = NULL;
+ return -ENOMEM;
+ }
+
+ /* If no drivers have overridden us and the user didn't pass a
+ boot option, default to displaying the cursor */
+ if (global_cursor_default == -1)
+ global_cursor_default = 1;
+
+ vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
+ vcs_make_sysfs(currcons);
+ atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, ¶m);
+ }
+ return 0;
+ }
+
+ static inline int resize_screen(struct vc_data *vc, int width, int height,
+ int user)
+ {
+ /* Resizes the resolution of the display adapater */
+ int err = 0;
+
+ if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize)
+ err = vc->vc_sw->con_resize(vc, width, height, user);
+
+ return err;
+ }
+
+ /*
+ * Change # of rows and columns (0 means unchanged/the size of fg_console)
+ * [this is to be used together with some user program
+ * like resize that changes the hardware videomode]
+ */
+ #define VC_RESIZE_MAXCOL (32767)
+ #define VC_RESIZE_MAXROW (32767)
+
+ /**
+ * vc_do_resize - resizing method for the tty
+ * @tty: tty being resized
+ * @real_tty: real tty (different to tty if a pty/tty pair)
+ * @vc: virtual console private data
+ * @cols: columns
+ * @lines: lines
+ *
+ * Resize a virtual console, clipping according to the actual constraints.
+ * If the caller passes a tty structure then update the termios winsize
+ * information and perform any necessary signal handling.
+ *
+ * Caller must hold the console semaphore. Takes the termios mutex and
+ * ctrl_lock of the tty IFF a tty is passed.
+ */
+
+ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
+ unsigned int cols, unsigned int lines)
+ {
+ unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
+ unsigned long end;
+ unsigned int old_cols, old_rows, old_row_size, old_screen_size;
+ unsigned int new_cols, new_rows, new_row_size, new_screen_size;
+ unsigned int user;
+ unsigned short *newscreen;
+
+ WARN_CONSOLE_UNLOCKED();
+
+ if (!vc)
+ return -ENXIO;
+
+ user = vc->vc_resize_user;
+ vc->vc_resize_user = 0;
+
+ if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW)
+ return -EINVAL;
+
+ new_cols = (cols ? cols : vc->vc_cols);
+ new_rows = (lines ? lines : vc->vc_rows);
+ new_row_size = new_cols << 1;
+ new_screen_size = new_row_size * new_rows;
+
+ if (new_cols == vc->vc_cols && new_rows == vc->vc_rows)
+ return 0;
+
+ newscreen = kmalloc(new_screen_size, GFP_USER);
+ if (!newscreen)
+ return -ENOMEM;
+
+ old_rows = vc->vc_rows;
+ old_cols = vc->vc_cols;
+ old_row_size = vc->vc_size_row;
+ old_screen_size = vc->vc_screenbuf_size;
+
+ err = resize_screen(vc, new_cols, new_rows, user);
+ if (err) {
+ kfree(newscreen);
+ return err;
+ }
+
+ vc->vc_rows = new_rows;
+ vc->vc_cols = new_cols;
+ vc->vc_size_row = new_row_size;
+ vc->vc_screenbuf_size = new_screen_size;
+
+ rlth = min(old_row_size, new_row_size);
+ rrem = new_row_size - rlth;
+ old_origin = vc->vc_origin;
+ new_origin = (long) newscreen;
+ new_scr_end = new_origin + new_screen_size;
+
+ if (vc->vc_y > new_rows) {
+ if (old_rows - vc->vc_y < new_rows) {
+ /*
+ * Cursor near the bottom, copy contents from the
+ * bottom of buffer
+ */
+ old_origin += (old_rows - new_rows) * old_row_size;
+ } else {
+ /*
+ * Cursor is in no man's land, copy 1/2 screenful
+ * from the top and bottom of cursor position
+ */
+ old_origin += (vc->vc_y - new_rows/2) * old_row_size;
+ }
+ }
+
+ end = old_origin + old_row_size * min(old_rows, new_rows);
+
+ update_attr(vc);
+
+ while (old_origin < end) {
+ scr_memcpyw((unsigned short *) new_origin,
+ (unsigned short *) old_origin, rlth);
+ if (rrem)
+ scr_memsetw((void *)(new_origin + rlth),
+ vc->vc_video_erase_char, rrem);
+ old_origin += old_row_size;
+ new_origin += new_row_size;
+ }
+ if (new_scr_end > new_origin)
+ scr_memsetw((void *)new_origin, vc->vc_video_erase_char,
+ new_scr_end - new_origin);
+ kfree(vc->vc_screenbuf);
+ vc->vc_screenbuf = newscreen;
+ vc->vc_screenbuf_size = new_screen_size;
+ set_origin(vc);
+
+ /* do part of a reset_terminal() */
+ vc->vc_top = 0;
+ vc->vc_bottom = vc->vc_rows;
+ gotoxy(vc, vc->vc_x, vc->vc_y);
+ save_cur(vc);
+
+ if (tty) {
+ /* Rewrite the requested winsize data with the actual
+ resulting sizes */
+ struct winsize ws;
+ memset(&ws, 0, sizeof(ws));
+ ws.ws_row = vc->vc_rows;
+ ws.ws_col = vc->vc_cols;
+ ws.ws_ypixel = vc->vc_scan_lines;
+ tty_do_resize(tty, &ws);
+ }
+
+ if (CON_IS_VISIBLE(vc))
+ update_screen(vc);
+ vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num);
+ return err;
+ }
+
+ /**
+ * vc_resize - resize a VT
+ * @vc: virtual console
+ * @cols: columns
+ * @rows: rows
+ *
+ * Resize a virtual console as seen from the console end of things. We
+ * use the common vc_do_resize methods to update the structures. The
+ * caller must hold the console sem to protect console internals and
+ * vc->port.tty
+ */
+
+ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
+ {
+ return vc_do_resize(vc->port.tty, vc, cols, rows);
+ }
+
+ /**
+ * vt_resize - resize a VT
+ * @tty: tty to resize
+ * @ws: winsize attributes
+ *
+ * Resize a virtual terminal. This is called by the tty layer as we
+ * register our own handler for resizing. The mutual helper does all
+ * the actual work.
+ *
+ * Takes the console sem and the called methods then take the tty
+ * termios_mutex and the tty ctrl_lock in that order.
+ */
+ static int vt_resize(struct tty_struct *tty, struct winsize *ws)
+ {
+ struct vc_data *vc = tty->driver_data;
+ int ret;
+
+ acquire_console_sem();
+ ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row);
+ release_console_sem();
+ return ret;
+ }
+
+ void vc_deallocate(unsigned int currcons)
+ {
+ WARN_CONSOLE_UNLOCKED();
+
+ if (vc_cons_allocated(currcons)) {
+ struct vc_data *vc = vc_cons[currcons].d;
+ struct vt_notifier_param param = { .vc = vc };
+
+ atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, ¶m);
+ vcs_remove_sysfs(currcons);
+ vc->vc_sw->con_deinit(vc);
+ put_pid(vc->vt_pid);
+ module_put(vc->vc_sw->owner);
+ kfree(vc->vc_screenbuf);
+ if (currcons >= MIN_NR_CONSOLES)
+ kfree(vc);
+ vc_cons[currcons].d = NULL;
+ }
+ }
+
+ /*
+ * VT102 emulator
+ */
+
+ #define set_kbd(vc, x) set_vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
+ #define clr_kbd(vc, x) clr_vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
+ #define is_kbd(vc, x) vc_kbd_mode(kbd_table + (vc)->vc_num, (x))
+
+ #define decarm VC_REPEAT
+ #define decckm VC_CKMODE
+ #define kbdapplic VC_APPLIC
+ #define lnm VC_CRLF
+
+ /*
+ * this is what the terminal answers to a ESC-Z or csi0c query.
+ */
+ #define VT100ID "\033[?1;2c"
+ #define VT102ID "\033[?6c"
+
+ unsigned char color_table[] = { 0, 4, 2, 6, 1, 5, 3, 7,
+ 8,12,10,14, 9,13,11,15 };
+
+ /* the default colour table, for VGA+ colour systems */
+ int default_red[] = {0x00,0xaa,0x00,0xaa,0x00,0xaa,0x00,0xaa,
+ 0x55,0xff,0x55,0xff,0x55,0xff,0x55,0xff};
+ int default_grn[] = {0x00,0x00,0xaa,0x55,0x00,0x00,0xaa,0xaa,
+ 0x55,0x55,0xff,0xff,0x55,0x55,0xff,0xff};
+ int default_blu[] = {0x00,0x00,0x00,0x00,0xaa,0xaa,0xaa,0xaa,
+ 0x55,0x55,0x55,0x55,0xff,0xff,0xff,0xff};
+
+ module_param_array(default_red, int, NULL, S_IRUGO | S_IWUSR);
+ module_param_array(default_grn, int, NULL, S_IRUGO | S_IWUSR);
+ module_param_array(default_blu, int, NULL, S_IRUGO | S_IWUSR);
+
+ /*
+ * gotoxy() must verify all boundaries, because the arguments
+ * might also be negative. If the given position is out of
+ * bounds, the cursor is placed at the nearest margin.
+ */
+ static void gotoxy(struct vc_data *vc, int new_x, int new_y)
+ {
+ int min_y, max_y;
+
+ if (new_x < 0)
+ vc->vc_x = 0;
+ else {
+ if (new_x >= vc->vc_cols)
+ vc->vc_x = vc->vc_cols - 1;
+ else
+ vc->vc_x = new_x;
+ }
+
+ if (vc->vc_decom) {
+ min_y = vc->vc_top;
+ max_y = vc->vc_bottom;
+ } else {
+ min_y = 0;
+ max_y = vc->vc_rows;
+ }
+ if (new_y < min_y)
+ vc->vc_y = min_y;
+ else if (new_y >= max_y)
+ vc->vc_y = max_y - 1;
+ else
+ vc->vc_y = new_y;
+ vc->vc_pos = vc->vc_origin + vc->vc_y * vc->vc_size_row + (vc->vc_x<<1);
+ vc->vc_need_wrap = 0;
+ }
+
+ /* for absolute user moves, when decom is set */
+ static void gotoxay(struct vc_data *vc, int new_x, int new_y)
+ {
+ gotoxy(vc, new_x, vc->vc_decom ? (vc->vc_top + new_y) : new_y);
+ }
+
+ void scrollback(struct vc_data *vc, int lines)
+ {
+ if (!lines)
+ lines = vc->vc_rows / 2;
+ scrolldelta(-lines);
+ }
+
+ void scrollfront(struct vc_data *vc, int lines)
+ {
+ if (!lines)
+ lines = vc->vc_rows / 2;
+ scrolldelta(lines);
+ }
+
+ static void lf(struct vc_data *vc)
+ {
+ /* don't scroll if above bottom of scrolling region, or
+ * if below scrolling region
+ */
+ if (vc->vc_y + 1 == vc->vc_bottom)
+ scrup(vc, vc->vc_top, vc->vc_bottom, 1);
+ else if (vc->vc_y < vc->vc_rows - 1) {
+ vc->vc_y++;
+ vc->vc_pos += vc->vc_size_row;
+ }
+ vc->vc_need_wrap = 0;
+ notify_write(vc, '\n');
+ }
+
+ static void ri(struct vc_data *vc)
+ {
+ /* don't scroll if below top of scrolling region, or
+ * if above scrolling region
+ */
+ if (vc->vc_y == vc->vc_top)
+ scrdown(vc, vc->vc_top, vc->vc_bottom, 1);
+ else if (vc->vc_y > 0) {
+ vc->vc_y--;
+ vc->vc_pos -= vc->vc_size_row;
+ }
+ vc->vc_need_wrap = 0;
+ }
+
+ static inline void cr(struct vc_data *vc)
+ {
+ vc->vc_pos -= vc->vc_x << 1;
+ vc->vc_need_wrap = vc->vc_x = 0;
+ notify_write(vc, '\r');
+ }
+
+ static inline void bs(struct vc_data *vc)
+ {
+ if (vc->vc_x) {
+ vc->vc_pos -= 2;
+ vc->vc_x--;
+ vc->vc_need_wrap = 0;
+ notify_write(vc, '\b');
+ }
+ }
+
+ static inline void del(struct vc_data *vc)
+ {
+ /* ignored */
+ }
+
+ static void csi_J(struct vc_data *vc, int vpar)
+ {
+ unsigned int count;
+ unsigned short * start;
+
+ switch (vpar) {
+ case 0: /* erase from cursor to end of display */
+ count = (vc->vc_scr_end - vc->vc_pos) >> 1;
+ start = (unsigned short *)vc->vc_pos;
+ if (DO_UPDATE(vc)) {
+ /* do in two stages */
+ vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1,
+ vc->vc_cols - vc->vc_x);
+ vc->vc_sw->con_clear(vc, vc->vc_y + 1, 0,
+ vc->vc_rows - vc->vc_y - 1,
+ vc->vc_cols);
+ }
+ break;
+ case 1: /* erase from start to cursor */
+ count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1;
+ start = (unsigned short *)vc->vc_origin;
+ if (DO_UPDATE(vc)) {
+ /* do in two stages */
+ vc->vc_sw->con_clear(vc, 0, 0, vc->vc_y,
+ vc->vc_cols);
+ vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
+ vc->vc_x + 1);
+ }
+ break;
+ case 2: /* erase whole display */
+ count = vc->vc_cols * vc->vc_rows;
+ start = (unsigned short *)vc->vc_origin;
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_clear(vc, 0, 0,
+ vc->vc_rows,
+ vc->vc_cols);
+ break;
+ default:
+ return;
+ }
+ scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
+ vc->vc_need_wrap = 0;
+ }
+
+ static void csi_K(struct vc_data *vc, int vpar)
+ {
+ unsigned int count;
+ unsigned short * start;
+
+ switch (vpar) {
+ case 0: /* erase from cursor to end of line */
+ count = vc->vc_cols - vc->vc_x;
+ start = (unsigned short *)vc->vc_pos;
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1,
+ vc->vc_cols - vc->vc_x);
+ break;
+ case 1: /* erase from start of line to cursor */
+ start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+ count = vc->vc_x + 1;
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
+ vc->vc_x + 1);
+ break;
+ case 2: /* erase whole line */
+ start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+ count = vc->vc_cols;
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_clear(vc, vc->vc_y, 0, 1,
+ vc->vc_cols);
+ break;
+ default:
+ return;
+ }
+ scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
+ vc->vc_need_wrap = 0;
+ }
+
+ static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar positions */
+ { /* not vt100? */
+ int count;
+
+ if (!vpar)
+ vpar++;
+ count = (vpar > vc->vc_cols - vc->vc_x) ? (vc->vc_cols - vc->vc_x) : vpar;
+
+ scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count);
+ if (DO_UPDATE(vc))
+ vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1, count);
+ vc->vc_need_wrap = 0;
+ }
+
+ static void default_attr(struct vc_data *vc)
+ {
+ vc->vc_intensity = 1;
+ vc->vc_italic = 0;
+ vc->vc_underline = 0;
+ vc->vc_reverse = 0;
+ vc->vc_blink = 0;
+ vc->vc_color = vc->vc_def_color;
+ }
+
+ /* console_sem is held */
+ static void csi_m(struct vc_data *vc)
+ {
+ int i;
+
+ for (i = 0; i <= vc->vc_npar; i++)
+ switch (vc->vc_par[i]) {
+ case 0: /* all attributes off */
+ default_attr(vc);
+ break;
+ case 1:
+ vc->vc_intensity = 2;
+ break;
+ case 2:
+ vc->vc_intensity = 0;
+ break;
+ case 3:
+ vc->vc_italic = 1;
+ break;
+ case 4:
+ vc->vc_underline = 1;
+ break;
+ case 5:
+ vc->vc_blink = 1;
+ break;
+ case 7:
+ vc->vc_reverse = 1;
+ break;
+ case 10: /* ANSI X3.64-1979 (SCO-ish?)
+ * Select primary font, don't display
+ * control chars if defined, don't set
+ * bit 8 on output.
+ */
+ vc->vc_translate = set_translate(vc->vc_charset == 0
+ ? vc->vc_G0_charset
+ : vc->vc_G1_charset, vc);
+ vc->vc_disp_ctrl = 0;
+ vc->vc_toggle_meta = 0;
+ break;
+ case 11: /* ANSI X3.64-1979 (SCO-ish?)
+ * Select first alternate font, lets
+ * chars < 32 be displayed as ROM chars.
+ */
+ vc->vc_translate = set_translate(IBMPC_MAP, vc);
+ vc->vc_disp_ctrl = 1;
+ vc->vc_toggle_meta = 0;
+ break;
+ case 12: /* ANSI X3.64-1979 (SCO-ish?)
+ * Select second alternate font, toggle
+ * high bit before displaying as ROM char.
+ */
+ vc->vc_translate = set_translate(IBMPC_MAP, vc);
+ vc->vc_disp_ctrl = 1;
+ vc->vc_toggle_meta = 1;
+ break;
+ case 21:
+ case 22:
+ vc->vc_intensity = 1;
+ break;
+ case 23:
+ vc->vc_italic = 0;
+ break;
+ case 24:
+ vc->vc_underline = 0;
+ break;
+ case 25:
+ vc->vc_blink = 0;
+ break;
+ case 27:
+ vc->vc_reverse = 0;
+ break;
+ case 38: /* ANSI X3.64-1979 (SCO-ish?)
+ * Enables underscore, white foreground
+ * with white underscore (Linux - use
+ * default foreground).
+ */
+ vc->vc_color = (vc->vc_def_color & 0x0f) | (vc->vc_color & 0xf0);
+ vc->vc_underline = 1;
+ break;
+ case 39: /* ANSI X3.64-1979 (SCO-ish?)
+ * Disable underline option.
+ * Reset colour to default? It did this
+ * before...
+ */
+ vc->vc_color = (vc->vc_def_color & 0x0f) | (vc->vc_color & 0xf0);
+ vc->vc_underline = 0;
+ break;
+ case 49:
+ vc->vc_color = (vc->vc_def_color & 0xf0) | (vc->vc_color & 0x0f);
+ break;
+ default:
+ if (vc->vc_par[i] >= 30 && vc->vc_par[i] <= 37)
+ vc->vc_color = color_table[vc->vc_par[i] - 30]
+ | (vc->vc_color & 0xf0);
+ else if (vc->vc_par[i] >= 40 && vc->vc_par[i] <= 47)
+ vc->vc_color = (color_table[vc->vc_par[i] - 40] << 4)
+ | (vc->vc_color & 0x0f);
+ break;
+ }
+ update_attr(vc);
+ }
+
+ static void respond_string(const char *p, struct tty_struct *tty)
+ {
+ while (*p) {
+ tty_insert_flip_char(tty, *p, 0);
+ p++;
+ }
+ con_schedule_flip(tty);
+ }
+
+ static void cursor_report(struct vc_data *vc, struct tty_struct *tty)
+ {
+ char buf[40];
+
+ sprintf(buf, "\033[%d;%dR", vc->vc_y + (vc->vc_decom ? vc->vc_top + 1 : 1), vc->vc_x + 1);
+ respond_string(buf, tty);
+ }
+
+ static inline void status_report(struct tty_struct *tty)
+ {
+ respond_string("\033[0n", tty); /* Terminal ok */
+ }
+
+ static inline void respond_ID(struct tty_struct * tty)
+ {
+ respond_string(VT102ID, tty);
+ }
+
+ void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry)
+ {
+ char buf[8];
+
+ sprintf(buf, "\033[M%c%c%c", (char)(' ' + butt), (char)('!' + mrx),
+ (char)('!' + mry));
+ respond_string(buf, tty);
+ }
+
+ /* invoked via ioctl(TIOCLINUX) and through set_selection */
+ int mouse_reporting(void)
+ {
+ return vc_cons[fg_console].d->vc_report_mouse;
+ }
+
+ /* console_sem is held */
+ static void set_mode(struct vc_data *vc, int on_off)
+ {
+ int i;
+
+ for (i = 0; i <= vc->vc_npar; i++)
+ if (vc->vc_ques) {
+ switch(vc->vc_par[i]) { /* DEC private modes set/reset */
+ case 1: /* Cursor keys send ^[Ox/^[[x */
+ if (on_off)
+ set_kbd(vc, decckm);
+ else
+ clr_kbd(vc, decckm);
+ break;
+ case 3: /* 80/132 mode switch unimplemented */
+ vc->vc_deccolm = on_off;
+ #if 0
+ vc_resize(deccolm ? 132 : 80, vc->vc_rows);
+ /* this alone does not suffice; some user mode
+ utility has to change the hardware regs */
+ #endif
+ break;
+ case 5: /* Inverted screen on/off */
+ if (vc->vc_decscnm != on_off) {
+ vc->vc_decscnm = on_off;
+ invert_screen(vc, 0, vc->vc_screenbuf_size, 0);
+ update_attr(vc);
+ }
+ break;
+ case 6: /* Origin relative/absolute */
+ vc->vc_decom = on_off;
+ gotoxay(vc, 0, 0);
+ break;
+ case 7: /* Autowrap on/off */
+ vc->vc_decawm = on_off;
+ break;
+ case 8: /* Autorepeat on/off */
+ if (on_off)
+ set_kbd(vc, decarm);
+ else
+ clr_kbd(vc, decarm);
+ break;
+ case 9:
+ vc->vc_report_mouse = on_off ? 1 : 0;
+ break;
+ case 25: /* Cursor on/off */
+ vc->vc_deccm = on_off;
+ break;
+ case 1000:
+ vc->vc_report_mouse = on_off ? 2 : 0;
+ break;
+ }
+ } else {
+ switch(vc->vc_par[i]) { /* ANSI modes set/reset */
+ case 3: /* Monitor (display ctrls) */
+ vc->vc_disp_ctrl = on_off;
+ break;
+ case 4: /* Insert Mode on/off */
+ vc->vc_decim = on_off;
+ break;
+ case 20: /* Lf, Enter == CrLf/Lf */
+ if (on_off)
+ set_kbd(vc, lnm);
+ else
+ clr_kbd(vc, lnm);
+ break;
+ }
+ }
+ }
+
+ /* console_sem is held */
+ static void setterm_command(struct vc_data *vc)
+ {
+ switch(vc->vc_par[0]) {
+ case 1: /* set color for underline mode */
+ if (vc->vc_can_do_color &&
+ vc->vc_par[1] < 16) {
+ vc->vc_ulcolor = color_table[vc->vc_par[1]];
+ if (vc->vc_underline)
+ update_attr(vc);
+ }
+ break;
+ case 2: /* set color for half intensity mode */
+ if (vc->vc_can_do_color &&
+ vc->vc_par[1] < 16) {
+ vc->vc_halfcolor = color_table[vc->vc_par[1]];
+ if (vc->vc_intensity == 0)
+ update_attr(vc);
+ }
+ break;
+ case 8: /* store colors as defaults */
+ vc->vc_def_color = vc->vc_attr;
+ if (vc->vc_hi_font_mask == 0x100)
+ vc->vc_def_color >>= 1;
+ default_attr(vc);
+ update_attr(vc);
+ break;
+ case 9: /* set blanking interval */
+ blankinterval = ((vc->vc_par[1] < 60) ? vc->vc_par[1] : 60) * 60;
+ poke_blanked_console();
+ break;
+ case 10: /* set bell frequency in Hz */
+ if (vc->vc_npar >= 1)
+ vc->vc_bell_pitch = vc->vc_par[1];
+ else
+ vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
+ break;
+ case 11: /* set bell duration in msec */
+ if (vc->vc_npar >= 1)
+ vc->vc_bell_duration = (vc->vc_par[1] < 2000) ?
+ vc->vc_par[1] * HZ / 1000 : 0;
+ else
+ vc->vc_bell_duration = DEFAULT_BELL_DURATION;
+ break;
+ case 12: /* bring specified console to the front */
+ if (vc->vc_par[1] >= 1 && vc_cons_allocated(vc->vc_par[1] - 1))
+ set_console(vc->vc_par[1] - 1);
+ break;
+ case 13: /* unblank the screen */
+ poke_blanked_console();
+ break;
+ case 14: /* set vesa powerdown interval */
+ vesa_off_interval = ((vc->vc_par[1] < 60) ? vc->vc_par[1] : 60) * 60 * HZ;
+ break;
+ case 15: /* activate the previous console */
+ set_console(last_console);
+ break;
+ }
+ }
+
+ /* console_sem is held */
+ static void csi_at(struct vc_data *vc, unsigned int nr)
+ {
+ if (nr > vc->vc_cols - vc->vc_x)
+ nr = vc->vc_cols - vc->vc_x;
+ else if (!nr)
+ nr = 1;
+ insert_char(vc, nr);
+ }
+
+ /* console_sem is held */
+ static void csi_L(struct vc_data *vc, unsigned int nr)
+ {
+ if (nr > vc->vc_rows - vc->vc_y)
+ nr = vc->vc_rows - vc->vc_y;
+ else if (!nr)
+ nr = 1;
+ scrdown(vc, vc->vc_y, vc->vc_bottom, nr);
+ vc->vc_need_wrap = 0;
+ }
+
+ /* console_sem is held */
+ static void csi_P(struct vc_data *vc, unsigned int nr)
+ {
+ if (nr > vc->vc_cols - vc->vc_x)
+ nr = vc->vc_cols - vc->vc_x;
+ else if (!nr)
+ nr = 1;
+ delete_char(vc, nr);
+ }
+
+ /* console_sem is held */
+ static void csi_M(struct vc_data *vc, unsigned int nr)
+ {
+ if (nr > vc->vc_rows - vc->vc_y)
+ nr = vc->vc_rows - vc->vc_y;
+ else if (!nr)
+ nr=1;
+ scrup(vc, vc->vc_y, vc->vc_bottom, nr);
+ vc->vc_need_wrap = 0;
+ }
+
+ /* console_sem is held (except via vc_init->reset_terminal */
+ static void save_cur(struct vc_data *vc)
+ {
+ vc->vc_saved_x = vc->vc_x;
+ vc->vc_saved_y = vc->vc_y;
+ vc->vc_s_intensity = vc->vc_intensity;
+ vc->vc_s_italic = vc->vc_italic;
+ vc->vc_s_underline = vc->vc_underline;
+ vc->vc_s_blink = vc->vc_blink;
+ vc->vc_s_reverse = vc->vc_reverse;
+ vc->vc_s_charset = vc->vc_charset;
+ vc->vc_s_color = vc->vc_color;
+ vc->vc_saved_G0 = vc->vc_G0_charset;
+ vc->vc_saved_G1 = vc->vc_G1_charset;
+ }
+
+ /* console_sem is held */
+ static void restore_cur(struct vc_data *vc)
+ {
+ gotoxy(vc, vc->vc_saved_x, vc->vc_saved_y);
+ vc->vc_intensity = vc->vc_s_intensity;
+ vc->vc_italic = vc->vc_s_italic;
+ vc->vc_underline = vc->vc_s_underline;
+ vc->vc_blink = vc->vc_s_blink;
+ vc->vc_reverse = vc->vc_s_reverse;
+ vc->vc_charset = vc->vc_s_charset;
+ vc->vc_color = vc->vc_s_color;
+ vc->vc_G0_charset = vc->vc_saved_G0;
+ vc->vc_G1_charset = vc->vc_saved_G1;
+ vc->vc_translate = set_translate(vc->vc_charset ? vc->vc_G1_charset : vc->vc_G0_charset, vc);
+ update_attr(vc);
+ vc->vc_need_wrap = 0;
+ }
+
+ enum { ESnormal, ESesc, ESsquare, ESgetpars, ESgotpars, ESfunckey,
+ EShash, ESsetG0, ESsetG1, ESpercent, ESignore, ESnonstd,
+ ESpalette };
+
+ /* console_sem is held (except via vc_init()) */
+ static void reset_terminal(struct vc_data *vc, int do_clear)
+ {
+ vc->vc_top = 0;
+ vc->vc_bottom = vc->vc_rows;
+ vc->vc_state = ESnormal;
+ vc->vc_ques = 0;
+ vc->vc_translate = set_translate(LAT1_MAP, vc);
+ vc->vc_G0_charset = LAT1_MAP;
+ vc->vc_G1_charset = GRAF_MAP;
+ vc->vc_charset = 0;
+ vc->vc_need_wrap = 0;
+ vc->vc_report_mouse = 0;
+ vc->vc_utf = default_utf8;
+ vc->vc_utf_count = 0;
+
+ vc->vc_disp_ctrl = 0;
+ vc->vc_toggle_meta = 0;
+
+ vc->vc_decscnm = 0;
+ vc->vc_decom = 0;
+ vc->vc_decawm = 1;
+ vc->vc_deccm = global_cursor_default;
+ vc->vc_decim = 0;
+
+ set_kbd(vc, decarm);
+ clr_kbd(vc, decckm);
+ clr_kbd(vc, kbdapplic);
+ clr_kbd(vc, lnm);
+ kbd_table[vc->vc_num].lockstate = 0;
+ kbd_table[vc->vc_num].slockstate = 0;
+ kbd_table[vc->vc_num].ledmode = LED_SHOW_FLAGS;
+ kbd_table[vc->vc_num].ledflagstate = kbd_table[vc->vc_num].default_ledflagstate;
+ /* do not do set_leds here because this causes an endless tasklet loop
+ when the keyboard hasn't been initialized yet */
+
+ vc->vc_cursor_type = cur_default;
+ vc->vc_complement_mask = vc->vc_s_complement_mask;
+
+ default_attr(vc);
+ update_attr(vc);
+
+ vc->vc_tab_stop[0] = 0x01010100;
+ vc->vc_tab_stop[1] =
+ vc->vc_tab_stop[2] =
+ vc->vc_tab_stop[3] =
+ vc->vc_tab_stop[4] =
+ vc->vc_tab_stop[5] =
+ vc->vc_tab_stop[6] =
+ vc->vc_tab_stop[7] = 0x01010101;
+
+ vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
+ vc->vc_bell_duration = DEFAULT_BELL_DURATION;
+
+ gotoxy(vc, 0, 0);
+ save_cur(vc);
+ if (do_clear)
+ csi_J(vc, 2);
+ }
+
+ /* console_sem is held */
+ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
+ {
+ /*
+ * Control characters can be used in the _middle_
+ * of an escape sequence.
+ */
+ switch (c) {
+ case 0:
+ return;
+ case 7:
+ if (vc->vc_bell_duration)
+ kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration);
+ return;
+ case 8:
+ bs(vc);
+ return;
+ case 9:
+ vc->vc_pos -= (vc->vc_x << 1);
+ while (vc->vc_x < vc->vc_cols - 1) {
+ vc->vc_x++;
+ if (vc->vc_tab_stop[vc->vc_x >> 5] & (1 << (vc->vc_x & 31)))
+ break;
+ }
+ vc->vc_pos += (vc->vc_x << 1);
+ notify_write(vc, '\t');
+ return;
+ case 10: case 11: case 12:
+ lf(vc);
+ if (!is_kbd(vc, lnm))
+ return;
+ case 13:
+ cr(vc);
+ return;
+ case 14:
+ vc->vc_charset = 1;
+ vc->vc_translate = set_translate(vc->vc_G1_charset, vc);
+ vc->vc_disp_ctrl = 1;
+ return;
+ case 15:
+ vc->vc_charset = 0;
+ vc->vc_translate = set_translate(vc->vc_G0_charset, vc);
+ vc->vc_disp_ctrl = 0;
+ return;
+ case 24: case 26:
+ vc->vc_state = ESnormal;
+ return;
+ case 27:
+ vc->vc_state = ESesc;
+ return;
+ case 127:
+ del(vc);
+ return;
+ case 128+27:
+ vc->vc_state = ESsquare;
+ return;
+ }
+ switch(vc->vc_state) {
+ case ESesc:
+ vc->vc_state = ESnormal;
+ switch (c) {
+ case '[':
+ vc->vc_state = ESsquare;
+ return;
+ case ']':
+ vc->vc_state = ESnonstd;
+ return;
+ case '%':
+ vc->vc_state = ESpercent;
+ return;
+ case 'E':
+ cr(vc);
+ lf(vc);
+ return;
+ case 'M':
+ ri(vc);
+ return;
+ case 'D':
+ lf(vc);
+ return;
+ case 'H':
+ vc->vc_tab_stop[vc->vc_x >> 5] |= (1 << (vc->vc_x & 31));
+ return;
+ case 'Z':
+ respond_ID(tty);
+ return;
+ case '7':
+ save_cur(vc);
+ return;
+ case '8':
+ restore_cur(vc);
+ return;
+ case '(':
+ vc->vc_state = ESsetG0;
+ return;
+ case ')':
+ vc->vc_state = ESsetG1;
+ return;
+ case '#':
+ vc->vc_state = EShash;
+ return;
+ case 'c':
+ reset_terminal(vc, 1);
+ return;
+ case '>': /* Numeric keypad */
+ clr_kbd(vc, kbdapplic);
+ return;
+ case '=': /* Appl. keypad */
+ set_kbd(vc, kbdapplic);
+ return;
+ }
+ return;
+ case ESnonstd:
+ if (c=='P') { /* palette escape sequence */
+ for (vc->vc_npar = 0; vc->vc_npar < NPAR; vc->vc_npar++)
+ vc->vc_par[vc->vc_npar] = 0;
+ vc->vc_npar = 0;
+ vc->vc_state = ESpalette;
+ return;
+ } else if (c=='R') { /* reset palette */
+ reset_palette(vc);
+ vc->vc_state = ESnormal;
+ } else
+ vc->vc_state = ESnormal;
+ return;
+ case ESpalette:
+ if (isxdigit(c)) {
+ vc->vc_par[vc->vc_npar++] = hex_to_bin(c);
+ if (vc->vc_npar == 7) {
+ int i = vc->vc_par[0] * 3, j = 1;
+ vc->vc_palette[i] = 16 * vc->vc_par[j++];
+ vc->vc_palette[i++] += vc->vc_par[j++];
+ vc->vc_palette[i] = 16 * vc->vc_par[j++];
+ vc->vc_palette[i++] += vc->vc_par[j++];
+ vc->vc_palette[i] = 16 * vc->vc_par[j++];
+ vc->vc_palette[i] += vc->vc_par[j];
+ set_palette(vc);
+ vc->vc_state = ESnormal;
+ }
+ } else
+ vc->vc_state = ESnormal;
+ return;
+ case ESsquare:
+ for (vc->vc_npar = 0; vc->vc_npar < NPAR; vc->vc_npar++)
+ vc->vc_par[vc->vc_npar] = 0;
+ vc->vc_npar = 0;
+ vc->vc_state = ESgetpars;
+ if (c == '[') { /* Function key */
+ vc->vc_state=ESfunckey;
+ return;
+ }
+ vc->vc_ques = (c == '?');
+ if (vc->vc_ques)
+ return;
+ case ESgetpars:
+ if (c == ';' && vc->vc_npar < NPAR - 1) {
+ vc->vc_npar++;
+ return;
+ } else if (c>='0' && c<='9') {
+ vc->vc_par[vc->vc_npar] *= 10;
+ vc->vc_par[vc->vc_npar] += c - '0';
+ return;
+ } else
+ vc->vc_state = ESgotpars;
+ case ESgotpars:
+ vc->vc_state = ESnormal;
+ switch(c) {
+ case 'h':
+ set_mode(vc, 1);
+ return;
+ case 'l':
+ set_mode(vc, 0);
+ return;
+ case 'c':
+ if (vc->vc_ques) {
+ if (vc->vc_par[0])
+ vc->vc_cursor_type = vc->vc_par[0] | (vc->vc_par[1] << 8) | (vc->vc_par[2] << 16);
+ else
+ vc->vc_cursor_type = cur_default;
+ return;
+ }
+ break;
+ case 'm':
+ if (vc->vc_ques) {
+ clear_selection();
+ if (vc->vc_par[0])
+ vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1];
+ else
+ vc->vc_complement_mask = vc->vc_s_complement_mask;
+ return;
+ }
+ break;
+ case 'n':
+ if (!vc->vc_ques) {
+ if (vc->vc_par[0] == 5)
+ status_report(tty);
+ else if (vc->vc_par[0] == 6)
+ cursor_report(vc, tty);
+ }
+ return;
+ }
+ if (vc->vc_ques) {
+ vc->vc_ques = 0;
+ return;
+ }
+ switch(c) {
+ case 'G': case '`':
+ if (vc->vc_par[0])
+ vc->vc_par[0]--;
+ gotoxy(vc, vc->vc_par[0], vc->vc_y);
+ return;
+ case 'A':
+ if (!vc->vc_par[0])
+ vc->vc_par[0]++;
+ gotoxy(vc, vc->vc_x, vc->vc_y - vc->vc_par[0]);
+ return;
+ case 'B': case 'e':
+ if (!vc->vc_par[0])
+ vc->vc_par[0]++;
+ gotoxy(vc, vc->vc_x, vc->vc_y + vc->vc_par[0]);
+ return;
+ case 'C': case 'a':
+ if (!vc->vc_par[0])
+ vc->vc_par[0]++;
+ gotoxy(vc, vc->vc_x + vc->vc_par[0], vc->vc_y);
+ return;
+ case 'D':
+ if (!vc->vc_par[0])
+ vc->vc_par[0]++;
+ gotoxy(vc, vc->vc_x - vc->vc_par[0], vc->vc_y);
+ return;
+ case 'E':
+ if (!vc->vc_par[0])
+ vc->vc_par[0]++;
+ gotoxy(vc, 0, vc->vc_y + vc->vc_par[0]);
+ return;
+ case 'F':
+ if (!vc->vc_par[0])
+ vc->vc_par[0]++;
+ gotoxy(vc, 0, vc->vc_y - vc->vc_par[0]);
+ return;
+ case 'd':
+ if (vc->vc_par[0])
+ vc->vc_par[0]--;
+ gotoxay(vc, vc->vc_x ,vc->vc_par[0]);
+ return;
+ case 'H': case 'f':
+ if (vc->vc_par[0])
+ vc->vc_par[0]--;
+ if (vc->vc_par[1])
+ vc->vc_par[1]--;
+ gotoxay(vc, vc->vc_par[1], vc->vc_par[0]);
+ return;
+ case 'J':
+ csi_J(vc, vc->vc_par[0]);
+ return;
+ case 'K':
+ csi_K(vc, vc->vc_par[0]);
+ return;
+ case 'L':
+ csi_L(vc, vc->vc_par[0]);
+ return;
+ case 'M':
+ csi_M(vc, vc->vc_par[0]);
+ return;
+ case 'P':
+ csi_P(vc, vc->vc_par[0]);
+ return;
+ case 'c':
+ if (!vc->vc_par[0])
+ respond_ID(tty);
+ return;
+ case 'g':
+ if (!vc->vc_par[0])
+ vc->vc_tab_stop[vc->vc_x >> 5] &= ~(1 << (vc->vc_x & 31));
+ else if (vc->vc_par[0] == 3) {
+ vc->vc_tab_stop[0] =
+ vc->vc_tab_stop[1] =
+ vc->vc_tab_stop[2] =
+ vc->vc_tab_stop[3] =
+ vc->vc_tab_stop[4] =
+ vc->vc_tab_stop[5] =
+ vc->vc_tab_stop[6] =
+ vc->vc_tab_stop[7] = 0;
+ }
+ return;
+ case 'm':
+ csi_m(vc);
+ return;
+ case 'q': /* DECLL - but only 3 leds */
+ /* map 0,1,2,3 to 0,1,2,4 */
+ if (vc->vc_par[0] < 4)
+ setledstate(kbd_table + vc->vc_num,
+ (vc->vc_par[0] < 3) ? vc->vc_par[0] : 4);
+ return;
+ case 'r':
+ if (!vc->vc_par[0])
+ vc->vc_par[0]++;
+ if (!vc->vc_par[1])
+ vc->vc_par[1] = vc->vc_rows;
+ /* Minimum allowed region is 2 lines */
+ if (vc->vc_par[0] < vc->vc_par[1] &&
+ vc->vc_par[1] <= vc->vc_rows) {
+ vc->vc_top = vc->vc_par[0] - 1;
+ vc->vc_bottom = vc->vc_par[1];
+ gotoxay(vc, 0, 0);
+ }
+ return;
+ case 's':
+ save_cur(vc);
+ return;
+ case 'u':
+ restore_cur(vc);
+ return;
+ case 'X':
+ csi_X(vc, vc->vc_par[0]);
+ return;
+ case '@':
+ csi_at(vc, vc->vc_par[0]);
+ return;
+ case ']': /* setterm functions */
+ setterm_command(vc);
+ return;
+ }
+ return;
+ case ESpercent:
+ vc->vc_state = ESnormal;
+ switch (c) {
+ case '@': /* defined in ISO 2022 */
+ vc->vc_utf = 0;
+ return;
+ case 'G': /* prelim official escape code */
+ case '8': /* retained for compatibility */
+ vc->vc_utf = 1;
+ return;
+ }
+ return;
+ case ESfunckey:
+ vc->vc_state = ESnormal;
+ return;
+ case EShash:
+ vc->vc_state = ESnormal;
+ if (c == '8') {
+ /* DEC screen alignment test. kludge :-) */
+ vc->vc_video_erase_char =
+ (vc->vc_video_erase_char & 0xff00) | 'E';
+ csi_J(vc, 2);
+ vc->vc_video_erase_char =
+ (vc->vc_video_erase_char & 0xff00) | ' ';
+ do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
+ }
+ return;
+ case ESsetG0:
+ if (c == '0')
+ vc->vc_G0_charset = GRAF_MAP;
+ else if (c == 'B')
+ vc->vc_G0_charset = LAT1_MAP;
+ else if (c == 'U')
+ vc->vc_G0_charset = IBMPC_MAP;
+ else if (c == 'K')
+ vc->vc_G0_charset = USER_MAP;
+ if (vc->vc_charset == 0)
+ vc->vc_translate = set_translate(vc->vc_G0_charset, vc);
+ vc->vc_state = ESnormal;
+ return;
+ case ESsetG1:
+ if (c == '0')
+ vc->vc_G1_charset = GRAF_MAP;
+ else if (c == 'B')
+ vc->vc_G1_charset = LAT1_MAP;
+ else if (c == 'U')
+ vc->vc_G1_charset = IBMPC_MAP;
+ else if (c == 'K')
+ vc->vc_G1_charset = USER_MAP;
+ if (vc->vc_charset == 1)
+ vc->vc_translate = set_translate(vc->vc_G1_charset, vc);
+ vc->vc_state = ESnormal;
+ return;
+ default:
+ vc->vc_state = ESnormal;
+ }
+ }
+
+ /* This is a temporary buffer used to prepare a tty console write
+ * so that we can easily avoid touching user space while holding the
+ * console spinlock. It is allocated in con_init and is shared by
+ * this code and the vc_screen read/write tty calls.
+ *
+ * We have to allocate this statically in the kernel data section
+ * since console_init (and thus con_init) are called before any
+ * kernel memory allocation is available.
+ */
+ char con_buf[CON_BUF_SIZE];
+ DEFINE_MUTEX(con_buf_mtx);
+
+ /* is_double_width() is based on the wcwidth() implementation by
+ * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
+ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ */
+ struct interval {
+ uint32_t first;
+ uint32_t last;
+ };
+
+ static int bisearch(uint32_t ucs, const struct interval *table, int max)
+ {
+ int min = 0;
+ int mid;
+
+ if (ucs < table[0].first || ucs > table[max].last)
+ return 0;
+ while (max >= min) {
+ mid = (min + max) / 2;
+ if (ucs > table[mid].last)
+ min = mid + 1;
+ else if (ucs < table[mid].first)
+ max = mid - 1;
+ else
+ return 1;
+ }
+ return 0;
+ }
+
+ static int is_double_width(uint32_t ucs)
+ {
+ static const struct interval double_width[] = {
+ { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E },
+ { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF },
+ { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 },
+ { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }
+ };
+ return bisearch(ucs, double_width, ARRAY_SIZE(double_width) - 1);
+ }
+
+ /* acquires console_sem */
+ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
+ {
+ #ifdef VT_BUF_VRAM_ONLY
+ #define FLUSH do { } while(0);
+ #else
+ #define FLUSH if (draw_x >= 0) { \
+ vc->vc_sw->con_putcs(vc, (u16 *)draw_from, (u16 *)draw_to - (u16 *)draw_from, vc->vc_y, draw_x); \
+ draw_x = -1; \
+ }
+ #endif
+
+ int c, tc, ok, n = 0, draw_x = -1;
+ unsigned int currcons;
+ unsigned long draw_from = 0, draw_to = 0;
+ struct vc_data *vc;
+ unsigned char vc_attr;
+ struct vt_notifier_param param;
+ uint8_t rescan;
+ uint8_t inverse;
+ uint8_t width;
+ u16 himask, charmask;
+
+ if (in_interrupt())
+ return count;
+
+ might_sleep();
+
+ acquire_console_sem();
+ vc = tty->driver_data;
+ if (vc == NULL) {
+ printk(KERN_ERR "vt: argh, driver_data is NULL !\n");
+ release_console_sem();
+ return 0;
+ }
+
+ currcons = vc->vc_num;
+ if (!vc_cons_allocated(currcons)) {
+ /* could this happen? */
+ printk_once("con_write: tty %d not allocated\n", currcons+1);
+ release_console_sem();
+ return 0;
+ }
+
+ himask = vc->vc_hi_font_mask;
+ charmask = himask ? 0x1ff : 0xff;
+
+ /* undraw cursor first */
+ if (IS_FG(vc))
+ hide_cursor(vc);
+
+ param.vc = vc;
+
+ while (!tty->stopped && count) {
+ int orig = *buf;
+ c = orig;
+ buf++;
+ n++;
+ count--;
+ rescan = 0;
+ inverse = 0;
+ width = 1;
+
+ /* Do no translation at all in control states */
+ if (vc->vc_state != ESnormal) {
+ tc = c;
+ } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
+ /* Combine UTF-8 into Unicode in vc_utf_char.
+ * vc_utf_count is the number of continuation bytes still
+ * expected to arrive.
+ * vc_npar is the number of continuation bytes arrived so
+ * far
+ */
+ rescan_last_byte:
+ if ((c & 0xc0) == 0x80) {
+ /* Continuation byte received */
+ static const uint32_t utf8_length_changes[] = { 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff };
+ if (vc->vc_utf_count) {
+ vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
+ vc->vc_npar++;
+ if (--vc->vc_utf_count) {
+ /* Still need some bytes */
+ continue;
+ }
+ /* Got a whole character */
+ c = vc->vc_utf_char;
+ /* Reject overlong sequences */
+ if (c <= utf8_length_changes[vc->vc_npar - 1] ||
+ c > utf8_length_changes[vc->vc_npar])
+ c = 0xfffd;
+ } else {
+ /* Unexpected continuation byte */
+ vc->vc_utf_count = 0;
+ c = 0xfffd;
+ }
+ } else {
+ /* Single ASCII byte or first byte of a sequence received */
+ if (vc->vc_utf_count) {
+ /* Continuation byte expected */
+ rescan = 1;
+ vc->vc_utf_count = 0;
+ c = 0xfffd;
+ } else if (c > 0x7f) {
+ /* First byte of a multibyte sequence received */
+ vc->vc_npar = 0;
+ if ((c & 0xe0) == 0xc0) {
+ vc->vc_utf_count = 1;
+ vc->vc_utf_char = (c & 0x1f);
+ } else if ((c & 0xf0) == 0xe0) {
+ vc->vc_utf_count = 2;
+ vc->vc_utf_char = (c & 0x0f);
+ } else if ((c & 0xf8) == 0xf0) {
+ vc->vc_utf_count = 3;
+ vc->vc_utf_char = (c & 0x07);
+ } else if ((c & 0xfc) == 0xf8) {
+ vc->vc_utf_count = 4;
+ vc->vc_utf_char = (c & 0x03);
+ } else if ((c & 0xfe) == 0xfc) {
+ vc->vc_utf_count = 5;
+ vc->vc_utf_char = (c & 0x01);
+ } else {
+ /* 254 and 255 are invalid */
+ c = 0xfffd;
+ }
+ if (vc->vc_utf_count) {
+ /* Still need some bytes */
+ continue;
+ }
+ }
+ /* Nothing to do if an ASCII byte was received */
+ }
+ /* End of UTF-8 decoding. */
+ /* c is the received character, or U+FFFD for invalid sequences. */
+ /* Replace invalid Unicode code points with U+FFFD too */
+ if ((c >= 0xd800 && c <= 0xdfff) || c == 0xfffe || c == 0xffff)
+ c = 0xfffd;
+ tc = c;
+ } else { /* no utf or alternate charset mode */
+ tc = vc_translate(vc, c);
+ }
+
+ param.c = tc;
+ if (atomic_notifier_call_chain(&vt_notifier_list, VT_PREWRITE,
+ ¶m) == NOTIFY_STOP)
+ continue;
+
+ /* If the original code was a control character we
+ * only allow a glyph to be displayed if the code is
+ * not normally used (such as for cursor movement) or
+ * if the disp_ctrl mode has been explicitly enabled.
+ * Certain characters (as given by the CTRL_ALWAYS
+ * bitmap) are always displayed as control characters,
+ * as the console would be pretty useless without
+ * them; to display an arbitrary font position use the
+ * direct-to-font zone in UTF-8 mode.
+ */
+ ok = tc && (c >= 32 ||
+ !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
+ vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
+ && (c != 127 || vc->vc_disp_ctrl)
+ && (c != 128+27);
+
+ if (vc->vc_state == ESnormal && ok) {
+ if (vc->vc_utf && !vc->vc_disp_ctrl) {
+ if (is_double_width(c))
+ width = 2;
+ }
+ /* Now try to find out how to display it */
+ tc = conv_uni_to_pc(vc, tc);
+ if (tc & ~charmask) {
+ if (tc == -1 || tc == -2) {
+ continue; /* nothing to display */
+ }
+ /* Glyph not found */
+ if ((!(vc->vc_utf && !vc->vc_disp_ctrl) || c < 128) && !(c & ~charmask)) {
+ /* In legacy mode use the glyph we get by a 1:1 mapping.
+ This would make absolutely no sense with Unicode in mind,
+ but do this for ASCII characters since a font may lack
+ Unicode mapping info and we don't want to end up with
+ having question marks only. */
+ tc = c;
+ } else {
+ /* Display U+FFFD. If it's not found, display an inverse question mark. */
+ tc = conv_uni_to_pc(vc, 0xfffd);
+ if (tc < 0) {
+ inverse = 1;
+ tc = conv_uni_to_pc(vc, '?');
+ if (tc < 0) tc = '?';
+ }
+ }
+ }
+
+ if (!inverse) {
+ vc_attr = vc->vc_attr;
+ } else {
+ /* invert vc_attr */
+ if (!vc->vc_can_do_color) {
+ vc_attr = (vc->vc_attr) ^ 0x08;
+ } else if (vc->vc_hi_font_mask == 0x100) {
+ vc_attr = ((vc->vc_attr) & 0x11) | (((vc->vc_attr) & 0xe0) >> 4) | (((vc->vc_attr) & 0x0e) << 4);
+ } else {
+ vc_attr = ((vc->vc_attr) & 0x88) | (((vc->vc_attr) & 0x70) >> 4) | (((vc->vc_attr) & 0x07) << 4);
+ }
+ FLUSH
+ }
+
+ while (1) {
+ if (vc->vc_need_wrap || vc->vc_decim)
+ FLUSH
+ if (vc->vc_need_wrap) {
+ cr(vc);
+ lf(vc);
+ }
+ if (vc->vc_decim)
+ insert_char(vc, 1);
+ scr_writew(himask ?
+ ((vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
+ (vc_attr << 8) + tc,
+ (u16 *) vc->vc_pos);
+ if (DO_UPDATE(vc) && draw_x < 0) {
+ draw_x = vc->vc_x;
+ draw_from = vc->vc_pos;
+ }
+ if (vc->vc_x == vc->vc_cols - 1) {
+ vc->vc_need_wrap = vc->vc_decawm;
+ draw_to = vc->vc_pos + 2;
+ } else {
+ vc->vc_x++;
+ draw_to = (vc->vc_pos += 2);
+ }
+
+ if (!--width) break;
+
+ tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
+ if (tc < 0) tc = ' ';
+ }
+ notify_write(vc, c);
+
+ if (inverse) {
+ FLUSH
+ }
+
+ if (rescan) {
+ rescan = 0;
+ inverse = 0;
+ width = 1;
+ c = orig;
+ goto rescan_last_byte;
+ }
+ continue;
+ }
+ FLUSH
+ do_con_trol(tty, vc, orig);
+ }
+ FLUSH
+ console_conditional_schedule();
+ release_console_sem();
+ notify_update(vc);
+ return n;
+ #undef FLUSH
+ }
+
+ /*
+ * This is the console switching callback.
+ *
+ * Doing console switching in a process context allows
+ * us to do the switches asynchronously (needed when we want
+ * to switch due to a keyboard interrupt). Synchronization
+ * with other console code and prevention of re-entrancy is
+ * ensured with console_sem.
+ */
+ static void console_callback(struct work_struct *ignored)
+ {
+ acquire_console_sem();
+
+ if (want_console >= 0) {
+ if (want_console != fg_console &&
+ vc_cons_allocated(want_console)) {
+ hide_cursor(vc_cons[fg_console].d);
+ change_console(vc_cons[want_console].d);
+ /* we only changed when the console had already
+ been allocated - a new console is not created
+ in an interrupt routine */
+ }
+ want_console = -1;
+ }
+ if (do_poke_blanked_console) { /* do not unblank for a LED change */
+ do_poke_blanked_console = 0;
+ poke_blanked_console();
+ }
+ if (scrollback_delta) {
+ struct vc_data *vc = vc_cons[fg_console].d;
+ clear_selection();
+ if (vc->vc_mode == KD_TEXT)
+ vc->vc_sw->con_scrolldelta(vc, scrollback_delta);
+ scrollback_delta = 0;
+ }
+ if (blank_timer_expired) {
+ do_blank_screen(0);
+ blank_timer_expired = 0;
+ }
+ notify_update(vc_cons[fg_console].d);
+
+ release_console_sem();
+ }
+
+ int set_console(int nr)
+ {
+ struct vc_data *vc = vc_cons[fg_console].d;
+
+ if (!vc_cons_allocated(nr) || vt_dont_switch ||
+ (vc->vt_mode.mode == VT_AUTO && vc->vc_mode == KD_GRAPHICS)) {
+
+ /*
+ * Console switch will fail in console_callback() or
+ * change_console() so there is no point scheduling
+ * the callback
+ *
+ * Existing set_console() users don't check the return
+ * value so this shouldn't break anything
+ */
+ return -EINVAL;
+ }
+
+ want_console = nr;
+ schedule_console_callback();
+
+ return 0;
+ }
+
+ struct tty_driver *console_driver;
+
+ #ifdef CONFIG_VT_CONSOLE
+
+ /**
+ * vt_kmsg_redirect() - Sets/gets the kernel message console
+ * @new: The new virtual terminal number or -1 if the console should stay
+ * unchanged
+ *
+ * By default, the kernel messages are always printed on the current virtual
+ * console. However, the user may modify that default with the
+ * TIOCL_SETKMSGREDIRECT ioctl call.
+ *
+ * This function sets the kernel message console to be @new. It returns the old
+ * virtual console number. The virtual terminal number 0 (both as parameter and
+ * return value) means no redirection (i.e. always printed on the currently
+ * active console).
+ *
+ * The parameter -1 means that only the current console is returned, but the
+ * value is not modified. You may use the macro vt_get_kmsg_redirect() in that
+ * case to make the code more understandable.
+ *
+ * When the kernel is compiled without CONFIG_VT_CONSOLE, this function ignores
+ * the parameter and always returns 0.
+ */
+ int vt_kmsg_redirect(int new)
+ {
+ static int kmsg_con;
+
+ if (new != -1)
+ return xchg(&kmsg_con, new);
+ else
+ return kmsg_con;
+ }
+
+ /*
+ * Console on virtual terminal
+ *
+ * The console must be locked when we get here.
+ */
+
+ static void vt_console_print(struct console *co, const char *b, unsigned count)
+ {
+ struct vc_data *vc = vc_cons[fg_console].d;
+ unsigned char c;
+ static DEFINE_SPINLOCK(printing_lock);
+ const ushort *start;
+ ushort cnt = 0;
+ ushort myx;
+ int kmsg_console;
+
+ /* console busy or not yet initialized */
+ if (!printable)
+ return;
+ if (!spin_trylock(&printing_lock))
+ return;
+
+ kmsg_console = vt_get_kmsg_redirect();
+ if (kmsg_console && vc_cons_allocated(kmsg_console - 1))
+ vc = vc_cons[kmsg_console - 1].d;
+
+ /* read `x' only after setting currcons properly (otherwise
+ the `x' macro will read the x of the foreground console). */
+ myx = vc->vc_x;
+
+ if (!vc_cons_allocated(fg_console)) {
+ /* impossible */
+ /* printk("vt_console_print: tty %d not allocated ??\n", currcons+1); */
+ goto quit;
+ }
+
+ if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+ goto quit;
+
+ /* undraw cursor first */
+ if (IS_FG(vc))
+ hide_cursor(vc);
+
+ start = (ushort *)vc->vc_pos;
+
+ /* Contrived structure to try to emulate original need_wrap behaviour
+ * Problems caused when we have need_wrap set on '\n' character */
+ while (count--) {
+ c = *b++;
+ if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) {
+ if (cnt > 0) {
+ if (CON_IS_VISIBLE(vc))
+ vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
+ vc->vc_x += cnt;
+ if (vc->vc_need_wrap)
+ vc->vc_x--;
+ cnt = 0;
+ }
+ if (c == 8) { /* backspace */
+ bs(vc);
+ start = (ushort *)vc->vc_pos;
+ myx = vc->vc_x;
+ continue;
+ }
+ if (c != 13)
+ lf(vc);
+ cr(vc);
+ start = (ushort *)vc->vc_pos;
+ myx = vc->vc_x;
+ if (c == 10 || c == 13)
+ continue;
+ }
+ scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
+ notify_write(vc, c);
+ cnt++;
+ if (myx == vc->vc_cols - 1) {
+ vc->vc_need_wrap = 1;
+ continue;
+ }
+ vc->vc_pos += 2;
+ myx++;
+ }
+ if (cnt > 0) {
+ if (CON_IS_VISIBLE(vc))
+ vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
+ vc->vc_x += cnt;
+ if (vc->vc_x == vc->vc_cols) {
+ vc->vc_x--;
+ vc->vc_need_wrap = 1;
+ }
+ }
+ set_cursor(vc);
+ notify_update(vc);
+
+ quit:
+ spin_unlock(&printing_lock);
+ }
+
+ static struct tty_driver *vt_console_device(struct console *c, int *index)
+ {
+ *index = c->index ? c->index-1 : fg_console;
+ return console_driver;
+ }
+
+ static struct console vt_console_driver = {
+ .name = "tty",
+ .write = vt_console_print,
+ .device = vt_console_device,
+ .unblank = unblank_screen,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+ };
+ #endif
+
+ /*
+ * Handling of Linux-specific VC ioctls
+ */
+
+ /*
+ * Generally a bit racy with respect to console_sem().
+ *
+ * There are some functions which don't need it.
+ *
+ * There are some functions which can sleep for arbitrary periods
+ * (paste_selection) but we don't need the lock there anyway.
+ *
+ * set_selection has locking, and definitely needs it
+ */
+
+ int tioclinux(struct tty_struct *tty, unsigned long arg)
+ {
+ char type, data;
+ char __user *p = (char __user *)arg;
+ int lines;
+ int ret;
+
+ if (current->signal->tty != tty && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (get_user(type, p))
+ return -EFAULT;
+ ret = 0;
+
+ switch (type)
+ {
+ case TIOCL_SETSEL:
+ acquire_console_sem();
+ ret = set_selection((struct tiocl_selection __user *)(p+1), tty);
+ release_console_sem();
+ break;
+ case TIOCL_PASTESEL:
+ ret = paste_selection(tty);
+ break;
+ case TIOCL_UNBLANKSCREEN:
+ acquire_console_sem();
+ unblank_screen();
+ release_console_sem();
+ break;
+ case TIOCL_SELLOADLUT:
+ ret = sel_loadlut(p);
+ break;
+ case TIOCL_GETSHIFTSTATE:
+
+ /*
+ * Make it possible to react to Shift+Mousebutton.
+ * Note that 'shift_state' is an undocumented
+ * kernel-internal variable; programs not closely
+ * related to the kernel should not use this.
+ */
+ data = shift_state;
+ ret = __put_user(data, p);
+ break;
+ case TIOCL_GETMOUSEREPORTING:
+ data = mouse_reporting();
+ ret = __put_user(data, p);
+ break;
+ case TIOCL_SETVESABLANK:
+ ret = set_vesa_blanking(p);
+ break;
+ case TIOCL_GETKMSGREDIRECT:
+ data = vt_get_kmsg_redirect();
+ ret = __put_user(data, p);
+ break;
+ case TIOCL_SETKMSGREDIRECT:
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ } else {
+ if (get_user(data, p+1))
+ ret = -EFAULT;
+ else
+ vt_kmsg_redirect(data);
+ }
+ break;
+ case TIOCL_GETFGCONSOLE:
+ ret = fg_console;
+ break;
+ case TIOCL_SCROLLCONSOLE:
+ if (get_user(lines, (s32 __user *)(p+4))) {
+ ret = -EFAULT;
+ } else {
+ scrollfront(vc_cons[fg_console].d, lines);
+ ret = 0;
+ }
+ break;
+ case TIOCL_BLANKSCREEN: /* until explicitly unblanked, not only poked */
+ acquire_console_sem();
+ ignore_poke = 1;
+ do_blank_screen(0);
+ release_console_sem();
+ break;
+ case TIOCL_BLANKEDSCREEN:
+ ret = console_blanked;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+ }
+
+ /*
+ * /dev/ttyN handling
+ */
+
+ static int con_write(struct tty_struct *tty, const unsigned char *buf, int count)
+ {
+ int retval;
+
+ retval = do_con_write(tty, buf, count);
+ con_flush_chars(tty);
+
+ return retval;
+ }
+
+ static int con_put_char(struct tty_struct *tty, unsigned char ch)
+ {
+ if (in_interrupt())
+ return 0; /* n_r3964 calls put_char() from interrupt context */
+ return do_con_write(tty, &ch, 1);
+ }
+
+ static int con_write_room(struct tty_struct *tty)
+ {
+ if (tty->stopped)
+ return 0;
+ return 32768; /* No limit, really; we're not buffering */
+ }
+
+ static int con_chars_in_buffer(struct tty_struct *tty)
+ {
+ return 0; /* we're not buffering */
+ }
+
+ /*
+ * con_throttle and con_unthrottle are only used for
+ * paste_selection(), which has to stuff in a large number of
+ * characters...
+ */
+ static void con_throttle(struct tty_struct *tty)
+ {
+ }
+
+ static void con_unthrottle(struct tty_struct *tty)
+ {
+ struct vc_data *vc = tty->driver_data;
+
+ wake_up_interruptible(&vc->paste_wait);
+ }
+
+ /*
+ * Turn the Scroll-Lock LED on when the tty is stopped
+ */
+ static void con_stop(struct tty_struct *tty)
+ {
+ int console_num;
+ if (!tty)
+ return;
+ console_num = tty->index;
+ if (!vc_cons_allocated(console_num))
+ return;
+ set_vc_kbd_led(kbd_table + console_num, VC_SCROLLOCK);
+ set_leds();
+ }
+
+ /*
+ * Turn the Scroll-Lock LED off when the console is started
+ */
+ static void con_start(struct tty_struct *tty)
+ {
+ int console_num;
+ if (!tty)
+ return;
+ console_num = tty->index;
+ if (!vc_cons_allocated(console_num))
+ return;
+ clr_vc_kbd_led(kbd_table + console_num, VC_SCROLLOCK);
+ set_leds();
+ }
+
+ static void con_flush_chars(struct tty_struct *tty)
+ {
+ struct vc_data *vc;
+
+ if (in_interrupt()) /* from flush_to_ldisc */
+ return;
+
+ /* if we race with con_close(), vt may be null */
+ acquire_console_sem();
+ vc = tty->driver_data;
+ if (vc)
+ set_cursor(vc);
+ release_console_sem();
+ }
+
+ /*
+ * Allocate the console screen memory.
+ */
+ static int con_open(struct tty_struct *tty, struct file *filp)
+ {
+ unsigned int currcons = tty->index;
+ int ret = 0;
+
+ acquire_console_sem();
+ if (tty->driver_data == NULL) {
+ ret = vc_allocate(currcons);
+ if (ret == 0) {
+ struct vc_data *vc = vc_cons[currcons].d;
+
+ /* Still being freed */
+ if (vc->port.tty) {
+ release_console_sem();
+ return -ERESTARTSYS;
+ }
+ tty->driver_data = vc;
+ vc->port.tty = tty;
+
+ if (!tty->winsize.ws_row && !tty->winsize.ws_col) {
+ tty->winsize.ws_row = vc_cons[currcons].d->vc_rows;
+ tty->winsize.ws_col = vc_cons[currcons].d->vc_cols;
+ }
+ if (vc->vc_utf)
+ tty->termios->c_iflag |= IUTF8;
+ else
+ tty->termios->c_iflag &= ~IUTF8;
+ release_console_sem();
+ return ret;
+ }
+ }
+ release_console_sem();
+ return ret;
+ }
+
+ static void con_close(struct tty_struct *tty, struct file *filp)
+ {
+ /* Nothing to do - we defer to shutdown */
+ }
+
+ static void con_shutdown(struct tty_struct *tty)
+ {
+ struct vc_data *vc = tty->driver_data;
+ BUG_ON(vc == NULL);
+ acquire_console_sem();
+ vc->port.tty = NULL;
+ release_console_sem();
+ tty_shutdown(tty);
+ }
+
+ static int default_italic_color = 2; // green (ASCII)
+ static int default_underline_color = 3; // cyan (ASCII)
+ module_param_named(italic, default_italic_color, int, S_IRUGO | S_IWUSR);
+ module_param_named(underline, default_underline_color, int, S_IRUGO | S_IWUSR);
+
+ static void vc_init(struct vc_data *vc, unsigned int rows,
+ unsigned int cols, int do_clear)
+ {
+ int j, k ;
+
+ vc->vc_cols = cols;
+ vc->vc_rows = rows;
+ vc->vc_size_row = cols << 1;
+ vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row;
+
+ set_origin(vc);
+ vc->vc_pos = vc->vc_origin;
+ reset_vc(vc);
+ for (j=k=0; j<16; j++) {
+ vc->vc_palette[k++] = default_red[j] ;
+ vc->vc_palette[k++] = default_grn[j] ;
+ vc->vc_palette[k++] = default_blu[j] ;
+ }
+ vc->vc_def_color = 0x07; /* white */
+ vc->vc_ulcolor = default_underline_color;
+ vc->vc_itcolor = default_italic_color;
+ vc->vc_halfcolor = 0x08; /* grey */
+ init_waitqueue_head(&vc->paste_wait);
+ reset_terminal(vc, do_clear);
+ }
+
+ /*
+ * This routine initializes console interrupts, and does nothing
+ * else. If you want the screen to clear, call tty_write with
+ * the appropriate escape-sequence.
+ */
+
+ static int __init con_init(void)
+ {
+ const char *display_desc = NULL;
+ struct vc_data *vc;
+ unsigned int currcons = 0, i;
+
+ acquire_console_sem();
+
+ if (conswitchp)
+ display_desc = conswitchp->con_startup();
+ if (!display_desc) {
+ fg_console = 0;
+ release_console_sem();
+ return 0;
+ }
+
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ struct con_driver *con_driver = ®istered_con_driver[i];
+
+ if (con_driver->con == NULL) {
+ con_driver->con = conswitchp;
+ con_driver->desc = display_desc;
+ con_driver->flag = CON_DRIVER_FLAG_INIT;
+ con_driver->first = 0;
+ con_driver->last = MAX_NR_CONSOLES - 1;
+ break;
+ }
+ }
+
+ for (i = 0; i < MAX_NR_CONSOLES; i++)
+ con_driver_map[i] = conswitchp;
+
+ if (blankinterval) {
+ blank_state = blank_normal_wait;
+ mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+ }
+
+ for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
+ vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT);
+ INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
+ tty_port_init(&vc->port);
+ visual_init(vc, currcons, 1);
+ vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT);
+ vc_init(vc, vc->vc_rows, vc->vc_cols,
+ currcons || !vc->vc_sw->con_save_screen);
+ }
+ currcons = fg_console = 0;
+ master_display_fg = vc = vc_cons[currcons].d;
+ set_origin(vc);
+ save_screen(vc);
+ gotoxy(vc, vc->vc_x, vc->vc_y);
+ csi_J(vc, 0);
+ update_screen(vc);
+ printk("Console: %s %s %dx%d",
+ vc->vc_can_do_color ? "colour" : "mono",
+ display_desc, vc->vc_cols, vc->vc_rows);
+ printable = 1;
+ printk("\n");
+
+ release_console_sem();
+
+ #ifdef CONFIG_VT_CONSOLE
+ register_console(&vt_console_driver);
+ #endif
+ return 0;
+ }
+ console_initcall(con_init);
+
+ static const struct tty_operations con_ops = {
+ .open = con_open,
+ .close = con_close,
+ .write = con_write,
+ .write_room = con_write_room,
+ .put_char = con_put_char,
+ .flush_chars = con_flush_chars,
+ .chars_in_buffer = con_chars_in_buffer,
+ .ioctl = vt_ioctl,
+ #ifdef CONFIG_COMPAT
+ .compat_ioctl = vt_compat_ioctl,
+ #endif
+ .stop = con_stop,
+ .start = con_start,
+ .throttle = con_throttle,
+ .unthrottle = con_unthrottle,
+ .resize = vt_resize,
+ .shutdown = con_shutdown
+ };
+
+ static struct cdev vc0_cdev;
+
+ int __init vty_init(const struct file_operations *console_fops)
+ {
+ cdev_init(&vc0_cdev, console_fops);
+ if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) ||
+ register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
+ panic("Couldn't register /dev/tty0 driver\n");
+ device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
+
+ vcs_init();
+
+ console_driver = alloc_tty_driver(MAX_NR_CONSOLES);
+ if (!console_driver)
+ panic("Couldn't allocate console driver\n");
+ console_driver->owner = THIS_MODULE;
+ console_driver->name = "tty";
+ console_driver->name_base = 1;
+ console_driver->major = TTY_MAJOR;
+ console_driver->minor_start = 1;
+ console_driver->type = TTY_DRIVER_TYPE_CONSOLE;
+ console_driver->init_termios = tty_std_termios;
+ if (default_utf8)
+ console_driver->init_termios.c_iflag |= IUTF8;
+ console_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS;
+ tty_set_operations(console_driver, &con_ops);
+ if (tty_register_driver(console_driver))
+ panic("Couldn't register console driver\n");
+ kbd_init();
+ console_map_init();
+ #ifdef CONFIG_MDA_CONSOLE
+ mda_console_init();
+ #endif
+ return 0;
+ }
+
+ #ifndef VT_SINGLE_DRIVER
+
+ static struct class *vtconsole_class;
+
+ static int bind_con_driver(const struct consw *csw, int first, int last,
+ int deflt)
+ {
+ struct module *owner = csw->owner;
+ const char *desc = NULL;
+ struct con_driver *con_driver;
+ int i, j = -1, k = -1, retval = -ENODEV;
+
+ if (!try_module_get(owner))
+ return -ENODEV;
+
+ acquire_console_sem();
+
+ /* check if driver is registered */
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ con_driver = ®istered_con_driver[i];
+
+ if (con_driver->con == csw) {
+ desc = con_driver->desc;
+ retval = 0;
+ break;
+ }
+ }
+
+ if (retval)
+ goto err;
+
+ if (!(con_driver->flag & CON_DRIVER_FLAG_INIT)) {
+ csw->con_startup();
+ con_driver->flag |= CON_DRIVER_FLAG_INIT;
+ }
+
+ if (deflt) {
+ if (conswitchp)
+ module_put(conswitchp->owner);
+
+ __module_get(owner);
+ conswitchp = csw;
+ }
+
+ first = max(first, con_driver->first);
+ last = min(last, con_driver->last);
+
+ for (i = first; i <= last; i++) {
+ int old_was_color;
+ struct vc_data *vc = vc_cons[i].d;
+
+ if (con_driver_map[i])
+ module_put(con_driver_map[i]->owner);
+ __module_get(owner);
+ con_driver_map[i] = csw;
+
+ if (!vc || !vc->vc_sw)
+ continue;
+
+ j = i;
+
+ if (CON_IS_VISIBLE(vc)) {
+ k = i;
+ save_screen(vc);
+ }
+
+ old_was_color = vc->vc_can_do_color;
+ vc->vc_sw->con_deinit(vc);
+ vc->vc_origin = (unsigned long)vc->vc_screenbuf;
+ visual_init(vc, i, 0);
+ set_origin(vc);
+ update_attr(vc);
+
+ /* If the console changed between mono <-> color, then
+ * the attributes in the screenbuf will be wrong. The
+ * following resets all attributes to something sane.
+ */
+ if (old_was_color != vc->vc_can_do_color)
+ clear_buffer_attributes(vc);
+ }
+
+ printk("Console: switching ");
+ if (!deflt)
+ printk("consoles %d-%d ", first+1, last+1);
+ if (j >= 0) {
+ struct vc_data *vc = vc_cons[j].d;
+
+ printk("to %s %s %dx%d\n",
+ vc->vc_can_do_color ? "colour" : "mono",
+ desc, vc->vc_cols, vc->vc_rows);
+
+ if (k >= 0) {
+ vc = vc_cons[k].d;
+ update_screen(vc);
+ }
+ } else
+ printk("to %s\n", desc);
+
+ retval = 0;
+ err:
+ release_console_sem();
+ module_put(owner);
+ return retval;
+ };
+
+ #ifdef CONFIG_VT_HW_CONSOLE_BINDING
+ static int con_is_graphics(const struct consw *csw, int first, int last)
+ {
+ int i, retval = 0;
+
+ for (i = first; i <= last; i++) {
+ struct vc_data *vc = vc_cons[i].d;
+
+ if (vc && vc->vc_mode == KD_GRAPHICS) {
+ retval = 1;
+ break;
+ }
+ }
+
+ return retval;
+ }
+
+ /**
+ * unbind_con_driver - unbind a console driver
+ * @csw: pointer to console driver to unregister
+ * @first: first in range of consoles that @csw should be unbound from
+ * @last: last in range of consoles that @csw should be unbound from
+ * @deflt: should next bound console driver be default after @csw is unbound?
+ *
+ * To unbind a driver from all possible consoles, pass 0 as @first and
+ * %MAX_NR_CONSOLES as @last.
+ *
+ * @deflt controls whether the console that ends up replacing @csw should be
+ * the default console.
+ *
+ * RETURNS:
+ * -ENODEV if @csw isn't a registered console driver or can't be unregistered
+ * or 0 on success.
+ */
+ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
+ {
+ struct module *owner = csw->owner;
+ const struct consw *defcsw = NULL;
+ struct con_driver *con_driver = NULL, *con_back = NULL;
+ int i, retval = -ENODEV;
+
+ if (!try_module_get(owner))
+ return -ENODEV;
+
+ acquire_console_sem();
+
+ /* check if driver is registered and if it is unbindable */
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ con_driver = ®istered_con_driver[i];
+
+ if (con_driver->con == csw &&
+ con_driver->flag & CON_DRIVER_FLAG_MODULE) {
+ retval = 0;
+ break;
+ }
+ }
+
+ if (retval) {
+ release_console_sem();
+ goto err;
+ }
+
+ retval = -ENODEV;
+
+ /* check if backup driver exists */
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ con_back = ®istered_con_driver[i];
+
+ if (con_back->con &&
+ !(con_back->flag & CON_DRIVER_FLAG_MODULE)) {
+ defcsw = con_back->con;
+ retval = 0;
+ break;
+ }
+ }
+
+ if (retval) {
+ release_console_sem();
+ goto err;
+ }
+
+ if (!con_is_bound(csw)) {
+ release_console_sem();
+ goto err;
+ }
+
+ first = max(first, con_driver->first);
+ last = min(last, con_driver->last);
+
+ for (i = first; i <= last; i++) {
+ if (con_driver_map[i] == csw) {
+ module_put(csw->owner);
+ con_driver_map[i] = NULL;
+ }
+ }
+
+ if (!con_is_bound(defcsw)) {
+ const struct consw *defconsw = conswitchp;
+
+ defcsw->con_startup();
+ con_back->flag |= CON_DRIVER_FLAG_INIT;
+ /*
+ * vgacon may change the default driver to point
+ * to dummycon, we restore it here...
+ */
+ conswitchp = defconsw;
+ }
+
+ if (!con_is_bound(csw))
+ con_driver->flag &= ~CON_DRIVER_FLAG_INIT;
+
+ release_console_sem();
+ /* ignore return value, binding should not fail */
+ bind_con_driver(defcsw, first, last, deflt);
+ err:
+ module_put(owner);
+ return retval;
+
+ }
+ EXPORT_SYMBOL(unbind_con_driver);
+
+ static int vt_bind(struct con_driver *con)
+ {
+ const struct consw *defcsw = NULL, *csw = NULL;
+ int i, more = 1, first = -1, last = -1, deflt = 0;
+
+ if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE) ||
+ con_is_graphics(con->con, con->first, con->last))
+ goto err;
+
+ csw = con->con;
+
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ struct con_driver *con = ®istered_con_driver[i];
+
+ if (con->con && !(con->flag & CON_DRIVER_FLAG_MODULE)) {
+ defcsw = con->con;
+ break;
+ }
+ }
+
+ if (!defcsw)
+ goto err;
+
+ while (more) {
+ more = 0;
+
+ for (i = con->first; i <= con->last; i++) {
+ if (con_driver_map[i] == defcsw) {
+ if (first == -1)
+ first = i;
+ last = i;
+ more = 1;
+ } else if (first != -1)
+ break;
+ }
+
+ if (first == 0 && last == MAX_NR_CONSOLES -1)
+ deflt = 1;
+
+ if (first != -1)
+ bind_con_driver(csw, first, last, deflt);
+
+ first = -1;
+ last = -1;
+ deflt = 0;
+ }
+
+ err:
+ return 0;
+ }
+
+ static int vt_unbind(struct con_driver *con)
+ {
+ const struct consw *csw = NULL;
+ int i, more = 1, first = -1, last = -1, deflt = 0;
+
+ if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE) ||
+ con_is_graphics(con->con, con->first, con->last))
+ goto err;
+
+ csw = con->con;
+
+ while (more) {
+ more = 0;
+
+ for (i = con->first; i <= con->last; i++) {
+ if (con_driver_map[i] == csw) {
+ if (first == -1)
+ first = i;
+ last = i;
+ more = 1;
+ } else if (first != -1)
+ break;
+ }
+
+ if (first == 0 && last == MAX_NR_CONSOLES -1)
+ deflt = 1;
+
+ if (first != -1)
+ unbind_con_driver(csw, first, last, deflt);
+
+ first = -1;
+ last = -1;
+ deflt = 0;
+ }
+
+ err:
+ return 0;
+ }
+ #else
+ static inline int vt_bind(struct con_driver *con)
+ {
+ return 0;
+ }
+ static inline int vt_unbind(struct con_driver *con)
+ {
+ return 0;
+ }
+ #endif /* CONFIG_VT_HW_CONSOLE_BINDING */
+
+ static ssize_t store_bind(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+ {
+ struct con_driver *con = dev_get_drvdata(dev);
+ int bind = simple_strtoul(buf, NULL, 0);
+
+ if (bind)
+ vt_bind(con);
+ else
+ vt_unbind(con);
+
+ return count;
+ }
+
+ static ssize_t show_bind(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+ struct con_driver *con = dev_get_drvdata(dev);
+ int bind = con_is_bound(con->con);
+
+ return snprintf(buf, PAGE_SIZE, "%i\n", bind);
+ }
+
+ static ssize_t show_name(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+ struct con_driver *con = dev_get_drvdata(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%s %s\n",
+ (con->flag & CON_DRIVER_FLAG_MODULE) ? "(M)" : "(S)",
+ con->desc);
+
+ }
+
+ static struct device_attribute device_attrs[] = {
+ __ATTR(bind, S_IRUGO|S_IWUSR, show_bind, store_bind),
+ __ATTR(name, S_IRUGO, show_name, NULL),
+ };
+
+ static int vtconsole_init_device(struct con_driver *con)
+ {
+ int i;
+ int error = 0;
+
+ con->flag |= CON_DRIVER_FLAG_ATTR;
+ dev_set_drvdata(con->dev, con);
+ for (i = 0; i < ARRAY_SIZE(device_attrs); i++) {
+ error = device_create_file(con->dev, &device_attrs[i]);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ while (--i >= 0)
+ device_remove_file(con->dev, &device_attrs[i]);
+ con->flag &= ~CON_DRIVER_FLAG_ATTR;
+ }
+
+ return error;
+ }
+
+ static void vtconsole_deinit_device(struct con_driver *con)
+ {
+ int i;
+
+ if (con->flag & CON_DRIVER_FLAG_ATTR) {
+ for (i = 0; i < ARRAY_SIZE(device_attrs); i++)
+ device_remove_file(con->dev, &device_attrs[i]);
+ con->flag &= ~CON_DRIVER_FLAG_ATTR;
+ }
+ }
+
+ /**
+ * con_is_bound - checks if driver is bound to the console
+ * @csw: console driver
+ *
+ * RETURNS: zero if unbound, nonzero if bound
+ *
+ * Drivers can call this and if zero, they should release
+ * all resources allocated on con_startup()
+ */
+ int con_is_bound(const struct consw *csw)
+ {
+ int i, bound = 0;
+
+ for (i = 0; i < MAX_NR_CONSOLES; i++) {
+ if (con_driver_map[i] == csw) {
+ bound = 1;
+ break;
+ }
+ }
+
+ return bound;
+ }
+ EXPORT_SYMBOL(con_is_bound);
+
+ /**
+ * con_debug_enter - prepare the console for the kernel debugger
+ * @sw: console driver
+ *
+ * Called when the console is taken over by the kernel debugger, this
+ * function needs to save the current console state, then put the console
+ * into a state suitable for the kernel debugger.
+ *
+ * RETURNS:
+ * Zero on success, nonzero if a failure occurred when trying to prepare
+ * the console for the debugger.
+ */
+ int con_debug_enter(struct vc_data *vc)
+ {
+ int ret = 0;
+
+ saved_fg_console = fg_console;
+ saved_last_console = last_console;
+ saved_want_console = want_console;
+ saved_vc_mode = vc->vc_mode;
+ saved_console_blanked = console_blanked;
+ vc->vc_mode = KD_TEXT;
+ console_blanked = 0;
+ if (vc->vc_sw->con_debug_enter)
+ ret = vc->vc_sw->con_debug_enter(vc);
+ #ifdef CONFIG_KGDB_KDB
+ /* Set the initial LINES variable if it is not already set */
+ if (vc->vc_rows < 999) {
+ int linecount;
+ char lns[4];
+ const char *setargs[3] = {
+ "set",
+ "LINES",
+ lns,
+ };
+ if (kdbgetintenv(setargs[0], &linecount)) {
+ snprintf(lns, 4, "%i", vc->vc_rows);
+ kdb_set(2, setargs);
+ }
+ }
+ #endif /* CONFIG_KGDB_KDB */
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(con_debug_enter);
+
+ /**
+ * con_debug_leave - restore console state
+ * @sw: console driver
+ *
+ * Restore the console state to what it was before the kernel debugger
+ * was invoked.
+ *
+ * RETURNS:
+ * Zero on success, nonzero if a failure occurred when trying to restore
+ * the console.
+ */
+ int con_debug_leave(void)
+ {
+ struct vc_data *vc;
+ int ret = 0;
+
+ fg_console = saved_fg_console;
+ last_console = saved_last_console;
+ want_console = saved_want_console;
+ console_blanked = saved_console_blanked;
+ vc_cons[fg_console].d->vc_mode = saved_vc_mode;
+
+ vc = vc_cons[fg_console].d;
+ if (vc->vc_sw->con_debug_leave)
+ ret = vc->vc_sw->con_debug_leave(vc);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(con_debug_leave);
+
+ /**
+ * register_con_driver - register console driver to console layer
+ * @csw: console driver
+ * @first: the first console to take over, minimum value is 0
+ * @last: the last console to take over, maximum value is MAX_NR_CONSOLES -1
+ *
+ * DESCRIPTION: This function registers a console driver which can later
+ * bind to a range of consoles specified by @first and @last. It will
+ * also initialize the console driver by calling con_startup().
+ */
+ int register_con_driver(const struct consw *csw, int first, int last)
+ {
+ struct module *owner = csw->owner;
+ struct con_driver *con_driver;
+ const char *desc;
+ int i, retval = 0;
+
+ if (!try_module_get(owner))
+ return -ENODEV;
+
+ acquire_console_sem();
+
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ con_driver = ®istered_con_driver[i];
+
+ /* already registered */
+ if (con_driver->con == csw)
+ retval = -EINVAL;
+ }
+
+ if (retval)
+ goto err;
+
+ desc = csw->con_startup();
+
+ if (!desc)
+ goto err;
+
+ retval = -EINVAL;
+
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ con_driver = ®istered_con_driver[i];
+
+ if (con_driver->con == NULL) {
+ con_driver->con = csw;
+ con_driver->desc = desc;
+ con_driver->node = i;
+ con_driver->flag = CON_DRIVER_FLAG_MODULE |
+ CON_DRIVER_FLAG_INIT;
+ con_driver->first = first;
+ con_driver->last = last;
+ retval = 0;
+ break;
+ }
+ }
+
+ if (retval)
+ goto err;
+
+ con_driver->dev = device_create(vtconsole_class, NULL,
+ MKDEV(0, con_driver->node),
+ NULL, "vtcon%i",
+ con_driver->node);
+
+ if (IS_ERR(con_driver->dev)) {
+ printk(KERN_WARNING "Unable to create device for %s; "
+ "errno = %ld\n", con_driver->desc,
+ PTR_ERR(con_driver->dev));
+ con_driver->dev = NULL;
+ } else {
+ vtconsole_init_device(con_driver);
+ }
+
+ err:
+ release_console_sem();
+ module_put(owner);
+ return retval;
+ }
+ EXPORT_SYMBOL(register_con_driver);
+
+ /**
+ * unregister_con_driver - unregister console driver from console layer
+ * @csw: console driver
+ *
+ * DESCRIPTION: All drivers that registers to the console layer must
+ * call this function upon exit, or if the console driver is in a state
+ * where it won't be able to handle console services, such as the
+ * framebuffer console without loaded framebuffer drivers.
+ *
+ * The driver must unbind first prior to unregistration.
+ */
+ int unregister_con_driver(const struct consw *csw)
+ {
+ int i, retval = -ENODEV;
+
+ acquire_console_sem();
+
+ /* cannot unregister a bound driver */
+ if (con_is_bound(csw))
+ goto err;
+
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ struct con_driver *con_driver = ®istered_con_driver[i];
+
+ if (con_driver->con == csw &&
+ con_driver->flag & CON_DRIVER_FLAG_MODULE) {
+ vtconsole_deinit_device(con_driver);
+ device_destroy(vtconsole_class,
+ MKDEV(0, con_driver->node));
+ con_driver->con = NULL;
+ con_driver->desc = NULL;
+ con_driver->dev = NULL;
+ con_driver->node = 0;
+ con_driver->flag = 0;
+ con_driver->first = 0;
+ con_driver->last = 0;
+ retval = 0;
+ break;
+ }
+ }
+ err:
+ release_console_sem();
+ return retval;
+ }
+ EXPORT_SYMBOL(unregister_con_driver);
+
+ /*
+ * If we support more console drivers, this function is used
+ * when a driver wants to take over some existing consoles
+ * and become default driver for newly opened ones.
+ *
+ * take_over_console is basically a register followed by unbind
+ */
+ int take_over_console(const struct consw *csw, int first, int last, int deflt)
+ {
+ int err;
+
+ err = register_con_driver(csw, first, last);
+
+ if (!err)
+ bind_con_driver(csw, first, last, deflt);
+
+ return err;
+ }
+
+ /*
+ * give_up_console is a wrapper to unregister_con_driver. It will only
+ * work if driver is fully unbound.
+ */
+ void give_up_console(const struct consw *csw)
+ {
+ unregister_con_driver(csw);
+ }
+
+ static int __init vtconsole_class_init(void)
+ {
+ int i;
+
+ vtconsole_class = class_create(THIS_MODULE, "vtconsole");
+ if (IS_ERR(vtconsole_class)) {
+ printk(KERN_WARNING "Unable to create vt console class; "
+ "errno = %ld\n", PTR_ERR(vtconsole_class));
+ vtconsole_class = NULL;
+ }
+
+ /* Add system drivers to sysfs */
+ for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
+ struct con_driver *con = ®istered_con_driver[i];
+
+ if (con->con && !con->dev) {
+ con->dev = device_create(vtconsole_class, NULL,
+ MKDEV(0, con->node),
+ NULL, "vtcon%i",
+ con->node);
+
+ if (IS_ERR(con->dev)) {
+ printk(KERN_WARNING "Unable to create "
+ "device for %s; errno = %ld\n",
+ con->desc, PTR_ERR(con->dev));
+ con->dev = NULL;
+ } else {
+ vtconsole_init_device(con);
+ }
+ }
+ }
+
+ return 0;
+ }
+ postcore_initcall(vtconsole_class_init);
+
+ #endif
+
+ /*
+ * Screen blanking
+ */
+
+ static int set_vesa_blanking(char __user *p)
+ {
+ unsigned int mode;
+
+ if (get_user(mode, p + 1))
+ return -EFAULT;
+
+ vesa_blank_mode = (mode < 4) ? mode : 0;
+ return 0;
+ }
+
+ void do_blank_screen(int entering_gfx)
+ {
+ struct vc_data *vc = vc_cons[fg_console].d;
+ int i;
+
+ WARN_CONSOLE_UNLOCKED();
+
+ if (console_blanked) {
+ if (blank_state == blank_vesa_wait) {
+ blank_state = blank_off;
+ vc->vc_sw->con_blank(vc, vesa_blank_mode + 1, 0);
+ }
+ return;
+ }
+
+ /* entering graphics mode? */
+ if (entering_gfx) {
+ hide_cursor(vc);
+ save_screen(vc);
+ vc->vc_sw->con_blank(vc, -1, 1);
+ console_blanked = fg_console + 1;
+ blank_state = blank_off;
+ set_origin(vc);
+ return;
+ }
+
+ if (blank_state != blank_normal_wait)
+ return;
+ blank_state = blank_off;
+
+ /* don't blank graphics */
+ if (vc->vc_mode != KD_TEXT) {
+ console_blanked = fg_console + 1;
+ return;
+ }
+
+ hide_cursor(vc);
+ del_timer_sync(&console_timer);
+ blank_timer_expired = 0;
+
+ save_screen(vc);
+ /* In case we need to reset origin, blanking hook returns 1 */
+ i = vc->vc_sw->con_blank(vc, vesa_off_interval ? 1 : (vesa_blank_mode + 1), 0);
+ console_blanked = fg_console + 1;
+ if (i)
+ set_origin(vc);
+
+ if (console_blank_hook && console_blank_hook(1))
+ return;
+
+ if (vesa_off_interval && vesa_blank_mode) {
+ blank_state = blank_vesa_wait;
+ mod_timer(&console_timer, jiffies + vesa_off_interval);
+ }
+ vt_event_post(VT_EVENT_BLANK, vc->vc_num, vc->vc_num);
+ }
+ EXPORT_SYMBOL(do_blank_screen);
+
+ /*
+ * Called by timer as well as from vt_console_driver
+ */
+ void do_unblank_screen(int leaving_gfx)
+ {
+ struct vc_data *vc;
+
+ /* This should now always be called from a "sane" (read: can schedule)
+ * context for the sake of the low level drivers, except in the special
+ * case of oops_in_progress
+ */
+ if (!oops_in_progress)
+ might_sleep();
+
+ WARN_CONSOLE_UNLOCKED();
+
+ ignore_poke = 0;
+ if (!console_blanked)
+ return;
+ if (!vc_cons_allocated(fg_console)) {
+ /* impossible */
+ printk("unblank_screen: tty %d not allocated ??\n", fg_console+1);
+ return;
+ }
+ vc = vc_cons[fg_console].d;
+ /* Try to unblank in oops case too */
+ if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+ return; /* but leave console_blanked != 0 */
+
+ if (blankinterval) {
+ mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+ blank_state = blank_normal_wait;
+ }
+
+ console_blanked = 0;
+ if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || vt_force_oops_output(vc))
+ /* Low-level driver cannot restore -> do it ourselves */
+ update_screen(vc);
+ if (console_blank_hook)
+ console_blank_hook(0);
+ set_palette(vc);
+ set_cursor(vc);
+ vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num);
+ }
+ EXPORT_SYMBOL(do_unblank_screen);
+
+ /*
+ * This is called by the outside world to cause a forced unblank, mostly for
+ * oopses. Currently, I just call do_unblank_screen(0), but we could eventually
+ * call it with 1 as an argument and so force a mode restore... that may kill
+ * X or at least garbage the screen but would also make the Oops visible...
+ */
+ void unblank_screen(void)
+ {
+ do_unblank_screen(0);
+ }
+
+ /*
+ * We defer the timer blanking to work queue so it can take the console mutex
+ * (console operations can still happen at irq time, but only from printk which
+ * has the console mutex. Not perfect yet, but better than no locking
+ */
+ static void blank_screen_t(unsigned long dummy)
+ {
+ if (unlikely(!keventd_up())) {
+ mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+ return;
+ }
+ blank_timer_expired = 1;
+ schedule_work(&console_work);
+ }
+
+ void poke_blanked_console(void)
+ {
+ WARN_CONSOLE_UNLOCKED();
+
+ /* Add this so we quickly catch whoever might call us in a non
+ * safe context. Nowadays, unblank_screen() isn't to be called in
+ * atomic contexts and is allowed to schedule (with the special case
+ * of oops_in_progress, but that isn't of any concern for this
+ * function. --BenH.
+ */
+ might_sleep();
+
+ /* This isn't perfectly race free, but a race here would be mostly harmless,
+ * at worse, we'll do a spurrious blank and it's unlikely
+ */
+ del_timer(&console_timer);
+ blank_timer_expired = 0;
+
+ if (ignore_poke || !vc_cons[fg_console].d || vc_cons[fg_console].d->vc_mode == KD_GRAPHICS)
+ return;
+ if (console_blanked)
+ unblank_screen();
+ else if (blankinterval) {
+ mod_timer(&console_timer, jiffies + (blankinterval * HZ));
+ blank_state = blank_normal_wait;
+ }
+ }
+
+ /*
+ * Palettes
+ */
+
+ static void set_palette(struct vc_data *vc)
+ {
+ WARN_CONSOLE_UNLOCKED();
+
+ if (vc->vc_mode != KD_GRAPHICS)
+ vc->vc_sw->con_set_palette(vc, color_table);
+ }
+
+ static int set_get_cmap(unsigned char __user *arg, int set)
+ {
+ int i, j, k;
+
+ WARN_CONSOLE_UNLOCKED();
+
+ for (i = 0; i < 16; i++)
+ if (set) {
+ get_user(default_red[i], arg++);
+ get_user(default_grn[i], arg++);
+ get_user(default_blu[i], arg++);
+ } else {
+ put_user(default_red[i], arg++);
+ put_user(default_grn[i], arg++);
+ put_user(default_blu[i], arg++);
+ }
+ if (set) {
+ for (i = 0; i < MAX_NR_CONSOLES; i++)
+ if (vc_cons_allocated(i)) {
+ for (j = k = 0; j < 16; j++) {
+ vc_cons[i].d->vc_palette[k++] = default_red[j];
+ vc_cons[i].d->vc_palette[k++] = default_grn[j];
+ vc_cons[i].d->vc_palette[k++] = default_blu[j];
+ }
+ set_palette(vc_cons[i].d);
+ }
+ }
+ return 0;
+ }
+
+ /*
+ * Load palette into the DAC registers. arg points to a colour
+ * map, 3 bytes per colour, 16 colours, range from 0 to 255.
+ */
+
+ int con_set_cmap(unsigned char __user *arg)
+ {
+ int rc;
+
+ acquire_console_sem();
+ rc = set_get_cmap (arg,1);
+ release_console_sem();
+
+ return rc;
+ }
+
+ int con_get_cmap(unsigned char __user *arg)
+ {
+ int rc;
+
+ acquire_console_sem();
+ rc = set_get_cmap (arg,0);
+ release_console_sem();
+
+ return rc;
+ }
+
+ void reset_palette(struct vc_data *vc)
+ {
+ int j, k;
+ for (j=k=0; j<16; j++) {
+ vc->vc_palette[k++] = default_red[j];
+ vc->vc_palette[k++] = default_grn[j];
+ vc->vc_palette[k++] = default_blu[j];
+ }
+ set_palette(vc);
+ }
+
+ /*
+ * Font switching
+ *
+ * Currently we only support fonts up to 32 pixels wide, at a maximum height
+ * of 32 pixels. Userspace fontdata is stored with 32 bytes (shorts/ints,
+ * depending on width) reserved for each character which is kinda wasty, but
+ * this is done in order to maintain compatibility with the EGA/VGA fonts. It
+ * is upto the actual low-level console-driver convert data into its favorite
+ * format (maybe we should add a `fontoffset' field to the `display'
+ * structure so we won't have to convert the fontdata all the time.
+ * /Jes
+ */
+
+ #define max_font_size 65536
+
+ static int con_font_get(struct vc_data *vc, struct console_font_op *op)
+ {
+ struct console_font font;
+ int rc = -EINVAL;
+ int c;
+
+ if (vc->vc_mode != KD_TEXT)
+ return -EINVAL;
+
+ if (op->data) {
+ font.data = kmalloc(max_font_size, GFP_KERNEL);
+ if (!font.data)
+ return -ENOMEM;
+ } else
+ font.data = NULL;
+
+ acquire_console_sem();
+ if (vc->vc_sw->con_font_get)
+ rc = vc->vc_sw->con_font_get(vc, &font);
+ else
+ rc = -ENOSYS;
+ release_console_sem();
+
+ if (rc)
+ goto out;
+
+ c = (font.width+7)/8 * 32 * font.charcount;
+
+ if (op->data && font.charcount > op->charcount)
+ rc = -ENOSPC;
+ if (!(op->flags & KD_FONT_FLAG_OLD)) {
+ if (font.width > op->width || font.height > op->height)
+ rc = -ENOSPC;
+ } else {
+ if (font.width != 8)
+ rc = -EIO;
+ else if ((op->height && font.height > op->height) ||
+ font.height > 32)
+ rc = -ENOSPC;
+ }
+ if (rc)
+ goto out;
+
+ op->height = font.height;
+ op->width = font.width;
+ op->charcount = font.charcount;
+
+ if (op->data && copy_to_user(op->data, font.data, c))
+ rc = -EFAULT;
+
+ out:
+ kfree(font.data);
+ return rc;
+ }
+
+ static int con_font_set(struct vc_data *vc, struct console_font_op *op)
+ {
+ struct console_font font;
+ int rc = -EINVAL;
+ int size;
+
+ if (vc->vc_mode != KD_TEXT)
+ return -EINVAL;
+ if (!op->data)
+ return -EINVAL;
+ if (op->charcount > 512)
+ return -EINVAL;
+ if (!op->height) { /* Need to guess font height [compat] */
+ int h, i;
+ u8 __user *charmap = op->data;
+ u8 tmp;
+
+ /* If from KDFONTOP ioctl, don't allow things which can be done in userland,
+ so that we can get rid of this soon */
+ if (!(op->flags & KD_FONT_FLAG_OLD))
+ return -EINVAL;
+ for (h = 32; h > 0; h--)
+ for (i = 0; i < op->charcount; i++) {
+ if (get_user(tmp, &charmap[32*i+h-1]))
+ return -EFAULT;
+ if (tmp)
+ goto nonzero;
+ }
+ return -EINVAL;
+ nonzero:
+ op->height = h;
+ }
+ if (op->width <= 0 || op->width > 32 || op->height > 32)
+ return -EINVAL;
+ size = (op->width+7)/8 * 32 * op->charcount;
+ if (size > max_font_size)
+ return -ENOSPC;
+ font.charcount = op->charcount;
+ font.height = op->height;
+ font.width = op->width;
+ font.data = memdup_user(op->data, size);
+ if (IS_ERR(font.data))
+ return PTR_ERR(font.data);
+ acquire_console_sem();
+ if (vc->vc_sw->con_font_set)
+ rc = vc->vc_sw->con_font_set(vc, &font, op->flags);
+ else
+ rc = -ENOSYS;
+ release_console_sem();
+ kfree(font.data);
+ return rc;
+ }
+
+ static int con_font_default(struct vc_data *vc, struct console_font_op *op)
+ {
+ struct console_font font = {.width = op->width, .height = op->height};
+ char name[MAX_FONT_NAME];
+ char *s = name;
+ int rc;
+
+ if (vc->vc_mode != KD_TEXT)
+ return -EINVAL;
+
+ if (!op->data)
+ s = NULL;
+ else if (strncpy_from_user(name, op->data, MAX_FONT_NAME - 1) < 0)
+ return -EFAULT;
+ else
+ name[MAX_FONT_NAME - 1] = 0;
+
+ acquire_console_sem();
+ if (vc->vc_sw->con_font_default)
+ rc = vc->vc_sw->con_font_default(vc, &font, s);
+ else
+ rc = -ENOSYS;
+ release_console_sem();
+ if (!rc) {
+ op->width = font.width;
+ op->height = font.height;
+ }
+ return rc;
+ }
+
+ static int con_font_copy(struct vc_data *vc, struct console_font_op *op)
+ {
+ int con = op->height;
+ int rc;
+
+ if (vc->vc_mode != KD_TEXT)
+ return -EINVAL;
+
+ acquire_console_sem();
+ if (!vc->vc_sw->con_font_copy)
+ rc = -ENOSYS;
+ else if (con < 0 || !vc_cons_allocated(con))
+ rc = -ENOTTY;
+ else if (con == vc->vc_num) /* nothing to do */
+ rc = 0;
+ else
+ rc = vc->vc_sw->con_font_copy(vc, con);
+ release_console_sem();
+ return rc;
+ }
+
+ int con_font_op(struct vc_data *vc, struct console_font_op *op)
+ {
+ switch (op->op) {
+ case KD_FONT_OP_SET:
+ return con_font_set(vc, op);
+ case KD_FONT_OP_GET:
+ return con_font_get(vc, op);
+ case KD_FONT_OP_SET_DEFAULT:
+ return con_font_default(vc, op);
+ case KD_FONT_OP_COPY:
+ return con_font_copy(vc, op);
+ }
+ return -ENOSYS;
+ }
+
+ /*
+ * Interface exported to selection and vcs.
+ */
+
+ /* used by selection */
+ u16 screen_glyph(struct vc_data *vc, int offset)
+ {
+ u16 w = scr_readw(screenpos(vc, offset, 1));
+ u16 c = w & 0xff;
+
+ if (w & vc->vc_hi_font_mask)
+ c |= 0x100;
+ return c;
+ }
+ EXPORT_SYMBOL_GPL(screen_glyph);
+
+ /* used by vcs - note the word offset */
+ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
+ {
+ return screenpos(vc, 2 * w_offset, viewed);
+ }
+
+ void getconsxy(struct vc_data *vc, unsigned char *p)
+ {
+ p[0] = vc->vc_x;
+ p[1] = vc->vc_y;
+ }
+
+ void putconsxy(struct vc_data *vc, unsigned char *p)
+ {
+ hide_cursor(vc);
+ gotoxy(vc, p[0], p[1]);
+ set_cursor(vc);
+ }
+
+ u16 vcs_scr_readw(struct vc_data *vc, const u16 *org)
+ {
+ if ((unsigned long)org == vc->vc_pos && softcursor_original != -1)
+ return softcursor_original;
+ return scr_readw(org);
+ }
+
+ void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org)
+ {
+ scr_writew(val, org);
+ if ((unsigned long)org == vc->vc_pos) {
+ softcursor_original = -1;
+ add_softcursor(vc);
+ }
+ }
+
+ void vcs_scr_updated(struct vc_data *vc)
+ {
+ notify_update(vc);
+ }
+
++#ifdef CONFIG_BOOTSPLASH
++void con_remap_def_color(struct vc_data *vc, int new_color)
++{
++ unsigned short *sbuf = screenpos(vc, 0, 1);
++ unsigned c, len = vc->vc_screenbuf_size >> 1;
++ int old_color;
++
++ if (sbuf) {
++ old_color = vc->vc_def_color << 8;
++ new_color <<= 8;
++ while(len--) {
++ c = scr_readw(sbuf);
++ if (((c ^ old_color) & 0xf000) == 0)
++ scr_writew(c ^ ((old_color ^ new_color) & 0xf000), sbuf);
++ *sbuf ^= (old_color ^ new_color) & 0xf000;
++ if (((c ^ old_color) & 0x0f00) == 0)
++ scr_writew(c ^ ((old_color ^ new_color) & 0x0f00), sbuf);
++ *sbuf ^= (old_color ^ new_color) & 0x0f00;
++ sbuf++;
++ }
++ new_color >>= 8;
++ }
++ vc->vc_def_color = vc->vc_color = new_color;
++ update_attr(vc);
++}
++#endif
++
+ /*
+ * Visible symbols for modules
+ */
+
+ EXPORT_SYMBOL(color_table);
+ EXPORT_SYMBOL(default_red);
+ EXPORT_SYMBOL(default_grn);
+ EXPORT_SYMBOL(default_blu);
+ EXPORT_SYMBOL(update_region);
+ EXPORT_SYMBOL(redraw_screen);
+ EXPORT_SYMBOL(vc_resize);
+ EXPORT_SYMBOL(fg_console);
+ EXPORT_SYMBOL(console_blank_hook);
+ EXPORT_SYMBOL(console_blanked);
+ EXPORT_SYMBOL(vc_cons);
+ EXPORT_SYMBOL(global_cursor_default);
+ #ifndef VT_SINGLE_DRIVER
+ EXPORT_SYMBOL(take_over_console);
+ EXPORT_SYMBOL(give_up_console);
+ #endif