UBUNTU: Ubuntu-2.6.38-12.51

[linux-flexiantxendom0-natty.git] / block / blk-core.c
diff --git a/block/blk-core.c b/block/blk-core.c

index 4b1b29e..518dd42 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,7 +33,7 @@
  
  #include "blk.h"
  
-EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
  
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
                 return;
  
         cpu = part_stat_lock();
-       part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
  
-       if (!new_io)
+       if (!new_io) {
+               part = rq->part;
                 part_stat_inc(cpu, part, merges[rw]);
-       else {
+       } else {
+               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+               if (!hd_struct_try_get(part)) {
+                       /*
+                        * The partition is already being removed,
+                        * the request will be accounted on the disk only
+                        *
+                        * We take a reference on disk->part0 although that
+                        * partition will never be deleted, so we can treat
+                        * it as any other partition.
+                        */
+                       part = &rq->rq_disk->part0;
+                       hd_struct_get(part);
+               }
                 part_round_stats(cpu, part);
                 part_inc_in_flight(part, rw);
+               rq->part = part;
         }
  
         part_stat_unlock();
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
         rq->ref_count = 1;
         rq->start_time = jiffies;
         set_start_time_ns(rq);
+       rq->part = NULL;
  }
  EXPORT_SYMBOL(blk_rq_init);
  
@@ -136,7 +151,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
  {
         struct request_queue *q = rq->q;
  
-       if (&q->bar_rq != rq) {
+       if (&q->flush_rq != rq) {
                 if (error)
                         clear_bit(BIO_UPTODATE, &bio->bi_flags);
                 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
@@ -160,13 +175,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
                 if (bio->bi_size == 0)
                         bio_endio(bio, error);
         } else {
-
                 /*
-                * Okay, this is the barrier request in progress, just
-                * record the error;
+                * Okay, this is the sequenced flush request in
+                * progress, just record the error;
                  */
-               if (error && !q->orderr)
-                       q->orderr = error;
+               if (error && !q->flush_err)
+                       q->flush_err = error;
         }
  }
  
@@ -184,7 +198,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
         printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
                rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
  
-       if (blk_pc_request(rq)) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                 printk(KERN_INFO "  cdb: ");
                 for (bit = 0; bit < BLK_MAX_CDB; bit++)
                         printk("%02x ", rq->cmd[bit]);
@@ -338,7 +352,7 @@ void blk_start_queue(struct request_queue *q)
         WARN_ON(!irqs_disabled());
  
         queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-       __blk_run_queue(q);
+       __blk_run_queue(q, false);
  }
  EXPORT_SYMBOL(blk_start_queue);
  
@@ -382,19 +396,21 @@ void blk_sync_queue(struct request_queue *q)
         del_timer_sync(&q->unplug_timer);
         del_timer_sync(&q->timeout);
         cancel_work_sync(&q->unplug_work);
+       throtl_shutdown_timer_wq(q);
  }
  EXPORT_SYMBOL(blk_sync_queue);
  
  /**
   * __blk_run_queue - run a single device queue
   * @q: The queue to run
+ * @force_kblockd: Don't run @q->request_fn directly.  Use kblockd.
   *
   * Description:
   *    See @blk_run_queue. This variant must be called with the queue lock
   *    held and interrupts disabled.
   *
   */
-void __blk_run_queue(struct request_queue *q)
+void __blk_run_queue(struct request_queue *q, bool force_kblockd)
  {
         blk_remove_plug(q);
  
@@ -408,7 +424,7 @@ void __blk_run_queue(struct request_queue *q)
          * Only recurse once to avoid overrunning the stack, let the unplug
          * handling reinvoke the handler shortly if we already got there.
          */
-       if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+       if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
                 q->request_fn(q);
                 queue_flag_clear(QUEUE_FLAG_REENTER, q);
         } else {
@@ -431,7 +447,7 @@ void blk_run_queue(struct request_queue *q)
         unsigned long flags;
  
         spin_lock_irqsave(q->queue_lock, flags);
-       __blk_run_queue(q);
+       __blk_run_queue(q, false);
         spin_unlock_irqrestore(q->queue_lock, flags);
  }
  EXPORT_SYMBOL(blk_run_queue);
@@ -467,6 +483,9 @@ static int blk_init_free_list(struct request_queue *q)
  {
         struct request_list *rl = &q->rq;
  
+       if (unlikely(rl->rq_pool))
+               return 0;
+
         rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
         rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
         rl->elvpriv = 0;
@@ -512,11 +531,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
                 return NULL;
         }
  
+       if (blk_throtl_init(q)) {
+               kmem_cache_free(blk_requestq_cachep, q);
+               return NULL;
+       }
+
         setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
                     laptop_mode_timer_fn, (unsigned long) q);
         init_timer(&q->unplug_timer);
         setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
         INIT_LIST_HEAD(&q->timeout_list);
+       INIT_LIST_HEAD(&q->pending_flushes);
         INIT_WORK(&q->unplug_work, blk_unplug_work);
  
         kobject_init(&q->kobj, &blk_queue_ktype);
@@ -570,19 +595,42 @@ EXPORT_SYMBOL(blk_init_queue);
  struct request_queue *
  blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
  {
-       struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+       struct request_queue *uninit_q, *q;
+
+       uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+       if (!uninit_q)
+               return NULL;
+
+       q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
+       if (!q)
+               blk_cleanup_queue(uninit_q);
+
+       return q;
+}
+EXPORT_SYMBOL(blk_init_queue_node);
+
+struct request_queue *
+blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
+                        spinlock_t *lock)
+{
+       return blk_init_allocated_queue_node(q, rfn, lock, -1);
+}
+EXPORT_SYMBOL(blk_init_allocated_queue);
  
+struct request_queue *
+blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
+                             spinlock_t *lock, int node_id)
+{
         if (!q)
                 return NULL;
  
         q->node = node_id;
-       if (blk_init_free_list(q)) {
-               kmem_cache_free(blk_requestq_cachep, q);
+       if (blk_init_free_list(q))
                 return NULL;
-       }
  
         q->request_fn           = rfn;
         q->prep_rq_fn           = NULL;
+       q->unprep_rq_fn         = NULL;
         q->unplug_fn            = generic_unplug_device;
         q->queue_flags          = QUEUE_FLAG_DEFAULT;
         q->queue_lock           = lock;
@@ -602,10 +650,9 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
                 return q;
         }
  
-       blk_put_queue(q);
         return NULL;
  }
-EXPORT_SYMBOL(blk_init_queue_node);
+EXPORT_SYMBOL(blk_init_allocated_queue_node);
  
  int blk_get_queue(struct request_queue *q)
  {
@@ -1007,27 +1054,11 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
  
         drive_stat_acct(rq, 1);
         __elv_add_request(q, rq, where, 0);
-       __blk_run_queue(q);
+       __blk_run_queue(q, false);
         spin_unlock_irqrestore(q->queue_lock, flags);
  }
  EXPORT_SYMBOL(blk_insert_request);
  
-/*
- * add-request adds a request to the linked list.
- * queue lock is held and interrupts disabled, as we muck with the
- * request queue list.
- */
-static inline void add_request(struct request_queue *q, struct request *req)
-{
-       drive_stat_acct(req, 1);
-
-       /*
-        * elevator indicated where it wants this request to be
-        * inserted at elevator_merge time
-        */
-       __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
-}
-
  static void part_round_stats_single(int cpu, struct hd_struct *part,
                                     unsigned long now)
  {
@@ -1111,33 +1142,46 @@ void blk_put_request(struct request *req)
  }
  EXPORT_SYMBOL(blk_put_request);
  
+/**
+ * blk_add_request_payload - add a payload to a request
+ * @rq: request to update
+ * @page: page backing the payload
+ * @len: length of the payload.
+ *
+ * This allows to later add a payload to an already submitted request by
+ * a block driver.  The driver needs to take care of freeing the payload
+ * itself.
+ *
+ * Note that this is a quite horrible hack and nothing but handling of
+ * discard requests should ever use it.
+ */
+void blk_add_request_payload(struct request *rq, struct page *page,
+               unsigned int len)
+{
+       struct bio *bio = rq->bio;
+
+       bio->bi_io_vec->bv_page = page;
+       bio->bi_io_vec->bv_offset = 0;
+       bio->bi_io_vec->bv_len = len;
+
+       bio->bi_size = len;
+       bio->bi_vcnt = 1;
+       bio->bi_phys_segments = 1;
+
+       rq->__data_len = rq->resid_len = len;
+       rq->nr_phys_segments = 1;
+       rq->buffer = bio_data(bio);
+}
+EXPORT_SYMBOL_GPL(blk_add_request_payload);
+
  void init_request_from_bio(struct request *req, struct bio *bio)
  {
         req->cpu = bio->bi_comp_cpu;
         req->cmd_type = REQ_TYPE_FS;
  
-       /*
-        * Inherit FAILFAST from bio (for read-ahead, and explicit
-        * FAILFAST).  FAILFAST flags are identical for req and bio.
-        */
-       if (bio_rw_flagged(bio, BIO_RW_AHEAD))
+       req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
+       if (bio->bi_rw & REQ_RAHEAD)
                 req->cmd_flags |= REQ_FAILFAST_MASK;
-       else
-               req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
-
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
-               req->cmd_flags |= REQ_DISCARD;
-               if (bio_rw_flagged(bio, BIO_RW_BARRIER))
-                       req->cmd_flags |= REQ_SOFTBARRIER;
-       } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
-               req->cmd_flags |= REQ_HARDBARRIER;
-
-       if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
-               req->cmd_flags |= REQ_RW_SYNC;
-       if (bio_rw_flagged(bio, BIO_RW_META))
-               req->cmd_flags |= REQ_RW_META;
-       if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
-               req->cmd_flags |= REQ_NOIDLE;
  
         req->errors = 0;
         req->__sector = bio->bi_sector;
@@ -1160,16 +1204,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
         int el_ret;
         unsigned int bytes = bio->bi_size;
         const unsigned short prio = bio_prio(bio);
-       const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
-       const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
-       const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+       const bool sync = !!(bio->bi_rw & REQ_SYNC);
+       const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
+       const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
+       int where = ELEVATOR_INSERT_SORT;
         int rw_flags;
  
-       if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&
-           (q->next_ordered == QUEUE_ORDERED_NONE)) {
-               bio_endio(bio, -EOPNOTSUPP);
-               return 0;
-       }
         /*
          * low level driver can indicate that it wants pages above a
          * certain limit bounced to low memory (ie for highmem, or even
@@ -1179,7 +1219,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
  
         spin_lock_irq(q->queue_lock);
  
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
+       if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+               where = ELEVATOR_INSERT_FRONT;
+               goto get_rq;
+       }
+
+       if (elv_queue_empty(q))
                 goto get_rq;
  
         el_ret = elv_merge(q, &req, bio);
@@ -1202,6 +1247,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                 if (!blk_rq_cpu_valid(req))
                         req->cpu = bio->bi_comp_cpu;
                 drive_stat_acct(req, 0);
+               elv_bio_merged(q, req, bio);
                 if (!attempt_back_merge(q, req))
                         elv_merged_request(q, req, el_ret);
                 goto out;
@@ -1235,6 +1281,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                 if (!blk_rq_cpu_valid(req))
                         req->cpu = bio->bi_comp_cpu;
                 drive_stat_acct(req, 0);
+               elv_bio_merged(q, req, bio);
                 if (!attempt_front_merge(q, req))
                         elv_merged_request(q, req, el_ret);
                 goto out;
@@ -1252,7 +1299,7 @@ get_rq:
          */
         rw_flags = bio_data_dir(bio);
         if (sync)
-               rw_flags |= REQ_RW_SYNC;
+               rw_flags |= REQ_SYNC;
  
         /*
          * Grab a free request. This is might sleep but can not fail.
@@ -1274,7 +1321,10 @@ get_rq:
                 req->cpu = blk_cpu_to_group(smp_processor_id());
         if (queue_should_plug(q) && elv_queue_empty(q))
                 blk_plug_device(q);
-       add_request(q, req);
+
+       /* insert the request into the elevator */
+       drive_stat_acct(req, 1);
+       __elv_add_request(q, req, where, 0);
  out:
         if (unplug || !queue_should_plug(q))
                 __generic_unplug_device(q);
@@ -1295,9 +1345,9 @@ static inline void blk_partition_remap(struct bio *bio)
                 bio->bi_sector += p->start_sect;
                 bio->bi_bdev = bdev->bd_contains;
  
-               trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-                                   bdev->bd_dev,
-                                   bio->bi_sector - p->start_sect);
+               trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
+                                     bdev->bd_dev,
+                                     bio->bi_sector - p->start_sect);
         }
  }
  
@@ -1310,7 +1360,7 @@ static void handle_bad_sector(struct bio *bio)
                         bdevname(bio->bi_bdev, b),
                         bio->bi_rw,
                         (unsigned long long)bio->bi_sector + bio_sectors(bio),
-                       (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
+                       (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
  
         set_bit(BIO_EOF, &bio->bi_flags);
  }
@@ -1363,7 +1413,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
                 return 0;
  
         /* Test device or partition size, when known. */
-       maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+       maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
         if (maxsector) {
                 sector_t sector = bio->bi_sector;
  
@@ -1441,7 +1491,7 @@ static inline void __generic_make_request(struct bio *bio)
                         goto end_io;
                 }
  
-               if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+               if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
                              nr_sectors > queue_max_hw_sectors(q))) {
                         printk(KERN_ERR "bio too big device %s (%u > %u)\n",
                                bdevname(bio->bi_bdev, b),
@@ -1466,7 +1516,7 @@ static inline void __generic_make_request(struct bio *bio)
                         goto end_io;
  
                 if (old_sector != -1)
-                       trace_block_remap(q, bio, old_dev, old_sector);
+                       trace_block_bio_remap(q, bio, old_dev, old_sector);
  
                 old_sector = bio->bi_sector;
                 old_dev = bio->bi_bdev->bd_dev;
@@ -1474,12 +1524,36 @@ static inline void __generic_make_request(struct bio *bio)
                 if (bio_check_eod(bio, nr_sectors))
                         goto end_io;
  
-               if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-                   !blk_queue_discard(q)) {
+               /*
+                * Filter flush bio's early so that make_request based
+                * drivers without flush support don't have to worry
+                * about them.
+                */
+               if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
+                       bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+                       if (!nr_sectors) {
+                               err = 0;
+                               goto end_io;
+                       }
+               }
+
+               if ((bio->bi_rw & REQ_DISCARD) &&
+                   (!blk_queue_discard(q) ||
+                    ((bio->bi_rw & REQ_SECURE) &&
+                     !blk_queue_secdiscard(q)))) {
                         err = -EOPNOTSUPP;
                         goto end_io;
                 }
  
+               blk_throtl_bio(q, &bio);
+
+               /*
+                * If bio = NULL, bio has been throttled and will be submitted
+                * later.
+                */
+               if (!bio)
+                       break;
+
                 trace_block_bio_queue(q, bio);
  
                 ret = q->make_request_fn(q, bio);
@@ -1560,7 +1634,7 @@ void submit_bio(int rw, struct bio *bio)
          * If it's a regular read/write or a barrier with data attached,
          * go through the normal accounting stuff before submission.
          */
-       if (bio_has_data(bio)) {
+       if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
                 if (rw & WRITE) {
                         count_vm_events(PGPGOUT, count);
                 } else {
@@ -1570,11 +1644,12 @@ void submit_bio(int rw, struct bio *bio)
  
                 if (unlikely(block_dump)) {
                         char b[BDEVNAME_SIZE];
-                       printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+                       printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
                         current->comm, task_pid_nr(current),
                                 (rw & WRITE) ? "WRITE" : "READ",
                                 (unsigned long long)bio->bi_sector,
-                               bdevname(bio->bi_bdev, b));
+                               bdevname(bio->bi_bdev, b),
+                               count);
                 }
         }
  
@@ -1595,7 +1670,7 @@ EXPORT_SYMBOL(submit_bio);
   *    the insertion using this generic function.
   *
   *    This function should also be useful for request stacking drivers
- *    in some cases below, so export this fuction.
+ *    in some cases below, so export this function.
   *    Request stacking drivers like request-based dm may change the queue
   *    limits while requests are in the queue (e.g. dm's table swapping).
   *    Such request stacking drivers should check those requests agaist
@@ -1605,6 +1680,9 @@ EXPORT_SYMBOL(submit_bio);
   */
  int blk_rq_check_limits(struct request_queue *q, struct request *rq)
  {
+       if (rq->cmd_flags & REQ_DISCARD)
+               return 0;
+
         if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
             blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
                 printk(KERN_ERR "%s: over max size limit.\n", __func__);
@@ -1714,7 +1792,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
                 int cpu;
  
                 cpu = part_stat_lock();
-               part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+               part = req->part;
                 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
                 part_stat_unlock();
         }
@@ -1723,24 +1801,25 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
  static void blk_account_io_done(struct request *req)
  {
         /*
-        * Account IO completion.  bar_rq isn't accounted as a normal
-        * IO on queueing nor completion.  Accounting the containing
-        * request is enough.
+        * Account IO completion.  flush_rq isn't accounted as a
+        * normal IO on queueing nor completion.  Accounting the
+        * containing request is enough.
          */
-       if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
+       if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
                 unsigned long duration = jiffies - req->start_time;
                 const int rw = rq_data_dir(req);
                 struct hd_struct *part;
                 int cpu;
  
                 cpu = part_stat_lock();
-               part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+               part = req->part;
  
                 part_stat_inc(cpu, part, ios[rw]);
                 part_stat_add(cpu, part, ticks[rw], duration);
                 part_round_stats(cpu, part);
                 part_dec_in_flight(part, rw);
  
+               hd_struct_put(part);
                 part_stat_unlock();
         }
  }
@@ -1773,7 +1852,7 @@ struct request *blk_peek_request(struct request_queue *q)
                          * sees this request (possibly after
                          * requeueing).  Notify IO scheduler.
                          */
-                       if (blk_sorted_rq(rq))
+                       if (rq->cmd_flags & REQ_SORTED)
                                 elv_activate_rq(q, rq);
  
                         /*
@@ -1961,10 +2040,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
          * TODO: tj: This is too subtle.  It would be better to let
          * low level drivers do what they see fit.
          */
-       if (blk_fs_request(req))
+       if (req->cmd_type == REQ_TYPE_FS)
                 req->errors = 0;
  
-       if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
+       if (error && req->cmd_type == REQ_TYPE_FS &&
+           !(req->cmd_flags & REQ_QUIET)) {
                 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
                                 req->rq_disk ? req->rq_disk->disk_name : "?",
                                 (unsigned long long)blk_rq_pos(req));
@@ -2051,7 +2131,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
         req->buffer = bio_data(req->bio);
  
         /* update sector only for requests with clear definition of sector */
-       if (blk_fs_request(req) || blk_discard_rq(req))
+       if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
                 req->__sector += total_bytes >> 9;
  
         /* mixed attributes always follow the first bio */
@@ -2088,11 +2168,32 @@ static bool blk_update_bidi_request(struct request *rq, int error,
             blk_update_request(rq->next_rq, error, bidi_bytes))
                 return true;
  
-       add_disk_randomness(rq->rq_disk);
+       if (blk_queue_add_random(rq->q))
+               add_disk_randomness(rq->rq_disk);
  
         return false;
  }
  
+/**
+ * blk_unprep_request - unprepare a request
+ * @req:       the request
+ *
+ * This function makes a request ready for complete resubmission (or
+ * completion).  It happens only after all error handling is complete,
+ * so represents the appropriate moment to deallocate any resources
+ * that were allocated to the request in the prep_rq_fn.  The queue
+ * lock is held when calling this.
+ */
+void blk_unprep_request(struct request *req)
+{
+       struct request_queue *q = req->q;
+
+       req->cmd_flags &= ~REQ_DONTPREP;
+       if (q->unprep_rq_fn)
+               q->unprep_rq_fn(q, req);
+}
+EXPORT_SYMBOL_GPL(blk_unprep_request);
+
  /*
   * queue lock must be held
   */
@@ -2103,11 +2204,15 @@ static void blk_finish_request(struct request *req, int error)
  
         BUG_ON(blk_queued_rq(req));
  
-       if (unlikely(laptop_mode) && blk_fs_request(req))
+       if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
                 laptop_io_completion(&req->q->backing_dev_info);
  
         blk_delete_timer(req);
  
+       if (req->cmd_flags & REQ_DONTPREP)
+               blk_unprep_request(req);
+
+
         blk_account_io_done(req);
  
         if (req->end_io)
@@ -2340,7 +2445,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                      struct bio *bio)
  {
         /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
-       rq->cmd_flags |= bio->bi_rw & REQ_RW;
+       rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
  
         if (bio_has_data(bio)) {
                 rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -2426,7 +2531,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
  static void __blk_rq_prep_clone(struct request *dst, struct request *src)
  {
         dst->cpu = src->cpu;
-       dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
+       dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
         dst->cmd_type = src->cmd_type;
         dst->__sector = blk_rq_pos(src);
         dst->__data_len = blk_rq_bytes(src);
@@ -2511,7 +2616,9 @@ int __init blk_dev_init(void)
         BUILD_BUG_ON(__REQ_NR_BITS > 8 *
                         sizeof(((struct request *)0)->cmd_flags));
  
-       kblockd_workqueue = create_workqueue("kblockd");
+       /* used for unplugging and affects IO latency/throughput - HIGHPRI */
+       kblockd_workqueue = alloc_workqueue("kblockd",
+                                           WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
         if (!kblockd_workqueue)
                 panic("Failed to create kblockd\n");