Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / block / blk-core.c
index b5ed4f4..1f61b74 100644 (file)
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
+DEFINE_IDA(blk_queue_ida);
+
 /*
  * For the allocated request tables
  */
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue);
 void blk_drain_queue(struct request_queue *q, bool drain_all)
 {
        while (true) {
-               int nr_rqs;
+               bool drain = false;
+               int i;
 
                spin_lock_irq(q->queue_lock);
 
@@ -366,16 +369,34 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
                if (drain_all)
                        blk_throtl_drain(q);
 
-               __blk_run_queue(q);
+               /*
+                * This function might be called on a queue which failed
+                * driver init after queue creation.  Some drivers
+                * (e.g. fd) get unhappy in such cases.  Kick queue iff
+                * dispatch queue has something on it.
+                */
+               if (!list_empty(&q->queue_head))
+                       __blk_run_queue(q);
 
-               if (drain_all)
-                       nr_rqs = q->rq.count[0] + q->rq.count[1];
-               else
-                       nr_rqs = q->rq.elvpriv;
+               drain |= q->rq.elvpriv;
+
+               /*
+                * Unfortunately, requests are queued at and tracked from
+                * multiple places and there's no single counter which can
+                * be drained.  Check all the queues and counters.
+                */
+               if (drain_all) {
+                       drain |= !list_empty(&q->queue_head);
+                       for (i = 0; i < 2; i++) {
+                               drain |= q->rq.count[i];
+                               drain |= q->in_flight[i];
+                               drain |= !list_empty(&q->flush_queue[i]);
+                       }
+               }
 
                spin_unlock_irq(q->queue_lock);
 
-               if (!nr_rqs)
+               if (!drain)
                        break;
                msleep(10);
        }
@@ -462,27 +483,29 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (!q)
                return NULL;
 
+       q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
+       if (q->id < 0)
+               goto fail_q;
+
        q->backing_dev_info.ra_pages =
                        (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
        q->backing_dev_info.state = 0;
        q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
        q->backing_dev_info.name = "block";
+       q->node = node_id;
 
        err = bdi_init(&q->backing_dev_info);
-       if (err) {
-               kmem_cache_free(blk_requestq_cachep, q);
-               return NULL;
-       }
+       if (err)
+               goto fail_id;
 
-       if (blk_throtl_init(q)) {
-               kmem_cache_free(blk_requestq_cachep, q);
-               return NULL;
-       }
+       if (blk_throtl_init(q))
+               goto fail_id;
 
        setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
                    laptop_mode_timer_fn, (unsigned long) q);
        setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
        INIT_LIST_HEAD(&q->timeout_list);
+       INIT_LIST_HEAD(&q->icq_list);
        INIT_LIST_HEAD(&q->flush_queue[0]);
        INIT_LIST_HEAD(&q->flush_queue[1]);
        INIT_LIST_HEAD(&q->flush_data_in_flight);
@@ -500,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        q->queue_lock = &q->__queue_lock;
 
        return q;
+
+fail_id:
+       ida_simple_remove(&blk_queue_ida, q->id);
+fail_q:
+       kmem_cache_free(blk_requestq_cachep, q);
+       return NULL;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
 
@@ -551,7 +580,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
        if (!uninit_q)
                return NULL;
 
-       q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
+       q = blk_init_allocated_queue(uninit_q, rfn, lock);
        if (!q)
                blk_cleanup_queue(uninit_q);
 
@@ -563,18 +592,9 @@ struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
                         spinlock_t *lock)
 {
-       return blk_init_allocated_queue_node(q, rfn, lock, -1);
-}
-EXPORT_SYMBOL(blk_init_allocated_queue);
-
-struct request_queue *
-blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
-                             spinlock_t *lock, int node_id)
-{
        if (!q)
                return NULL;
 
-       q->node = node_id;
        if (blk_init_free_list(q))
                return NULL;
 
@@ -604,28 +624,33 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 
        return NULL;
 }
-EXPORT_SYMBOL(blk_init_allocated_queue_node);
+EXPORT_SYMBOL(blk_init_allocated_queue);
 
-int blk_get_queue(struct request_queue *q)
+bool blk_get_queue(struct request_queue *q)
 {
        if (likely(!blk_queue_dead(q))) {
-               kobject_get(&q->kobj);
-               return 0;
+               __blk_get_queue(q);
+               return true;
        }
 
-       return 1;
+       return false;
 }
 EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-       if (rq->cmd_flags & REQ_ELVPRIV)
+       if (rq->cmd_flags & REQ_ELVPRIV) {
                elv_put_request(q, rq);
+               if (rq->elv.icq)
+                       put_io_context(rq->elv.icq->ioc);
+       }
+
        mempool_free(rq, q->rq.rq_pool);
 }
 
 static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+                 unsigned int flags, gfp_t gfp_mask)
 {
        struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -636,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
 
        rq->cmd_flags = flags | REQ_ALLOCED;
 
-       if ((flags & REQ_ELVPRIV) &&
-           unlikely(elv_set_request(q, rq, gfp_mask))) {
-               mempool_free(rq, q->rq.rq_pool);
-               return NULL;
+       if (flags & REQ_ELVPRIV) {
+               rq->elv.icq = icq;
+               if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+                       mempool_free(rq, q->rq.rq_pool);
+                       return NULL;
+               }
+               /* @rq->elv.icq holds on to io_context until @rq is freed */
+               if (icq)
+                       get_io_context(icq->ioc);
        }
 
        return rq;
@@ -751,9 +781,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 {
        struct request *rq = NULL;
        struct request_list *rl = &q->rq;
-       struct io_context *ioc = NULL;
+       struct elevator_type *et;
+       struct io_context *ioc;
+       struct io_cq *icq = NULL;
        const bool is_sync = rw_is_sync(rw_flags) != 0;
+       bool retried = false;
        int may_queue;
+retry:
+       et = q->elevator->type;
+       ioc = current->io_context;
 
        if (unlikely(blk_queue_dead(q)))
                return NULL;
@@ -764,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 
        if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
                if (rl->count[is_sync]+1 >= q->nr_requests) {
-                       ioc = current_io_context(GFP_ATOMIC, q->node);
+                       /*
+                        * We want ioc to record batching state.  If it's
+                        * not already there, creating a new one requires
+                        * dropping queue_lock, which in turn requires
+                        * retesting conditions to avoid queue hang.
+                        */
+                       if (!ioc && !retried) {
+                               spin_unlock_irq(q->queue_lock);
+                               create_io_context(current, gfp_mask, q->node);
+                               spin_lock_irq(q->queue_lock);
+                               retried = true;
+                               goto retry;
+                       }
+
                        /*
                         * The queue will fill after this allocation, so set
                         * it as full, and mark this process as "batching".
@@ -800,17 +849,38 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
        rl->count[is_sync]++;
        rl->starved[is_sync] = 0;
 
+       /*
+        * Decide whether the new request will be managed by elevator.  If
+        * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+        * prevent the current elevator from being destroyed until the new
+        * request is freed.  This guarantees icq's won't be destroyed and
+        * makes creating new ones safe.
+        *
+        * Also, lookup icq while holding queue_lock.  If it doesn't exist,
+        * it will be created after releasing queue_lock.
+        */
        if (blk_rq_should_init_elevator(bio) &&
            !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
                rw_flags |= REQ_ELVPRIV;
                rl->elvpriv++;
+               if (et->icq_cache && ioc)
+                       icq = ioc_lookup_icq(ioc, q);
        }
 
        if (blk_queue_io_stat(q))
                rw_flags |= REQ_IO_STAT;
        spin_unlock_irq(q->queue_lock);
 
-       rq = blk_alloc_request(q, rw_flags, gfp_mask);
+       /* create icq if missing */
+       if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
+               icq = ioc_create_icq(q, gfp_mask);
+               if (!icq)
+                       goto fail_icq;
+       }
+
+       rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+
+fail_icq:
        if (unlikely(!rq)) {
                /*
                 * Allocation failed presumably due to memory. Undo anything
@@ -872,7 +942,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
        rq = get_request(q, rw_flags, bio, GFP_NOIO);
        while (!rq) {
                DEFINE_WAIT(wait);
-               struct io_context *ioc;
                struct request_list *rl = &q->rq;
 
                if (unlikely(blk_queue_dead(q)))
@@ -892,8 +961,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                 * up to a big batch of them for a small period time.
                 * See ioc_batching, ioc_set_batching
                 */
-               ioc = current_io_context(GFP_NOIO, q->node);
-               ioc_set_batching(q, ioc);
+               create_io_context(current, GFP_NOIO, q->node);
+               ioc_set_batching(q, current->io_context);
 
                spin_lock_irq(q->queue_lock);
                finish_wait(&rl->wait[is_sync], &wait);
@@ -1143,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
        req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
        drive_stat_acct(req, 0);
-       elv_bio_merged(q, req, bio);
        return true;
 }
 
@@ -1174,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
        req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
        drive_stat_acct(req, 0);
-       elv_bio_merged(q, req, bio);
        return true;
 }
 
@@ -1188,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
  * on %current's plugged list.  Returns %true if merge was successful,
  * otherwise %false.
  *
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock.  Elevator
- * must be ready for this.
+ * Plugging coalesces IOs from the same issuer for the same purpose without
+ * going through @q->queue_lock.  As such it's more of an issuing mechanism
+ * than scheduling, and the request, while may have elvpriv data, is not
+ * added on the elevator at this point.  In addition, we don't have
+ * reliable access to the elevator outside queue lock.  Only check basic
+ * merging parameters without querying the elevator.
  */
 static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
                               unsigned int *request_count)
@@ -1211,12 +1277,13 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
        list_for_each_entry_reverse(rq, &plug->list, queuelist) {
                int el_ret;
 
-               (*request_count)++;
+               if (rq->q == q)
+                       (*request_count)++;
 
-               if (rq->q != q)
+               if (rq->q != q || !blk_rq_merge_ok(rq, bio))
                        continue;
 
-               el_ret = elv_try_merge(rq, bio);
+               el_ret = blk_try_merge(rq, bio);
                if (el_ret == ELEVATOR_BACK_MERGE) {
                        ret = bio_attempt_back_merge(q, rq, bio);
                        if (ret)
@@ -1278,12 +1345,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
        el_ret = elv_merge(q, &req, bio);
        if (el_ret == ELEVATOR_BACK_MERGE) {
                if (bio_attempt_back_merge(q, req, bio)) {
+                       elv_bio_merged(q, req, bio);
                        if (!attempt_back_merge(q, req))
                                elv_merged_request(q, req, el_ret);
                        goto out_unlock;
                }
        } else if (el_ret == ELEVATOR_FRONT_MERGE) {
                if (bio_attempt_front_merge(q, req, bio)) {
+                       elv_bio_merged(q, req, bio);
                        if (!attempt_front_merge(q, req))
                                elv_merged_request(q, req, el_ret);
                        goto out_unlock;
@@ -1719,6 +1788,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
                return -EIO;
 
        spin_lock_irqsave(q->queue_lock, flags);
+       if (unlikely(blk_queue_dead(q))) {
+               spin_unlock_irqrestore(q->queue_lock, flags);
+               return -ENODEV;
+       }
 
        /*
         * Submitting request must be dequeued before calling this function
@@ -2693,6 +2766,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
        trace_block_unplug(q, depth, !from_schedule);
 
        /*
+        * Don't mess with dead queue.
+        */
+       if (unlikely(blk_queue_dead(q))) {
+               spin_unlock(q->queue_lock);
+               return;
+       }
+
+       /*
         * If we are punting this to kblockd, then we can safely drop
         * the queue_lock before waking kblockd (which needs to take
         * this lock).
@@ -2768,6 +2849,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                        depth = 0;
                        spin_lock(q->queue_lock);
                }
+
+               /*
+                * Short-circuit if @q is dead
+                */
+               if (unlikely(blk_queue_dead(q))) {
+                       __blk_end_request_all(rq, -ENODEV);
+                       continue;
+               }
+
                /*
                 * rq is already accounted, so use raw insert
                 */