- Update to 3.3-final.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / blkback / blkback.c
1 /******************************************************************************
2  * arch/xen/drivers/blkif/backend/main.c
3  * 
4  * Back-end of the driver for virtual block devices. This portion of the
5  * driver exports a 'unified' block-device interface that can be accessed
6  * by any operating system that implements a compatible front end. A 
7  * reference front-end implementation can be found in:
8  *  arch/xen/drivers/blkif/frontend
9  * 
10  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
11  * Copyright (c) 2005, Christopher Clark
12  * 
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version 2
15  * as published by the Free Software Foundation; or, when distributed
16  * separately from the Linux kernel or incorporated into other
17  * software packages, subject to the following license:
18  * 
19  * Permission is hereby granted, free of charge, to any person obtaining a copy
20  * of this source file (the "Software"), to deal in the Software without
21  * restriction, including without limitation the rights to use, copy, modify,
22  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23  * and to permit persons to whom the Software is furnished to do so, subject to
24  * the following conditions:
25  * 
26  * The above copyright notice and this permission notice shall be included in
27  * all copies or substantial portions of the Software.
28  * 
29  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35  * IN THE SOFTWARE.
36  */
37
38 #include <linux/spinlock.h>
39 #include <linux/kthread.h>
40 #include <linux/freezer.h>
41 #include <linux/list.h>
42 #include <linux/module.h>
43 #include <linux/delay.h>
44 #include <xen/balloon.h>
45 #include <xen/evtchn.h>
46 #include <xen/gnttab.h>
47 #include <asm/hypervisor.h>
48 #include "common.h"
49
50 /*
51  * These are rather arbitrary. They are fairly large because adjacent requests
52  * pulled from a communication ring are quite likely to end up being part of
53  * the same scatter/gather request at the disc.
54  * 
55  * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
56  * 
57  * This will increase the chances of being able to write whole tracks.
58  * 64 should be enough to keep us competitive with Linux.
59  */
60 static int blkif_reqs = 64;
61 module_param_named(reqs, blkif_reqs, int, 0);
62 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
63
64 /* Run-time switchable: /sys/module/blkback/parameters/ */
65 static unsigned int log_stats = 0;
66 static unsigned int debug_lvl = 0;
67 module_param(log_stats, int, 0644);
68 module_param(debug_lvl, int, 0644);
69
70 /*
71  * Each outstanding request that we've passed to the lower device layers has a 
72  * 'pending_req' allocated to it. Each buffer_head that completes decrements 
73  * the pendcnt towards zero. When it hits zero, the specified domain has a 
74  * response queued for it, with the saved 'id' passed back.
75  */
76 typedef struct {
77         blkif_t       *blkif;
78         u64            id;
79         atomic_t       pendcnt;
80         unsigned short nr_pages;
81         unsigned short operation;
82         struct list_head free_list;
83 } pending_req_t;
84
85 static pending_req_t *pending_reqs;
86 static struct list_head pending_free;
87 static DEFINE_SPINLOCK(pending_free_lock);
88 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
89
90 #define BLKBACK_INVALID_HANDLE (~0)
91
92 static struct page **pending_pages;
93 static grant_handle_t *pending_grant_handles;
94
95 static inline int vaddr_pagenr(pending_req_t *req, int seg)
96 {
97         return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
98 }
99
100 #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
101
102 static inline unsigned long vaddr(pending_req_t *req, int seg)
103 {
104         unsigned long pfn = page_to_pfn(pending_page(req, seg));
105         return (unsigned long)pfn_to_kaddr(pfn);
106 }
107
108 #define pending_handle(_req, _seg) \
109         (pending_grant_handles[vaddr_pagenr(_req, _seg)])
110
111
112 static int do_block_io_op(blkif_t *blkif);
113 static void dispatch_rw_block_io(blkif_t *blkif,
114                                  blkif_request_t *req,
115                                  pending_req_t *pending_req);
116 static void make_response(blkif_t *blkif, u64 id,
117                           unsigned short op, int st);
118
119 /******************************************************************
120  * misc small helpers
121  */
122 static pending_req_t* alloc_req(void)
123 {
124         pending_req_t *req = NULL;
125         unsigned long flags;
126
127         spin_lock_irqsave(&pending_free_lock, flags);
128         if (!list_empty(&pending_free)) {
129                 req = list_entry(pending_free.next, pending_req_t, free_list);
130                 list_del(&req->free_list);
131         }
132         spin_unlock_irqrestore(&pending_free_lock, flags);
133         return req;
134 }
135
136 static void free_req(pending_req_t *req)
137 {
138         unsigned long flags;
139         int was_empty;
140
141         spin_lock_irqsave(&pending_free_lock, flags);
142         was_empty = list_empty(&pending_free);
143         list_add(&req->free_list, &pending_free);
144         spin_unlock_irqrestore(&pending_free_lock, flags);
145         if (was_empty)
146                 wake_up(&pending_free_wq);
147 }
148
149 static void unplug_queue(blkif_t *blkif)
150 {
151         if (blkif->plug == NULL)
152                 return;
153         kobject_put(&blkif->plug->kobj);
154         blkif->plug = NULL;
155 }
156
157 static void plug_queue(blkif_t *blkif, struct block_device *bdev)
158 {
159         struct request_queue *q = bdev_get_queue(bdev);
160
161         if (q == blkif->plug)
162                 return;
163         unplug_queue(blkif);
164         WARN_ON(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags));
165         kobject_get(&q->kobj);
166         blkif->plug = q;
167 }
168
169 static void fast_flush_area(pending_req_t *req)
170 {
171         struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
172         unsigned int i, invcount = 0;
173         grant_handle_t handle;
174         int ret;
175
176         for (i = 0; i < req->nr_pages; i++) {
177                 handle = pending_handle(req, i);
178                 if (handle == BLKBACK_INVALID_HANDLE)
179                         continue;
180                 blkback_pagemap_clear(pending_page(req, i));
181                 gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
182                                     GNTMAP_host_map, handle);
183                 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
184                 invcount++;
185         }
186
187         ret = HYPERVISOR_grant_table_op(
188                 GNTTABOP_unmap_grant_ref, unmap, invcount);
189         BUG_ON(ret);
190 }
191
192 /******************************************************************
193  * SCHEDULER FUNCTIONS
194  */
195
196 static void print_stats(blkif_t *blkif)
197 {
198         printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d"
199                "  |  fl %4d  |  ds %4d  |  pk %4d\n",
200                current->comm, blkif->st_oo_req,
201                blkif->st_rd_req, blkif->st_wr_req,
202                blkif->st_br_req, blkif->st_fl_req,
203                blkif->st_ds_req, blkif->st_pk_req);
204         blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
205         blkif->st_rd_req = 0;
206         blkif->st_wr_req = 0;
207         blkif->st_oo_req = 0;
208         blkif->st_br_req = 0;
209         blkif->st_fl_req = 0;
210         blkif->st_ds_req = 0;
211         blkif->st_pk_req = 0;
212 }
213
214 int blkif_schedule(void *arg)
215 {
216         blkif_t *blkif = arg;
217         struct vbd *vbd = &blkif->vbd;
218
219         blkif_get(blkif);
220
221         if (debug_lvl)
222                 printk(KERN_DEBUG "%s: started\n", current->comm);
223
224         while (!kthread_should_stop()) {
225                 if (try_to_freeze())
226                         continue;
227                 if (unlikely(vbd->size != vbd_size(vbd)))
228                         vbd_resize(blkif);
229
230                 wait_event_interruptible(
231                         blkif->wq,
232                         blkif->waiting_reqs || kthread_should_stop());
233                 wait_event_interruptible(
234                         pending_free_wq,
235                         !list_empty(&pending_free) || kthread_should_stop());
236
237                 blkif->waiting_reqs = 0;
238                 smp_mb(); /* clear flag *before* checking for work */
239
240                 if (do_block_io_op(blkif))
241                         blkif->waiting_reqs = 1;
242                 unplug_queue(blkif);
243
244                 if (log_stats && time_after(jiffies, blkif->st_print))
245                         print_stats(blkif);
246         }
247
248         if (log_stats)
249                 print_stats(blkif);
250         if (debug_lvl)
251                 printk(KERN_DEBUG "%s: exiting\n", current->comm);
252
253         blkif->xenblkd = NULL;
254         blkif_put(blkif);
255
256         return 0;
257 }
258
259 static void do_discard(blkif_t *blkif, struct blkif_request_discard *req)
260 {
261         int err = -EOPNOTSUPP;
262         int status = BLKIF_RSP_OKAY;
263         struct block_device *bdev = blkif->vbd.bdev;
264
265         if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
266             blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
267                 unsigned long secure = (blkif->vbd.discard_secure &&
268                         (req->flag & BLKIF_DISCARD_SECURE)) ?
269                         BLKDEV_DISCARD_SECURE : 0;
270
271                 err = blkdev_issue_discard(bdev, req->sector_number,
272                                            req->nr_sectors, GFP_KERNEL,
273                                            secure);
274         }
275
276         if (err == -EOPNOTSUPP) {
277                 DPRINTK("discard op failed, not supported\n");
278                 status = BLKIF_RSP_EOPNOTSUPP;
279         } else if (err)
280                 status = BLKIF_RSP_ERROR;
281
282         make_response(blkif, req->id, req->operation, status);
283 }
284
285 static void drain_io(blkif_t *blkif)
286 {
287         atomic_set(&blkif->drain, 1);
288         do {
289                 /* The initial value is one, and one refcnt taken at the
290                  * start of the blkif_schedule thread. */
291                 if (atomic_read(&blkif->refcnt) <= 2)
292                         break;
293
294                 wait_for_completion_interruptible_timeout(
295                                 &blkif->drain_complete, HZ);
296
297                 if (!atomic_read(&blkif->drain))
298                         break;
299         } while (!kthread_should_stop());
300         atomic_set(&blkif->drain, 0);
301 }
302
303 /******************************************************************
304  * COMPLETION CALLBACK -- Called as bh->b_end_io()
305  */
306
307 static void __end_block_io_op(pending_req_t *pending_req, int error)
308 {
309         blkif_t *blkif = pending_req->blkif;
310         int status = BLKIF_RSP_OKAY;
311
312         /* An error fails the entire request. */
313         if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
314             (error == -EOPNOTSUPP)) {
315                 DPRINTK("blkback: write barrier op failed, not supported\n");
316                 blkback_barrier(XBT_NIL, blkif->be, 0);
317                 status = BLKIF_RSP_EOPNOTSUPP;
318         } else if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
319                    (error == -EOPNOTSUPP)) {
320                 DPRINTK("blkback: flush diskcache op failed, not supported\n");
321                 blkback_flush_diskcache(XBT_NIL, blkif->be, 0);
322                 status = BLKIF_RSP_EOPNOTSUPP;
323         } else if (error) {
324                 DPRINTK("Buffer not up-to-date at end of operation, "
325                         "error=%d\n", error);
326                 status = BLKIF_RSP_ERROR;
327         }
328
329         if (atomic_dec_and_test(&pending_req->pendcnt)) {
330                 fast_flush_area(pending_req);
331                 make_response(blkif, pending_req->id,
332                               pending_req->operation, status);
333                 blkif_put(blkif);
334                 free_req(pending_req);
335                 if (atomic_read(&blkif->drain)
336                     && atomic_read(&blkif->refcnt) <= 2)
337                         complete(&blkif->drain_complete);
338         }
339 }
340
341 static void end_block_io_op(struct bio *bio, int error)
342 {
343         __end_block_io_op(bio->bi_private, error);
344         bio_put(bio);
345 }
346
347
348 /******************************************************************************
349  * NOTIFICATION FROM GUEST OS.
350  */
351
352 static void blkif_notify_work(blkif_t *blkif)
353 {
354         blkif->waiting_reqs = 1;
355         wake_up(&blkif->wq);
356 }
357
358 irqreturn_t blkif_be_int(int irq, void *dev_id)
359 {
360         blkif_notify_work(dev_id);
361         return IRQ_HANDLED;
362 }
363
364
365
366 /******************************************************************
367  * DOWNWARD CALLS -- These interface with the block-device layer proper.
368  */
369
370 static int _do_block_io_op(blkif_t *blkif)
371 {
372         blkif_back_rings_t *blk_rings = &blkif->blk_rings;
373         blkif_request_t req;
374         pending_req_t *pending_req;
375         RING_IDX rc, rp;
376         int more_to_do = 0;
377
378         rc = blk_rings->common.req_cons;
379         rp = blk_rings->common.sring->req_prod;
380         rmb(); /* Ensure we see queued requests up to 'rp'. */
381
382         while ((rc != rp)) {
383
384                 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
385                         break;
386
387                 if (kthread_should_stop()) {
388                         more_to_do = 1;
389                         break;
390                 }
391
392                 pending_req = alloc_req();
393                 if (NULL == pending_req) {
394                         blkif->st_oo_req++;
395                         more_to_do = 1;
396                         break;
397                 }
398
399                 switch (blkif->blk_protocol) {
400                 case BLKIF_PROTOCOL_NATIVE:
401                         memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
402                         break;
403                 case BLKIF_PROTOCOL_X86_32:
404                         blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
405                         break;
406                 case BLKIF_PROTOCOL_X86_64:
407                         blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
408                         break;
409                 default:
410                         BUG();
411                 }
412                 blk_rings->common.req_cons = ++rc; /* before make_response() */
413
414                 /* Apply all sanity checks to /private copy/ of request. */
415                 barrier();
416
417                 switch (req.operation) {
418                 case BLKIF_OP_READ:
419                 case BLKIF_OP_WRITE:
420                 case BLKIF_OP_WRITE_BARRIER:
421                 case BLKIF_OP_FLUSH_DISKCACHE:
422                 case BLKIF_OP_DISCARD:
423                         dispatch_rw_block_io(blkif, &req, pending_req);
424                         break;
425                 case BLKIF_OP_PACKET:
426                         DPRINTK("error: block operation BLKIF_OP_PACKET not implemented\n");
427                         blkif->st_pk_req++;
428                         make_response(blkif, req.id, req.operation,
429                                       BLKIF_RSP_ERROR);
430                         free_req(pending_req);
431                         break;
432                 default:
433                         /* A good sign something is wrong: sleep for a while to
434                          * avoid excessive CPU consumption by a bad guest. */
435                         msleep(1);
436                         DPRINTK("error: unknown block io operation [%d]\n",
437                                 req.operation);
438                         make_response(blkif, req.id, req.operation,
439                                       BLKIF_RSP_ERROR);
440                         free_req(pending_req);
441                         break;
442                 }
443
444                 /* Yield point for this unbounded loop. */
445                 cond_resched();
446         }
447
448         return more_to_do;
449 }
450
451 static int
452 do_block_io_op(blkif_t *blkif)
453 {
454         blkif_back_rings_t *blk_rings = &blkif->blk_rings;
455         int more_to_do;
456
457         do {
458                 more_to_do = _do_block_io_op(blkif);
459                 if (more_to_do)
460                         break;
461
462                 RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
463         } while (more_to_do);
464
465         return more_to_do;
466 }
467
468 static void dispatch_rw_block_io(blkif_t *blkif,
469                                  blkif_request_t *req,
470                                  pending_req_t *pending_req)
471 {
472         struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
473         struct phys_req preq;
474         struct { 
475                 unsigned long buf; unsigned int nsec;
476         } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
477         unsigned int nseg = req->nr_segments;
478         struct bio *bio = NULL;
479         uint32_t flags;
480         int ret, i;
481         int operation;
482
483         switch (req->operation) {
484         case BLKIF_OP_READ:
485                 blkif->st_rd_req++;
486                 operation = READ;
487                 break;
488         case BLKIF_OP_WRITE:
489                 blkif->st_wr_req++;
490                 operation = WRITE;
491                 break;
492         case BLKIF_OP_WRITE_BARRIER:
493                 blkif->st_br_req++;
494                 operation = WRITE_FLUSH_FUA;
495                 break;
496         case BLKIF_OP_FLUSH_DISKCACHE:
497                 blkif->st_fl_req++;
498                 operation = WRITE_FLUSH;
499                 break;
500         case BLKIF_OP_DISCARD:
501                 blkif->st_ds_req++;
502                 operation = REQ_DISCARD;
503                 nseg = 0;
504                 break;
505         default:
506                 operation = 0; /* make gcc happy */
507                 BUG();
508         }
509
510         /* Check that number of segments is sane. */
511         if (unlikely(nseg == 0 && !(operation & (REQ_FLUSH|REQ_DISCARD))) ||
512             unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
513                 DPRINTK("Bad number of segments in request (%d)\n", nseg);
514                 goto fail_response;
515         }
516
517         preq.dev           = req->handle;
518         preq.sector_number = req->sector_number;
519         preq.nr_sects      = 0;
520
521         pending_req->blkif     = blkif;
522         pending_req->id        = req->id;
523         pending_req->operation = req->operation;
524         pending_req->nr_pages  = nseg;
525
526         flags = GNTMAP_host_map;
527         if (operation != READ)
528                 flags |= GNTMAP_readonly;
529
530         for (i = 0; i < nseg; i++) {
531                 seg[i].nsec = req->seg[i].last_sect -
532                         req->seg[i].first_sect + 1;
533
534                 if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
535                     (req->seg[i].last_sect < req->seg[i].first_sect))
536                         goto fail_response;
537                 preq.nr_sects += seg[i].nsec;
538
539                 gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
540                                   req->seg[i].gref, blkif->domid);
541         }
542
543         ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
544         BUG_ON(ret);
545
546         for (i = 0; i < nseg; i++) {
547                 if (unlikely(map[i].status == GNTST_eagain))
548                         gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &map[i])
549                 if (unlikely(map[i].status != GNTST_okay)) {
550                         DPRINTK("invalid buffer -- could not remap it\n");
551                         map[i].handle = BLKBACK_INVALID_HANDLE;
552                         ret = 1;
553                 } else {
554                         blkback_pagemap_set(vaddr_pagenr(pending_req, i),
555                                             pending_page(pending_req, i),
556                                             blkif->domid, req->handle,
557                                             req->seg[i].gref);
558                 }
559
560                 pending_handle(pending_req, i) = map[i].handle;
561
562                 if (ret)
563                         continue;
564
565                 set_phys_to_machine(
566                         page_to_pfn(pending_page(pending_req, i)),
567                         FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
568                 seg[i].buf  = map[i].dev_bus_addr | 
569                         (req->seg[i].first_sect << 9);
570         }
571
572         if (ret)
573                 goto fail_flush;
574
575         if (vbd_translate(&preq, blkif, operation) != 0) {
576                 DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
577                         operation == READ ? "read" : "write",
578                         preq.sector_number,
579                         preq.sector_number + preq.nr_sects, preq.dev);
580                 goto fail_flush;
581         }
582
583         /* Wait on all outstanding I/O's and once that has been completed
584          * issue the WRITE_FLUSH.
585          */
586         if (req->operation == BLKIF_OP_WRITE_BARRIER)
587                 drain_io(blkif);
588
589         plug_queue(blkif, preq.bdev);
590         atomic_set(&pending_req->pendcnt, 1);
591         blkif_get(blkif);
592
593         for (i = 0; i < nseg; i++) {
594                 if (((int)preq.sector_number|(int)seg[i].nsec) &
595                     ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
596                         DPRINTK("Misaligned I/O request from domain %d",
597                                 blkif->domid);
598                         goto fail_put_bio;
599                 }
600
601                 while ((bio == NULL) ||
602                        (bio_add_page(bio,
603                                      pending_page(pending_req, i),
604                                      seg[i].nsec << 9,
605                                      seg[i].buf & ~PAGE_MASK) == 0)) {
606                         if (bio) {
607                                 atomic_inc(&pending_req->pendcnt);
608                                 submit_bio(operation, bio);
609                         }
610
611                         bio = bio_alloc(GFP_KERNEL, nseg-i);
612                         if (unlikely(bio == NULL))
613                                 goto fail_put_bio;
614
615                         bio->bi_bdev    = preq.bdev;
616                         bio->bi_private = pending_req;
617                         bio->bi_end_io  = end_block_io_op;
618                         bio->bi_sector  = preq.sector_number;
619                 }
620
621                 preq.sector_number += seg[i].nsec;
622         }
623
624         if (!bio) {
625                 if (operation == REQ_DISCARD) {
626                         do_discard(blkif, (void *)req);
627                         blkif_put(blkif);
628                         free_req(pending_req);
629                         return;
630                 }
631
632                 BUG_ON(!(operation & (REQ_FLUSH|REQ_FUA)));
633                 bio = bio_alloc(GFP_KERNEL, 0);
634                 if (unlikely(bio == NULL))
635                         goto fail_put_bio;
636
637                 bio->bi_bdev    = preq.bdev;
638                 bio->bi_private = pending_req;
639                 bio->bi_end_io  = end_block_io_op;
640                 bio->bi_sector  = -1;
641         }
642
643         submit_bio(operation, bio);
644
645         if (operation == READ)
646                 blkif->st_rd_sect += preq.nr_sects;
647         else
648                 blkif->st_wr_sect += preq.nr_sects;
649
650         return;
651
652  fail_flush:
653         fast_flush_area(pending_req);
654  fail_response:
655         make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
656         free_req(pending_req);
657         msleep(1); /* back off a bit */
658         return;
659
660  fail_put_bio:
661         __end_block_io_op(pending_req, -EINVAL);
662         if (bio)
663                 bio_put(bio);
664         unplug_queue(blkif);
665         msleep(1); /* back off a bit */
666         return;
667 }
668
669
670
671 /******************************************************************
672  * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
673  */
674
675
676 static void make_response(blkif_t *blkif, u64 id,
677                           unsigned short op, int st)
678 {
679         blkif_response_t  resp;
680         unsigned long     flags;
681         blkif_back_rings_t *blk_rings = &blkif->blk_rings;
682         int notify;
683
684         resp.id        = id;
685         resp.operation = op;
686         resp.status    = st;
687
688         spin_lock_irqsave(&blkif->blk_ring_lock, flags);
689         /* Place on the response ring for the relevant domain. */
690         switch (blkif->blk_protocol) {
691         case BLKIF_PROTOCOL_NATIVE:
692                 memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
693                        &resp, sizeof(resp));
694                 break;
695         case BLKIF_PROTOCOL_X86_32:
696                 memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
697                        &resp, sizeof(resp));
698                 break;
699         case BLKIF_PROTOCOL_X86_64:
700                 memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
701                        &resp, sizeof(resp));
702                 break;
703         default:
704                 BUG();
705         }
706         blk_rings->common.rsp_prod_pvt++;
707         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
708         spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
709
710         if (notify)
711                 notify_remote_via_irq(blkif->irq);
712 }
713
714 static int __init blkif_init(void)
715 {
716         int i, mmap_pages;
717
718         if (!is_running_on_xen())
719                 return -ENODEV;
720
721         mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
722
723         pending_reqs          = kzalloc(sizeof(pending_reqs[0]) *
724                                         blkif_reqs, GFP_KERNEL);
725         pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
726                                         mmap_pages, GFP_KERNEL);
727         pending_pages         = alloc_empty_pages_and_pagevec(mmap_pages);
728
729         if (blkback_pagemap_init(mmap_pages))
730                 goto out_of_memory;
731
732         if (!pending_reqs || !pending_grant_handles || !pending_pages)
733                 goto out_of_memory;
734
735         for (i = 0; i < mmap_pages; i++)
736                 pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
737
738         blkif_interface_init();
739
740         INIT_LIST_HEAD(&pending_free);
741
742         for (i = 0; i < blkif_reqs; i++)
743                 list_add_tail(&pending_reqs[i].free_list, &pending_free);
744
745         blkif_xenbus_init();
746
747         return 0;
748
749  out_of_memory:
750         kfree(pending_reqs);
751         kfree(pending_grant_handles);
752         free_empty_pages_and_pagevec(pending_pages, mmap_pages);
753         pr_warning("%s: out of memory\n", __FUNCTION__);
754         return -ENOMEM;
755 }
756
757 module_init(blkif_init);
758
759 MODULE_LICENSE("Dual BSD/GPL");
760 MODULE_ALIAS("xen-backend:vbd");