1 /******************************************************************************
4 * XenLinux virtual block-device driver.
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license:
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions:
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
38 #include <linux/version.h>
40 #include <linux/cdrom.h>
41 #include <linux/sched.h>
42 #include <linux/interrupt.h>
43 #include <linux/scatterlist.h>
44 #include <scsi/scsi.h>
45 #include <xen/evtchn.h>
46 #include <xen/xenbus.h>
47 #include <xen/interface/grant_table.h>
48 #include <xen/interface/io/protocols.h>
49 #include <xen/gnttab.h>
50 #include <asm/hypervisor.h>
51 #include <asm/maddr.h>
53 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
54 #include <xen/platform-compat.h>
57 #define BLKIF_STATE_DISCONNECTED 0
58 #define BLKIF_STATE_CONNECTED 1
59 #define BLKIF_STATE_SUSPENDED 2
61 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
62 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
64 static void connect(struct blkfront_info *);
65 static void blkfront_closing(struct blkfront_info *);
66 static int blkfront_remove(struct xenbus_device *);
67 static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
68 static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
70 static void kick_pending_request_queues(struct blkfront_info *);
72 static irqreturn_t blkif_int(int irq, void *dev_id);
73 static void blkif_restart_queue(struct work_struct *arg);
74 static int blkif_recover(struct blkfront_info *);
75 static void blkif_completion(struct blk_shadow *);
76 static void blkif_free(struct blkfront_info *, int);
80 * Entry point to this code when a new device is created. Allocate the basic
81 * structures and the ring buffer for communication with the backend, and
82 * inform the backend of the appropriate details for those. Switch to
85 static int blkfront_probe(struct xenbus_device *dev,
86 const struct xenbus_device_id *id)
89 struct blkfront_info *info;
91 #ifndef CONFIG_XEN /* For HVM guests, do not take over CDROM devices. */
94 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", NULL);
96 xenbus_dev_fatal(dev, PTR_ERR(type), "reading dev type");
99 if (!strncmp(type, "cdrom", 5)) {
101 * We are handed a cdrom device in a hvm guest; let the
102 * native cdrom driver handle this device.
105 pr_notice("blkfront: ignoring CDROM %s\n", dev->nodename);
111 /* FIXME: Use dynamic device id if this is not set. */
112 err = xenbus_scanf(XBT_NIL, dev->nodename,
113 "virtual-device", "%i", &vdevice);
115 /* go looking in the extended area instead */
116 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
119 xenbus_dev_fatal(dev, err, "reading virtual-device");
124 info = kzalloc(sizeof(*info), GFP_KERNEL);
126 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
130 spin_lock_init(&info->io_lock);
131 mutex_init(&info->mutex);
133 info->vdevice = vdevice;
134 info->connected = BLKIF_STATE_DISCONNECTED;
135 INIT_WORK(&info->work, blkif_restart_queue);
137 for (i = 0; i < BLK_RING_SIZE; i++)
138 info->shadow[i].req.id = i+1;
139 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
141 /* Front end dir is a number, which is used as the id. */
142 info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
143 dev_set_drvdata(&dev->dev, info);
145 err = talk_to_backend(dev, info);
148 dev_set_drvdata(&dev->dev, NULL);
157 * We are reconnecting to the backend, due to a suspend/resume, or a backend
158 * driver restart. We tear down our blkif structure and recreate it, but
159 * leave the device-layer structures intact so that this is transparent to the
160 * rest of the kernel.
162 static int blkfront_resume(struct xenbus_device *dev)
164 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
167 DPRINTK("blkfront_resume: %s\n", dev->nodename);
169 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
171 err = talk_to_backend(dev, info);
172 if (info->connected == BLKIF_STATE_SUSPENDED && !err)
173 err = blkif_recover(info);
179 /* Common code used when first setting up, and when resuming. */
180 static int talk_to_backend(struct xenbus_device *dev,
181 struct blkfront_info *info)
183 const char *message = NULL;
184 struct xenbus_transaction xbt;
187 /* Create shared ring, alloc event channel. */
188 err = setup_blkring(dev, info);
193 err = xenbus_transaction_start(&xbt);
195 xenbus_dev_fatal(dev, err, "starting transaction");
196 goto destroy_blkring;
199 err = xenbus_printf(xbt, dev->nodename,
200 "ring-ref","%u", info->ring_ref);
202 message = "writing ring-ref";
203 goto abort_transaction;
205 err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
206 irq_to_evtchn_port(info->irq));
208 message = "writing event-channel";
209 goto abort_transaction;
211 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
212 XEN_IO_PROTO_ABI_NATIVE);
214 message = "writing protocol";
215 goto abort_transaction;
218 err = xenbus_transaction_end(xbt, 0);
222 xenbus_dev_fatal(dev, err, "completing transaction");
223 goto destroy_blkring;
226 xenbus_switch_state(dev, XenbusStateInitialised);
231 xenbus_transaction_end(xbt, 1);
233 xenbus_dev_fatal(dev, err, "%s", message);
241 static int setup_blkring(struct xenbus_device *dev,
242 struct blkfront_info *info)
244 blkif_sring_t *sring;
247 info->ring_ref = GRANT_INVALID_REF;
249 sring = (blkif_sring_t *)__get_free_page(GFP_NOIO | __GFP_HIGH);
251 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
254 SHARED_RING_INIT(sring);
255 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
257 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
259 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
261 free_page((unsigned long)sring);
262 info->ring.sring = NULL;
265 info->ring_ref = err;
267 err = bind_listening_port_to_irqhandler(
268 dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info);
270 xenbus_dev_fatal(dev, err,
271 "bind_listening_port_to_irqhandler");
284 * Callback received when the backend's state changes.
286 static void backend_changed(struct xenbus_device *dev,
287 enum xenbus_state backend_state)
289 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
290 struct block_device *bd;
292 DPRINTK("blkfront:backend_changed.\n");
294 switch (backend_state) {
295 case XenbusStateInitialising:
296 case XenbusStateInitWait:
297 case XenbusStateInitialised:
298 case XenbusStateReconfiguring:
299 case XenbusStateReconfigured:
300 case XenbusStateUnknown:
301 case XenbusStateClosed:
304 case XenbusStateConnected:
308 case XenbusStateClosing:
309 mutex_lock(&info->mutex);
310 if (dev->state == XenbusStateClosing) {
311 mutex_unlock(&info->mutex);
315 bd = info->gd ? bdget_disk(info->gd, 0) : NULL;
317 mutex_unlock(&info->mutex);
320 xenbus_frontend_closed(dev);
324 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
327 mutex_lock(&bd->bd_mutex);
329 if (bd->bd_openers) {
330 xenbus_dev_error(dev, -EBUSY,
331 "Device in use; refusing to close");
332 xenbus_switch_state(dev, XenbusStateClosing);
334 blkfront_closing(info);
335 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
338 mutex_unlock(&bd->bd_mutex);
346 /* ** Connection ** */
348 static void blkfront_setup_discard(struct blkfront_info *info)
352 unsigned int discard_granularity;
353 unsigned int discard_alignment;
356 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
360 info->feature_secdiscard = 0;
361 if (strncmp(type, "phy", 3) == 0) {
362 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
363 "discard-granularity", "%u", &discard_granularity,
364 "discard-alignment", "%u", &discard_alignment,
367 info->feature_discard = 1;
368 info->discard_granularity = discard_granularity;
369 info->discard_alignment = discard_alignment;
371 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
372 "discard-secure", "%d", &discard_secure);
374 info->feature_secdiscard = discard_secure;
375 } else if (strncmp(type, "file", 4) == 0)
376 info->feature_discard = 1;
382 * Invoked when the backend is finally 'ready' (and has told produced
383 * the details about the physical device - #sectors, size, etc).
385 static void connect(struct blkfront_info *info)
387 unsigned long long sectors;
388 unsigned long sector_size;
390 int err, barrier, flush, discard;
392 switch (info->connected) {
393 case BLKIF_STATE_CONNECTED:
395 * Potentially, the back-end may be signalling
396 * a capacity change; update the capacity.
398 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
399 "sectors", "%Lu", §ors);
402 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
403 "sector-size", "%lu", §or_size);
407 blk_queue_logical_block_size(info->gd->queue,
409 pr_info("Setting capacity to %Lu\n", sectors);
410 set_capacity(info->gd, sectors);
411 revalidate_disk(info->gd);
414 case BLKIF_STATE_SUSPENDED:
418 DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
420 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
421 "sectors", "%Lu", §ors,
422 "info", "%u", &binfo,
423 "sector-size", "%lu", §or_size,
426 xenbus_dev_fatal(info->xbdev, err,
427 "reading backend fields at %s",
428 info->xbdev->otherend);
432 info->feature_flush = 0;
435 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
436 "feature-barrier", "%d", &barrier);
438 * If there's no "feature-barrier" defined, then it means
439 * we're dealing with a very old backend which writes
440 * synchronously; nothing to do.
442 * If there are barriers, then we use flush.
444 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
445 if (err > 0 && barrier) {
446 info->feature_flush = REQ_FLUSH | REQ_FUA;
447 info->flush_op = BLKIF_OP_WRITE_BARRIER;
450 * And if there is "feature-flush-cache" use that above
453 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
454 "feature-flush-cache", "%d", &flush);
455 if (err > 0 && flush) {
456 info->feature_flush = REQ_FLUSH;
457 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
461 info->feature_flush = QUEUE_ORDERED_DRAIN;
463 info->feature_flush = QUEUE_ORDERED_TAG;
465 info->feature_flush = QUEUE_ORDERED_NONE;
468 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
469 "feature-discard", "%d", &discard);
471 if (err > 0 && discard)
472 blkfront_setup_discard(info);
474 err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
476 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
477 info->xbdev->otherend);
481 err = xlvbd_sysfs_addif(info);
483 xenbus_dev_fatal(info->xbdev, err, "xlvbd_sysfs_addif at %s",
484 info->xbdev->otherend);
488 (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
490 /* Kick pending requests. */
491 spin_lock_irq(&info->io_lock);
492 info->connected = BLKIF_STATE_CONNECTED;
493 kick_pending_request_queues(info);
494 spin_unlock_irq(&info->io_lock);
504 * Handle the change of state of the backend to Closing. We must delete our
505 * device-layer structures now, to ensure that writes are flushed through to
506 * the backend. Once is this done, we can switch to Closed in
509 static void blkfront_closing(struct blkfront_info *info)
513 DPRINTK("blkfront_closing: %d removed\n", info->vdevice);
515 if (info->rq == NULL)
518 spin_lock_irqsave(&info->io_lock, flags);
519 /* No more blkif_request(). */
520 blk_stop_queue(info->rq);
521 /* No more gnttab callback work. */
522 gnttab_cancel_free_callback(&info->callback);
523 spin_unlock_irqrestore(&info->io_lock, flags);
525 /* Flush gnttab callback work. Must be done with no locks held. */
526 flush_work_sync(&info->work);
528 xlvbd_sysfs_delif(info);
530 unregister_vcd(info);
536 xenbus_frontend_closed(info->xbdev);
540 static int blkfront_remove(struct xenbus_device *dev)
542 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
543 struct block_device *bd;
544 struct gendisk *disk;
546 DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
550 mutex_lock(&info->mutex);
553 bd = disk ? bdget_disk(disk, 0) : NULL;
556 mutex_unlock(&info->mutex);
564 * The xbdev was removed before we reached the Closed
565 * state. See if it's safe to remove the disk. If the bdev
566 * isn't closed yet, we let release take care of it.
568 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
571 mutex_lock(&bd->bd_mutex);
573 info = disk->private_data;
575 dev_warn(disk_to_dev(disk),
576 "%s was hot-unplugged, %d stale handles\n",
577 dev->nodename, bd->bd_openers);
579 if (info && !bd->bd_openers) {
580 blkfront_closing(info);
581 disk->private_data = NULL;
585 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
588 mutex_unlock(&bd->bd_mutex);
596 static inline int GET_ID_FROM_FREELIST(
597 struct blkfront_info *info)
599 unsigned long free = info->shadow_free;
600 BUG_ON(free >= BLK_RING_SIZE);
601 info->shadow_free = info->shadow[free].req.id;
602 info->shadow[free].req.id = 0x0fffffee; /* debug */
606 static inline void ADD_ID_TO_FREELIST(
607 struct blkfront_info *info, unsigned long id)
609 info->shadow[id].req.id = info->shadow_free;
610 info->shadow[id].request = NULL;
611 info->shadow_free = id;
614 static inline void flush_requests(struct blkfront_info *info)
618 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
621 notify_remote_via_irq(info->irq);
624 static void kick_pending_request_queues(struct blkfront_info *info)
626 if (!RING_FULL(&info->ring)) {
627 /* Re-enable calldowns. */
628 blk_start_queue(info->rq);
629 /* Kick things off immediately. */
630 do_blkif_request(info->rq);
634 static void blkif_restart_queue(struct work_struct *arg)
636 struct blkfront_info *info = container_of(arg, struct blkfront_info, work);
637 spin_lock_irq(&info->io_lock);
638 if (info->connected == BLKIF_STATE_CONNECTED)
639 kick_pending_request_queues(info);
640 spin_unlock_irq(&info->io_lock);
643 static void blkif_restart_queue_callback(void *arg)
645 struct blkfront_info *info = (struct blkfront_info *)arg;
646 schedule_work(&info->work);
649 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
650 int blkif_open(struct inode *inode, struct file *filep)
652 struct block_device *bd = inode->i_bdev;
654 int blkif_open(struct block_device *bd, fmode_t mode)
657 struct blkfront_info *info = bd->bd_disk->private_data;
664 mutex_lock(&info->mutex);
667 /* xbdev is closed */
670 mutex_unlock(&info->mutex);
677 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
678 int blkif_release(struct inode *inode, struct file *filep)
680 struct gendisk *disk = inode->i_bdev->bd_disk;
682 int blkif_release(struct gendisk *disk, fmode_t mode)
685 struct blkfront_info *info = disk->private_data;
686 struct xenbus_device *xbdev;
687 struct block_device *bd = bdget_disk(disk, 0);
694 * Check if we have been instructed to close. We will have
695 * deferred this request, because the bdev was still open.
697 mutex_lock(&info->mutex);
700 if (xbdev && xbdev->state == XenbusStateClosing) {
701 /* pending switch to state closed */
702 dev_info(disk_to_dev(disk), "releasing disk\n");
703 blkfront_closing(info);
706 mutex_unlock(&info->mutex);
709 /* sudden device removal */
710 dev_info(disk_to_dev(disk), "releasing disk\n");
711 blkfront_closing(info);
712 disk->private_data = NULL;
720 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
721 int blkif_ioctl(struct inode *inode, struct file *filep,
722 unsigned command, unsigned long argument)
724 struct block_device *bd = inode->i_bdev;
726 int blkif_ioctl(struct block_device *bd, fmode_t mode,
727 unsigned command, unsigned long argument)
730 struct blkfront_info *info = bd->bd_disk->private_data;
733 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
734 command, (long)argument, inode->i_rdev);
737 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
739 struct hd_geometry geo;
745 geo.start = get_start_sect(bd);
746 ret = blkif_getgeo(bd, &geo);
750 if (copy_to_user((struct hd_geometry __user *)argument, &geo,
757 case CDROMMULTISESSION:
758 DPRINTK("FIXME: support multisession CDs later\n");
759 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
760 if (put_user(0, (char __user *)(argument + i)))
764 case CDROM_GET_CAPABILITY:
765 if (info->gd && (info->gd->flags & GENHD_FL_CD))
770 if (info->mi && info->gd && info->rq) {
771 switch (info->mi->major) {
772 case SCSI_DISK0_MAJOR:
773 case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
774 case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
775 case SCSI_CDROM_MAJOR:
776 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
777 return scsi_cmd_ioctl(filep, info->gd, command,
778 (void __user *)argument);
779 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
780 return scsi_cmd_ioctl(filep, info->rq,
782 (void __user *)argument);
783 #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
784 return scsi_cmd_ioctl(info->rq, info->gd,
786 (void __user *)argument);
788 return scsi_cmd_blk_ioctl(bd, mode, command,
789 (void __user *)argument);
794 return -EINVAL; /* same return as native Linux */
801 int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
803 /* We don't have real geometry info, but let's at least return
804 values consistent with the size of the device */
805 sector_t nsect = get_capacity(bd->bd_disk);
806 sector_t cylinders = nsect;
810 sector_div(cylinders, hg->heads * hg->sectors);
811 hg->cylinders = cylinders;
812 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
813 hg->cylinders = 0xffff;
819 * Generate a Xen blkfront IO request from a blk layer request. Reads
820 * and writes are handled as expected.
822 * @req: a request struct
824 static int blkif_queue_request(struct request *req)
826 struct blkfront_info *info = req->rq_disk->private_data;
827 unsigned long buffer_mfn;
828 blkif_request_t *ring_req;
830 unsigned int fsect, lsect;
832 grant_ref_t gref_head;
833 struct scatterlist *sg;
835 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
838 if (gnttab_alloc_grant_references(
839 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
840 gnttab_request_free_callback(
842 blkif_restart_queue_callback,
844 BLKIF_MAX_SEGMENTS_PER_REQUEST);
848 /* Fill out a communications ring structure. */
849 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
850 id = GET_ID_FROM_FREELIST(info);
851 info->shadow[id].request = req;
854 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
855 ring_req->handle = info->handle;
857 ring_req->operation = rq_data_dir(req) ?
858 BLKIF_OP_WRITE : BLKIF_OP_READ;
859 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
860 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
862 if (req->cmd_flags & REQ_HARDBARRIER)
864 ring_req->operation = info->flush_op;
865 if (req->cmd_type == REQ_TYPE_BLOCK_PC)
866 ring_req->operation = BLKIF_OP_PACKET;
868 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
869 struct blkif_request_discard *discard = (void *)ring_req;
871 /* id, sector_number and handle are set above. */
872 discard->operation = BLKIF_OP_DISCARD;
874 discard->nr_sectors = blk_rq_sectors(req);
875 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
876 discard->flag = BLKIF_DISCARD_SECURE;
878 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
879 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
880 for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
881 buffer_mfn = page_to_phys(sg_page(sg)) >> PAGE_SHIFT;
882 fsect = sg->offset >> 9;
883 lsect = fsect + (sg->length >> 9) - 1;
884 /* install a grant reference. */
885 ref = gnttab_claim_grant_reference(&gref_head);
886 BUG_ON(ref == -ENOSPC);
888 gnttab_grant_foreign_access_ref(
890 info->xbdev->otherend_id,
892 rq_data_dir(req) ? GTF_readonly : 0 );
894 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
896 (struct blkif_request_segment) {
899 .last_sect = lsect };
903 info->ring.req_prod_pvt++;
905 /* Keep a private copy so we can reissue requests when recovering. */
906 info->shadow[id].req = *ring_req;
908 gnttab_free_grant_references(gref_head);
915 * read a block; request is in a request queue
917 void do_blkif_request(struct request_queue *rq)
919 struct blkfront_info *info = NULL;
923 DPRINTK("Entered do_blkif_request\n");
927 while ((req = blk_peek_request(rq)) != NULL) {
928 info = req->rq_disk->private_data;
930 if (RING_FULL(&info->ring))
933 blk_start_request(req);
935 if ((req->cmd_type != REQ_TYPE_FS &&
936 (req->cmd_type != REQ_TYPE_BLOCK_PC || req->cmd_len)) ||
937 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
939 req->errors = (DID_ERROR << 16) |
940 (DRIVER_INVALID << 24);
941 __blk_end_request_all(req, -EIO);
945 DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
946 "(%u/%u) buffer:%p [%s]\n",
947 req, req->cmd, (long long)blk_rq_pos(req),
948 blk_rq_cur_sectors(req), blk_rq_sectors(req),
949 req->buffer, rq_data_dir(req) ? "write" : "read");
951 if (blkif_queue_request(req)) {
952 blk_requeue_request(rq, req);
954 /* Avoid pointless unplugs. */
963 flush_requests(info);
967 static irqreturn_t blkif_int(int irq, void *dev_id)
970 blkif_response_t *bret;
973 struct blkfront_info *info = (struct blkfront_info *)dev_id;
975 spin_lock_irqsave(&info->io_lock, flags);
977 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
978 spin_unlock_irqrestore(&info->io_lock, flags);
983 rp = info->ring.sring->rsp_prod;
984 rmb(); /* Ensure we see queued responses up to 'rp'. */
986 for (i = info->ring.rsp_cons; i != rp; i++) {
990 bret = RING_GET_RESPONSE(&info->ring, i);
992 req = info->shadow[id].request;
994 blkif_completion(&info->shadow[id]);
996 ADD_ID_TO_FREELIST(info, id);
998 ret = bret->status == BLKIF_RSP_OKAY ? 0 : -EIO;
999 switch (bret->operation) {
1002 case BLKIF_OP_FLUSH_DISKCACHE:
1003 case BLKIF_OP_WRITE_BARRIER:
1004 what = bret->operation == BLKIF_OP_WRITE_BARRIER ?
1005 "write barrier" : "flush disk cache";
1006 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
1007 pr_warn("blkfront: %s: %s op failed\n",
1008 what, info->gd->disk_name);
1011 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
1012 info->shadow[id].req.nr_segments == 0)) {
1013 pr_warn("blkfront: %s: empty %s op failed\n",
1014 what, info->gd->disk_name);
1017 if (unlikely(ret)) {
1018 if (ret == -EOPNOTSUPP)
1020 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
1021 info->feature_flush = 0;
1023 info->feature_flush = QUEUE_ORDERED_NONE;
1029 case BLKIF_OP_WRITE:
1030 case BLKIF_OP_PACKET:
1031 if (unlikely(bret->status != BLKIF_RSP_OKAY))
1032 DPRINTK("Bad return from blkdev data "
1033 "request: %x\n", bret->status);
1035 __blk_end_request_all(req, ret);
1037 case BLKIF_OP_DISCARD:
1038 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
1039 struct request_queue *rq = info->rq;
1041 pr_warn("blkfront: %s: discard op failed\n",
1042 info->gd->disk_name);
1044 info->feature_discard = 0;
1045 info->feature_secdiscard = 0;
1046 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
1047 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
1049 __blk_end_request_all(req, ret);
1056 info->ring.rsp_cons = i;
1058 if (i != info->ring.req_prod_pvt) {
1060 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
1064 info->ring.sring->rsp_event = i + 1;
1066 kick_pending_request_queues(info);
1068 spin_unlock_irqrestore(&info->io_lock, flags);
1073 static void blkif_free(struct blkfront_info *info, int suspend)
1075 /* Prevent new requests being issued until we fix things up. */
1076 spin_lock_irq(&info->io_lock);
1077 info->connected = suspend ?
1078 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
1079 /* No more blkif_request(). */
1081 blk_stop_queue(info->rq);
1082 /* No more gnttab callback work. */
1083 gnttab_cancel_free_callback(&info->callback);
1084 spin_unlock_irq(&info->io_lock);
1086 /* Flush gnttab callback work. Must be done with no locks held. */
1087 flush_work_sync(&info->work);
1089 /* Free resources associated with old device channel. */
1090 if (info->ring_ref != GRANT_INVALID_REF) {
1091 gnttab_end_foreign_access(info->ring_ref,
1092 (unsigned long)info->ring.sring);
1093 info->ring_ref = GRANT_INVALID_REF;
1094 info->ring.sring = NULL;
1097 unbind_from_irqhandler(info->irq, info);
1101 static void blkif_completion(struct blk_shadow *s)
1105 if (s->req.operation == BLKIF_OP_DISCARD)
1107 for (i = 0; i < s->req.nr_segments; i++)
1108 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
1111 static int blkif_recover(struct blkfront_info *info)
1114 blkif_request_t *req;
1115 struct blk_shadow *copy;
1118 /* Stage 1: Make a safe copy of the shadow state. */
1119 copy = kmemdup(info->shadow, sizeof(info->shadow),
1120 GFP_NOIO | __GFP_NOFAIL | __GFP_HIGH);
1124 /* Stage 2: Set up free list. */
1125 memset(&info->shadow, 0, sizeof(info->shadow));
1126 for (i = 0; i < BLK_RING_SIZE; i++)
1127 info->shadow[i].req.id = i+1;
1128 info->shadow_free = info->ring.req_prod_pvt;
1129 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
1131 /* Stage 3: Find pending requests and requeue them. */
1132 for (i = 0; i < BLK_RING_SIZE; i++) {
1134 if (!copy[i].request)
1137 /* Grab a request slot and copy shadow state into it. */
1138 req = RING_GET_REQUEST(
1139 &info->ring, info->ring.req_prod_pvt);
1142 /* We get a new request id, and must reset the shadow state. */
1143 req->id = GET_ID_FROM_FREELIST(info);
1144 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i]));
1146 /* Rewrite any grant references invalidated by susp/resume. */
1147 for (j = 0; j < req->nr_segments; j++)
1148 gnttab_grant_foreign_access_ref(
1150 info->xbdev->otherend_id,
1151 pfn_to_mfn(info->shadow[req->id].frame[j]),
1152 rq_data_dir(info->shadow[req->id].request) ?
1154 info->shadow[req->id].req = *req;
1156 info->ring.req_prod_pvt++;
1161 (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
1163 spin_lock_irq(&info->io_lock);
1165 /* Now safe for us to use the shared ring */
1166 info->connected = BLKIF_STATE_CONNECTED;
1168 /* Send off requeued requests */
1169 flush_requests(info);
1171 /* Kick any other new requests queued since we resumed */
1172 kick_pending_request_queues(info);
1174 spin_unlock_irq(&info->io_lock);
1179 int blkfront_is_ready(struct xenbus_device *dev)
1181 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1183 return info->is_ready && info->xbdev;
1187 /* ** Driver Registration ** */
1190 static const struct xenbus_device_id blkfront_ids[] = {
1194 MODULE_ALIAS("xen:vbd");
1196 static DEFINE_XENBUS_DRIVER(blkfront, ,
1197 .probe = blkfront_probe,
1198 .remove = blkfront_remove,
1199 .resume = blkfront_resume,
1200 .otherend_changed = backend_changed,
1201 .is_ready = blkfront_is_ready,
1205 static int __init xlblk_init(void)
1207 if (!is_running_on_xen())
1210 return xenbus_register_frontend(&blkfront_driver);
1212 module_init(xlblk_init);
1215 static void __exit xlblk_exit(void)
1217 xenbus_unregister_driver(&blkfront_driver);
1218 xlbd_release_major_info();
1220 module_exit(xlblk_exit);
1222 MODULE_LICENSE("Dual BSD/GPL");