- Update Xen patches to 3.3-rc5 and c/s 1157.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / blktap2-new / device.c
1 #include <linux/fs.h>
2 #include <linux/blkdev.h>
3 #include <linux/cdrom.h>
4 #include <linux/hdreg.h>
5 #include <linux/module.h>
6 #include <scsi/scsi.h>
7 #include <scsi/scsi_ioctl.h>
8
9 #include "blktap.h"
10
11 int blktap_device_major;
12
13 #define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
14
15 static int
16 blktap_device_open(struct block_device *bdev, fmode_t mode)
17 {
18         struct gendisk *disk = bdev->bd_disk;
19         struct blktap_device *tapdev = disk->private_data;
20
21         if (!tapdev)
22                 return -ENXIO;
23
24         /* NB. we might have bounced a bd trylock by tapdisk. when
25          * failing for reasons not !tapdev, make sure to kick tapdisk
26          * out of destroy wait state again. */
27
28         return 0;
29 }
30
31 static int
32 blktap_device_release(struct gendisk *disk, fmode_t mode)
33 {
34         struct blktap_device *tapdev = disk->private_data;
35         struct block_device *bdev = bdget_disk(disk, 0);
36         struct blktap *tap = dev_to_blktap(tapdev);
37
38         bdput(bdev);
39
40         if (!bdev->bd_openers) {
41                 set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse);
42                 blktap_ring_kick_user(tap);
43         }
44
45         return 0;
46 }
47
48 static int
49 blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
50 {
51         /* We don't have real geometry info, but let's at least return
52            values consistent with the size of the device */
53         sector_t nsect = get_capacity(bd->bd_disk);
54         sector_t cylinders = nsect;
55
56         hg->heads = 0xff;
57         hg->sectors = 0x3f;
58         sector_div(cylinders, hg->heads * hg->sectors);
59         hg->cylinders = cylinders;
60         if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
61                 hg->cylinders = 0xffff;
62         return 0;
63 }
64
65 static int
66 blktap_device_ioctl(struct block_device *bd, fmode_t mode,
67                     unsigned command, unsigned long argument)
68 {
69         int i;
70
71         switch (command) {
72         case CDROMMULTISESSION:
73                 BTDBG("FIXME: support multisession CDs later\n");
74                 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
75                         if (put_user(0, (char __user *)(argument + i)))
76                                 return -EFAULT;
77                 return 0;
78
79         case SCSI_IOCTL_GET_IDLUN:
80                 if (!access_ok(VERIFY_WRITE, argument, 
81                         sizeof(struct scsi_idlun)))
82                         return -EFAULT;
83
84                 /* return 0 for now. */
85                 __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
86                 __put_user(0, 
87                         &((struct scsi_idlun __user *)argument)->host_unique_id);
88                 return 0;
89
90         default:
91                 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
92                   command);*/
93                 return -EINVAL; /* same return as native Linux */
94         }
95
96         return 0;
97 }
98
99 static const struct block_device_operations blktap_device_file_operations = {
100         .owner     = THIS_MODULE,
101         .open      = blktap_device_open,
102         .release   = blktap_device_release,
103         .ioctl     = blktap_device_ioctl,
104         .getgeo    = blktap_device_getgeo
105 };
106
107 /* NB. __blktap holding the queue lock; blktap where unlocked */
108
109 static inline struct request*
110 __blktap_next_queued_rq(struct request_queue *q)
111 {
112         return blk_peek_request(q);
113 }
114
115 static inline void
116 __blktap_dequeue_rq(struct request *rq)
117 {
118         blk_start_request(rq);
119 }
120
121 /* NB. err == 0 indicates success, failures < 0 */
122
123 static inline void
124 __blktap_end_queued_rq(struct request *rq, int err)
125 {
126         blk_start_request(rq);
127         __blk_end_request(rq, err, blk_rq_bytes(rq));
128 }
129
130 static inline void
131 __blktap_end_rq(struct request *rq, int err)
132 {
133         __blk_end_request(rq, err, blk_rq_bytes(rq));
134 }
135
136 static inline void
137 blktap_end_rq(struct request *rq, int err)
138 {
139         struct request_queue *q = rq->q;
140
141         spin_lock_irq(q->queue_lock);
142         __blktap_end_rq(rq, err);
143         spin_unlock_irq(q->queue_lock);
144 }
145
146 void
147 blktap_device_end_request(struct blktap *tap,
148                           struct blktap_request *request,
149                           int error)
150 {
151         struct blktap_device *tapdev = &tap->device;
152         struct request *rq = request->rq;
153
154         blktap_ring_unmap_request(tap, request);
155
156         blktap_ring_free_request(tap, request);
157
158         dev_dbg(disk_to_dev(tapdev->gd),
159                 "end_request: op=%d error=%d bytes=%d\n",
160                 rq_data_dir(rq), error, blk_rq_bytes(rq));
161
162         blktap_end_rq(rq, error);
163 }
164
165 int
166 blktap_device_make_request(struct blktap *tap, struct request *rq)
167 {
168         struct blktap_device *tapdev = &tap->device;
169         struct blktap_request *request;
170         int write, nsegs;
171         int err;
172
173         request = blktap_ring_make_request(tap);
174         if (IS_ERR(request)) {
175                 err = PTR_ERR(request);
176                 request = NULL;
177
178                 if (err == -ENOSPC || err == -ENOMEM)
179                         goto stop;
180
181                 goto fail;
182         }
183
184         write = rq_data_dir(rq) == WRITE;
185         nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
186
187         dev_dbg(disk_to_dev(tapdev->gd),
188                 "make_request: op=%c bytes=%d nsegs=%d\n",
189                 write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
190
191         request->rq = rq;
192         request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
193         if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
194                 request->operation = BLKIF_OP_PACKET;
195
196         err = blktap_request_get_pages(tap, request, nsegs);
197         if (err)
198                 goto stop;
199
200         err = blktap_ring_map_request(tap, request);
201         if (err)
202                 goto fail;
203
204         blktap_ring_submit_request(tap, request);
205
206         return 0;
207
208 stop:
209         tap->stats.st_oo_req++;
210         err = -EBUSY;
211
212 _out:
213         if (request)
214                 blktap_ring_free_request(tap, request);
215
216         return err;
217 fail:
218         if (printk_ratelimit())
219                 dev_warn(disk_to_dev(tapdev->gd),
220                          "make request: %d, failing\n", err);
221         goto _out;
222 }
223
224 /*
225  * called from tapdisk context
226  */
227 void
228 blktap_device_run_queue(struct blktap *tap)
229 {
230         struct blktap_device *tapdev = &tap->device;
231         struct request_queue *q;
232         struct request *rq;
233         int err;
234
235         if (!tapdev->gd)
236                 return;
237
238         q = tapdev->gd->queue;
239
240         spin_lock_irq(&tapdev->lock);
241         queue_flag_clear(QUEUE_FLAG_STOPPED, q);
242
243         do {
244                 rq = __blktap_next_queued_rq(q);
245                 if (!rq)
246                         break;
247
248                 if (rq->cmd_type != REQ_TYPE_FS) {
249                         rq->errors = (DID_ERROR << 16) |
250                                      (DRIVER_INVALID << 24);
251                         __blktap_end_queued_rq(rq, -EOPNOTSUPP);
252                         continue;
253                 }
254
255                 spin_unlock_irq(&tapdev->lock);
256
257                 err = blktap_device_make_request(tap, rq);
258
259                 spin_lock_irq(&tapdev->lock);
260
261                 if (err == -EBUSY) {
262                         blk_stop_queue(q);
263                         break;
264                 }
265
266                 __blktap_dequeue_rq(rq);
267
268                 if (unlikely(err))
269                         __blktap_end_rq(rq, err);
270         } while (1);
271
272         spin_unlock_irq(&tapdev->lock);
273 }
274
275 static void
276 blktap_device_do_request(struct request_queue *rq)
277 {
278         struct blktap_device *tapdev = rq->queuedata;
279         struct blktap *tap = dev_to_blktap(tapdev);
280
281         blktap_ring_kick_user(tap);
282 }
283
284 static void
285 blktap_device_configure(struct blktap *tap,
286                         struct blktap_params *params)
287 {
288         struct request_queue *rq;
289         struct blktap_device *dev = &tap->device;
290
291         dev = &tap->device;
292         rq  = dev->gd->queue;
293
294         spin_lock_irq(&dev->lock);
295
296         set_capacity(dev->gd, params->capacity);
297
298         /* Hard sector size and max sectors impersonate the equiv. hardware. */
299         blk_queue_logical_block_size(rq, params->sector_size);
300         blk_queue_max_hw_sectors(rq, 512);
301
302         /* Each segment in a request is up to an aligned page in size. */
303         blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
304         blk_queue_max_segment_size(rq, PAGE_SIZE);
305
306         /* Ensure a merged request will fit in a single I/O ring slot. */
307         blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
308
309         /* Make sure buffer addresses are sector-aligned. */
310         blk_queue_dma_alignment(rq, 511);
311
312         spin_unlock_irq(&dev->lock);
313 }
314
315 static int
316 blktap_device_validate_params(struct blktap *tap,
317                               struct blktap_params *params)
318 {
319         struct device *dev = tap->ring.dev;
320         int sector_order, name_sz;
321
322         sector_order = ffs(params->sector_size) - 1;
323
324         if (sector_order <  9 ||
325             sector_order > 12 ||
326             params->sector_size != 1U<<sector_order)
327                 goto fail;
328
329         if (!params->capacity ||
330             (params->capacity > ULLONG_MAX >> sector_order))
331                 goto fail;
332
333         name_sz = min(sizeof(params->name), sizeof(tap->name));
334         if (strnlen(params->name, name_sz) >= name_sz)
335                 goto fail;
336
337         return 0;
338
339 fail:
340         params->name[name_sz-1] = 0;
341         dev_err(dev, "capacity: %llu, sector-size: %lu, name: %s\n",
342                 params->capacity, params->sector_size, params->name);
343         return -EINVAL;
344 }
345
346 int
347 blktap_device_destroy(struct blktap *tap)
348 {
349         struct blktap_device *tapdev = &tap->device;
350         struct block_device *bdev;
351         struct gendisk *gd;
352         int err;
353
354         gd = tapdev->gd;
355         if (!gd)
356                 return 0;
357
358         bdev = bdget_disk(gd, 0);
359
360         err = !mutex_trylock(&bdev->bd_mutex);
361         if (err) {
362                 /* NB. avoid a deadlock. the last opener syncs the
363                  * bdev holding bd_mutex. */
364                 err = -EBUSY;
365                 goto out_nolock;
366         }
367
368         if (bdev->bd_openers) {
369                 err = -EBUSY;
370                 goto out;
371         }
372
373         del_gendisk(gd);
374         gd->private_data = NULL;
375
376         blk_cleanup_queue(gd->queue);
377
378         put_disk(gd);
379         tapdev->gd = NULL;
380
381         clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
382         err = 0;
383 out:
384         mutex_unlock(&bdev->bd_mutex);
385 out_nolock:
386         bdput(bdev);
387
388         return err;
389 }
390
391 static void
392 blktap_device_fail_queue(struct blktap *tap)
393 {
394         struct blktap_device *tapdev = &tap->device;
395         struct request_queue *q = tapdev->gd->queue;
396
397         spin_lock_irq(&tapdev->lock);
398         queue_flag_clear(QUEUE_FLAG_STOPPED, q);
399
400         do {
401                 struct request *rq = __blktap_next_queued_rq(q);
402                 if (!rq)
403                         break;
404
405                 __blktap_end_queued_rq(rq, -EIO);
406         } while (1);
407
408         spin_unlock_irq(&tapdev->lock);
409 }
410
411 static int
412 blktap_device_try_destroy(struct blktap *tap)
413 {
414         int err;
415
416         err = blktap_device_destroy(tap);
417         if (err)
418                 blktap_device_fail_queue(tap);
419
420         return err;
421 }
422
423 void
424 blktap_device_destroy_sync(struct blktap *tap)
425 {
426         wait_event(tap->ring.poll_wait,
427                    !blktap_device_try_destroy(tap));
428 }
429
430 static char *blktap_devnode(struct gendisk *gd, umode_t *mode)
431 {
432         return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
433                          gd->first_minor);
434 }
435
436 int
437 blktap_device_create(struct blktap *tap, struct blktap_params *params)
438 {
439         int minor, err;
440         struct gendisk *gd;
441         struct request_queue *rq;
442         struct blktap_device *tapdev;
443
444         gd     = NULL;
445         rq     = NULL;
446         tapdev = &tap->device;
447         minor  = tap->minor;
448
449         if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
450                 return -EEXIST;
451
452         if (blktap_device_validate_params(tap, params))
453                 return -EINVAL;
454
455         gd = alloc_disk(1);
456         if (!gd) {
457                 err = -ENOMEM;
458                 goto fail;
459         }
460
461         if (minor < 26) {
462                 sprintf(gd->disk_name, "td%c", 'a' + minor % 26);
463         } else if (minor < (26 + 1) * 26) {
464                 sprintf(gd->disk_name, "td%c%c",
465                         'a' + minor / 26 - 1,'a' + minor % 26);
466         } else {
467                 const unsigned int m1 = (minor / 26 - 1) / 26 - 1;
468                 const unsigned int m2 = (minor / 26 - 1) % 26;
469                 const unsigned int m3 =  minor % 26;
470                 sprintf(gd->disk_name, "td%c%c%c",
471                         'a' + m1, 'a' + m2, 'a' + m3);
472         }
473
474         gd->major = blktap_device_major;
475         gd->first_minor = minor;
476         gd->devnode = blktap_devnode;
477         gd->fops = &blktap_device_file_operations;
478         gd->private_data = tapdev;
479
480         spin_lock_init(&tapdev->lock);
481         rq = blk_init_queue(blktap_device_do_request, &tapdev->lock);
482         if (!rq) {
483                 err = -ENOMEM;
484                 goto fail;
485         }
486         elevator_init(rq, "noop");
487
488         gd->queue     = rq;
489         rq->queuedata = tapdev;
490         tapdev->gd    = gd;
491
492         blktap_device_configure(tap, params);
493         add_disk(gd);
494
495         strlcpy(tap->name, params->name, ARRAY_SIZE(tap->name));
496
497         set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
498
499         dev_info(disk_to_dev(gd), "sector-size: %u capacity: %llu\n",
500                  queue_logical_block_size(rq),
501                  (unsigned long long)get_capacity(gd));
502
503         return 0;
504
505 fail:
506         if (gd)
507                 del_gendisk(gd);
508         if (rq)
509                 blk_cleanup_queue(rq);
510
511         return err;
512 }
513
514 size_t
515 blktap_device_debug(struct blktap *tap, char *buf, size_t size)
516 {
517         struct gendisk *disk = tap->device.gd;
518         struct request_queue *q;
519         struct block_device *bdev;
520         char *s = buf, *end = buf + size;
521
522         if (!disk)
523                 return 0;
524
525         q = disk->queue;
526
527         s += snprintf(s, end - s,
528                       "disk capacity:%llu sector size:%u\n",
529                       (unsigned long long)get_capacity(disk),
530                       queue_logical_block_size(q));
531
532         s += snprintf(s, end - s,
533                       "queue flags:%#lx stopped:%d\n",
534                       q->queue_flags,
535                       blk_queue_stopped(q));
536
537         bdev = bdget_disk(disk, 0);
538         if (bdev) {
539                 s += snprintf(s, end - s,
540                               "bdev openers:%d closed:%d\n",
541                               bdev->bd_openers,
542                               test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse));
543                 bdput(bdev);
544         }
545
546         return s - buf;
547 }
548
549 int __init
550 blktap_device_init()
551 {
552         int major;
553
554         /* Dynamically allocate a major for this device */
555         major = register_blkdev(0, "tapdev");
556         if (major < 0) {
557                 BTERR("Couldn't register blktap device\n");
558                 return -ENOMEM;
559         }
560
561         blktap_device_major = major;
562         BTINFO("blktap device major %d\n", major);
563
564         return 0;
565 }
566
567 void
568 blktap_device_exit(void)
569 {
570         if (blktap_device_major)
571                 unregister_blkdev(blktap_device_major, "tapdev");
572 }