Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / blktap2-new / ring.c
1 #include <linux/module.h>
2 #include <linux/device.h>
3 #include <linux/signal.h>
4 #include <linux/sched.h>
5 #include <linux/poll.h>
6 #include <linux/blkdev.h>
7
8 #include "blktap.h"
9
10 int blktap_ring_major;
11
12  /* 
13   * BLKTAP - immediately before the mmap area,
14   * we have a bunch of pages reserved for shared memory rings.
15   */
16 #define RING_PAGES 1
17
18 static void
19 blktap_ring_read_response(struct blktap *tap,
20                      const struct blkif_response *rsp)
21 {
22         struct blktap_ring *ring = &tap->ring;
23         struct blktap_request *request;
24         int usr_idx, err;
25
26         request = NULL;
27
28         usr_idx = rsp->id;
29         if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) {
30                 err = -ERANGE;
31                 goto invalid;
32         }
33
34         request = ring->pending[usr_idx];
35
36         if (!request) {
37                 err = -ESRCH;
38                 goto invalid;
39         }
40
41         if (rsp->operation != request->operation) {
42                 err = -EINVAL;
43                 goto invalid;
44         }
45
46         dev_dbg(ring->dev,
47                 "request %d [%p] response: %d\n",
48                 request->usr_idx, request, rsp->status);
49
50         err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO;
51 end_request:
52         blktap_device_end_request(tap, request, err);
53         return;
54
55 invalid:
56         dev_warn(ring->dev,
57                  "invalid response, idx:%d status:%d op:%d/%d: err %d\n",
58                  usr_idx, rsp->status,
59                  rsp->operation, request->operation,
60                  err);
61         if (request)
62                 goto end_request;
63 }
64
65 static void
66 blktap_read_ring(struct blktap *tap)
67 {
68         struct blktap_ring *ring = &tap->ring;
69         struct blkif_response rsp;
70         RING_IDX rc, rp;
71
72         down_read(&current->mm->mmap_sem);
73         if (!ring->vma) {
74                 up_read(&current->mm->mmap_sem);
75                 return;
76         }
77
78         /* for each outstanding message on the ring  */
79         rp = ring->ring.sring->rsp_prod;
80         rmb();
81
82         for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
83                 memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp));
84                 blktap_ring_read_response(tap, &rsp);
85         }
86
87         ring->ring.rsp_cons = rc;
88
89         up_read(&current->mm->mmap_sem);
90 }
91
92 static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
93 {
94         return VM_FAULT_SIGBUS;
95 }
96
97 static void
98 blktap_ring_fail_pending(struct blktap *tap)
99 {
100         struct blktap_ring *ring = &tap->ring;
101         struct blktap_request *request;
102         int usr_idx;
103
104         for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
105                 request = ring->pending[usr_idx];
106                 if (!request)
107                         continue;
108
109                 blktap_device_end_request(tap, request, -EIO);
110         }
111 }
112
113 static void
114 blktap_ring_vm_close(struct vm_area_struct *vma)
115 {
116         struct blktap *tap = vma->vm_private_data;
117         struct blktap_ring *ring = &tap->ring;
118         struct page *page = virt_to_page(ring->ring.sring);
119
120         blktap_ring_fail_pending(tap);
121
122         zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
123         ClearPageReserved(page);
124         __free_page(page);
125
126         ring->vma = NULL;
127
128         if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
129                 blktap_control_destroy_tap(tap);
130 }
131
132 static struct vm_operations_struct blktap_ring_vm_operations = {
133         .close    = blktap_ring_vm_close,
134         .fault    = blktap_ring_fault,
135 };
136
137 int
138 blktap_ring_map_segment(struct blktap *tap,
139                         struct blktap_request *request,
140                         int seg)
141 {
142         struct blktap_ring *ring = &tap->ring;
143         unsigned long uaddr;
144
145         uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
146         return vm_insert_page(ring->vma, uaddr, request->pages[seg]);
147 }
148
149 int
150 blktap_ring_map_request(struct blktap *tap,
151                         struct blktap_request *request)
152 {
153         int seg, err = 0;
154         int write;
155
156         write = request->operation != BLKIF_OP_READ;
157
158         for (seg = 0; seg < request->nr_pages; seg++) {
159                 if (write)
160                         blktap_request_bounce(tap, request, seg, 1);
161
162                 err = blktap_ring_map_segment(tap, request, seg);
163                 if (err)
164                         break;
165         }
166
167         if (err)
168                 blktap_ring_unmap_request(tap, request);
169
170         return err;
171 }
172
173 void
174 blktap_ring_unmap_request(struct blktap *tap,
175                           struct blktap_request *request)
176 {
177         struct blktap_ring *ring = &tap->ring;
178         unsigned long uaddr;
179         unsigned size;
180         int seg, read;
181
182         uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
183         size  = request->nr_pages << PAGE_SHIFT;
184         read  = request->operation != BLKIF_OP_WRITE;
185
186         if (read)
187                 for (seg = 0; seg < request->nr_pages; seg++)
188                         blktap_request_bounce(tap, request, seg, 0);
189
190         zap_page_range(ring->vma, uaddr, size, NULL);
191 }
192
193 void
194 blktap_ring_free_request(struct blktap *tap,
195                          struct blktap_request *request)
196 {
197         struct blktap_ring *ring = &tap->ring;
198
199         ring->pending[request->usr_idx] = NULL;
200         ring->n_pending--;
201
202         blktap_request_free(tap, request);
203 }
204
205 struct blktap_request*
206 blktap_ring_make_request(struct blktap *tap)
207 {
208         struct blktap_ring *ring = &tap->ring;
209         struct blktap_request *request;
210         int usr_idx;
211
212         if (RING_FULL(&ring->ring))
213                 return ERR_PTR(-ENOSPC);
214
215         request = blktap_request_alloc(tap);
216         if (!request)
217                 return ERR_PTR(-ENOMEM);
218
219         for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++)
220                 if (!ring->pending[usr_idx])
221                         break;
222
223         BUG_ON(usr_idx >= BLK_RING_SIZE);
224
225         request->tap     = tap;
226         request->usr_idx = usr_idx;
227
228         ring->pending[usr_idx] = request;
229         ring->n_pending++;
230
231         return request;
232 }
233
234 void
235 blktap_ring_submit_request(struct blktap *tap,
236                            struct blktap_request *request)
237 {
238         struct blktap_ring *ring = &tap->ring;
239         struct blkif_request *breq;
240         struct scatterlist *sg;
241         int i, nsecs = 0;
242
243         dev_dbg(ring->dev,
244                 "request %d [%p] submit\n", request->usr_idx, request);
245
246         breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
247
248         breq->id            = request->usr_idx;
249         breq->sector_number = blk_rq_pos(request->rq);
250         breq->handle        = 0;
251         breq->operation     = request->operation;
252         breq->nr_segments   = request->nr_pages;
253
254         blktap_for_each_sg(sg, request, i) {
255                 struct blkif_request_segment *seg = &breq->seg[i];
256                 int first, count;
257
258                 count = sg->length >> 9;
259                 first = sg->offset >> 9;
260
261                 seg->first_sect = first;
262                 seg->last_sect  = first + count - 1;
263
264                 nsecs += count;
265         }
266
267         ring->ring.req_prod_pvt++;
268
269         do_gettimeofday(&request->time);
270
271
272         switch (request->operation) {
273         case BLKIF_OP_WRITE:
274                 tap->stats.st_wr_sect += nsecs;
275                 tap->stats.st_wr_req++;
276                 break;
277
278         case BLKIF_OP_READ:
279                 tap->stats.st_rd_sect += nsecs;
280                 tap->stats.st_rd_req++;
281                 break;
282
283         case BLKIF_OP_PACKET:
284                 tap->stats.st_pk_req++;
285                 break;
286         }
287 }
288
289 static int
290 blktap_ring_open(struct inode *inode, struct file *filp)
291 {
292         struct blktap *tap = NULL;
293         int minor;
294
295         minor = iminor(inode);
296
297         if (minor < blktap_max_minor)
298                 tap = blktaps[minor];
299
300         if (!tap)
301                 return -ENXIO;
302
303         if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
304                 return -ENXIO;
305
306         if (tap->ring.task)
307                 return -EBUSY;
308
309         filp->private_data = tap;
310         tap->ring.task = current;
311
312         return 0;
313 }
314
315 static int
316 blktap_ring_release(struct inode *inode, struct file *filp)
317 {
318         struct blktap *tap = filp->private_data;
319
320         blktap_device_destroy_sync(tap);
321
322         tap->ring.task = NULL;
323
324         if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
325                 blktap_control_destroy_tap(tap);
326
327         return 0;
328 }
329
330 static int
331 blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
332 {
333         struct blktap *tap = filp->private_data;
334         struct blktap_ring *ring = &tap->ring;
335         struct blkif_sring *sring;
336         struct page *page = NULL;
337         int err;
338
339         if (ring->vma)
340                 return -EBUSY;
341
342         page = alloc_page(GFP_KERNEL|__GFP_ZERO);
343         if (!page)
344                 return -ENOMEM;
345
346         SetPageReserved(page);
347
348         err = vm_insert_page(vma, vma->vm_start, page);
349         if (err)
350                 goto fail;
351
352         sring = page_address(page);
353         SHARED_RING_INIT(sring);
354         FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
355
356         ring->ring_vstart = vma->vm_start;
357         ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
358
359         vma->vm_private_data = tap;
360
361         vma->vm_flags |= VM_DONTCOPY;
362         vma->vm_flags |= VM_RESERVED;
363
364         vma->vm_ops = &blktap_ring_vm_operations;
365
366         ring->vma = vma;
367         return 0;
368
369 fail:
370         if (page) {
371                 zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
372                 ClearPageReserved(page);
373                 __free_page(page);
374         }
375
376         return err;
377 }
378
379 static long
380 blktap_ring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
381 {
382         struct blktap *tap = filp->private_data;
383         struct blktap_ring *ring = &tap->ring;
384
385         BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
386
387         if (!ring->vma || ring->vma->vm_mm != current->mm)
388                 return -EACCES;
389
390         switch(cmd) {
391         case BLKTAP2_IOCTL_KICK_FE:
392
393                 blktap_read_ring(tap);
394                 return 0;
395
396         case BLKTAP2_IOCTL_CREATE_DEVICE: {
397                 struct blktap_params params;
398                 void __user *ptr = (void *)arg;
399
400                 if (!arg)
401                         return -EINVAL;
402
403                 if (copy_from_user(&params, ptr, sizeof(params)))
404                         return -EFAULT;
405
406                 return blktap_device_create(tap, &params);
407         }
408
409         case BLKTAP2_IOCTL_REMOVE_DEVICE:
410
411                 return blktap_device_destroy(tap);
412         }
413
414         return -ENOIOCTLCMD;
415 }
416
417 static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
418 {
419         struct blktap *tap = filp->private_data;
420         struct blktap_ring *ring = &tap->ring;
421         int work;
422
423         poll_wait(filp, &tap->pool->wait, wait);
424         poll_wait(filp, &ring->poll_wait, wait);
425
426         down_read(&current->mm->mmap_sem);
427         if (ring->vma && tap->device.gd)
428                 blktap_device_run_queue(tap);
429         up_read(&current->mm->mmap_sem);
430
431         work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
432         RING_PUSH_REQUESTS(&ring->ring);
433
434         if (work ||
435             ring->ring.sring->private.tapif_user.msg ||
436             test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
437                 return POLLIN | POLLRDNORM;
438
439         return 0;
440 }
441
442 static const struct file_operations blktap_ring_file_operations = {
443         .owner    = THIS_MODULE,
444         .open     = blktap_ring_open,
445         .release  = blktap_ring_release,
446         .unlocked_ioctl = blktap_ring_ioctl,
447         .mmap     = blktap_ring_mmap,
448         .poll     = blktap_ring_poll,
449 };
450
451 void
452 blktap_ring_kick_user(struct blktap *tap)
453 {
454         wake_up(&tap->ring.poll_wait);
455 }
456
457 int
458 blktap_ring_destroy(struct blktap *tap)
459 {
460         struct blktap_ring *ring = &tap->ring;
461
462         if (ring->task || ring->vma)
463                 return -EBUSY;
464
465         return 0;
466 }
467
468 int
469 blktap_ring_create(struct blktap *tap)
470 {
471         struct blktap_ring *ring = &tap->ring;
472
473         init_waitqueue_head(&ring->poll_wait);
474         ring->devno = MKDEV(blktap_ring_major, tap->minor);
475
476         return 0;
477 }
478
479 size_t
480 blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
481 {
482         struct blktap_ring *ring = &tap->ring;
483         char *s = buf, *end = buf + size;
484         int usr_idx;
485
486         s += snprintf(s, end - s,
487                       "begin pending:%d\n", ring->n_pending);
488
489         for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
490                 struct blktap_request *request;
491                 struct timeval *time;
492                 char op = '?';
493
494                 request = ring->pending[usr_idx];
495                 if (!request)
496                         continue;
497
498                 switch (request->operation) {
499                 case BLKIF_OP_WRITE:  op = 'W'; break;
500                 case BLKIF_OP_READ:   op = 'R'; break;
501                 case BLKIF_OP_PACKET: op = 'P'; break;
502                 }
503                 time  = &request->time;
504
505                 s += snprintf(s, end - s,
506                               "%02d: usr_idx:%02d "
507                               "op:%c nr_pages:%02d time:%lu.%09lu\n",
508                               usr_idx, request->usr_idx,
509                               op, request->nr_pages,
510                               time->tv_sec, time->tv_usec);
511         }
512
513         s += snprintf(s, end - s, "end pending\n");
514
515         return s - buf;
516 }
517
518
519 int __init
520 blktap_ring_init(void)
521 {
522         int err;
523
524         err = __register_chrdev(0, 0, CONFIG_XEN_NR_TAP2_DEVICES, "blktap2",
525                                 &blktap_ring_file_operations);
526         if (err < 0) {
527                 BTERR("error registering ring devices: %d\n", err);
528                 return err;
529         }
530
531         blktap_ring_major = err;
532         BTINFO("blktap ring major: %d\n", blktap_ring_major);
533
534         return 0;
535 }
536
537 void
538 blktap_ring_exit(void)
539 {
540         if (!blktap_ring_major)
541                 return;
542
543         __unregister_chrdev(blktap_ring_major, 0, CONFIG_XEN_NR_TAP2_DEVICES,
544                             "blktap2");
545
546         blktap_ring_major = 0;
547 }