- Update Xen patches to 3.3-rc5 and c/s 1157.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / blktap2 / ring.c
1 #include <linux/module.h>
2 #include <linux/signal.h>
3
4 #include "blktap.h"
5
6 static int blktap_ring_major;
7
8 static inline struct blktap *
9 vma_to_blktap(struct vm_area_struct *vma)
10 {
11         struct vm_foreign_map *m = vma->vm_private_data;
12         struct blktap_ring *r = container_of(m, struct blktap_ring, foreign_map);
13         return container_of(r, struct blktap, ring);
14 }
15
16  /* 
17   * BLKTAP - immediately before the mmap area,
18   * we have a bunch of pages reserved for shared memory rings.
19   */
20 #define RING_PAGES 1
21
22 static int
23 blktap_read_ring(struct blktap *tap)
24 {
25         /* This is called to read responses from the ring. */
26         int usr_idx;
27         RING_IDX rc, rp;
28         blkif_response_t res;
29         struct blktap_ring *ring;
30         struct blktap_request *request;
31
32         down_read(&tap->tap_sem);
33
34         ring = &tap->ring;
35         if (!ring->vma) {
36                 up_read(&tap->tap_sem);
37                 return 0;
38         }
39
40         /* for each outstanding message on the ring  */
41         rp = ring->ring.sring->rsp_prod;
42         rmb();
43
44         for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
45                 memcpy(&res, RING_GET_RESPONSE(&ring->ring, rc), sizeof(res));
46                 mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
47                 ++ring->ring.rsp_cons;
48
49                 usr_idx = (int)res.id;
50                 if (usr_idx >= MAX_PENDING_REQS ||
51                     !tap->pending_requests[usr_idx]) {
52                         BTWARN("Request %d/%d invalid [%x], tapdisk %d%p\n",
53                                rc, rp, usr_idx, tap->pid, ring->vma);
54                         continue;
55                 }
56
57                 request = tap->pending_requests[usr_idx];
58                 BTDBG("request %p response #%d id %x\n", request, rc, usr_idx);
59                 blktap_device_finish_request(tap, &res, request);
60         }
61
62         up_read(&tap->tap_sem);
63
64         blktap_run_deferred();
65
66         return 0;
67 }
68
69 static int
70 blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
71 {
72         /*
73          * if the page has not been mapped in by the driver then return
74          * VM_FAULT_SIGBUS to the domain.
75          */
76
77         return VM_FAULT_SIGBUS;
78 }
79
80 static pte_t
81 blktap_ring_clear_pte(struct vm_area_struct *vma,
82                       unsigned long uvaddr,
83                       pte_t *ptep, int is_fullmm)
84 {
85         pte_t copy;
86         struct blktap *tap;
87         unsigned long kvaddr;
88         struct page **map, *page;
89         struct blktap_ring *ring;
90         struct blktap_request *request;
91         struct grant_handle_pair *khandle;
92         struct gnttab_unmap_grant_ref unmap[2];
93         int offset, seg, usr_idx, count = 0;
94
95         tap  = vma_to_blktap(vma);
96         ring = &tap->ring;
97         map  = ring->foreign_map.map;
98         BUG_ON(!map);   /* TODO Should this be changed to if statement? */
99
100         /*
101          * Zap entry if the address is before the start of the grant
102          * mapped region.
103          */
104         if (uvaddr < ring->user_vstart)
105                 return xen_ptep_get_and_clear_full(vma, uvaddr,
106                                                    ptep, is_fullmm);
107
108         offset  = (int)((uvaddr - ring->user_vstart) >> PAGE_SHIFT);
109         usr_idx = offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
110         seg     = offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
111
112         offset  = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
113         page    = map[offset];
114         if (page && PageBlkback(page)) {
115                 ClearPageBlkback(page);
116                 set_page_private(page, 0);
117         }
118         map[offset] = NULL;
119
120         request = tap->pending_requests[usr_idx];
121         kvaddr  = request_to_kaddr(request, seg);
122         khandle = request->handles + seg;
123
124         if (khandle->kernel != INVALID_GRANT_HANDLE) {
125                 gnttab_set_unmap_op(&unmap[count], kvaddr, 
126                                     GNTMAP_host_map, khandle->kernel);
127                 count++;
128
129                 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, 
130                                     INVALID_P2M_ENTRY);
131         }
132
133
134         if (khandle->user != INVALID_GRANT_HANDLE) {
135                 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
136
137                 copy = *ptep;
138                 gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep), 
139                                     GNTMAP_host_map 
140                                     | GNTMAP_application_map 
141                                     | GNTMAP_contains_pte,
142                                     khandle->user);
143                 count++;
144         } else
145                 copy = xen_ptep_get_and_clear_full(vma, uvaddr, ptep,
146                                                    is_fullmm);
147
148         if (count)
149                 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
150                                               unmap, count))
151                         BUG();
152
153         khandle->kernel = INVALID_GRANT_HANDLE;
154         khandle->user   = INVALID_GRANT_HANDLE;
155
156         return copy;
157 }
158
159 static void
160 blktap_ring_vm_unmap(struct vm_area_struct *vma)
161 {
162         struct blktap *tap = vma_to_blktap(vma);
163
164         down_write(&tap->tap_sem);
165         clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
166         clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
167         clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
168         up_write(&tap->tap_sem);
169 }
170
171 static void
172 blktap_ring_vm_close(struct vm_area_struct *vma)
173 {
174         struct blktap *tap = vma_to_blktap(vma);
175         struct blktap_ring *ring = &tap->ring;
176
177         blktap_ring_vm_unmap(vma);                 /* fail future requests */
178         blktap_device_fail_pending_requests(tap);  /* fail pending requests */
179         blktap_device_restart(tap);                /* fail deferred requests */
180
181         down_write(&tap->tap_sem);
182
183         zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
184
185         kfree(ring->foreign_map.map);
186         ring->foreign_map.map = NULL;
187
188         /* Free the ring page. */
189         ClearPageReserved(virt_to_page(ring->ring.sring));
190         free_page((unsigned long)ring->ring.sring);
191
192         BTINFO("unmapping ring %d\n", tap->minor);
193         ring->ring.sring = NULL;
194         ring->vma = NULL;
195
196         up_write(&tap->tap_sem);
197
198         wake_up(&tap->wq);
199 }
200
201 static struct vm_operations_struct blktap_ring_vm_operations = {
202         .close    = blktap_ring_vm_close,
203         .unmap    = blktap_ring_vm_unmap,
204         .fault    = blktap_ring_fault,
205         .zap_pte  = blktap_ring_clear_pte,
206 };
207
208 static int
209 blktap_ring_open(struct inode *inode, struct file *filp)
210 {
211         int idx;
212         struct blktap *tap;
213
214         idx = iminor(inode);
215         if (idx < 0 || idx >= CONFIG_XEN_NR_TAP2_DEVICES || !blktaps[idx]) {
216                 BTERR("unable to open device blktap%d\n", idx);
217                 return -ENODEV;
218         }
219
220         tap = blktaps[idx];
221
222         BTINFO("opening device blktap%d\n", idx);
223
224         if (!test_bit(BLKTAP_CONTROL, &tap->dev_inuse))
225                 return -ENODEV;
226
227         /* Only one process can access ring at a time */
228         if (test_and_set_bit(BLKTAP_RING_FD, &tap->dev_inuse))
229                 return -EBUSY;
230
231         filp->private_data = tap;
232         BTINFO("opened device %d\n", tap->minor);
233
234         return 0;
235 }
236
237 static int
238 blktap_ring_release(struct inode *inode, struct file *filp)
239 {
240         struct blktap *tap = filp->private_data;
241
242         BTINFO("freeing device %d\n", tap->minor);
243         clear_bit(BLKTAP_RING_FD, &tap->dev_inuse);
244         filp->private_data = NULL;
245         wake_up(&tap->wq);      
246         return 0;
247 }
248
249 /* Note on mmap:
250  * We need to map pages to user space in a way that will allow the block
251  * subsystem set up direct IO to them.  This couldn't be done before, because
252  * there isn't really a sane way to translate a user virtual address down to a 
253  * physical address when the page belongs to another domain.
254  *
255  * My first approach was to map the page in to kernel memory, add an entry
256  * for it in the physical frame list (using alloc_lomem_region as in blkback)
257  * and then attempt to map that page up to user space.  This is disallowed
258  * by xen though, which realizes that we don't really own the machine frame
259  * underlying the physical page.
260  *
261  * The new approach is to provide explicit support for this in xen linux.
262  * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
263  * mapped from other vms.  vma->vm_private_data is set up as a mapping 
264  * from pages to actual page structs.  There is a new clause in get_user_pages
265  * that does the right thing for this sort of mapping.
266  */
267 static int
268 blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
269 {
270         int size, err;
271         struct page **map;
272         struct blktap *tap;
273         blkif_sring_t *sring;
274         struct blktap_ring *ring;
275
276         tap   = filp->private_data;
277         ring  = &tap->ring;
278         map   = NULL;
279         sring = NULL;
280
281         if (!tap || test_and_set_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
282                 return -ENOMEM;
283
284         size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
285         if (size != (MMAP_PAGES + RING_PAGES)) {
286                 BTERR("you _must_ map exactly %lu pages!\n",
287                       MMAP_PAGES + RING_PAGES);
288                 return -EAGAIN;
289         }
290
291         /* Allocate the fe ring. */
292         sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
293         if (!sring) {
294                 BTERR("Couldn't alloc sring.\n");
295                 goto fail_mem;
296         }
297
298         map = kzalloc(size * sizeof(struct page *), GFP_KERNEL);
299         if (!map) {
300                 BTERR("Couldn't alloc VM_FOREIGN map.\n");
301                 goto fail_mem;
302         }
303
304         SetPageReserved(virt_to_page(sring));
305     
306         SHARED_RING_INIT(sring);
307         FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
308
309         ring->ring_vstart = vma->vm_start;
310         ring->user_vstart = ring->ring_vstart + (RING_PAGES << PAGE_SHIFT);
311
312         /* Map the ring pages to the start of the region and reserve it. */
313         if (xen_feature(XENFEAT_auto_translated_physmap))
314                 err = vm_insert_page(vma, vma->vm_start,
315                                      virt_to_page(ring->ring.sring));
316         else
317                 err = remap_pfn_range(vma, vma->vm_start,
318                                       __pa(ring->ring.sring) >> PAGE_SHIFT,
319                                       PAGE_SIZE, vma->vm_page_prot);
320         if (err) {
321                 BTERR("Mapping user ring failed: %d\n", err);
322                 goto fail;
323         }
324
325         /* Mark this VM as containing foreign pages, and set up mappings. */
326         ring->foreign_map.map = map;
327         vma->vm_private_data = &ring->foreign_map;
328         vma->vm_flags |= VM_FOREIGN;
329         vma->vm_flags |= VM_DONTCOPY;
330         vma->vm_flags |= VM_RESERVED;
331         vma->vm_ops = &blktap_ring_vm_operations;
332
333 #ifdef CONFIG_X86
334         vma->vm_mm->context.has_foreign_mappings = 1;
335 #endif
336
337         tap->pid = current->pid;
338         BTINFO("blktap: mapping pid is %d\n", tap->pid);
339
340         ring->vma = vma;
341         return 0;
342
343  fail:
344         /* Clear any active mappings. */
345         zap_page_range(vma, vma->vm_start, 
346                        vma->vm_end - vma->vm_start, NULL);
347         ClearPageReserved(virt_to_page(sring));
348  fail_mem:
349         free_page((unsigned long)sring);
350         kfree(map);
351
352         return -ENOMEM;
353 }
354
355 static inline void
356 blktap_ring_set_message(struct blktap *tap, int msg)
357 {
358         struct blktap_ring *ring = &tap->ring;
359
360         down_read(&tap->tap_sem);
361         if (ring->ring.sring)
362                 ring->ring.sring->private.tapif_user.msg = msg;
363         up_read(&tap->tap_sem);
364 }
365
366 static long
367 blktap_ring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
368 {
369         struct blktap_params params;
370         struct blktap *tap = filp->private_data;
371
372         BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
373
374         switch(cmd) {
375         case BLKTAP2_IOCTL_KICK_FE:
376                 /* There are fe messages to process. */
377                 return blktap_read_ring(tap);
378
379         case BLKTAP2_IOCTL_CREATE_DEVICE:
380                 if (!arg)
381                         return -EINVAL;
382
383                 if (copy_from_user(&params, (struct blktap_params __user *)arg,
384                                    sizeof(params))) {
385                         BTERR("failed to get params\n");
386                         return -EFAULT;
387                 }
388
389                 if (blktap_validate_params(tap, &params)) {
390                         BTERR("invalid params\n");
391                         return -EINVAL;
392                 }
393
394                 tap->params = params;
395                 return blktap_device_create(tap);
396
397         case BLKTAP2_IOCTL_SET_PARAMS:
398                 if (!arg)
399                         return -EINVAL;
400
401                 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
402                         return -EINVAL;
403
404                 if (copy_from_user(&params, (struct blktap_params __user *)arg,
405                                    sizeof(params))) {
406                         BTERR("failed to get params\n");
407                         return -EFAULT;
408                 }
409
410                 if (blktap_validate_params(tap, &params)) {
411                         BTERR("invalid params\n");
412                         return -EINVAL;
413                 }
414
415                 tap->params = params;
416                 return 0;
417
418         case BLKTAP2_IOCTL_PAUSE:
419                 if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
420                         return -EINVAL;
421
422                 set_bit(BLKTAP_PAUSED, &tap->dev_inuse);
423                 clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
424
425                 blktap_ring_set_message(tap, 0);
426                 wake_up_interruptible(&tap->wq);
427
428                 return 0;
429
430
431         case BLKTAP2_IOCTL_REOPEN:
432                 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
433                         return -EINVAL;
434
435                 if (!arg)
436                         return -EINVAL;
437
438                 if (copy_to_user((char __user *)arg,
439                                  tap->params.name,
440                                  strlen(tap->params.name) + 1))
441                         return -EFAULT;
442
443                 blktap_ring_set_message(tap, 0);
444                 wake_up_interruptible(&tap->wq);
445
446                 return 0;
447
448         case BLKTAP2_IOCTL_RESUME:
449                 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
450                         return -EINVAL;
451
452                 tap->ring.response = (int)arg;
453                 if (!tap->ring.response)
454                         clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
455
456                 blktap_ring_set_message(tap, 0);
457                 wake_up_interruptible(&tap->wq);
458
459                 return 0;
460         }
461
462         return -ENOIOCTLCMD;
463 }
464
465 static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
466 {
467         struct blktap *tap = filp->private_data;
468         struct blktap_ring *ring = &tap->ring;
469
470         poll_wait(filp, &ring->poll_wait, wait);
471         if (ring->ring.sring->private.tapif_user.msg ||
472             ring->ring.req_prod_pvt != ring->ring.sring->req_prod) {
473                 RING_PUSH_REQUESTS(&ring->ring);
474                 return POLLIN | POLLRDNORM;
475         }
476
477         return 0;
478 }
479
480 static const struct file_operations blktap_ring_file_operations = {
481         .owner    = THIS_MODULE,
482         .open     = blktap_ring_open,
483         .release  = blktap_ring_release,
484         .unlocked_ioctl = blktap_ring_ioctl,
485         .mmap     = blktap_ring_mmap,
486         .poll     = blktap_ring_poll,
487 };
488
489 void
490 blktap_ring_kick_user(struct blktap *tap)
491 {
492         wake_up_interruptible(&tap->ring.poll_wait);
493 }
494
495 int
496 blktap_ring_resume(struct blktap *tap)
497 {
498         int err;
499         struct blktap_ring *ring = &tap->ring;
500
501         if (!blktap_active(tap))
502                 return -ENODEV;
503
504         if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
505                 return -EINVAL;
506
507         /* set shared flag for resume */
508         ring->response = 0;
509
510         blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_RESUME);
511         blktap_ring_kick_user(tap);
512
513         wait_event_interruptible(tap->wq, ring->response ||
514                                  !test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
515
516         err = ring->response;
517         ring->response = 0;
518
519         BTDBG("err: %d\n", err);
520
521         if (err)
522                 return err;
523
524         if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
525                 return -EAGAIN;
526
527         return 0;
528 }
529
530 int
531 blktap_ring_pause(struct blktap *tap)
532 {
533         if (!blktap_active(tap))
534                 return -ENODEV;
535
536         if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
537                 return -EINVAL;
538
539         BTDBG("draining queue\n");
540         wait_event_interruptible(tap->wq, !tap->pending_cnt);
541         if (tap->pending_cnt)
542                 return -EAGAIN;
543
544         blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_PAUSE);
545         blktap_ring_kick_user(tap);
546
547         BTDBG("waiting for tapdisk response\n");
548         wait_event_interruptible(tap->wq, test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
549         if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
550                 return -EAGAIN;
551
552         return 0;
553 }
554
555 int
556 blktap_ring_destroy(struct blktap *tap)
557 {
558         if (!test_bit(BLKTAP_RING_FD, &tap->dev_inuse) &&
559             !test_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
560                 return 0;
561
562         BTDBG("sending tapdisk close message\n");
563         blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_CLOSE);
564         blktap_ring_kick_user(tap);
565
566         return -EAGAIN;
567 }
568
569 static void
570 blktap_ring_initialize(struct blktap_ring *ring, int minor)
571 {
572         memset(ring, 0, sizeof(*ring));
573         init_waitqueue_head(&ring->poll_wait);
574         ring->devno = MKDEV(blktap_ring_major, minor);
575 }
576
577 int
578 blktap_ring_create(struct blktap *tap)
579 {
580         struct blktap_ring *ring = &tap->ring;
581         blktap_ring_initialize(ring, tap->minor);
582         return blktap_sysfs_create(tap);
583 }
584
585 int __init
586 blktap_ring_init(int *major)
587 {
588         int err;
589
590         err = __register_chrdev(0, 0, CONFIG_XEN_NR_TAP2_DEVICES, "blktap2",
591                                 &blktap_ring_file_operations);
592         if (err < 0) {
593                 BTERR("error registering blktap ring device: %d\n", err);
594                 return err;
595         }
596
597         blktap_ring_major = *major = err;
598         BTINFO("blktap ring major: %d\n", blktap_ring_major);
599         return 0;
600 }
601
602 int
603 blktap_ring_free(void)
604 {
605         if (blktap_ring_major)
606                 __unregister_chrdev(blktap_ring_major, 0,
607                                     CONFIG_XEN_NR_TAP2_DEVICES, "blktap2");
608
609         return 0;
610 }