Added patch headers.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / privcmd / privcmd.c
1 /******************************************************************************
2  * privcmd.c
3  * 
4  * Interface to privileged domain-0 commands.
5  * 
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/mman.h>
16 #include <linux/swap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/highmem.h>
19 #include <linux/pagemap.h>
20 #include <linux/seq_file.h>
21 #include <asm/hypervisor.h>
22
23 #include <asm/pgalloc.h>
24 #include <asm/pgtable.h>
25 #include <asm/uaccess.h>
26 #include <asm/tlb.h>
27 #include <asm/hypervisor.h>
28 #include <xen/public/privcmd.h>
29 #include <xen/interface/xen.h>
30 #include <xen/xen_proc.h>
31 #include <xen/features.h>
32
33 static struct proc_dir_entry *privcmd_intf;
34 static struct proc_dir_entry *capabilities_intf;
35
36 #ifndef CONFIG_XEN_PRIVILEGED_GUEST
37 #define HAVE_ARCH_PRIVCMD_MMAP
38 #endif
39 #ifndef HAVE_ARCH_PRIVCMD_MMAP
40 static int enforce_singleshot_mapping_fn(pte_t *pte, struct page *pmd_page,
41                                          unsigned long addr, void *data)
42 {
43         return pte_none(*pte) ? 0 : -EBUSY;
44 }
45
46 static inline int enforce_singleshot_mapping(struct vm_area_struct *vma,
47                                              unsigned long addr,
48                                              unsigned long npages)
49 {
50         return apply_to_page_range(vma->vm_mm, addr, npages << PAGE_SHIFT,
51                                    enforce_singleshot_mapping_fn, NULL) == 0;
52 }
53 #else
54 #define enforce_singleshot_mapping(vma, addr, npages) \
55         privcmd_enforce_singleshot_mapping(vma)
56 #endif
57
58 static long privcmd_ioctl(struct file *file,
59                           unsigned int cmd, unsigned long data)
60 {
61         long ret;
62         void __user *udata = (void __user *) data;
63 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
64         unsigned long i, addr, nr, nr_pages;
65         int paged_out;
66         struct mm_struct *mm = current->mm;
67         struct vm_area_struct *vma;
68         LIST_HEAD(pagelist);
69         struct list_head *l, *l2;
70 #endif
71
72         switch (cmd) {
73         case IOCTL_PRIVCMD_HYPERCALL: {
74                 privcmd_hypercall_t hypercall;
75   
76                 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
77                         return -EFAULT;
78
79 #ifdef CONFIG_X86
80                 ret = -ENOSYS;
81                 if (hypercall.op >= (PAGE_SIZE >> 5))
82                         break;
83                 ret = _hypercall(long, (unsigned int)hypercall.op,
84                                  (unsigned long)hypercall.arg[0],
85                                  (unsigned long)hypercall.arg[1],
86                                  (unsigned long)hypercall.arg[2],
87                                  (unsigned long)hypercall.arg[3],
88                                  (unsigned long)hypercall.arg[4]);
89 #else
90                 ret = privcmd_hypercall(&hypercall);
91 #endif
92         }
93         break;
94
95 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
96
97         case IOCTL_PRIVCMD_MMAP: {
98 #define MMAP_NR_PER_PAGE \
99         (unsigned long)((PAGE_SIZE - sizeof(*l)) / sizeof(*msg))
100                 privcmd_mmap_t mmapcmd;
101                 privcmd_mmap_entry_t *msg;
102                 privcmd_mmap_entry_t __user *p;
103
104                 if (!is_initial_xendomain())
105                         return -EPERM;
106
107                 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
108                         return -EFAULT;
109
110                 if (mmapcmd.num <= 0)
111                         return -EINVAL;
112
113                 p = mmapcmd.entry;
114                 for (i = 0; i < mmapcmd.num;) {
115                         nr = min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
116
117                         ret = -ENOMEM;
118                         l = (struct list_head *) __get_free_page(GFP_KERNEL);
119                         if (l == NULL)
120                                 goto mmap_out;
121
122                         INIT_LIST_HEAD(l);
123                         list_add_tail(l, &pagelist);
124                         msg = (privcmd_mmap_entry_t*)(l + 1);
125
126                         ret = -EFAULT;
127                         if (copy_from_user(msg, p, nr*sizeof(*msg)))
128                                 goto mmap_out;
129                         i += nr;
130                         p += nr;
131                 }
132
133                 l = pagelist.next;
134                 msg = (privcmd_mmap_entry_t*)(l + 1);
135
136                 down_write(&mm->mmap_sem);
137
138                 vma = find_vma(mm, msg->va);
139                 ret = -EINVAL;
140                 if (!vma || (msg->va != vma->vm_start))
141                         goto mmap_out;
142
143                 addr = vma->vm_start;
144
145                 i = 0;
146                 list_for_each(l, &pagelist) {
147                         nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
148
149                         msg = (privcmd_mmap_entry_t*)(l + 1);
150                         while (i<nr) {
151
152                                 /* Do not allow range to wrap the address space. */
153                                 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
154                                     (((unsigned long)msg->npages << PAGE_SHIFT) >= -addr))
155                                         goto mmap_out;
156
157                                 /* Range chunks must be contiguous in va space. */
158                                 if ((msg->va != addr) ||
159                                     ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
160                                         goto mmap_out;
161
162                                 addr += msg->npages << PAGE_SHIFT;
163                                 msg++;
164                                 i++;
165                         }
166                 }
167
168                 if (!enforce_singleshot_mapping(vma, vma->vm_start,
169                                                 (addr - vma->vm_start) >> PAGE_SHIFT))
170                         goto mmap_out;
171
172                 addr = vma->vm_start;
173                 i = 0;
174                 list_for_each(l, &pagelist) {
175                         nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
176
177                         msg = (privcmd_mmap_entry_t*)(l + 1);
178                         while (i < nr) {
179                                 if ((ret = direct_remap_pfn_range(
180                                              vma,
181                                              msg->va & PAGE_MASK,
182                                              msg->mfn,
183                                              msg->npages << PAGE_SHIFT,
184                                              vma->vm_page_prot,
185                                              mmapcmd.dom)) < 0)
186                                         goto mmap_out;
187
188                                 addr += msg->npages << PAGE_SHIFT;
189                                 msg++;
190                                 i++;
191                         }
192                 }
193
194                 ret = 0;
195
196         mmap_out:
197                 up_write(&mm->mmap_sem);
198                 list_for_each_safe(l,l2,&pagelist)
199                         free_page((unsigned long)l);
200         }
201 #undef MMAP_NR_PER_PAGE
202         break;
203
204         case IOCTL_PRIVCMD_MMAPBATCH: {
205 #define MMAPBATCH_NR_PER_PAGE \
206         (unsigned long)((PAGE_SIZE - sizeof(*l)) / sizeof(*mfn))
207                 privcmd_mmapbatch_t m;
208                 xen_pfn_t __user *p;
209                 xen_pfn_t *mfn;
210
211                 if (!is_initial_xendomain())
212                         return -EPERM;
213
214                 if (copy_from_user(&m, udata, sizeof(m)))
215                         return -EFAULT;
216
217                 nr_pages = m.num;
218                 addr = m.addr;
219                 if (m.num <= 0 || nr_pages > (LONG_MAX >> PAGE_SHIFT) ||
220                     addr != m.addr || nr_pages > (-addr >> PAGE_SHIFT))
221                         return -EINVAL;
222
223                 p = m.arr;
224                 for (i=0; i<nr_pages; ) {
225                         nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
226
227                         ret = -ENOMEM;
228                         l = (struct list_head *)__get_free_page(GFP_KERNEL);
229                         if (l == NULL)
230                                 goto mmapbatch_out;
231
232                         INIT_LIST_HEAD(l);
233                         list_add_tail(l, &pagelist);
234
235                         mfn = (unsigned long*)(l + 1);
236                         ret = -EFAULT;
237                         if (copy_from_user(mfn, p, nr*sizeof(*mfn)))
238                                 goto mmapbatch_out;
239
240                         i += nr; p+= nr;
241                 }
242
243                 down_write(&mm->mmap_sem);
244
245                 vma = find_vma(mm, addr);
246                 ret = -EINVAL;
247                 if (!vma ||
248                     addr < vma->vm_start ||
249                     addr + (nr_pages << PAGE_SHIFT) > vma->vm_end ||
250                     !enforce_singleshot_mapping(vma, addr, nr_pages)) {
251                         up_write(&mm->mmap_sem);
252                         goto mmapbatch_out;
253                 }
254
255                 i = 0;
256                 ret = 0;
257                 paged_out = 0;
258                 list_for_each(l, &pagelist) {
259                         nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
260                         mfn = (unsigned long *)(l + 1);
261
262                         while (i<nr) {
263                                 int rc;
264
265                                 rc = direct_remap_pfn_range(vma, addr & PAGE_MASK,
266                                                             *mfn, PAGE_SIZE,
267                                                             vma->vm_page_prot, m.dom);
268                                 if(rc < 0) {
269                                         if (rc == -ENOENT)
270                                         {
271                                                 *mfn |= 0x80000000U;
272                                                 paged_out = 1;
273                                         }
274                                         else
275                                                 *mfn |= 0xf0000000U;
276                                         ret++;
277                                 }
278                                 mfn++; i++; addr += PAGE_SIZE;
279                         }
280                 }
281
282                 up_write(&mm->mmap_sem);
283                 if (ret > 0) {
284                         p = m.arr;
285                         i = 0;
286                         if (paged_out)
287                                 ret = -ENOENT;
288                         else
289                                 ret = 0;
290                         list_for_each(l, &pagelist) {
291                                 nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
292                                 mfn = (unsigned long *)(l + 1);
293                                 if (copy_to_user(p, mfn, nr*sizeof(*mfn)))
294                                         ret = -EFAULT;
295                                 i += nr; p += nr;
296                         }
297                 }
298         mmapbatch_out:
299                 list_for_each_safe(l,l2,&pagelist)
300                         free_page((unsigned long)l);
301         }
302         break;
303
304         case IOCTL_PRIVCMD_MMAPBATCH_V2: {
305                 privcmd_mmapbatch_v2_t m;
306                 const xen_pfn_t __user *p;
307                 xen_pfn_t *mfn;
308                 int *err;
309
310                 if (!is_initial_xendomain())
311                         return -EPERM;
312
313                 if (copy_from_user(&m, udata, sizeof(m)))
314                         return -EFAULT;
315
316                 nr_pages = m.num;
317                 addr = m.addr;
318                 if (m.num <= 0 || nr_pages > (ULONG_MAX >> PAGE_SHIFT) ||
319                     addr != m.addr || nr_pages > (-addr >> PAGE_SHIFT))
320                         return -EINVAL;
321
322                 p = m.arr;
323                 for (i = 0; i < nr_pages; i += nr, p += nr) {
324                         nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
325
326                         ret = -ENOMEM;
327                         l = (struct list_head *)__get_free_page(GFP_KERNEL);
328                         if (l == NULL)
329                                 goto mmapbatch_v2_out;
330
331                         INIT_LIST_HEAD(l);
332                         list_add_tail(l, &pagelist);
333
334                         mfn = (void *)(l + 1);
335                         ret = -EFAULT;
336                         if (copy_from_user(mfn, p, nr * sizeof(*mfn)))
337                                 goto mmapbatch_v2_out;
338                 }
339
340                 down_write(&mm->mmap_sem);
341
342                 vma = find_vma(mm, addr);
343                 ret = -EINVAL;
344                 if (!vma ||
345                     addr < vma->vm_start ||
346                     addr + (nr_pages << PAGE_SHIFT) > vma->vm_end ||
347                     !enforce_singleshot_mapping(vma, addr, nr_pages)) {
348                         up_write(&mm->mmap_sem);
349                         goto mmapbatch_v2_out;
350                 }
351
352                 i = 0;
353                 ret = 0;
354                 paged_out = 0;
355                 list_for_each(l, &pagelist) {
356                         nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
357                         mfn = (void *)(l + 1);
358                         err = (void *)(l + 1);
359                         BUILD_BUG_ON(sizeof(*err) > sizeof(*mfn));
360
361                         while (i < nr) {
362                                 int rc;
363
364                                 rc = direct_remap_pfn_range(vma, addr & PAGE_MASK,
365                                                             *mfn, PAGE_SIZE,
366                                                             vma->vm_page_prot, m.dom);
367                                 if (rc < 0) {
368                                         if (rc == -ENOENT)
369                                                 paged_out = 1;
370                                         ret++;
371                                 } else
372                                         BUG_ON(rc > 0);
373                                 *err++ = rc;
374                                 mfn++; i++; addr += PAGE_SIZE;
375                         }
376                 }
377
378                 up_write(&mm->mmap_sem);
379
380                 if (ret > 0) {
381                         int __user *p = m.err;
382
383                         ret = paged_out ? -ENOENT : 0;
384                         i = 0;
385                         list_for_each(l, &pagelist) {
386                                 nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
387                                 err = (void *)(l + 1);
388                                 if (copy_to_user(p, err, nr * sizeof(*err)))
389                                         ret = -EFAULT;
390                                 i += nr; p += nr;
391                         }
392                 } else if (clear_user(m.err, nr_pages * sizeof(*m.err)))
393                         ret = -EFAULT;
394
395         mmapbatch_v2_out:
396                 list_for_each_safe(l, l2, &pagelist)
397                         free_page((unsigned long)l);
398 #undef MMAPBATCH_NR_PER_PAGE
399         }
400         break;
401
402 #endif /* CONFIG_XEN_PRIVILEGED_GUEST */
403
404         default:
405                 ret = -EINVAL;
406                 break;
407         }
408
409         return ret;
410 }
411
412 #ifndef HAVE_ARCH_PRIVCMD_MMAP
413 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
414 {
415         return VM_FAULT_SIGBUS;
416 }
417
418 static struct vm_operations_struct privcmd_vm_ops = {
419         .fault = privcmd_fault
420 };
421
422 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
423 {
424         /* Unsupported for auto-translate guests. */
425         if (xen_feature(XENFEAT_auto_translated_physmap))
426                 return -ENOSYS;
427
428         /* DONTCOPY is essential for Xen as copy_page_range is broken. */
429         vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
430         vma->vm_ops = &privcmd_vm_ops;
431         vma->vm_private_data = NULL;
432
433         return 0;
434 }
435 #endif
436
437 static const struct file_operations privcmd_file_ops = {
438         .unlocked_ioctl = privcmd_ioctl,
439 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
440         .mmap = privcmd_mmap,
441 #endif
442 };
443
444 static int capabilities_read(char *page, char **start, off_t off,
445                              int count, int *eof, void *data)
446 {
447         int len = 0;
448         *page = 0;
449
450         if (is_initial_xendomain())
451                 len = sprintf( page, "control_d\n" );
452
453         *eof = 1;
454         return len;
455 }
456
457 static int __init privcmd_init(void)
458 {
459         if (!is_running_on_xen())
460                 return -ENODEV;
461
462         privcmd_intf = create_xen_proc_entry("privcmd", 0400);
463         if (privcmd_intf != NULL)
464                 privcmd_intf->proc_fops = &privcmd_file_ops;
465
466         capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
467         if (capabilities_intf != NULL)
468                 capabilities_intf->read_proc = capabilities_read;
469
470         return 0;
471 }
472
473 __initcall(privcmd_init);