Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / core / gnttab.c
1 /******************************************************************************
2  * gnttab.c
3  *
4  * Granting foreign access to our memory reservation.
5  *
6  * Copyright (c) 2005-2006, Christopher Clark
7  * Copyright (c) 2004-2005, K A Fraser
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License version 2
11  * as published by the Free Software Foundation; or, when distributed
12  * separately from the Linux kernel or incorporated into other
13  * software packages, subject to the following license:
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a copy
16  * of this source file (the "Software"), to deal in the Software without
17  * restriction, including without limitation the rights to use, copy, modify,
18  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19  * and to permit persons to whom the Software is furnished to do so, subject to
20  * the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31  * IN THE SOFTWARE.
32  */
33
34 #include <linux/export.h>
35 #include <linux/slab.h>
36 #include <linux/sched.h>
37 #include <linux/mm.h>
38 #include <linux/seqlock.h>
39 #include <linux/timer.h>
40 #include <xen/interface/xen.h>
41 #include <xen/gnttab.h>
42 #include <asm/pgtable.h>
43 #include <asm/uaccess.h>
44 #include <asm/cmpxchg.h>
45 #include <asm/io.h>
46 #include <xen/interface/memory.h>
47 #include <asm/gnttab_dma.h>
48
49 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
50 #include <xen/platform-compat.h>
51 #endif
52
53 /* External tools reserve first few grant table entries. */
54 #define NR_RESERVED_ENTRIES 8
55 #define GNTTAB_LIST_END 0xffffffff
56 #define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
57
58 static grant_ref_t **gnttab_list;
59 static unsigned int nr_grant_frames;
60 static unsigned int boot_max_nr_grant_frames;
61 static int gnttab_free_count;
62 static grant_ref_t gnttab_free_head;
63 static DEFINE_SPINLOCK(gnttab_list_lock);
64
65 static struct grant_entry *shared;
66
67 static struct gnttab_free_callback *gnttab_free_callback_list;
68
69 static int gnttab_expand(unsigned int req_entries);
70
71 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
72 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
73
74 #define nr_freelist_frames(grant_frames)                                \
75         (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP)
76
77 static int get_free_entries(int count)
78 {
79         unsigned long flags;
80         int ref, rc;
81         grant_ref_t head;
82
83         spin_lock_irqsave(&gnttab_list_lock, flags);
84
85         if ((gnttab_free_count < count) &&
86             ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
87                 spin_unlock_irqrestore(&gnttab_list_lock, flags);
88                 return rc;
89         }
90
91         ref = head = gnttab_free_head;
92         gnttab_free_count -= count;
93         while (count-- > 1)
94                 head = gnttab_entry(head);
95         gnttab_free_head = gnttab_entry(head);
96         gnttab_entry(head) = GNTTAB_LIST_END;
97
98         spin_unlock_irqrestore(&gnttab_list_lock, flags);
99
100         return ref;
101 }
102
103 #define get_free_entry() get_free_entries(1)
104
105 static void do_free_callbacks(void)
106 {
107         struct gnttab_free_callback *callback, *next;
108
109         callback = gnttab_free_callback_list;
110         gnttab_free_callback_list = NULL;
111
112         while (callback != NULL) {
113                 next = callback->next;
114                 if (gnttab_free_count >= callback->count) {
115                         callback->next = NULL;
116                         callback->queued = 0;
117                         callback->fn(callback->arg);
118                 } else {
119                         callback->next = gnttab_free_callback_list;
120                         gnttab_free_callback_list = callback;
121                 }
122                 callback = next;
123         }
124 }
125
126 static inline void check_free_callbacks(void)
127 {
128         if (unlikely(gnttab_free_callback_list))
129                 do_free_callbacks();
130 }
131
132 static void put_free_entry(grant_ref_t ref)
133 {
134         unsigned long flags;
135         spin_lock_irqsave(&gnttab_list_lock, flags);
136         gnttab_entry(ref) = gnttab_free_head;
137         gnttab_free_head = ref;
138         gnttab_free_count++;
139         check_free_callbacks();
140         spin_unlock_irqrestore(&gnttab_list_lock, flags);
141 }
142
143 /*
144  * Public grant-issuing interface functions
145  */
146
147 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
148                                 int flags)
149 {
150         int ref;
151
152         if (unlikely((ref = get_free_entry()) < 0))
153                 return -ENOSPC;
154
155         shared[ref].frame = frame;
156         shared[ref].domid = domid;
157         wmb();
158         BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
159         shared[ref].flags = GTF_permit_access | flags;
160
161         return ref;
162 }
163 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
164
165 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
166                                      unsigned long frame, int flags)
167 {
168         shared[ref].frame = frame;
169         shared[ref].domid = domid;
170         wmb();
171         BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
172         shared[ref].flags = GTF_permit_access | flags;
173 }
174 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
175
176
177 int gnttab_query_foreign_access(grant_ref_t ref)
178 {
179         u16 nflags;
180
181         nflags = shared[ref].flags;
182
183         return (nflags & (GTF_reading|GTF_writing));
184 }
185 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
186
187 static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref)
188 {
189         u16 flags, nflags;
190
191         nflags = shared[ref].flags;
192         do {
193                 if ((flags = nflags) & (GTF_reading|GTF_writing))
194                         return 0;
195         } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) !=
196                  flags);
197
198         return 1;
199 }
200
201 int gnttab_end_foreign_access_ref(grant_ref_t ref)
202 {
203         if (_gnttab_end_foreign_access_ref(ref))
204                 return 1;
205         printk(KERN_DEBUG "WARNING: g.e. %#x still in use!\n", ref);
206         return 0;
207 }
208 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
209
210 struct deferred_entry {
211         struct list_head list;
212         grant_ref_t ref;
213         uint16_t warn_delay;
214         struct page *page;
215 };
216 static LIST_HEAD(deferred_list);
217 static void gnttab_handle_deferred(unsigned long);
218 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
219
220 static void gnttab_handle_deferred(unsigned long unused)
221 {
222         unsigned int nr = 10;
223         struct deferred_entry *first = NULL;
224         unsigned long flags;
225
226         spin_lock_irqsave(&gnttab_list_lock, flags);
227         while (nr--) {
228                 struct deferred_entry *entry
229                         = list_first_entry(&deferred_list,
230                                            struct deferred_entry, list);
231
232                 if (entry == first)
233                         break;
234                 list_del(&entry->list);
235                 spin_unlock_irqrestore(&gnttab_list_lock, flags);
236                 if (_gnttab_end_foreign_access_ref(entry->ref)) {
237                         put_free_entry(entry->ref);
238                         if (entry->page) {
239                                 printk(KERN_DEBUG
240                                        "freeing g.e. %#x (pfn %#lx)\n",
241                                        entry->ref, page_to_pfn(entry->page));
242                                 __free_page(entry->page);
243                         } else
244                                 printk(KERN_DEBUG "freeing g.e. %#x\n",
245                                        entry->ref);
246                         kfree(entry);
247                         entry = NULL;
248                 } else {
249                         if (!--entry->warn_delay)
250                                 pr_info("g.e. %#x still pending\n",
251                                         entry->ref);
252                         if (!first)
253                                 first = entry;
254                 }
255                 spin_lock_irqsave(&gnttab_list_lock, flags);
256                 if (entry)
257                         list_add_tail(&entry->list, &deferred_list);
258                 else if (list_empty(&deferred_list))
259                         break;
260         }
261         if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
262                 deferred_timer.expires = jiffies + HZ;
263                 add_timer(&deferred_timer);
264         }
265         spin_unlock_irqrestore(&gnttab_list_lock, flags);
266 }
267
268 static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
269 {
270         struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
271         const char *what = KERN_WARNING "leaking";
272
273         if (entry) {
274                 unsigned long flags;
275
276                 entry->ref = ref;
277                 entry->page = page;
278                 entry->warn_delay = 60;
279                 spin_lock_irqsave(&gnttab_list_lock, flags);
280                 list_add_tail(&entry->list, &deferred_list);
281                 if (!timer_pending(&deferred_timer)) {
282                         deferred_timer.expires = jiffies + HZ;
283                         add_timer(&deferred_timer);
284                 }
285                 spin_unlock_irqrestore(&gnttab_list_lock, flags);
286                 what = KERN_DEBUG "deferring";
287         }
288         printk("%s g.e. %#x (pfn %lx)\n", what,
289                ref, page ? page_to_pfn(page) : -1);
290 }
291
292 void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page)
293 {
294         if (gnttab_end_foreign_access_ref(ref)) {
295                 put_free_entry(ref);
296                 if (page != 0)
297                         free_page(page);
298         } else
299                 gnttab_add_deferred(ref, page ? virt_to_page(page) : NULL);
300 }
301 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
302
303 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
304 {
305         int ref;
306
307         if (unlikely((ref = get_free_entry()) < 0))
308                 return -ENOSPC;
309         gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
310
311         return ref;
312 }
313 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
314
315 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
316                                        unsigned long pfn)
317 {
318         shared[ref].frame = pfn;
319         shared[ref].domid = domid;
320         wmb();
321         shared[ref].flags = GTF_accept_transfer;
322 }
323 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
324
325 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
326 {
327         unsigned long frame;
328         u16           flags;
329
330         /*
331          * If a transfer is not even yet started, try to reclaim the grant
332          * reference and return failure (== 0).
333          */
334         while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
335                 if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
336                         return 0;
337                 cpu_relax();
338         }
339
340         /* If a transfer is in progress then wait until it is completed. */
341         while (!(flags & GTF_transfer_completed)) {
342                 flags = shared[ref].flags;
343                 cpu_relax();
344         }
345
346         /* Read the frame number /after/ reading completion status. */
347         rmb();
348         frame = shared[ref].frame;
349         BUG_ON(frame == 0);
350
351         return frame;
352 }
353 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
354
355 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
356 {
357         unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
358         put_free_entry(ref);
359         return frame;
360 }
361 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
362
363 void gnttab_free_grant_reference(grant_ref_t ref)
364 {
365         put_free_entry(ref);
366 }
367 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
368
369 void gnttab_free_grant_references(grant_ref_t head)
370 {
371         grant_ref_t ref;
372         unsigned long flags;
373         int count = 1;
374         if (head == GNTTAB_LIST_END)
375                 return;
376         spin_lock_irqsave(&gnttab_list_lock, flags);
377         ref = head;
378         while (gnttab_entry(ref) != GNTTAB_LIST_END) {
379                 ref = gnttab_entry(ref);
380                 count++;
381         }
382         gnttab_entry(ref) = gnttab_free_head;
383         gnttab_free_head = head;
384         gnttab_free_count += count;
385         check_free_callbacks();
386         spin_unlock_irqrestore(&gnttab_list_lock, flags);
387 }
388 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
389
390 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
391 {
392         int h = get_free_entries(count);
393
394         if (h < 0)
395                 return -ENOSPC;
396
397         *head = h;
398
399         return 0;
400 }
401 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
402
403 int gnttab_empty_grant_references(const grant_ref_t *private_head)
404 {
405         return (*private_head == GNTTAB_LIST_END);
406 }
407 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
408
409 int gnttab_claim_grant_reference(grant_ref_t *private_head)
410 {
411         grant_ref_t g = *private_head;
412         if (unlikely(g == GNTTAB_LIST_END))
413                 return -ENOSPC;
414         *private_head = gnttab_entry(g);
415         return g;
416 }
417 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
418
419 void gnttab_release_grant_reference(grant_ref_t *private_head,
420                                     grant_ref_t release)
421 {
422         gnttab_entry(release) = *private_head;
423         *private_head = release;
424 }
425 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
426
427 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
428                                   void (*fn)(void *), void *arg, u16 count)
429 {
430         unsigned long flags;
431         spin_lock_irqsave(&gnttab_list_lock, flags);
432         if (callback->queued)
433                 goto out;
434         callback->fn = fn;
435         callback->arg = arg;
436         callback->count = count;
437         callback->queued = 1;
438         callback->next = gnttab_free_callback_list;
439         gnttab_free_callback_list = callback;
440         check_free_callbacks();
441 out:
442         spin_unlock_irqrestore(&gnttab_list_lock, flags);
443 }
444 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
445
446 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
447 {
448         struct gnttab_free_callback **pcb;
449         unsigned long flags;
450
451         spin_lock_irqsave(&gnttab_list_lock, flags);
452         for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
453                 if (*pcb == callback) {
454                         *pcb = callback->next;
455                         callback->queued = 0;
456                         break;
457                 }
458         }
459         spin_unlock_irqrestore(&gnttab_list_lock, flags);
460 }
461 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
462
463 static int grow_gnttab_list(unsigned int more_frames)
464 {
465         unsigned int new_nr_grant_frames, extra_entries, i;
466         unsigned int nr_glist_frames, new_nr_glist_frames;
467
468         new_nr_grant_frames = nr_grant_frames + more_frames;
469         extra_entries       = more_frames * ENTRIES_PER_GRANT_FRAME;
470
471         nr_glist_frames = nr_freelist_frames(nr_grant_frames);
472         new_nr_glist_frames = nr_freelist_frames(new_nr_grant_frames);
473         for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
474                 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
475                 if (!gnttab_list[i])
476                         goto grow_nomem;
477         }
478
479         for (i = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
480              i < ENTRIES_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
481                 gnttab_entry(i) = i + 1;
482
483         gnttab_entry(i) = gnttab_free_head;
484         gnttab_free_head = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
485         gnttab_free_count += extra_entries;
486
487         nr_grant_frames = new_nr_grant_frames;
488
489         check_free_callbacks();
490
491         return 0;
492         
493 grow_nomem:
494         for ( ; i >= nr_glist_frames; i--)
495                 free_page((unsigned long) gnttab_list[i]);
496         return -ENOMEM;
497 }
498
499 static unsigned int __max_nr_grant_frames(void)
500 {
501         struct gnttab_query_size query;
502         int rc;
503
504         query.dom = DOMID_SELF;
505
506         rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
507         if ((rc < 0) || (query.status != GNTST_okay))
508                 return 4; /* Legacy max supported number of frames */
509
510         return query.max_nr_frames;
511 }
512
513 static inline unsigned int max_nr_grant_frames(void)
514 {
515         unsigned int xen_max = __max_nr_grant_frames();
516
517         if (xen_max > boot_max_nr_grant_frames)
518                 return boot_max_nr_grant_frames;
519         return xen_max;
520 }
521
522 #ifdef CONFIG_XEN
523
524 #ifdef CONFIG_X86
525 static int map_pte_fn(pte_t *pte, struct page *pmd_page,
526                       unsigned long addr, void *data)
527 {
528         unsigned long **frames = (unsigned long **)data;
529
530         set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
531         (*frames)++;
532         return 0;
533 }
534
535 #ifdef CONFIG_PM_SLEEP
536 static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
537                         unsigned long addr, void *data)
538 {
539
540         set_pte_at(&init_mm, addr, pte, __pte(0));
541         return 0;
542 }
543 #endif
544
545 void *arch_gnttab_alloc_shared(unsigned long *frames)
546 {
547         struct vm_struct *area;
548         area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames(), NULL);
549         BUG_ON(area == NULL);
550         return area->addr;
551 }
552 #endif /* CONFIG_X86 */
553
554 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
555 {
556         struct gnttab_setup_table setup;
557         unsigned long *frames;
558         unsigned int nr_gframes = end_idx + 1;
559         int rc;
560
561         frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
562         if (!frames)
563                 return -ENOMEM;
564
565         setup.dom        = DOMID_SELF;
566         setup.nr_frames  = nr_gframes;
567         set_xen_guest_handle(setup.frame_list, frames);
568
569         rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
570         if (rc == -ENOSYS) {
571                 kfree(frames);
572                 return -ENOSYS;
573         }
574
575         BUG_ON(rc || setup.status != GNTST_okay);
576
577         if (shared == NULL)
578                 shared = arch_gnttab_alloc_shared(frames);
579
580 #ifdef CONFIG_X86
581         rc = apply_to_page_range(&init_mm, (unsigned long)shared,
582                                  PAGE_SIZE * nr_gframes,
583                                  map_pte_fn, &frames);
584         BUG_ON(rc);
585         frames -= nr_gframes; /* adjust after map_pte_fn() */
586 #endif /* CONFIG_X86 */
587
588         kfree(frames);
589
590         return 0;
591 }
592
593 #if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE)
594
595 static DEFINE_SEQLOCK(gnttab_dma_lock);
596
597 static void gnttab_page_free(struct page *page, unsigned int order)
598 {
599         BUG_ON(order);
600         ClearPageForeign(page);
601         gnttab_reset_grant_page(page);
602         ClearPageReserved(page);
603         put_page(page);
604 }
605
606 /*
607  * Must not be called with IRQs off.  This should only be used on the
608  * slow path.
609  *
610  * Copy a foreign granted page to local memory.
611  */
612 int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
613 {
614         struct gnttab_unmap_and_replace unmap;
615         mmu_update_t mmu;
616         struct page *page;
617         struct page *new_page;
618         void *new_addr;
619         void *addr;
620         paddr_t pfn;
621         maddr_t mfn;
622         maddr_t new_mfn;
623         int err;
624
625         page = *pagep;
626         if (!get_page_unless_zero(page))
627                 return -ENOENT;
628
629         err = -ENOMEM;
630         new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
631         if (!new_page)
632                 goto out;
633
634         new_addr = page_address(new_page);
635         addr = page_address(page);
636         copy_page(new_addr, addr);
637
638         pfn = page_to_pfn(page);
639         mfn = pfn_to_mfn(pfn);
640         new_mfn = virt_to_mfn(new_addr);
641
642         write_seqlock_bh(&gnttab_dma_lock);
643
644         /* Make seq visible before checking page_mapped. */
645         smp_mb();
646
647         /* Has the page been DMA-mapped? */
648         if (unlikely(page_mapped(page))) {
649                 write_sequnlock_bh(&gnttab_dma_lock);
650                 put_page(new_page);
651                 err = -EBUSY;
652                 goto out;
653         }
654
655         if (!xen_feature(XENFEAT_auto_translated_physmap))
656                 set_phys_to_machine(pfn, new_mfn);
657
658         gnttab_set_replace_op(&unmap, (unsigned long)addr,
659                               (unsigned long)new_addr, ref);
660
661         err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
662                                         &unmap, 1);
663         BUG_ON(err);
664         BUG_ON(unmap.status != GNTST_okay);
665
666         write_sequnlock_bh(&gnttab_dma_lock);
667
668         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
669                 set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
670
671                 mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
672                 mmu.val = pfn;
673                 err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
674                 BUG_ON(err);
675         }
676
677         new_page->mapping = page->mapping;
678         new_page->index = page->index;
679         set_bit(PG_foreign, &new_page->flags);
680         if (PageReserved(page))
681                 SetPageReserved(new_page);
682         *pagep = new_page;
683
684         SetPageForeign(page, gnttab_page_free);
685         page->mapping = NULL;
686
687 out:
688         put_page(page);
689         return err;
690 }
691 EXPORT_SYMBOL_GPL(gnttab_copy_grant_page);
692
693 void gnttab_reset_grant_page(struct page *page)
694 {
695         init_page_count(page);
696         reset_page_mapcount(page);
697 }
698 EXPORT_SYMBOL_GPL(gnttab_reset_grant_page);
699
700 /*
701  * Keep track of foreign pages marked as PageForeign so that we don't
702  * return them to the remote domain prematurely.
703  *
704  * PageForeign pages are pinned down by increasing their mapcount.
705  *
706  * All other pages are simply returned as is.
707  */
708 void __gnttab_dma_map_page(struct page *page)
709 {
710         unsigned int seq;
711
712         if (!is_running_on_xen() || !PageForeign(page))
713                 return;
714
715         do {
716                 seq = read_seqbegin(&gnttab_dma_lock);
717
718                 if (gnttab_dma_local_pfn(page))
719                         break;
720
721                 atomic_set(&page->_mapcount, 0);
722
723                 /* Make _mapcount visible before read_seqretry. */
724                 smp_mb();
725         } while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
726 }
727
728 #endif /* CONFIG_XEN_BACKEND */
729
730 #ifdef __HAVE_ARCH_PTE_SPECIAL
731
732 static unsigned int GNTMAP_pte_special;
733
734 bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *map,
735                            unsigned int count)
736 {
737         unsigned int i;
738
739         if (unlikely(cmd != GNTTABOP_map_grant_ref))
740                 count = 0;
741
742         for (i = 0; i < count; ++i, ++map) {
743                 if (!(map->flags & GNTMAP_host_map)
744                     || !(map->flags & GNTMAP_application_map))
745                         continue;
746                 if (GNTMAP_pte_special)
747                         map->flags |= GNTMAP_pte_special;
748                 else {
749                         BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
750                         return true;
751                 }
752         }
753
754         return false;
755 }
756 EXPORT_SYMBOL(gnttab_pre_map_adjust);
757
758 #if CONFIG_XEN_COMPAT < 0x030400
759 int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *map, unsigned int count)
760 {
761         unsigned int i;
762         int rc = 0;
763
764         for (i = 0; i < count && rc == 0; ++i, ++map) {
765                 pte_t pte;
766
767                 if (!(map->flags & GNTMAP_host_map)
768                     || !(map->flags & GNTMAP_application_map))
769                         continue;
770
771 #ifdef CONFIG_X86
772                 pte = __pte_ma((map->dev_bus_addr | _PAGE_PRESENT | _PAGE_USER
773                                 | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX
774                                 | _PAGE_SPECIAL)
775                                & __supported_pte_mask);
776 #else
777 #error Architecture not yet supported.
778 #endif
779                 if (!(map->flags & GNTMAP_readonly))
780                         pte = pte_mkwrite(pte);
781
782                 if (map->flags & GNTMAP_contains_pte) {
783                         mmu_update_t u;
784
785                         u.ptr = map->host_addr;
786                         u.val = __pte_val(pte);
787                         rc = HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
788                 } else
789                         rc = HYPERVISOR_update_va_mapping(map->host_addr, pte, 0);
790         }
791
792         return rc;
793 }
794 EXPORT_SYMBOL(gnttab_post_map_adjust);
795 #endif
796
797 #endif /* __HAVE_ARCH_PTE_SPECIAL */
798
799 int gnttab_resume(void)
800 {
801         if (max_nr_grant_frames() < nr_grant_frames)
802                 return 0;
803         return gnttab_map(0, nr_grant_frames - 1);
804 }
805
806 #ifdef CONFIG_PM_SLEEP
807 #include <linux/syscore_ops.h>
808
809 #ifdef CONFIG_X86
810 static int gnttab_suspend(void)
811 {
812         apply_to_page_range(&init_mm, (unsigned long)shared,
813                             PAGE_SIZE * nr_grant_frames,
814                             unmap_pte_fn, NULL);
815         return 0;
816 }
817 #else
818 #define gnttab_suspend NULL
819 #endif
820
821 static void _gnttab_resume(void)
822 {
823         if (gnttab_resume())
824                 BUG();
825 }
826
827 static struct syscore_ops gnttab_syscore_ops = {
828         .resume         = _gnttab_resume,
829         .suspend        = gnttab_suspend,
830 };
831 #endif
832
833 #else /* !CONFIG_XEN */
834
835 #include <platform-pci.h>
836
837 static unsigned long resume_frames;
838
839 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
840 {
841         struct xen_add_to_physmap xatp;
842         unsigned int i = end_idx;
843
844         /* Loop backwards, so that the first hypercall has the largest index,
845          * ensuring that the table will grow only once.
846          */
847         do {
848                 xatp.domid = DOMID_SELF;
849                 xatp.idx = i;
850                 xatp.space = XENMAPSPACE_grant_table;
851                 xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
852                 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
853                         BUG();
854         } while (i-- > start_idx);
855
856         return 0;
857 }
858
859 int gnttab_resume(void)
860 {
861         unsigned int max_nr_gframes, nr_gframes;
862
863         nr_gframes = nr_grant_frames;
864         max_nr_gframes = max_nr_grant_frames();
865         if (max_nr_gframes < nr_gframes)
866                 return -ENOSYS;
867
868         if (!resume_frames) {
869                 resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
870                 shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
871                 if (shared == NULL) {
872                         pr_warning("error to ioremap gnttab share frames\n");
873                         return -1;
874                 }
875         }
876
877         gnttab_map(0, nr_gframes - 1);
878
879         return 0;
880 }
881
882 #endif /* !CONFIG_XEN */
883
884 static int gnttab_expand(unsigned int req_entries)
885 {
886         int rc;
887         unsigned int cur, extra;
888
889         cur = nr_grant_frames;
890         extra = ((req_entries + (ENTRIES_PER_GRANT_FRAME-1)) /
891                  ENTRIES_PER_GRANT_FRAME);
892         if (cur + extra > max_nr_grant_frames())
893                 return -ENOSPC;
894
895         if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
896                 rc = grow_gnttab_list(extra);
897
898         return rc;
899 }
900
901 #ifdef CONFIG_XEN
902 static int __init
903 #else
904 int __devinit
905 #endif
906 gnttab_init(void)
907 {
908         int i;
909         unsigned int max_nr_glist_frames, nr_glist_frames;
910         unsigned int nr_init_grefs;
911
912         if (!is_running_on_xen())
913                 return -ENODEV;
914
915         nr_grant_frames = 1;
916         boot_max_nr_grant_frames = __max_nr_grant_frames();
917
918         /* Determine the maximum number of frames required for the
919          * grant reference free list on the current hypervisor.
920          */
921         max_nr_glist_frames = nr_freelist_frames(boot_max_nr_grant_frames);
922
923         gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
924                               GFP_KERNEL);
925         if (gnttab_list == NULL)
926                 return -ENOMEM;
927
928         nr_glist_frames = nr_freelist_frames(nr_grant_frames);
929         for (i = 0; i < nr_glist_frames; i++) {
930                 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
931                 if (gnttab_list[i] == NULL)
932                         goto ini_nomem;
933         }
934
935         if (gnttab_resume() < 0)
936                 return -ENODEV;
937
938         nr_init_grefs = nr_grant_frames * ENTRIES_PER_GRANT_FRAME;
939
940         for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
941                 gnttab_entry(i) = i + 1;
942
943         gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
944         gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
945         gnttab_free_head  = NR_RESERVED_ENTRIES;
946
947 #if defined(CONFIG_XEN) && defined(__HAVE_ARCH_PTE_SPECIAL)
948         if (!xen_feature(XENFEAT_auto_translated_physmap)
949             && xen_feature(XENFEAT_gnttab_map_avail_bits)) {
950 #ifdef CONFIG_X86
951                 GNTMAP_pte_special = (__pte_val(pte_mkspecial(__pte_ma(0)))
952                                       >> _PAGE_BIT_UNUSED1) << _GNTMAP_guest_avail0;
953 #else
954 #error Architecture not yet supported.
955 #endif
956         }
957 #endif
958
959 #if defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP)
960         if (!is_initial_xendomain())
961                 register_syscore_ops(&gnttab_syscore_ops);
962 #endif
963
964         return 0;
965
966  ini_nomem:
967         for (i--; i >= 0; i--)
968                 free_page((unsigned long)gnttab_list[i]);
969         kfree(gnttab_list);
970         return -ENOMEM;
971 }
972
973 #ifdef CONFIG_XEN
974 core_initcall(gnttab_init);
975 #endif