1 /******************************************************************************
4 * Update page tables via the hypervisor.
6 * Copyright (c) 2002-2004, K A Fraser
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 #include <linux/sched.h>
35 #include <linux/vmalloc.h>
37 #include <asm/pgtable.h>
38 #include <asm/hypervisor.h>
39 #include <xen/balloon.h>
40 #include <xen/features.h>
41 #include <xen/interface/memory.h>
42 #include <linux/module.h>
43 #include <linux/percpu.h>
44 #include <linux/highmem.h>
45 #include <asm/tlbflush.h>
46 #include <linux/highmem.h>
48 void xen_l1_entry_update(pte_t *ptr, pte_t val)
52 u.ptr = ((unsigned long)ptr >= (unsigned long)high_memory) ?
53 arbitrary_virt_to_machine(ptr) : virt_to_machine(ptr);
55 u.ptr = virt_to_machine(ptr);
57 u.val = __pte_val(val);
58 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
60 EXPORT_SYMBOL_GPL(xen_l1_entry_update);
62 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
65 u.ptr = virt_to_machine(ptr);
66 u.val = __pmd_val(val);
67 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
70 #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
71 void xen_l3_entry_update(pud_t *ptr, pud_t val)
74 u.ptr = virt_to_machine(ptr);
75 u.val = __pud_val(val);
76 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
81 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
84 u.ptr = virt_to_machine(ptr);
85 u.val = __pgd_val(val);
86 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
88 #endif /* CONFIG_X86_64 */
90 void xen_pt_switch(unsigned long ptr)
93 op.cmd = MMUEXT_NEW_BASEPTR;
94 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
95 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
98 void xen_new_user_pt(unsigned long ptr)
101 op.cmd = MMUEXT_NEW_USER_BASEPTR;
102 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
103 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
106 void xen_tlb_flush(void)
109 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
110 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
112 EXPORT_SYMBOL(xen_tlb_flush);
114 void xen_invlpg(unsigned long ptr)
117 op.cmd = MMUEXT_INVLPG_LOCAL;
118 op.arg1.linear_addr = ptr & PAGE_MASK;
119 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
121 EXPORT_SYMBOL(xen_invlpg);
125 void xen_tlb_flush_all(void)
128 op.cmd = MMUEXT_TLB_FLUSH_ALL;
129 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
132 void xen_tlb_flush_mask(cpumask_t *mask)
135 if ( cpus_empty(*mask) )
137 op.cmd = MMUEXT_TLB_FLUSH_MULTI;
138 set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
139 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
142 void xen_invlpg_all(unsigned long ptr)
145 op.cmd = MMUEXT_INVLPG_ALL;
146 op.arg1.linear_addr = ptr & PAGE_MASK;
147 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
150 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
153 if ( cpus_empty(*mask) )
155 op.cmd = MMUEXT_INVLPG_MULTI;
156 op.arg1.linear_addr = ptr & PAGE_MASK;
157 set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
158 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
161 #endif /* CONFIG_SMP */
163 void xen_pgd_pin(unsigned long ptr)
167 op.cmd = MMUEXT_PIN_L4_TABLE;
168 #elif defined(CONFIG_X86_PAE)
169 op.cmd = MMUEXT_PIN_L3_TABLE;
171 op.cmd = MMUEXT_PIN_L2_TABLE;
173 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
174 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
177 void xen_pgd_unpin(unsigned long ptr)
180 op.cmd = MMUEXT_UNPIN_TABLE;
181 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
182 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
185 void xen_set_ldt(const void *ptr, unsigned int ents)
188 op.cmd = MMUEXT_SET_LDT;
189 op.arg1.linear_addr = (unsigned long)ptr;
190 op.arg2.nr_ents = ents;
191 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
195 * Bitmap is indexed by page number. If bit is set, the page is part of a
196 * xen_create_contiguous_region() area of memory.
198 unsigned long *contiguous_bitmap;
200 static void contiguous_bitmap_set(
201 unsigned long first_page, unsigned long nr_pages)
203 unsigned long start_off, end_off, curr_idx, end_idx;
205 curr_idx = first_page / BITS_PER_LONG;
206 start_off = first_page & (BITS_PER_LONG-1);
207 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
208 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
210 if (curr_idx == end_idx) {
211 contiguous_bitmap[curr_idx] |=
212 ((1UL<<end_off)-1) & -(1UL<<start_off);
214 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
215 while ( ++curr_idx < end_idx )
216 contiguous_bitmap[curr_idx] = ~0UL;
217 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
221 static void contiguous_bitmap_clear(
222 unsigned long first_page, unsigned long nr_pages)
224 unsigned long start_off, end_off, curr_idx, end_idx;
226 curr_idx = first_page / BITS_PER_LONG;
227 start_off = first_page & (BITS_PER_LONG-1);
228 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
229 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
231 if (curr_idx == end_idx) {
232 contiguous_bitmap[curr_idx] &=
233 -(1UL<<end_off) | ((1UL<<start_off)-1);
235 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
236 while ( ++curr_idx != end_idx )
237 contiguous_bitmap[curr_idx] = 0;
238 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
242 /* Protected by balloon_lock. */
243 #define MAX_CONTIG_ORDER 9 /* 2MB */
244 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
245 static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
246 static multicall_entry_t cr_mcl[1<<MAX_CONTIG_ORDER];
248 /* Ensure multi-page extents are contiguous in machine memory. */
249 int xen_create_contiguous_region(
250 unsigned long vstart, unsigned int order, unsigned int address_bits)
252 unsigned long *in_frames = discontig_frames, out_frame;
253 unsigned long frame, flags;
256 struct xen_memory_exchange exchange = {
258 .nr_extents = 1UL << order,
264 .extent_order = order,
265 .address_bits = address_bits,
271 * Currently an auto-translated guest will not perform I/O, nor will
272 * it require PAE page directories below 4GB. Therefore any calls to
273 * this function are redundant and can be ignored.
275 if (xen_feature(XENFEAT_auto_translated_physmap))
278 if (unlikely(order > MAX_CONTIG_ORDER))
281 set_xen_guest_handle(exchange.in.extent_start, in_frames);
282 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
284 scrub_pages((void *)vstart, 1 << order);
288 /* 1. Zap current PTEs, remembering MFNs. */
289 for (i = 0; i < (1U<<order); i++) {
290 in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i);
291 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
293 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
296 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
299 /* 2. Get a new contiguous memory extent. */
300 out_frame = __pa(vstart) >> PAGE_SHIFT;
301 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
302 success = (exchange.nr_exchanged == (1UL << order));
303 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
304 BUG_ON(success && (rc != 0));
305 #if CONFIG_XEN_COMPAT <= 0x030002
306 if (unlikely(rc == -ENOSYS)) {
307 /* Compatibility when XENMEM_exchange is unsupported. */
308 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
309 &exchange.in) != (1UL << order))
311 success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
312 &exchange.out) == 1);
314 /* Couldn't get special memory: fall back to normal. */
315 for (i = 0; i < (1U<<order); i++)
316 in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
317 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
318 &exchange.in) != (1UL<<order))
324 /* 3. Map the new extent in place of old pages. */
325 for (i = 0; i < (1U<<order); i++) {
326 frame = success ? (out_frame + i) : in_frames[i];
327 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
328 pfn_pte_ma(frame, PAGE_KERNEL), 0);
329 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
332 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
333 ? UVMF_TLB_FLUSH|UVMF_ALL
334 : UVMF_INVLPG|UVMF_ALL;
335 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
339 contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
342 balloon_unlock(flags);
344 return success ? 0 : -ENOMEM;
346 EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
348 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
350 unsigned long *out_frames = discontig_frames, in_frame;
351 unsigned long frame, flags;
354 struct xen_memory_exchange exchange = {
357 .extent_order = order,
361 .nr_extents = 1UL << order,
367 if (xen_feature(XENFEAT_auto_translated_physmap) ||
368 !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
371 if (unlikely(order > MAX_CONTIG_ORDER))
374 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
375 set_xen_guest_handle(exchange.out.extent_start, out_frames);
377 scrub_pages((void *)vstart, 1 << order);
381 contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
383 /* 1. Find start MFN of contiguous extent. */
384 in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
386 /* 2. Zap current PTEs. */
387 for (i = 0; i < (1U<<order); i++) {
388 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
390 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
392 out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
394 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
397 /* 3. Do the exchange for non-contiguous MFNs. */
398 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
399 success = (exchange.nr_exchanged == 1);
400 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
401 BUG_ON(success && (rc != 0));
402 #if CONFIG_XEN_COMPAT <= 0x030002
403 if (unlikely(rc == -ENOSYS)) {
404 /* Compatibility when XENMEM_exchange is unsupported. */
405 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
408 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
409 &exchange.out) != (1UL << order))
415 /* 4. Map new pages in place of old pages. */
416 for (i = 0; i < (1U<<order); i++) {
417 frame = success ? out_frames[i] : (in_frame + i);
418 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
419 pfn_pte_ma(frame, PAGE_KERNEL), 0);
420 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
423 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
424 ? UVMF_TLB_FLUSH|UVMF_ALL
425 : UVMF_INVLPG|UVMF_ALL;
426 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
429 balloon_unlock(flags);
431 if (unlikely(!success)) {
432 /* Try hard to get the special memory back to Xen. */
433 exchange.in.extent_order = 0;
434 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
436 for (i = 0; i < (1U<<order); i++) {
437 struct page *page = alloc_page(__GFP_HIGHMEM|__GFP_COLD);
443 printk(KERN_WARNING "Xen and kernel out of memory "
444 "while trying to release an order %u "
445 "contiguous region\n", order);
448 pfn = page_to_pfn(page);
452 if (!PageHighMem(page)) {
453 void *v = __va(pfn << PAGE_SHIFT);
456 MULTI_update_va_mapping(cr_mcl + j, (unsigned long)v,
457 __pte_ma(0), UVMF_INVLPG|UVMF_ALL);
460 #ifdef CONFIG_XEN_SCRUB_PAGES
462 scrub_pages(kmap(page), 1);
468 frame = pfn_to_mfn(pfn);
469 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
471 MULTI_update_va_mapping(cr_mcl + j, vstart,
472 pfn_pte_ma(frame, PAGE_KERNEL),
473 UVMF_INVLPG|UVMF_ALL);
476 pfn = __pa(vstart) >> PAGE_SHIFT;
477 set_phys_to_machine(pfn, frame);
478 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
479 mmu.ptr = ((uint64_t)frame << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
481 cr_mcl[j].op = __HYPERVISOR_mmu_update;
482 cr_mcl[j].args[0] = (unsigned long)&mmu;
483 cr_mcl[j].args[1] = 1;
484 cr_mcl[j].args[2] = 0;
485 cr_mcl[j].args[3] = DOMID_SELF;
489 cr_mcl[j].op = __HYPERVISOR_memory_op;
490 cr_mcl[j].args[0] = XENMEM_decrease_reservation;
491 cr_mcl[j].args[1] = (unsigned long)&exchange.in;
493 if (HYPERVISOR_multicall(cr_mcl, j + 1))
495 BUG_ON(cr_mcl[j].result != 1);
497 BUG_ON(cr_mcl[j].result != 0);
499 balloon_unlock(flags);
501 free_empty_pages(&page, 1);
508 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
510 int xen_limit_pages_to_max_mfn(
511 struct page *pages, unsigned int order, unsigned int address_bits)
513 unsigned long flags, frame;
514 unsigned long *in_frames = discontig_frames, *out_frames = limited_frames;
516 unsigned int i, n, nr_mcl;
518 DECLARE_BITMAP(limit_map, 1 << MAX_CONTIG_ORDER);
520 struct xen_memory_exchange exchange = {
527 .address_bits = address_bits,
532 if (xen_feature(XENFEAT_auto_translated_physmap))
535 if (unlikely(order > MAX_CONTIG_ORDER))
538 bitmap_zero(limit_map, 1U << order);
539 set_xen_guest_handle(exchange.in.extent_start, in_frames);
540 set_xen_guest_handle(exchange.out.extent_start, out_frames);
542 /* 0. Scrub the pages. */
543 for (i = 0, n = 0; i < 1U<<order ; i++) {
545 if (!(pfn_to_mfn(page_to_pfn(page)) >> (address_bits - PAGE_SHIFT)))
547 __set_bit(i, limit_map);
549 if (!PageHighMem(page))
550 scrub_pages(page_address(page), 1);
551 #ifdef CONFIG_XEN_SCRUB_PAGES
553 scrub_pages(kmap(page), 1);
559 if (bitmap_empty(limit_map, 1U << order))
567 /* 1. Zap current PTEs (if any), remembering MFNs. */
568 for (i = 0, n = 0, nr_mcl = 0; i < (1U<<order); i++) {
569 if(!test_bit(i, limit_map))
573 out_frames[n] = page_to_pfn(page);
574 in_frames[n] = pfn_to_mfn(out_frames[n]);
576 if (!PageHighMem(page))
577 MULTI_update_va_mapping(cr_mcl + nr_mcl++,
578 (unsigned long)page_address(page),
581 set_phys_to_machine(out_frames[n], INVALID_P2M_ENTRY);
584 if (nr_mcl && HYPERVISOR_multicall_check(cr_mcl, nr_mcl, NULL))
587 /* 2. Get new memory below the required limit. */
588 exchange.in.nr_extents = n;
589 exchange.out.nr_extents = n;
590 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
591 success = (exchange.nr_exchanged == n);
592 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
593 BUG_ON(success && (rc != 0));
594 #if CONFIG_XEN_COMPAT <= 0x030002
595 if (unlikely(rc == -ENOSYS)) {
596 /* Compatibility when XENMEM_exchange is unsupported. */
597 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
600 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
607 /* 3. Map the new pages in place of old pages. */
608 for (i = 0, n = 0, nr_mcl = 0; i < (1U<<order); i++) {
609 if(!test_bit(i, limit_map))
613 frame = success ? out_frames[n] : in_frames[n];
615 if (!PageHighMem(page))
616 MULTI_update_va_mapping(cr_mcl + nr_mcl++,
617 (unsigned long)page_address(page),
618 pfn_pte_ma(frame, PAGE_KERNEL), 0);
620 set_phys_to_machine(page_to_pfn(page), frame);
624 cr_mcl[nr_mcl - 1].args[MULTI_UVMFLAGS_INDEX] = order
625 ? UVMF_TLB_FLUSH|UVMF_ALL
626 : UVMF_INVLPG|UVMF_ALL;
627 if (HYPERVISOR_multicall_check(cr_mcl, nr_mcl, NULL))
631 balloon_unlock(flags);
633 return success ? 0 : -ENOMEM;
635 EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
638 int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
640 __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
641 maddr_t mach_lp = arbitrary_virt_to_machine(lp);
642 return HYPERVISOR_update_descriptor(
643 mach_lp, (u64)entry_a | ((u64)entry_b<<32));
647 #define MAX_BATCHED_FULL_PTES 32
649 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
650 unsigned long addr, unsigned long end, pgprot_t newprot,
651 int dirty_accountable)
654 mmu_update_t u[MAX_BATCHED_FULL_PTES];
658 if (!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))
661 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
663 if (pte_present(*pte)) {
664 pte_t ptent = pte_modify(*pte, newprot);
666 if (dirty_accountable && pte_dirty(ptent))
667 ptent = pte_mkwrite(ptent);
668 u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
669 | ((unsigned long)pte & ~PAGE_MASK)
670 | MMU_PT_UPDATE_PRESERVE_AD;
671 u[i].val = __pte_val(ptent);
672 if (++i == MAX_BATCHED_FULL_PTES) {
673 if ((rc = HYPERVISOR_mmu_update(
674 &u[0], i, NULL, DOMID_SELF)) != 0)
679 } while (pte++, addr += PAGE_SIZE, addr != end);
681 rc = HYPERVISOR_mmu_update( &u[0], i, NULL, DOMID_SELF);
682 pte_unmap_unlock(pte - 1, ptl);
683 BUG_ON(rc && rc != -ENOSYS);