- Update Xen patches to 3.3-rc5 and c/s 1157.
[linux-flexiantxendom0-3.2.10.git] / arch / x86 / mm / dump_pagetables-xen.c
1 /*
2  * Debug helper to dump the current kernel pagetables of the system
3  * so that we can see what the various memory ranges are set to.
4  *
5  * (C) Copyright 2008 Intel Corporation
6  *
7  * Author: Arjan van de Ven <arjan@linux.intel.com>
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; version 2
12  * of the License.
13  */
14
15 #include <linux/debugfs.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/module.h>
19 #include <linux/seq_file.h>
20
21 #include <xen/interface/xen.h>
22
23 #include <asm/pgtable.h>
24
25 /*
26  * The dumper groups pagetable entries of the same type into one, and for
27  * that it needs to keep some state when walking, and flush this state
28  * when a "break" in the continuity is found.
29  */
30 struct pg_state {
31         int level;
32         pgprot_t current_prot;
33         unsigned long start_address;
34         unsigned long current_address;
35         const struct addr_marker *marker;
36 };
37
38 struct addr_marker {
39         unsigned long start_address;
40         const char *name;
41 };
42
43 /* indices for address_markers; keep sync'd w/ address_markers below */
44 enum address_markers_idx {
45         USER_SPACE_NR = 0,
46 #ifdef CONFIG_X86_64
47         XEN_SPACE_NR,
48         LOW_KERNEL_NR,
49         VMALLOC_START_NR,
50         VMEMMAP_START_NR,
51         HIGH_KERNEL_NR,
52         MODULES_VADDR_NR,
53         MODULES_END_NR,
54 #else
55         KERNEL_SPACE_NR,
56         VMALLOC_START_NR,
57         VMALLOC_END_NR,
58 # ifdef CONFIG_HIGHMEM
59         PKMAP_BASE_NR,
60 # endif
61         FIXADDR_START_NR,
62         XEN_SPACE_NR,
63 #endif
64 };
65
66 /* Address space markers hints */
67 static struct addr_marker address_markers[] = {
68         { 0, "User Space" },
69 #ifdef CONFIG_X86_64
70         { HYPERVISOR_VIRT_START,      "Hypervisor Space" },
71         { PAGE_OFFSET,                "Low Kernel Mapping" },
72         { VMALLOC_START,              "vmalloc() Area" },
73         { VMEMMAP_START,              "Vmemmap" },
74         { __START_KERNEL_map,         "High Kernel Mapping" },
75         { MODULES_VADDR,              "Modules" },
76         { MODULES_END,                "End Modules" },
77 #else
78         { PAGE_OFFSET,                "Kernel Mapping" },
79         { 0/* VMALLOC_START */,       "vmalloc() Area" },
80         { 0/*VMALLOC_END*/,           "vmalloc() End" },
81 # ifdef CONFIG_HIGHMEM
82         { 0/*PKMAP_BASE*/,            "Persisent kmap() Area" },
83 # endif
84         { 0/*FIXADDR_START*/,         "Fixmap Area" },
85         { 0/*HYPERVISOR_VIRT_START*/, "Hypervisor Space" },
86 #endif
87         { -1, NULL }                  /* End of list */
88 };
89
90 static inline bool hypervisor_space(unsigned long addr) {
91 #ifdef CONFIG_X86_64
92         return addr >= HYPERVISOR_VIRT_START && addr < HYPERVISOR_VIRT_END;
93 #else
94         return addr >= hypervisor_virt_start;
95 #endif
96 }
97
98 /* Multipliers for offsets within the PTEs */
99 #define PTE_LEVEL_MULT (PAGE_SIZE)
100 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
101 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
102 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
103
104 /*
105  * Print a readable form of a pgprot_t to the seq_file
106  */
107 static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
108 {
109         pgprotval_t pr = pgprot_val(prot);
110         static const char * const level_name[] =
111                 { "cr3", "pgd", "pud", "pmd", "pte" };
112
113         if (!pgprot_val(prot)) {
114                 /* Not present */
115                 seq_printf(m, "                          ");
116         } else {
117                 if (pr & _PAGE_USER)
118                         seq_printf(m, "USR ");
119                 else
120                         seq_printf(m, "    ");
121                 if (pr & _PAGE_RW)
122                         seq_printf(m, "RW ");
123                 else
124                         seq_printf(m, "ro ");
125                 if (pr & _PAGE_PWT)
126                         seq_printf(m, "PWT ");
127                 else
128                         seq_printf(m, "    ");
129                 if (pr & _PAGE_PCD)
130                         seq_printf(m, "PCD ");
131                 else
132                         seq_printf(m, "    ");
133
134                 /* Bit 9 has a different meaning on level 3 vs 4 */
135                 if (level <= 3) {
136                         if (pr & _PAGE_PSE)
137                                 seq_printf(m, "PSE ");
138                         else
139                                 seq_printf(m, "    ");
140                 } else {
141                         if (pr & _PAGE_PAT)
142                                 seq_printf(m, "pat ");
143                         else
144                                 seq_printf(m, "    ");
145                 }
146                 if (pr & _PAGE_GLOBAL)
147                         seq_printf(m, "GLB ");
148                 else
149                         seq_printf(m, "    ");
150                 if (pr & _PAGE_NX)
151                         seq_printf(m, "NX ");
152                 else
153                         seq_printf(m, "x  ");
154         }
155         seq_printf(m, "%s\n", level_name[level]);
156 }
157
158 /*
159  * On 64 bits, sign-extend the 48 bit address to 64 bit
160  */
161 static unsigned long normalize_addr(unsigned long u)
162 {
163 #ifdef CONFIG_X86_64
164         return (signed long)(u << 16) >> 16;
165 #else
166         return u;
167 #endif
168 }
169
170 /*
171  * This function gets called on a break in a continuous series
172  * of PTE entries; the next one is different so we need to
173  * print what we collected so far.
174  */
175 static void note_page(struct seq_file *m, struct pg_state *st,
176                       pgprot_t new_prot, int level)
177 {
178         pgprotval_t prot, cur;
179         static const char units[] = "KMGTPE";
180
181         /*
182          * If we have a "break" in the series, we need to flush the state that
183          * we have now. "break" is either changing perms, levels or
184          * address space marker.
185          */
186         prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
187         cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
188
189         if (!st->level) {
190                 /* First entry */
191                 st->current_prot = new_prot;
192                 st->level = level;
193                 st->marker = address_markers;
194                 seq_printf(m, "---[ %s ]---\n", st->marker->name);
195         } else if (prot != cur || level != st->level ||
196                    st->current_address >= st->marker[1].start_address) {
197                 const char *unit = units;
198                 unsigned long delta;
199                 int width = sizeof(unsigned long) * 2;
200
201                 /*
202                  * Now print the actual finished series
203                  */
204                 seq_printf(m, "0x%0*lx-0x%0*lx   ",
205                            width, st->start_address,
206                            width, st->current_address);
207
208                 delta = (st->current_address - st->start_address) >> 10;
209                 while (!(delta & 1023) && unit[1]) {
210                         delta >>= 10;
211                         unit++;
212                 }
213                 seq_printf(m, "%9lu%c ", delta, *unit);
214                 printk_prot(m, st->current_prot, st->level);
215
216                 /*
217                  * We print markers for special areas of address space,
218                  * such as the start of vmalloc space etc.
219                  * This helps in the interpretation.
220                  */
221                 if (st->current_address >= st->marker[1].start_address) {
222                         st->marker++;
223                         seq_printf(m, "---[ %s ]---\n", st->marker->name);
224                 }
225
226                 st->start_address = st->current_address;
227                 st->current_prot = new_prot;
228                 st->level = level;
229         }
230 }
231
232 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
233                                                         unsigned long P)
234 {
235         int i;
236         pte_t *start;
237
238         start = (pte_t *) pmd_page_vaddr(addr);
239         for (i = 0; i < PTRS_PER_PTE; i++) {
240                 pgprot_t prot = pte_pgprot(*start);
241
242                 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
243                 note_page(m, st, prot, 4);
244                 start++;
245         }
246 }
247
248 #if PTRS_PER_PMD > 1
249
250 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
251                                                         unsigned long P)
252 {
253         int i;
254         pmd_t *start;
255
256         start = (pmd_t *) pud_page_vaddr(addr);
257         for (i = 0; i < PTRS_PER_PMD; i++) {
258                 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
259                 if (!hypervisor_space(st->current_address)
260                     && !pmd_none(*start)) {
261                         pgprotval_t prot = __pmd_val(*start) & PTE_FLAGS_MASK;
262
263                         if (pmd_large(*start) || !pmd_present(*start))
264                                 note_page(m, st, __pgprot(prot), 3);
265                         else
266                                 walk_pte_level(m, st, *start,
267                                                P + i * PMD_LEVEL_MULT);
268                 } else
269                         note_page(m, st, __pgprot(0), 3);
270                 start++;
271         }
272 }
273
274 #else
275 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
276 #define pud_large(a) pmd_large(__pmd(pud_val(a)))
277 #define pud_none(a)  pmd_none(__pmd(pud_val(a)))
278 #endif
279
280 #if PTRS_PER_PUD > 1
281
282 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
283                                                         unsigned long P)
284 {
285         int i;
286         pud_t *start;
287
288         start = (pud_t *) pgd_page_vaddr(addr);
289
290         for (i = 0; i < PTRS_PER_PUD; i++) {
291                 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
292                 if (!hypervisor_space(st->current_address)
293                     && !pud_none(*start)) {
294                         pgprotval_t prot = __pud_val(*start) & PTE_FLAGS_MASK;
295
296                         if (pud_large(*start) || !pud_present(*start))
297                                 note_page(m, st, __pgprot(prot), 2);
298                         else
299                                 walk_pmd_level(m, st, *start,
300                                                P + i * PUD_LEVEL_MULT);
301                 } else
302                         note_page(m, st, __pgprot(0), 2);
303
304                 start++;
305         }
306 }
307
308 #else
309 #define __pud_ma(x) ((pud_t){ __pgd_ma(x) })
310 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud_ma(__pgd_val(a)),p)
311 #define pgd_large(a) pud_large(__pud_ma(__pgd_val(a)))
312 #define pgd_none(a)  pud_none(__pud_ma(__pgd_val(a)))
313 #endif
314
315 static void walk_pgd_level(struct seq_file *m)
316 {
317 #ifdef CONFIG_X86_64
318         pgd_t *start = (pgd_t *) &init_level4_pgt;
319 #else
320         pgd_t *start = swapper_pg_dir;
321 #endif
322         int i;
323         struct pg_state st;
324
325         memset(&st, 0, sizeof(st));
326
327         for (i = 0; i < PTRS_PER_PGD; i++) {
328                 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
329                 if (!pgd_none(*start)) {
330                         pgprotval_t prot = __pgd_val(*start) & PTE_FLAGS_MASK;
331
332                         if (pgd_large(*start) || !pgd_present(*start))
333                                 note_page(m, &st, __pgprot(prot), 1);
334                         else
335                                 walk_pud_level(m, &st, *start,
336                                                i * PGD_LEVEL_MULT);
337                 } else
338                         note_page(m, &st, __pgprot(0), 1);
339
340                 start++;
341         }
342
343         /* Flush out the last page */
344         st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
345         note_page(m, &st, __pgprot(0), 0);
346 }
347
348 static int ptdump_show(struct seq_file *m, void *v)
349 {
350         walk_pgd_level(m);
351         return 0;
352 }
353
354 static int ptdump_open(struct inode *inode, struct file *filp)
355 {
356         return single_open(filp, ptdump_show, NULL);
357 }
358
359 static const struct file_operations ptdump_fops = {
360         .open           = ptdump_open,
361         .read           = seq_read,
362         .llseek         = seq_lseek,
363         .release        = single_release,
364 };
365
366 static int __init pt_dump_init(void)
367 {
368         struct dentry *pe;
369
370 #ifdef CONFIG_X86_32
371         /* Not a compile-time constant on x86-32 */
372         address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
373         address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
374 # ifdef CONFIG_HIGHMEM
375         address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
376 # endif
377         address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
378         address_markers[XEN_SPACE_NR].start_address = hypervisor_virt_start;
379 #endif
380
381         pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
382                                  &ptdump_fops);
383         if (!pe)
384                 return -ENOMEM;
385
386         return 0;
387 }
388
389 __initcall(pt_dump_init);
390 MODULE_LICENSE("GPL");
391 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
392 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");