- 2.6.17 port work build breaks, but the patch set is relativly stable
[linux-flexiantxendom0-3.2.10.git] / arch / i386 / mm / init-xen.c
1 /*
2  *  linux/arch/i386/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  */
8
9 #include <linux/config.h>
10 #include <linux/module.h>
11 #include <linux/signal.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/types.h>
17 #include <linux/ptrace.h>
18 #include <linux/mman.h>
19 #include <linux/mm.h>
20 #include <linux/hugetlb.h>
21 #include <linux/swap.h>
22 #include <linux/smp.h>
23 #include <linux/init.h>
24 #include <linux/highmem.h>
25 #include <linux/pagemap.h>
26 #include <linux/bootmem.h>
27 #include <linux/slab.h>
28 #include <linux/proc_fs.h>
29 #include <linux/efi.h>
30 #include <linux/memory_hotplug.h>
31 #include <linux/initrd.h>
32 #include <linux/dma-mapping.h>
33 #include <linux/scatterlist.h>
34
35 #include <asm/processor.h>
36 #include <asm/system.h>
37 #include <asm/uaccess.h>
38 #include <asm/pgtable.h>
39 #include <asm/dma.h>
40 #include <asm/fixmap.h>
41 #include <asm/e820.h>
42 #include <asm/apic.h>
43 #include <asm/tlb.h>
44 #include <asm/tlbflush.h>
45 #include <asm/sections.h>
46 #include <asm/hypervisor.h>
47 #include <asm/swiotlb.h>
48
49 extern unsigned long *contiguous_bitmap;
50
51 unsigned int __VMALLOC_RESERVE = 128 << 20;
52
53 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
54 unsigned long highstart_pfn, highend_pfn;
55
56 static int noinline do_test_wp_bit(void);
57
58 /*
59  * Creates a middle page table and puts a pointer to it in the
60  * given global directory entry. This only returns the gd entry
61  * in non-PAE compilation mode, since the middle layer is folded.
62  */
63 static pmd_t * __init one_md_table_init(pgd_t *pgd)
64 {
65         pud_t *pud;
66         pmd_t *pmd_table;
67                 
68 #ifdef CONFIG_X86_PAE
69         pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
70         make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
71         set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
72         pud = pud_offset(pgd, 0);
73         if (pmd_table != pmd_offset(pud, 0)) 
74                 BUG();
75 #else
76         pud = pud_offset(pgd, 0);
77         pmd_table = pmd_offset(pud, 0);
78 #endif
79
80         return pmd_table;
81 }
82
83 /*
84  * Create a page table and place a pointer to it in a middle page
85  * directory entry.
86  */
87 static pte_t * __init one_page_table_init(pmd_t *pmd)
88 {
89         if (pmd_none(*pmd)) {
90                 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
91                 make_lowmem_page_readonly(page_table,
92                                           XENFEAT_writable_page_tables);
93                 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
94                 if (page_table != pte_offset_kernel(pmd, 0))
95                         BUG();  
96
97                 return page_table;
98         }
99         
100         return pte_offset_kernel(pmd, 0);
101 }
102
103 /*
104  * This function initializes a certain range of kernel virtual memory 
105  * with new bootmem page tables, everywhere page tables are missing in
106  * the given range.
107  */
108
109 /*
110  * NOTE: The pagetables are allocated contiguous on the physical space 
111  * so we can cache the place of the first one and move around without 
112  * checking the pgd every time.
113  */
114 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
115 {
116         pgd_t *pgd;
117         pud_t *pud;
118         pmd_t *pmd;
119         int pgd_idx, pmd_idx;
120         unsigned long vaddr;
121
122         vaddr = start;
123         pgd_idx = pgd_index(vaddr);
124         pmd_idx = pmd_index(vaddr);
125         pgd = pgd_base + pgd_idx;
126
127         for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
128                 if (pgd_none(*pgd)) 
129                         one_md_table_init(pgd);
130                 pud = pud_offset(pgd, vaddr);
131                 pmd = pmd_offset(pud, vaddr);
132                 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
133                         if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd)) 
134                                 one_page_table_init(pmd);
135
136                         vaddr += PMD_SIZE;
137                 }
138                 pmd_idx = 0;
139         }
140 }
141
142 static inline int is_kernel_text(unsigned long addr)
143 {
144         if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
145                 return 1;
146         return 0;
147 }
148
149 /*
150  * This maps the physical memory to kernel virtual address space, a total 
151  * of max_low_pfn pages, by creating page tables starting from address 
152  * PAGE_OFFSET.
153  */
154 static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
155 {
156         unsigned long pfn;
157         pgd_t *pgd;
158         pmd_t *pmd;
159         pte_t *pte;
160         int pgd_idx, pmd_idx, pte_ofs;
161
162         unsigned long max_ram_pfn = xen_start_info->nr_pages;
163         if (max_ram_pfn > max_low_pfn)
164                 max_ram_pfn = max_low_pfn;
165
166         pgd_idx = pgd_index(PAGE_OFFSET);
167         pgd = pgd_base + pgd_idx;
168         pfn = 0;
169         pmd_idx = pmd_index(PAGE_OFFSET);
170         pte_ofs = pte_index(PAGE_OFFSET);
171
172         for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
173 #ifdef CONFIG_XEN
174                 /*
175                  * Native linux hasn't PAE-paging enabled yet at this
176                  * point.  When running as xen domain we are in PAE
177                  * mode already, thus we can't simply hook a empty
178                  * pmd.  That would kill the mappings we are currently
179                  * using ...
180                  */
181                 pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
182 #else
183                 pmd = one_md_table_init(pgd);
184 #endif
185                 if (pfn >= max_low_pfn)
186                         continue;
187                 pmd += pmd_idx;
188                 for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
189                         unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
190                         if (address >= HYPERVISOR_VIRT_START)
191                                 continue;
192
193                         /* Map with big pages if possible, otherwise create normal page tables. */
194                         if (cpu_has_pse) {
195                                 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
196
197                                 if (is_kernel_text(address) || is_kernel_text(address2))
198                                         set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
199                                 else
200                                         set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
201                                 pfn += PTRS_PER_PTE;
202                         } else {
203                                 pte = one_page_table_init(pmd);
204
205                                 pte += pte_ofs;
206                                 for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
207                                                 /* XEN: Only map initial RAM allocation. */
208                                                 if ((pfn >= max_ram_pfn) || pte_present(*pte))
209                                                         continue;
210                                                 if (is_kernel_text(address))
211                                                         set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
212                                                 else
213                                                         set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
214                                 }
215                                 pte_ofs = 0;
216                         }
217                 }
218                 pmd_idx = 0;
219         }
220 }
221
222 #ifndef CONFIG_XEN
223
224 static inline int page_kills_ppro(unsigned long pagenr)
225 {
226         if (pagenr >= 0x70000 && pagenr <= 0x7003F)
227                 return 1;
228         return 0;
229 }
230
231 extern int is_available_memory(efi_memory_desc_t *);
232
233 int page_is_ram(unsigned long pagenr)
234 {
235         int i;
236         unsigned long addr, end;
237
238         if (efi_enabled) {
239                 efi_memory_desc_t *md;
240                 void *p;
241
242                 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
243                         md = p;
244                         if (!is_available_memory(md))
245                                 continue;
246                         addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
247                         end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
248
249                         if ((pagenr >= addr) && (pagenr < end))
250                                 return 1;
251                 }
252                 return 0;
253         }
254
255         for (i = 0; i < e820.nr_map; i++) {
256
257                 if (e820.map[i].type != E820_RAM)       /* not usable memory */
258                         continue;
259                 /*
260                  *      !!!FIXME!!! Some BIOSen report areas as RAM that
261                  *      are not. Notably the 640->1Mb area. We need a sanity
262                  *      check here.
263                  */
264                 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
265                 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
266                 if  ((pagenr >= addr) && (pagenr < end))
267                         return 1;
268         }
269         return 0;
270 }
271
272 #else /* CONFIG_XEN */
273
274 #define page_kills_ppro(p)      0
275 #define page_is_ram(p)          1
276
277 #endif
278
279 #ifdef CONFIG_HIGHMEM
280 pte_t *kmap_pte;
281 pgprot_t kmap_prot;
282
283 #define kmap_get_fixmap_pte(vaddr)                                      \
284         pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
285
286 static void __init kmap_init(void)
287 {
288         unsigned long kmap_vstart;
289
290         /* cache the first kmap pte */
291         kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
292         kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
293
294         kmap_prot = PAGE_KERNEL;
295 }
296
297 static void __init permanent_kmaps_init(pgd_t *pgd_base)
298 {
299         pgd_t *pgd;
300         pud_t *pud;
301         pmd_t *pmd;
302         pte_t *pte;
303         unsigned long vaddr;
304
305         vaddr = PKMAP_BASE;
306         page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
307
308         pgd = swapper_pg_dir + pgd_index(vaddr);
309         pud = pud_offset(pgd, vaddr);
310         pmd = pmd_offset(pud, vaddr);
311         pte = pte_offset_kernel(pmd, vaddr);
312         pkmap_page_table = pte; 
313 }
314
315 static void __meminit free_new_highpage(struct page *page, int pfn)
316 {
317         set_page_count(page, 1);
318         if (pfn < xen_start_info->nr_pages)
319                 __free_page(page);
320         totalhigh_pages++;
321 }
322
323 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
324 {
325         if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
326                 ClearPageReserved(page);
327                 free_new_highpage(page, pfn);
328         } else
329                 SetPageReserved(page);
330 }
331
332 static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
333 {
334         free_new_highpage(page, pfn);
335         totalram_pages++;
336 #ifdef CONFIG_FLATMEM
337         max_mapnr = max(pfn, max_mapnr);
338 #endif
339         num_physpages++;
340         return 0;
341 }
342
343 /*
344  * Not currently handling the NUMA case.
345  * Assuming single node and all memory that
346  * has been added dynamically that would be
347  * onlined here is in HIGHMEM
348  */
349 void online_page(struct page *page)
350 {
351         ClearPageReserved(page);
352         add_one_highpage_hotplug(page, page_to_pfn(page));
353 }
354
355
356 #ifdef CONFIG_NUMA
357 extern void set_highmem_pages_init(int);
358 #else
359 static void __init set_highmem_pages_init(int bad_ppro)
360 {
361         int pfn;
362         for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
363                 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
364         totalram_pages += totalhigh_pages;
365 }
366 #endif /* CONFIG_FLATMEM */
367
368 #else
369 #define kmap_init() do { } while (0)
370 #define permanent_kmaps_init(pgd_base) do { } while (0)
371 #define set_highmem_pages_init(bad_ppro) do { } while (0)
372 #endif /* CONFIG_HIGHMEM */
373
374 unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
375 EXPORT_SYMBOL(__PAGE_KERNEL);
376 unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
377
378 #ifdef CONFIG_NUMA
379 extern void __init remap_numa_kva(void);
380 #else
381 #define remap_numa_kva() do {} while (0)
382 #endif
383
384 pgd_t *swapper_pg_dir;
385
386 static void __init pagetable_init (void)
387 {
388         unsigned long vaddr;
389         pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
390
391         swapper_pg_dir = pgd_base;
392         init_mm.pgd    = pgd_base;
393
394         /* Enable PSE if available */
395         if (cpu_has_pse) {
396                 set_in_cr4(X86_CR4_PSE);
397         }
398
399         /* Enable PGE if available */
400         if (cpu_has_pge) {
401                 set_in_cr4(X86_CR4_PGE);
402                 __PAGE_KERNEL |= _PAGE_GLOBAL;
403                 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
404         }
405
406         kernel_physical_mapping_init(pgd_base);
407         remap_numa_kva();
408
409         /*
410          * Fixed mappings, only the page table structure has to be
411          * created - mappings will be set by set_fixmap():
412          */
413         vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
414         page_table_range_init(vaddr, 0, pgd_base);
415
416         permanent_kmaps_init(pgd_base);
417 }
418
419 #ifdef CONFIG_SOFTWARE_SUSPEND
420 /*
421  * Swap suspend & friends need this for resume because things like the intel-agp
422  * driver might have split up a kernel 4MB mapping.
423  */
424 char __nosavedata swsusp_pg_dir[PAGE_SIZE]
425         __attribute__ ((aligned (PAGE_SIZE)));
426
427 static inline void save_pg_dir(void)
428 {
429         memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
430 }
431 #else
432 static inline void save_pg_dir(void)
433 {
434 }
435 #endif
436
437 void zap_low_mappings (void)
438 {
439         int i;
440
441         save_pg_dir();
442
443         /*
444          * Zap initial low-memory mappings.
445          *
446          * Note that "pgd_clear()" doesn't do it for
447          * us, because pgd_clear() is a no-op on i386.
448          */
449         for (i = 0; i < USER_PTRS_PER_PGD; i++)
450 #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
451                 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
452 #else
453                 set_pgd(swapper_pg_dir+i, __pgd(0));
454 #endif
455         flush_tlb_all();
456 }
457
458 static int disable_nx __initdata = 0;
459 u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
460 EXPORT_SYMBOL(__supported_pte_mask);
461
462 /*
463  * noexec = on|off
464  *
465  * Control non executable mappings.
466  *
467  * on      Enable
468  * off     Disable
469  */
470 void __init noexec_setup(const char *str)
471 {
472         if (!strncmp(str, "on",2) && cpu_has_nx) {
473                 __supported_pte_mask |= _PAGE_NX;
474                 disable_nx = 0;
475         } else if (!strncmp(str,"off",3)) {
476                 disable_nx = 1;
477                 __supported_pte_mask &= ~_PAGE_NX;
478         }
479 }
480
481 int nx_enabled = 0;
482 #ifdef CONFIG_X86_PAE
483
484 static void __init set_nx(void)
485 {
486         unsigned int v[4], l, h;
487
488         if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
489                 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
490                 if ((v[3] & (1 << 20)) && !disable_nx) {
491                         rdmsr(MSR_EFER, l, h);
492                         l |= EFER_NX;
493                         wrmsr(MSR_EFER, l, h);
494                         nx_enabled = 1;
495                         __supported_pte_mask |= _PAGE_NX;
496                 }
497         }
498 }
499
500 /*
501  * Enables/disables executability of a given kernel page and
502  * returns the previous setting.
503  */
504 int __init set_kernel_exec(unsigned long vaddr, int enable)
505 {
506         pte_t *pte;
507         int ret = 1;
508
509         if (!nx_enabled)
510                 goto out;
511
512         pte = lookup_address(vaddr);
513         BUG_ON(!pte);
514
515         if (!pte_exec_kernel(*pte))
516                 ret = 0;
517
518         if (enable)
519                 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
520         else
521                 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
522         __flush_tlb_all();
523 out:
524         return ret;
525 }
526
527 #endif
528
529 /*
530  * paging_init() sets up the page tables - note that the first 8MB are
531  * already mapped by head.S.
532  *
533  * This routines also unmaps the page at virtual kernel address 0, so
534  * that we can trap those pesky NULL-reference errors in the kernel.
535  */
536 void __init paging_init(void)
537 {
538         int i;
539
540 #ifdef CONFIG_X86_PAE
541         set_nx();
542         if (nx_enabled)
543                 printk("NX (Execute Disable) protection: active\n");
544 #endif
545
546         pagetable_init();
547
548 #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
549         /*
550          * We will bail out later - printk doesn't work right now so
551          * the user would just see a hanging kernel.
552          * when running as xen domain we are already in PAE mode at
553          * this point.
554          */
555         if (cpu_has_pae)
556                 set_in_cr4(X86_CR4_PAE);
557 #endif
558         __flush_tlb_all();
559
560         kmap_init();
561
562         if (!xen_feature(XENFEAT_auto_translated_physmap) ||
563             xen_start_info->shared_info >= xen_start_info->nr_pages) {
564                 /* Switch to the real shared_info page, and clear the
565                  * dummy page. */
566                 set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
567                 HYPERVISOR_shared_info =
568                         (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
569                 memset(empty_zero_page, 0, sizeof(empty_zero_page));
570         }
571
572         /* Setup mapping of lower 1st MB */
573         for (i = 0; i < NR_FIX_ISAMAPS; i++)
574                 if (xen_start_info->flags & SIF_PRIVILEGED)
575                         set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
576                 else
577                         __set_fixmap(FIX_ISAMAP_BEGIN - i,
578                                      virt_to_machine(empty_zero_page),
579                                      PAGE_KERNEL_RO);
580 }
581
582 /*
583  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
584  * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
585  * used to involve black magic jumps to work around some nasty CPU bugs,
586  * but fortunately the switch to using exceptions got rid of all that.
587  */
588
589 static void __init test_wp_bit(void)
590 {
591         printk("Checking if this processor honours the WP bit even in supervisor mode... ");
592
593         /* Any page-aligned address will do, the test is non-destructive */
594         __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
595         boot_cpu_data.wp_works_ok = do_test_wp_bit();
596         clear_fixmap(FIX_WP_TEST);
597
598         if (!boot_cpu_data.wp_works_ok) {
599                 printk("No.\n");
600 #ifdef CONFIG_X86_WP_WORKS_OK
601                 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
602 #endif
603         } else {
604                 printk("Ok.\n");
605         }
606 }
607
608 static void __init set_max_mapnr_init(void)
609 {
610 #ifdef CONFIG_HIGHMEM
611         num_physpages = highend_pfn;
612 #else
613         num_physpages = max_low_pfn;
614 #endif
615 #ifdef CONFIG_FLATMEM
616         max_mapnr = num_physpages;
617 #endif
618 }
619
620 static struct kcore_list kcore_mem, kcore_vmalloc; 
621
622 void __init mem_init(void)
623 {
624         extern int ppro_with_ram_bug(void);
625         int codesize, reservedpages, datasize, initsize;
626         int tmp;
627         int bad_ppro;
628         unsigned long pfn;
629
630         contiguous_bitmap = alloc_bootmem_low_pages(
631                 (max_low_pfn + 2*BITS_PER_LONG) >> 3);
632         BUG_ON(!contiguous_bitmap);
633         memset(contiguous_bitmap, 0, (max_low_pfn + 2*BITS_PER_LONG) >> 3);
634
635 #if defined(CONFIG_SWIOTLB)
636         swiotlb_init(); 
637 #endif
638
639 #ifdef CONFIG_FLATMEM
640         if (!mem_map)
641                 BUG();
642 #endif
643         
644         bad_ppro = ppro_with_ram_bug();
645
646 #ifdef CONFIG_HIGHMEM
647         /* check that fixmap and pkmap do not overlap */
648         if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
649                 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
650                 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
651                                 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
652                 BUG();
653         }
654 #endif
655  
656         set_max_mapnr_init();
657
658 #ifdef CONFIG_HIGHMEM
659         high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
660 #else
661         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
662 #endif
663         printk("vmalloc area: %lx-%lx, maxmem %lx\n",
664                VMALLOC_START,VMALLOC_END,MAXMEM);
665         BUG_ON(VMALLOC_START > VMALLOC_END);
666         
667         /* this will put all low memory onto the freelists */
668         totalram_pages += free_all_bootmem();
669         /* XEN: init and count low-mem pages outside initial allocation. */
670         for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
671                 ClearPageReserved(&mem_map[pfn]);
672                 set_page_count(&mem_map[pfn], 1);
673                 totalram_pages++;
674         }
675
676         reservedpages = 0;
677         for (tmp = 0; tmp < max_low_pfn; tmp++)
678                 /*
679                  * Only count reserved RAM pages
680                  */
681                 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
682                         reservedpages++;
683
684         set_highmem_pages_init(bad_ppro);
685
686         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
687         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
688         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
689
690         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
691         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
692                    VMALLOC_END-VMALLOC_START);
693
694         printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
695                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
696                 num_physpages << (PAGE_SHIFT-10),
697                 codesize >> 10,
698                 reservedpages << (PAGE_SHIFT-10),
699                 datasize >> 10,
700                 initsize >> 10,
701                 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
702                );
703
704 #ifdef CONFIG_X86_PAE
705         if (!cpu_has_pae)
706                 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
707 #endif
708         if (boot_cpu_data.wp_works_ok < 0)
709                 test_wp_bit();
710
711         /*
712          * Subtle. SMP is doing it's boot stuff late (because it has to
713          * fork idle threads) - but it also needs low mappings for the
714          * protected-mode entry to work. We zap these entries only after
715          * the WP-bit has been tested.
716          */
717 #ifndef CONFIG_SMP
718         zap_low_mappings();
719 #endif
720
721         set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
722 }
723
724 /*
725  * this is for the non-NUMA, single node SMP system case.
726  * Specifically, in the case of x86, we will always add
727  * memory to the highmem for now.
728  */
729 #ifndef CONFIG_NEED_MULTIPLE_NODES
730 int add_memory(u64 start, u64 size)
731 {
732         struct pglist_data *pgdata = &contig_page_data;
733         struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
734         unsigned long start_pfn = start >> PAGE_SHIFT;
735         unsigned long nr_pages = size >> PAGE_SHIFT;
736
737         return __add_pages(zone, start_pfn, nr_pages);
738 }
739
740 int remove_memory(u64 start, u64 size)
741 {
742         return -EINVAL;
743 }
744 #endif
745
746 kmem_cache_t *pgd_cache;
747 kmem_cache_t *pmd_cache;
748
749 void __init pgtable_cache_init(void)
750 {
751         if (PTRS_PER_PMD > 1) {
752                 pmd_cache = kmem_cache_create("pmd",
753                                         PTRS_PER_PMD*sizeof(pmd_t),
754                                         PTRS_PER_PMD*sizeof(pmd_t),
755                                         0,
756                                         pmd_ctor,
757                                         NULL);
758                 if (!pmd_cache)
759                         panic("pgtable_cache_init(): cannot create pmd cache");
760         }
761         pgd_cache = kmem_cache_create("pgd",
762 #ifndef CONFIG_XEN
763                                 PTRS_PER_PGD*sizeof(pgd_t),
764                                 PTRS_PER_PGD*sizeof(pgd_t),
765 #else
766                                 PAGE_SIZE,
767                                 PAGE_SIZE,
768 #endif
769                                 0,
770                                 pgd_ctor,
771                                 pgd_dtor);
772         if (!pgd_cache)
773                 panic("pgtable_cache_init(): Cannot create pgd cache");
774 }
775
776 /*
777  * This function cannot be __init, since exceptions don't work in that
778  * section.  Put this after the callers, so that it cannot be inlined.
779  */
780 static int noinline do_test_wp_bit(void)
781 {
782         char tmp_reg;
783         int flag;
784
785         __asm__ __volatile__(
786                 "       movb %0,%1      \n"
787                 "1:     movb %1,%0      \n"
788                 "       xorl %2,%2      \n"
789                 "2:                     \n"
790                 ".section __ex_table,\"a\"\n"
791                 "       .align 4        \n"
792                 "       .long 1b,2b     \n"
793                 ".previous              \n"
794                 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
795                  "=q" (tmp_reg),
796                  "=r" (flag)
797                 :"2" (1)
798                 :"memory");
799         
800         return flag;
801 }
802
803 void free_initmem(void)
804 {
805         unsigned long addr;
806
807         addr = (unsigned long)(&__init_begin);
808         for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
809                 ClearPageReserved(virt_to_page(addr));
810                 set_page_count(virt_to_page(addr), 1);
811                 memset((void *)addr, 0xcc, PAGE_SIZE);
812                 free_page(addr);
813                 totalram_pages++;
814         }
815         printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
816 }
817
818 #ifdef CONFIG_DEBUG_RODATA
819
820 extern char __start_rodata, __end_rodata;
821 void mark_rodata_ro(void)
822 {
823         unsigned long addr = (unsigned long)&__start_rodata;
824
825         for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE)
826                 change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
827
828         printk ("Write protecting the kernel read-only data: %luk\n",
829                         (unsigned long)(&__end_rodata - &__start_rodata) >> 10);
830
831         /*
832          * change_page_attr() requires a global_flush_tlb() call after it.
833          * We do this after the printk so that if something went wrong in the
834          * change, the printk gets out at least to give a better debug hint
835          * of who is the culprit.
836          */
837         global_flush_tlb();
838 }
839 #endif
840
841
842 #ifdef CONFIG_BLK_DEV_INITRD
843 void free_initrd_mem(unsigned long start, unsigned long end)
844 {
845         if (start < end)
846                 printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
847         for (; start < end; start += PAGE_SIZE) {
848                 ClearPageReserved(virt_to_page(start));
849                 set_page_count(virt_to_page(start), 1);
850                 free_page(start);
851                 totalram_pages++;
852         }
853 }
854 #endif