30ad7c48b0c30507a35d9aa20705c1406f917902
[linux-flexiantxendom0-3.2.10.git] / arch / i386 / kernel / setup.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/root_dev.h>
37 #include <linux/highmem.h>
38 #include <linux/module.h>
39 #include <linux/efi.h>
40 #include <linux/init.h>
41 #include <video/edid.h>
42 #include <asm/e820.h>
43 #include <asm/mpspec.h>
44 #include <asm/edd.h>
45 #include <asm/setup.h>
46 #include <asm/arch_hooks.h>
47 #include <asm/sections.h>
48 #include <asm/io_apic.h>
49 #include <asm/ist.h>
50 #include "setup_arch_pre.h"
51 #include "mach_resources.h"
52
53 int disable_pse __initdata = 0;
54
55 static inline char * __init machine_specific_memory_setup(void);
56
57 /*
58  * Machine setup..
59  */
60
61 #ifdef CONFIG_EFI
62 int efi_enabled = 0;
63 #endif
64
65 /* cpu data as detected by the assembly code in head.S */
66 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
67 /* common cpu data for all cpus */
68 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
69
70 unsigned long mmu_cr4_features;
71 EXPORT_SYMBOL_GPL(mmu_cr4_features);
72
73 #ifdef  CONFIG_ACPI_INTERPRETER
74         int acpi_disabled = 0;
75 #else
76         int acpi_disabled = 1;
77 #endif
78 EXPORT_SYMBOL(acpi_disabled);
79
80 #ifdef  CONFIG_ACPI_BOOT
81 extern int __initdata acpi_ht;
82 int __initdata acpi_force = 0;
83 #endif
84
85 int MCA_bus;
86 /* for MCA, but anyone else can use it if they want */
87 unsigned int machine_id;
88 unsigned int machine_submodel_id;
89 unsigned int BIOS_revision;
90 unsigned int mca_pentium_flag;
91
92 /* For PCI or other memory-mapped resources */
93 unsigned long pci_mem_start = 0x10000000;
94
95 /* user-defined highmem size */
96 static unsigned int highmem_pages = -1;
97
98 /*
99  * Setup options
100  */
101 struct drive_info_struct { char dummy[32]; } drive_info;
102 struct screen_info screen_info;
103 struct apm_info apm_info;
104 struct sys_desc_table_struct {
105         unsigned short length;
106         unsigned char table[0];
107 };
108 struct edid_info edid_info;
109 struct ist_info ist_info;
110 struct e820map e820;
111
112 unsigned char aux_device_present;
113
114 extern void early_cpu_init(void);
115 extern void dmi_scan_machine(void);
116 extern void generic_apic_probe(char *);
117 extern int root_mountflags;
118 extern char _end[];
119
120 unsigned long saved_videomode;
121
122 #define RAMDISK_IMAGE_START_MASK        0x07FF
123 #define RAMDISK_PROMPT_FLAG             0x8000
124 #define RAMDISK_LOAD_FLAG               0x4000  
125
126 static char command_line[COMMAND_LINE_SIZE];
127        char saved_command_line[COMMAND_LINE_SIZE];
128
129 static struct resource code_resource = { "Kernel code", 0x100000, 0 };
130 static struct resource data_resource = { "Kernel data", 0, 0 };
131
132 static void __init probe_roms(void)
133 {
134         int roms = 1;
135
136         request_resource(&iomem_resource, rom_resources+0);
137
138         /* Video ROM is standard at C000:0000 - C7FF:0000, check signature */
139         probe_video_rom(roms);
140
141         /* Extension roms */
142         probe_extension_roms(roms);
143 }
144
145 static void __init limit_regions(unsigned long long size)
146 {
147         unsigned long long current_addr = 0;
148         int i;
149
150         if (efi_enabled) {
151                 for (i = 0; i < memmap.nr_map; i++) {
152                         current_addr = memmap.map[i].phys_addr +
153                                        (memmap.map[i].num_pages << 12);
154                         if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
155                                 if (current_addr >= size) {
156                                         memmap.map[i].num_pages -=
157                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
158                                         memmap.nr_map = i + 1;
159                                         return;
160                                 }
161                         }
162                 }
163         }
164         for (i = 0; i < e820.nr_map; i++) {
165                 if (e820.map[i].type == E820_RAM) {
166                         current_addr = e820.map[i].addr + e820.map[i].size;
167                         if (current_addr >= size) {
168                                 e820.map[i].size -= current_addr-size;
169                                 e820.nr_map = i + 1;
170                                 return;
171                         }
172                 }
173         }
174 }
175
176 static void __init add_memory_region(unsigned long long start,
177                                   unsigned long long size, int type)
178 {
179         int x;
180
181         if (!efi_enabled) {
182                 x = e820.nr_map;
183
184                 if (x == E820MAX) {
185                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
186                     return;
187                 }
188
189                 e820.map[x].addr = start;
190                 e820.map[x].size = size;
191                 e820.map[x].type = type;
192                 e820.nr_map++;
193         }
194 } /* add_memory_region */
195
196 #define E820_DEBUG      1
197
198 static void __init print_memory_map(char *who)
199 {
200         int i;
201
202         for (i = 0; i < e820.nr_map; i++) {
203                 printk(" %s: %016Lx - %016Lx ", who,
204                         e820.map[i].addr,
205                         e820.map[i].addr + e820.map[i].size);
206                 switch (e820.map[i].type) {
207                 case E820_RAM:  printk("(usable)\n");
208                                 break;
209                 case E820_RESERVED:
210                                 printk("(reserved)\n");
211                                 break;
212                 case E820_ACPI:
213                                 printk("(ACPI data)\n");
214                                 break;
215                 case E820_NVS:
216                                 printk("(ACPI NVS)\n");
217                                 break;
218                 default:        printk("type %lu\n", e820.map[i].type);
219                                 break;
220                 }
221         }
222 }
223
224 /*
225  * Sanitize the BIOS e820 map.
226  *
227  * Some e820 responses include overlapping entries.  The following 
228  * replaces the original e820 map with a new one, removing overlaps.
229  *
230  */
231 struct change_member {
232         struct e820entry *pbios; /* pointer to original bios entry */
233         unsigned long long addr; /* address for this change point */
234 };
235 struct change_member change_point_list[2*E820MAX] __initdata;
236 struct change_member *change_point[2*E820MAX] __initdata;
237 struct e820entry *overlap_list[E820MAX] __initdata;
238 struct e820entry new_bios[E820MAX] __initdata;
239
240 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
241 {
242         struct change_member *change_tmp;
243         unsigned long current_type, last_type;
244         unsigned long long last_addr;
245         int chgidx, still_changing;
246         int overlap_entries;
247         int new_bios_entry;
248         int old_nr, new_nr, chg_nr;
249         int i;
250
251         /*
252                 Visually we're performing the following (1,2,3,4 = memory types)...
253
254                 Sample memory map (w/overlaps):
255                    ____22__________________
256                    ______________________4_
257                    ____1111________________
258                    _44_____________________
259                    11111111________________
260                    ____________________33__
261                    ___________44___________
262                    __________33333_________
263                    ______________22________
264                    ___________________2222_
265                    _________111111111______
266                    _____________________11_
267                    _________________4______
268
269                 Sanitized equivalent (no overlap):
270                    1_______________________
271                    _44_____________________
272                    ___1____________________
273                    ____22__________________
274                    ______11________________
275                    _________1______________
276                    __________3_____________
277                    ___________44___________
278                    _____________33_________
279                    _______________2________
280                    ________________1_______
281                    _________________4______
282                    ___________________2____
283                    ____________________33__
284                    ______________________4_
285         */
286
287         /* if there's only one memory region, don't bother */
288         if (*pnr_map < 2)
289                 return -1;
290
291         old_nr = *pnr_map;
292
293         /* bail out if we find any unreasonable addresses in bios map */
294         for (i=0; i<old_nr; i++)
295                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
296                         return -1;
297
298         /* create pointers for initial change-point information (for sorting) */
299         for (i=0; i < 2*old_nr; i++)
300                 change_point[i] = &change_point_list[i];
301
302         /* record all known change-points (starting and ending addresses),
303            omitting those that are for empty memory regions */
304         chgidx = 0;
305         for (i=0; i < old_nr; i++)      {
306                 if (biosmap[i].size != 0) {
307                         change_point[chgidx]->addr = biosmap[i].addr;
308                         change_point[chgidx++]->pbios = &biosmap[i];
309                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
310                         change_point[chgidx++]->pbios = &biosmap[i];
311                 }
312         }
313         chg_nr = chgidx;        /* true number of change-points */
314
315         /* sort change-point list by memory addresses (low -> high) */
316         still_changing = 1;
317         while (still_changing)  {
318                 still_changing = 0;
319                 for (i=1; i < chg_nr; i++)  {
320                         /* if <current_addr> > <last_addr>, swap */
321                         /* or, if current=<start_addr> & last=<end_addr>, swap */
322                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
323                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
324                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
325                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
326                            )
327                         {
328                                 change_tmp = change_point[i];
329                                 change_point[i] = change_point[i-1];
330                                 change_point[i-1] = change_tmp;
331                                 still_changing=1;
332                         }
333                 }
334         }
335
336         /* create a new bios memory map, removing overlaps */
337         overlap_entries=0;       /* number of entries in the overlap table */
338         new_bios_entry=0;        /* index for creating new bios map entries */
339         last_type = 0;           /* start with undefined memory type */
340         last_addr = 0;           /* start with 0 as last starting address */
341         /* loop through change-points, determining affect on the new bios map */
342         for (chgidx=0; chgidx < chg_nr; chgidx++)
343         {
344                 /* keep track of all overlapping bios entries */
345                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
346                 {
347                         /* add map entry to overlap list (> 1 entry implies an overlap) */
348                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
349                 }
350                 else
351                 {
352                         /* remove entry from list (order independent, so swap with last) */
353                         for (i=0; i<overlap_entries; i++)
354                         {
355                                 if (overlap_list[i] == change_point[chgidx]->pbios)
356                                         overlap_list[i] = overlap_list[overlap_entries-1];
357                         }
358                         overlap_entries--;
359                 }
360                 /* if there are overlapping entries, decide which "type" to use */
361                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
362                 current_type = 0;
363                 for (i=0; i<overlap_entries; i++)
364                         if (overlap_list[i]->type > current_type)
365                                 current_type = overlap_list[i]->type;
366                 /* continue building up new bios map based on this information */
367                 if (current_type != last_type)  {
368                         if (last_type != 0)      {
369                                 new_bios[new_bios_entry].size =
370                                         change_point[chgidx]->addr - last_addr;
371                                 /* move forward only if the new size was non-zero */
372                                 if (new_bios[new_bios_entry].size != 0)
373                                         if (++new_bios_entry >= E820MAX)
374                                                 break;  /* no more space left for new bios entries */
375                         }
376                         if (current_type != 0)  {
377                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
378                                 new_bios[new_bios_entry].type = current_type;
379                                 last_addr=change_point[chgidx]->addr;
380                         }
381                         last_type = current_type;
382                 }
383         }
384         new_nr = new_bios_entry;   /* retain count for new bios entries */
385
386         /* copy new bios mapping into original location */
387         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
388         *pnr_map = new_nr;
389
390         return 0;
391 }
392
393 /*
394  * Copy the BIOS e820 map into a safe place.
395  *
396  * Sanity-check it while we're at it..
397  *
398  * If we're lucky and live on a modern system, the setup code
399  * will have given us a memory map that we can use to properly
400  * set up memory.  If we aren't, we'll fake a memory map.
401  *
402  * We check to see that the memory map contains at least 2 elements
403  * before we'll use it, because the detection code in setup.S may
404  * not be perfect and most every PC known to man has two memory
405  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
406  * thinkpad 560x, for example, does not cooperate with the memory
407  * detection code.)
408  */
409 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
410 {
411         /* Only one memory region (or negative)? Ignore it */
412         if (nr_map < 2)
413                 return -1;
414
415         do {
416                 unsigned long long start = biosmap->addr;
417                 unsigned long long size = biosmap->size;
418                 unsigned long long end = start + size;
419                 unsigned long type = biosmap->type;
420
421                 /* Overflow in 64 bits? Ignore the memory map. */
422                 if (start > end)
423                         return -1;
424
425                 /*
426                  * Some BIOSes claim RAM in the 640k - 1M region.
427                  * Not right. Fix it up.
428                  */
429                 if (type == E820_RAM) {
430                         if (start < 0x100000ULL && end > 0xA0000ULL) {
431                                 if (start < 0xA0000ULL)
432                                         add_memory_region(start, 0xA0000ULL-start, type);
433                                 if (end <= 0x100000ULL)
434                                         continue;
435                                 start = 0x100000ULL;
436                                 size = end - start;
437                         }
438                 }
439                 add_memory_region(start, size, type);
440         } while (biosmap++,--nr_map);
441         return 0;
442 }
443
444 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
445 unsigned char eddnr;
446 struct edd_info edd[EDDMAXNR];
447 unsigned int edd_disk80_sig;
448 #ifdef CONFIG_EDD_MODULE
449 EXPORT_SYMBOL(eddnr);
450 EXPORT_SYMBOL(edd);
451 EXPORT_SYMBOL(edd_disk80_sig);
452 #endif
453 /**
454  * copy_edd() - Copy the BIOS EDD information
455  *              from empty_zero_page into a safe place.
456  *
457  */
458 static inline void copy_edd(void)
459 {
460      eddnr = EDD_NR;
461      memcpy(edd, EDD_BUF, sizeof(edd));
462      edd_disk80_sig = DISK80_SIGNATURE;
463 }
464 #else
465 #define copy_edd() do {} while (0)
466 #endif
467
468 /*
469  * Do NOT EVER look at the BIOS memory size location.
470  * It does not work on many machines.
471  */
472 #define LOWMEMSIZE()    (0x9f000)
473
474 static void __init setup_memory_region(void)
475 {
476         char *who = machine_specific_memory_setup();
477         printk(KERN_INFO "BIOS-provided physical RAM map:\n");
478         print_memory_map(who);
479 } /* setup_memory_region */
480
481
482 static void __init parse_cmdline_early (char ** cmdline_p)
483 {
484         char c = ' ', *to = command_line, *from = COMMAND_LINE;
485         int len = 0;
486         int userdef = 0;
487
488         /* Save unparsed command line copy for /proc/cmdline */
489         memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
490         saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
491
492         for (;;) {
493                 /*
494                  * "mem=nopentium" disables the 4MB page tables.
495                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
496                  * to <mem>, overriding the bios size.
497                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
498                  * <start> to <start>+<mem>, overriding the bios size.
499                  *
500                  * HPA tells me bootloaders need to parse mem=, so no new
501                  * option should be mem=  [also see Documentation/i386/boot.txt]
502                  */
503                 if (c == ' ' && !memcmp(from, "mem=", 4)) {
504                         if (to != command_line)
505                                 to--;
506                         if (!memcmp(from+4, "nopentium", 9)) {
507                                 from += 9+4;
508                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
509                                 disable_pse = 1;
510                         } else {
511                                 /* If the user specifies memory size, we
512                                  * limit the BIOS-provided memory map to
513                                  * that size. exactmap can be used to specify
514                                  * the exact map. mem=number can be used to
515                                  * trim the existing memory map.
516                                  */
517                                 unsigned long long mem_size;
518  
519                                 mem_size = memparse(from+4, &from);
520                                 limit_regions(mem_size);
521                                 userdef=1;
522                         }
523                 }
524
525                 if (c == ' ' && !memcmp(from, "memmap=", 7)) {
526                         if (to != command_line)
527                                 to--;
528                         if (!memcmp(from+7, "exactmap", 8)) {
529                                 from += 8+7;
530                                 e820.nr_map = 0;
531                                 userdef = 1;
532                         } else {
533                                 /* If the user specifies memory size, we
534                                  * limit the BIOS-provided memory map to
535                                  * that size. exactmap can be used to specify
536                                  * the exact map. mem=number can be used to
537                                  * trim the existing memory map.
538                                  */
539                                 unsigned long long start_at, mem_size;
540  
541                                 mem_size = memparse(from+7, &from);
542                                 if (*from == '@') {
543                                         start_at = memparse(from+1, &from);
544                                         add_memory_region(start_at, mem_size, E820_RAM);
545                                 } else if (*from == '#') {
546                                         start_at = memparse(from+1, &from);
547                                         add_memory_region(start_at, mem_size, E820_ACPI);
548                                 } else if (*from == '$') {
549                                         start_at = memparse(from+1, &from);
550                                         add_memory_region(start_at, mem_size, E820_RESERVED);
551                                 } else {
552                                         limit_regions(mem_size);
553                                         userdef=1;
554                                 }
555                         }
556                 }
557
558 #ifdef CONFIG_ACPI_BOOT
559                 /* "acpi=off" disables both ACPI table parsing and interpreter */
560                 else if (!memcmp(from, "acpi=off", 8)) {
561                         acpi_ht = 0;
562                         acpi_disabled = 1;
563                 }
564
565                 /* acpi=force to over-ride black-list */
566                 else if (!memcmp(from, "acpi=force", 10)) {
567                         acpi_force = 1;
568                         acpi_ht=1;
569                         acpi_disabled = 0;
570                 }
571
572                 /* Limit ACPI just to boot-time to enable HT */
573                 else if (!memcmp(from, "acpi=ht", 7)) {
574                         acpi_ht = 1;
575                         if (!acpi_force) acpi_disabled = 1;
576                 }
577
578                 /* "pci=noacpi" disables ACPI interrupt routing */
579                 else if (!memcmp(from, "pci=noacpi", 10)) {
580                         acpi_noirq_set();
581                 }
582
583 #ifdef CONFIG_X86_LOCAL_APIC
584                 /* disable IO-APIC */
585                 else if (!memcmp(from, "noapic", 6))
586                         disable_ioapic_setup();
587 #endif /* CONFIG_X86_LOCAL_APIC */
588 #endif /* CONFIG_ACPI_BOOT */
589
590                 /*
591                  * highmem=size forces highmem to be exactly 'size' bytes.
592                  * This works even on boxes that have no highmem otherwise.
593                  * This also works to reduce highmem size on bigger boxes.
594                  */
595                 if (c == ' ' && !memcmp(from, "highmem=", 8))
596                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
597         
598                 c = *(from++);
599                 if (!c)
600                         break;
601                 if (COMMAND_LINE_SIZE <= ++len)
602                         break;
603                 *(to++) = c;
604         }
605         *to = '\0';
606         *cmdline_p = command_line;
607         if (userdef) {
608                 printk(KERN_INFO "user-defined physical RAM map:\n");
609                 print_memory_map("user");
610         }
611 }
612
613 /*
614  * Callback for efi_memory_walk.
615  */
616 static int __init
617 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
618 {
619         unsigned long *max_pfn = arg, pfn;
620
621         if (start < end) {
622                 pfn = PFN_UP(end -1);
623                 if (pfn > *max_pfn)
624                         *max_pfn = pfn;
625         }
626         return 0;
627 }
628
629
630 /*
631  * Find the highest page frame number we have available
632  */
633 void __init find_max_pfn(void)
634 {
635         int i;
636
637         max_pfn = 0;
638         if (efi_enabled) {
639                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
640                 return;
641         }
642
643         for (i = 0; i < e820.nr_map; i++) {
644                 unsigned long start, end;
645                 /* RAM? */
646                 if (e820.map[i].type != E820_RAM)
647                         continue;
648                 start = PFN_UP(e820.map[i].addr);
649                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
650                 if (start >= end)
651                         continue;
652                 if (end > max_pfn)
653                         max_pfn = end;
654         }
655 }
656
657 /*
658  * Determine low and high memory ranges:
659  */
660 unsigned long __init find_max_low_pfn(void)
661 {
662         unsigned long max_low_pfn;
663
664         max_low_pfn = max_pfn;
665         if (max_low_pfn > MAXMEM_PFN) {
666                 if (highmem_pages == -1)
667                         highmem_pages = max_pfn - MAXMEM_PFN;
668                 if (highmem_pages + MAXMEM_PFN < max_pfn)
669                         max_pfn = MAXMEM_PFN + highmem_pages;
670                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
671                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
672                         highmem_pages = 0;
673                 }
674                 max_low_pfn = MAXMEM_PFN;
675 #ifndef CONFIG_HIGHMEM
676                 /* Maximum memory usable is what is directly addressable */
677                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
678                                         MAXMEM>>20);
679                 if (max_pfn > MAX_NONPAE_PFN)
680                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
681                 else
682                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
683                 max_pfn = MAXMEM_PFN;
684 #else /* !CONFIG_HIGHMEM */
685 #ifndef CONFIG_X86_PAE
686                 if (max_pfn > MAX_NONPAE_PFN) {
687                         max_pfn = MAX_NONPAE_PFN;
688                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
689                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
690                 }
691 #endif /* !CONFIG_X86_PAE */
692 #endif /* !CONFIG_HIGHMEM */
693         } else {
694                 if (highmem_pages == -1)
695                         highmem_pages = 0;
696 #ifdef CONFIG_HIGHMEM
697                 if (highmem_pages >= max_pfn) {
698                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
699                         highmem_pages = 0;
700                 }
701                 if (highmem_pages) {
702                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
703                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
704                                 highmem_pages = 0;
705                         }
706                         max_low_pfn -= highmem_pages;
707                 }
708 #else
709                 if (highmem_pages)
710                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
711 #endif
712         }
713         return max_low_pfn;
714 }
715
716 #ifndef CONFIG_DISCONTIGMEM
717
718 /*
719  * Free all available memory for boot time allocation.  Used
720  * as a callback function by efi_memory_walk()
721  */
722
723 static int __init
724 free_available_memory(unsigned long start, unsigned long end, void *arg)
725 {
726         /* check max_low_pfn */
727         if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
728                 return 0;
729         if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
730                 end = (max_low_pfn + 1) << PAGE_SHIFT;
731         if (start < end)
732                 free_bootmem(start, end - start);
733
734         return 0;
735 }
736 /*
737  * Register fully available low RAM pages with the bootmem allocator.
738  */
739 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
740 {
741         int i;
742
743         if (efi_enabled) {
744                 efi_memmap_walk(free_available_memory, NULL);
745                 return;
746         }
747         for (i = 0; i < e820.nr_map; i++) {
748                 unsigned long curr_pfn, last_pfn, size;
749                 /*
750                  * Reserve usable low memory
751                  */
752                 if (e820.map[i].type != E820_RAM)
753                         continue;
754                 /*
755                  * We are rounding up the start address of usable memory:
756                  */
757                 curr_pfn = PFN_UP(e820.map[i].addr);
758                 if (curr_pfn >= max_low_pfn)
759                         continue;
760                 /*
761                  * ... and at the end of the usable range downwards:
762                  */
763                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
764
765                 if (last_pfn > max_low_pfn)
766                         last_pfn = max_low_pfn;
767
768                 /*
769                  * .. finally, did all the rounding and playing
770                  * around just make the area go away?
771                  */
772                 if (last_pfn <= curr_pfn)
773                         continue;
774
775                 size = last_pfn - curr_pfn;
776                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
777         }
778 }
779
780 static unsigned long __init setup_memory(void)
781 {
782         unsigned long bootmap_size, start_pfn, max_low_pfn;
783
784         /*
785          * partially used pages are not usable - thus
786          * we are rounding upwards:
787          */
788         start_pfn = PFN_UP(__pa(_end));
789
790         find_max_pfn();
791
792         max_low_pfn = find_max_low_pfn();
793
794 #ifdef CONFIG_HIGHMEM
795         highstart_pfn = highend_pfn = max_pfn;
796         if (max_pfn > max_low_pfn) {
797                 highstart_pfn = max_low_pfn;
798         }
799         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
800                 pages_to_mb(highend_pfn - highstart_pfn));
801 #endif
802         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
803                         pages_to_mb(max_low_pfn));
804         /*
805          * Initialize the boot-time allocator (with low memory only):
806          */
807         bootmap_size = init_bootmem(start_pfn, max_low_pfn);
808
809         register_bootmem_low_pages(max_low_pfn);
810
811         /*
812          * Reserve the bootmem bitmap itself as well. We do this in two
813          * steps (first step was init_bootmem()) because this catches
814          * the (very unlikely) case of us accidentally initializing the
815          * bootmem allocator with an invalid RAM area.
816          */
817         reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
818                          bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
819
820         /*
821          * reserve physical page 0 - it's a special BIOS page on many boxes,
822          * enabling clean reboots, SMP operation, laptop functions.
823          */
824         reserve_bootmem(0, PAGE_SIZE);
825
826 #ifdef CONFIG_SMP
827         /*
828          * But first pinch a few for the stack/trampoline stuff
829          * FIXME: Don't need the extra page at 4K, but need to fix
830          * trampoline before removing it. (see the GDT stuff)
831          */
832         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
833 #endif
834 #ifdef CONFIG_ACPI_SLEEP
835         /*
836          * Reserve low memory region for sleep support.
837          */
838         acpi_reserve_bootmem();
839 #endif
840 #ifdef CONFIG_X86_FIND_SMP_CONFIG
841         /*
842          * Find and reserve possible boot-time SMP configuration:
843          */
844         find_smp_config();
845 #endif
846
847 #ifdef CONFIG_BLK_DEV_INITRD
848         if (LOADER_TYPE && INITRD_START) {
849                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
850                         reserve_bootmem(INITRD_START, INITRD_SIZE);
851                         initrd_start =
852                                 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
853                         initrd_end = initrd_start+INITRD_SIZE;
854                 }
855                 else {
856                         printk(KERN_ERR "initrd extends beyond end of memory "
857                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
858                             INITRD_START + INITRD_SIZE,
859                             max_low_pfn << PAGE_SHIFT);
860                         initrd_start = 0;
861                 }
862         }
863 #endif
864         return max_low_pfn;
865 }
866 #else
867 extern unsigned long setup_memory(void);
868 #endif /* !CONFIG_DISCONTIGMEM */
869
870 /*
871  * Request address space for all standard RAM and ROM resources
872  * and also for regions reported as reserved by the e820.
873  */
874 static void __init
875 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
876 {
877         int i;
878
879         probe_roms();
880         for (i = 0; i < e820.nr_map; i++) {
881                 struct resource *res;
882                 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
883                         continue;
884                 res = alloc_bootmem_low(sizeof(struct resource));
885                 switch (e820.map[i].type) {
886                 case E820_RAM:  res->name = "System RAM"; break;
887                 case E820_ACPI: res->name = "ACPI Tables"; break;
888                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
889                 default:        res->name = "reserved";
890                 }
891                 res->start = e820.map[i].addr;
892                 res->end = res->start + e820.map[i].size - 1;
893                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
894                 request_resource(&iomem_resource, res);
895                 if (e820.map[i].type == E820_RAM) {
896                         /*
897                          *  We don't know which RAM region contains kernel data,
898                          *  so we try it repeatedly and let the resource manager
899                          *  test it.
900                          */
901                         request_resource(res, code_resource);
902                         request_resource(res, data_resource);
903                 }
904         }
905 }
906
907 /*
908  * Request address space for all standard resources
909  */
910 static void __init register_memory(unsigned long max_low_pfn)
911 {
912         unsigned long low_mem_size;
913         int i;
914
915         if (efi_enabled)
916                 efi_initialize_iomem_resources(&code_resource, &data_resource);
917         else
918                 legacy_init_iomem_resources(&code_resource, &data_resource);
919
920          /* EFI systems may still have VGA */
921         request_graphics_resource();
922
923         /* request I/O space for devices used on all i[345]86 PCs */
924         for (i = 0; i < STANDARD_IO_RESOURCES; i++)
925                 request_resource(&ioport_resource, standard_io_resources+i);
926
927         /* Tell the PCI layer not to allocate too close to the RAM area.. */
928         low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
929         if (low_mem_size > pci_mem_start)
930                 pci_mem_start = low_mem_size;
931 }
932
933 /* Use inline assembly to define this because the nops are defined 
934    as inline assembly strings in the include files and we cannot 
935    get them easily into strings. */
936 asm("\t.data\nintelnops: " 
937     GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
938     GENERIC_NOP7 GENERIC_NOP8); 
939 asm("\t.data\nk8nops: " 
940     K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
941     K8_NOP7 K8_NOP8); 
942 asm("\t.data\nk7nops: " 
943     K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
944     K7_NOP7 K7_NOP8); 
945     
946 extern unsigned char intelnops[], k8nops[], k7nops[];
947 static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
948      NULL,
949      intelnops,
950      intelnops + 1,
951      intelnops + 1 + 2,
952      intelnops + 1 + 2 + 3,
953      intelnops + 1 + 2 + 3 + 4,
954      intelnops + 1 + 2 + 3 + 4 + 5,
955      intelnops + 1 + 2 + 3 + 4 + 5 + 6,
956      intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
957 }; 
958 static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
959      NULL,
960      k8nops,
961      k8nops + 1,
962      k8nops + 1 + 2,
963      k8nops + 1 + 2 + 3,
964      k8nops + 1 + 2 + 3 + 4,
965      k8nops + 1 + 2 + 3 + 4 + 5,
966      k8nops + 1 + 2 + 3 + 4 + 5 + 6,
967      k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
968 }; 
969 static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
970      NULL,
971      k7nops,
972      k7nops + 1,
973      k7nops + 1 + 2,
974      k7nops + 1 + 2 + 3,
975      k7nops + 1 + 2 + 3 + 4,
976      k7nops + 1 + 2 + 3 + 4 + 5,
977      k7nops + 1 + 2 + 3 + 4 + 5 + 6,
978      k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
979 }; 
980 static struct nop { 
981      int cpuid; 
982      unsigned char **noptable; 
983 } noptypes[] = { 
984      { X86_FEATURE_K8, k8_nops }, 
985      { X86_FEATURE_K7, k7_nops }, 
986      { -1, 0 }
987 }; 
988
989 /* Replace instructions with better alternatives for this CPU type.
990
991    This runs before SMP is initialized to avoid SMP problems with
992    self modifying code. This implies that assymetric systems where
993    APs have less capabilities than the boot processor are not handled. 
994    In this case boot with "noreplacement". */ 
995 void apply_alternatives(void *start, void *end) 
996
997         struct alt_instr *a; 
998         int diff, i, k;
999         unsigned char **noptable = intel_nops; 
1000         for (i = 0; noptypes[i].cpuid >= 0; i++) { 
1001                 if (boot_cpu_has(noptypes[i].cpuid)) { 
1002                         noptable = noptypes[i].noptable;
1003                         break;
1004                 }
1005         } 
1006         for (a = start; (void *)a < end; a++) { 
1007                 if (!boot_cpu_has(a->cpuid))
1008                         continue;
1009                 BUG_ON(a->replacementlen > a->instrlen); 
1010                 memcpy(a->instr, a->replacement, a->replacementlen); 
1011                 diff = a->instrlen - a->replacementlen; 
1012                 /* Pad the rest with nops */
1013                 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1014                         k = diff;
1015                         if (k > ASM_NOP_MAX)
1016                                 k = ASM_NOP_MAX;
1017                         memcpy(a->instr + i, noptable[k], k); 
1018                 } 
1019         }
1020
1021
1022 static int no_replacement __initdata = 0; 
1023  
1024 void __init alternative_instructions(void)
1025 {
1026         extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1027         if (no_replacement) 
1028                 return;
1029         apply_alternatives(__alt_instructions, __alt_instructions_end);
1030 }
1031
1032 static int __init noreplacement_setup(char *s)
1033
1034      no_replacement = 1; 
1035      return 0; 
1036
1037
1038 __setup("noreplacement", noreplacement_setup); 
1039
1040 /*
1041  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1042  * passed the efi memmap, systab, etc., so we should use these data structures
1043  * for initialization.  Note, the efi init code path is determined by the
1044  * global efi_enabled. This allows the same kernel image to be used on existing
1045  * systems (with a traditional BIOS) as well as on EFI systems.
1046  */
1047 void __init setup_arch(char **cmdline_p)
1048 {
1049         unsigned long max_low_pfn;
1050
1051         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1052         pre_setup_arch_hook();
1053         early_cpu_init();
1054
1055         /*
1056          * FIXME: This isn't an official loader_type right
1057          * now but does currently work with elilo.
1058          * If we were configured as an EFI kernel, check to make
1059          * sure that we were loaded correctly from elilo and that
1060          * the system table is valid.  If not, then initialize normally.
1061          */
1062 #ifdef CONFIG_EFI
1063         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1064                 efi_enabled = 1;
1065 #endif
1066
1067         ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1068         drive_info = DRIVE_INFO;
1069         screen_info = SCREEN_INFO;
1070         edid_info = EDID_INFO;
1071         apm_info.bios = APM_BIOS_INFO;
1072         ist_info = IST_INFO;
1073         saved_videomode = VIDEO_MODE;
1074         if( SYS_DESC_TABLE.length != 0 ) {
1075                 MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
1076                 machine_id = SYS_DESC_TABLE.table[0];
1077                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1078                 BIOS_revision = SYS_DESC_TABLE.table[2];
1079         }
1080         aux_device_present = AUX_DEVICE_INFO;
1081
1082 #ifdef CONFIG_BLK_DEV_RAM
1083         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1084         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1085         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1086 #endif
1087         ARCH_SETUP
1088         if (efi_enabled)
1089                 efi_init();
1090         else
1091                 setup_memory_region();
1092
1093         copy_edd();
1094
1095         if (!MOUNT_ROOT_RDONLY)
1096                 root_mountflags &= ~MS_RDONLY;
1097         init_mm.start_code = (unsigned long) _text;
1098         init_mm.end_code = (unsigned long) _etext;
1099         init_mm.end_data = (unsigned long) _edata;
1100         init_mm.brk = (unsigned long) _end;
1101
1102         code_resource.start = virt_to_phys(_text);
1103         code_resource.end = virt_to_phys(_etext)-1;
1104         data_resource.start = virt_to_phys(_etext);
1105         data_resource.end = virt_to_phys(_edata)-1;
1106
1107         parse_cmdline_early(cmdline_p);
1108
1109         max_low_pfn = setup_memory();
1110
1111         /*
1112          * NOTE: before this point _nobody_ is allowed to allocate
1113          * any memory using the bootmem allocator.
1114          */
1115
1116 #ifdef CONFIG_SMP
1117         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1118 #endif
1119         paging_init();
1120
1121 #ifdef CONFIG_EARLY_PRINTK
1122         {
1123         char *s = strstr(*cmdline_p, "earlyprintk="); 
1124         if (s) { 
1125              extern void setup_early_printk(char *);
1126              setup_early_printk(s+12); 
1127              printk("early console should work ....\n");
1128         }
1129         }
1130 #endif
1131
1132
1133         dmi_scan_machine();
1134
1135 #ifdef CONFIG_X86_GENERICARCH
1136         generic_apic_probe(*cmdline_p);
1137 #endif  
1138         if (efi_enabled)
1139                 efi_map_memmap();
1140
1141         /*
1142          * Parse the ACPI tables for possible boot-time SMP configuration.
1143          */
1144         acpi_boot_init();
1145
1146 #ifdef CONFIG_X86_LOCAL_APIC
1147         if (smp_found_config)
1148                 get_smp_config();
1149 #endif
1150
1151         register_memory(max_low_pfn);
1152
1153 #ifdef CONFIG_VT
1154 #if defined(CONFIG_VGA_CONSOLE)
1155         if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1156                 conswitchp = &vga_con;
1157 #elif defined(CONFIG_DUMMY_CONSOLE)
1158         conswitchp = &dummy_con;
1159 #endif
1160 #endif
1161 }
1162
1163 #include "setup_arch_post.h"
1164 /*
1165  * Local Variables:
1166  * mode:c
1167  * c-file-style:"k&r"
1168  * c-basic-offset:8
1169  * End:
1170  */