blob: a0b4075856793224d0e98755f0506d01257c0c99 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
21
22/*
23 * This file handles the architecture-dependent parts of initialization
24 */
25
Alexey Dobriyan129f6942005-06-23 00:08:33 -070026#include <linux/config.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <linux/sched.h>
28#include <linux/mm.h>
Andy Whitcroft05b79bd2005-06-23 00:07:57 -070029#include <linux/mmzone.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/tty.h>
31#include <linux/ioport.h>
32#include <linux/acpi.h>
33#include <linux/apm_bios.h>
34#include <linux/initrd.h>
35#include <linux/bootmem.h>
36#include <linux/seq_file.h>
Michael Neulinge5c6c8e2006-03-14 00:11:50 -050037#include <linux/platform_device.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/console.h>
39#include <linux/mca.h>
40#include <linux/root_dev.h>
41#include <linux/highmem.h>
42#include <linux/module.h>
43#include <linux/efi.h>
44#include <linux/init.h>
45#include <linux/edd.h>
46#include <linux/nodemask.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070047#include <linux/kexec.h>
Vivek Goyal2030eae2005-06-25 14:58:20 -070048#include <linux/crash_dump.h>
Andi Kleene9928672006-01-11 22:43:33 +010049#include <linux/dmi.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070050
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <video/edid.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070052
Eric W. Biederman9635b472005-06-25 14:57:41 -070053#include <asm/apic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#include <asm/e820.h>
55#include <asm/mpspec.h>
56#include <asm/setup.h>
57#include <asm/arch_hooks.h>
58#include <asm/sections.h>
59#include <asm/io_apic.h>
60#include <asm/ist.h>
61#include <asm/io.h>
62#include "setup_arch_pre.h"
63#include <bios_ebda.h>
64
Vivek Goyal92aa63a2005-06-25 14:58:18 -070065/* Forward Declaration. */
66void __init find_max_pfn(void);
67
Linus Torvalds1da177e2005-04-16 15:20:36 -070068/* This value is set up by the early boot code to point to the value
69 immediately after the boot time page tables. It contains a *physical*
70 address, and must not be in the .bss segment! */
71unsigned long init_pg_tables_end __initdata = ~0UL;
72
Li Shaohua0bb31842005-06-25 14:54:55 -070073int disable_pse __devinitdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
75/*
76 * Machine setup..
77 */
78
79#ifdef CONFIG_EFI
80int efi_enabled = 0;
81EXPORT_SYMBOL(efi_enabled);
82#endif
83
84/* cpu data as detected by the assembly code in head.S */
85struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
86/* common cpu data for all cpus */
Christoph Lameterc3d8c142005-09-06 15:16:33 -070087struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
Alexey Dobriyan129f6942005-06-23 00:08:33 -070088EXPORT_SYMBOL(boot_cpu_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
90unsigned long mmu_cr4_features;
91
Len Brown84663612005-08-24 12:09:07 -040092#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 int acpi_disabled = 0;
94#else
95 int acpi_disabled = 1;
96#endif
97EXPORT_SYMBOL(acpi_disabled);
98
Len Brown888ba6c2005-08-24 12:07:20 -040099#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100int __initdata acpi_force = 0;
101extern acpi_interrupt_flags acpi_sci_flags;
102#endif
103
104/* for MCA, but anyone else can use it if they want */
105unsigned int machine_id;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700106#ifdef CONFIG_MCA
107EXPORT_SYMBOL(machine_id);
108#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109unsigned int machine_submodel_id;
110unsigned int BIOS_revision;
111unsigned int mca_pentium_flag;
112
113/* For PCI or other memory-mapped resources */
114unsigned long pci_mem_start = 0x10000000;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700115#ifdef CONFIG_PCI
116EXPORT_SYMBOL(pci_mem_start);
117#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119/* Boot loader ID as an integer, for the benefit of proc_dointvec */
120int bootloader_type;
121
122/* user-defined highmem size */
123static unsigned int highmem_pages = -1;
124
125/*
126 * Setup options
127 */
128struct drive_info_struct { char dummy[32]; } drive_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700129#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
130 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
131EXPORT_SYMBOL(drive_info);
132#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133struct screen_info screen_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700134EXPORT_SYMBOL(screen_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135struct apm_info apm_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700136EXPORT_SYMBOL(apm_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137struct sys_desc_table_struct {
138 unsigned short length;
139 unsigned char table[0];
140};
141struct edid_info edid_info;
Antonino A. Daplas5e518d72005-09-09 13:04:34 -0700142EXPORT_SYMBOL_GPL(edid_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143struct ist_info ist_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700144#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
145 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
146EXPORT_SYMBOL(ist_info);
147#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148struct e820map e820;
149
150extern void early_cpu_init(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151extern void generic_apic_probe(char *);
152extern int root_mountflags;
153
154unsigned long saved_videomode;
155
156#define RAMDISK_IMAGE_START_MASK 0x07FF
157#define RAMDISK_PROMPT_FLAG 0x8000
158#define RAMDISK_LOAD_FLAG 0x4000
159
160static char command_line[COMMAND_LINE_SIZE];
161
162unsigned char __initdata boot_params[PARAM_SIZE];
163
164static struct resource data_resource = {
165 .name = "Kernel data",
166 .start = 0,
167 .end = 0,
168 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
169};
170
171static struct resource code_resource = {
172 .name = "Kernel code",
173 .start = 0,
174 .end = 0,
175 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
176};
177
178static struct resource system_rom_resource = {
179 .name = "System ROM",
180 .start = 0xf0000,
181 .end = 0xfffff,
182 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183};
184
185static struct resource extension_rom_resource = {
186 .name = "Extension ROM",
187 .start = 0xe0000,
188 .end = 0xeffff,
189 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
190};
191
192static struct resource adapter_rom_resources[] = { {
193 .name = "Adapter ROM",
194 .start = 0xc8000,
195 .end = 0,
196 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197}, {
198 .name = "Adapter ROM",
199 .start = 0,
200 .end = 0,
201 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202}, {
203 .name = "Adapter ROM",
204 .start = 0,
205 .end = 0,
206 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
207}, {
208 .name = "Adapter ROM",
209 .start = 0,
210 .end = 0,
211 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212}, {
213 .name = "Adapter ROM",
214 .start = 0,
215 .end = 0,
216 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
217}, {
218 .name = "Adapter ROM",
219 .start = 0,
220 .end = 0,
221 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
222} };
223
224#define ADAPTER_ROM_RESOURCES \
225 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
226
227static struct resource video_rom_resource = {
228 .name = "Video ROM",
229 .start = 0xc0000,
230 .end = 0xc7fff,
231 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
232};
233
234static struct resource video_ram_resource = {
235 .name = "Video RAM area",
236 .start = 0xa0000,
237 .end = 0xbffff,
238 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
239};
240
241static struct resource standard_io_resources[] = { {
242 .name = "dma1",
243 .start = 0x0000,
244 .end = 0x001f,
245 .flags = IORESOURCE_BUSY | IORESOURCE_IO
246}, {
247 .name = "pic1",
248 .start = 0x0020,
249 .end = 0x0021,
250 .flags = IORESOURCE_BUSY | IORESOURCE_IO
251}, {
252 .name = "timer0",
253 .start = 0x0040,
254 .end = 0x0043,
255 .flags = IORESOURCE_BUSY | IORESOURCE_IO
256}, {
257 .name = "timer1",
258 .start = 0x0050,
259 .end = 0x0053,
260 .flags = IORESOURCE_BUSY | IORESOURCE_IO
261}, {
262 .name = "keyboard",
263 .start = 0x0060,
264 .end = 0x006f,
265 .flags = IORESOURCE_BUSY | IORESOURCE_IO
266}, {
267 .name = "dma page reg",
268 .start = 0x0080,
269 .end = 0x008f,
270 .flags = IORESOURCE_BUSY | IORESOURCE_IO
271}, {
272 .name = "pic2",
273 .start = 0x00a0,
274 .end = 0x00a1,
275 .flags = IORESOURCE_BUSY | IORESOURCE_IO
276}, {
277 .name = "dma2",
278 .start = 0x00c0,
279 .end = 0x00df,
280 .flags = IORESOURCE_BUSY | IORESOURCE_IO
281}, {
282 .name = "fpu",
283 .start = 0x00f0,
284 .end = 0x00ff,
285 .flags = IORESOURCE_BUSY | IORESOURCE_IO
286} };
287
288#define STANDARD_IO_RESOURCES \
289 (sizeof standard_io_resources / sizeof standard_io_resources[0])
290
291#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
292
293static int __init romchecksum(unsigned char *rom, unsigned long length)
294{
295 unsigned char *p, sum = 0;
296
297 for (p = rom; p < rom + length; p++)
298 sum += *p;
299 return sum == 0;
300}
301
302static void __init probe_roms(void)
303{
304 unsigned long start, length, upper;
305 unsigned char *rom;
306 int i;
307
308 /* video rom */
309 upper = adapter_rom_resources[0].start;
310 for (start = video_rom_resource.start; start < upper; start += 2048) {
311 rom = isa_bus_to_virt(start);
312 if (!romsignature(rom))
313 continue;
314
315 video_rom_resource.start = start;
316
317 /* 0 < length <= 0x7f * 512, historically */
318 length = rom[2] * 512;
319
320 /* if checksum okay, trust length byte */
321 if (length && romchecksum(rom, length))
322 video_rom_resource.end = start + length - 1;
323
324 request_resource(&iomem_resource, &video_rom_resource);
325 break;
326 }
327
328 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
329 if (start < upper)
330 start = upper;
331
332 /* system rom */
333 request_resource(&iomem_resource, &system_rom_resource);
334 upper = system_rom_resource.start;
335
336 /* check for extension rom (ignore length byte!) */
337 rom = isa_bus_to_virt(extension_rom_resource.start);
338 if (romsignature(rom)) {
339 length = extension_rom_resource.end - extension_rom_resource.start + 1;
340 if (romchecksum(rom, length)) {
341 request_resource(&iomem_resource, &extension_rom_resource);
342 upper = extension_rom_resource.start;
343 }
344 }
345
346 /* check for adapter roms on 2k boundaries */
347 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
348 rom = isa_bus_to_virt(start);
349 if (!romsignature(rom))
350 continue;
351
352 /* 0 < length <= 0x7f * 512, historically */
353 length = rom[2] * 512;
354
355 /* but accept any length that fits if checksum okay */
356 if (!length || start + length > upper || !romchecksum(rom, length))
357 continue;
358
359 adapter_rom_resources[i].start = start;
360 adapter_rom_resources[i].end = start + length - 1;
361 request_resource(&iomem_resource, &adapter_rom_resources[i]);
362
363 start = adapter_rom_resources[i++].end & ~2047UL;
364 }
365}
366
367static void __init limit_regions(unsigned long long size)
368{
369 unsigned long long current_addr = 0;
370 int i;
371
372 if (efi_enabled) {
Matt Tolentino7ae65fd2005-09-03 15:56:27 -0700373 efi_memory_desc_t *md;
374 void *p;
375
376 for (p = memmap.map, i = 0; p < memmap.map_end;
377 p += memmap.desc_size, i++) {
378 md = p;
379 current_addr = md->phys_addr + (md->num_pages << 12);
380 if (md->type == EFI_CONVENTIONAL_MEMORY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 if (current_addr >= size) {
Matt Tolentino7ae65fd2005-09-03 15:56:27 -0700382 md->num_pages -=
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
384 memmap.nr_map = i + 1;
385 return;
386 }
387 }
388 }
389 }
390 for (i = 0; i < e820.nr_map; i++) {
Dave Hansenf014a552005-10-30 14:59:37 -0800391 current_addr = e820.map[i].addr + e820.map[i].size;
392 if (current_addr < size)
393 continue;
394
395 if (e820.map[i].type != E820_RAM)
396 continue;
397
398 if (e820.map[i].addr >= size) {
399 /*
400 * This region starts past the end of the
401 * requested size, skip it completely.
402 */
403 e820.nr_map = i;
404 } else {
405 e820.nr_map = i + 1;
406 e820.map[i].size -= current_addr - size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 }
Dave Hansenf014a552005-10-30 14:59:37 -0800408 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 }
410}
411
412static void __init add_memory_region(unsigned long long start,
413 unsigned long long size, int type)
414{
415 int x;
416
417 if (!efi_enabled) {
418 x = e820.nr_map;
419
420 if (x == E820MAX) {
421 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
422 return;
423 }
424
425 e820.map[x].addr = start;
426 e820.map[x].size = size;
427 e820.map[x].type = type;
428 e820.nr_map++;
429 }
430} /* add_memory_region */
431
432#define E820_DEBUG 1
433
434static void __init print_memory_map(char *who)
435{
436 int i;
437
438 for (i = 0; i < e820.nr_map; i++) {
439 printk(" %s: %016Lx - %016Lx ", who,
440 e820.map[i].addr,
441 e820.map[i].addr + e820.map[i].size);
442 switch (e820.map[i].type) {
443 case E820_RAM: printk("(usable)\n");
444 break;
445 case E820_RESERVED:
446 printk("(reserved)\n");
447 break;
448 case E820_ACPI:
449 printk("(ACPI data)\n");
450 break;
451 case E820_NVS:
452 printk("(ACPI NVS)\n");
453 break;
454 default: printk("type %lu\n", e820.map[i].type);
455 break;
456 }
457 }
458}
459
460/*
461 * Sanitize the BIOS e820 map.
462 *
463 * Some e820 responses include overlapping entries. The following
464 * replaces the original e820 map with a new one, removing overlaps.
465 *
466 */
467struct change_member {
468 struct e820entry *pbios; /* pointer to original bios entry */
469 unsigned long long addr; /* address for this change point */
470};
471static struct change_member change_point_list[2*E820MAX] __initdata;
472static struct change_member *change_point[2*E820MAX] __initdata;
473static struct e820entry *overlap_list[E820MAX] __initdata;
474static struct e820entry new_bios[E820MAX] __initdata;
475
476static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
477{
478 struct change_member *change_tmp;
479 unsigned long current_type, last_type;
480 unsigned long long last_addr;
481 int chgidx, still_changing;
482 int overlap_entries;
483 int new_bios_entry;
484 int old_nr, new_nr, chg_nr;
485 int i;
486
487 /*
488 Visually we're performing the following (1,2,3,4 = memory types)...
489
490 Sample memory map (w/overlaps):
491 ____22__________________
492 ______________________4_
493 ____1111________________
494 _44_____________________
495 11111111________________
496 ____________________33__
497 ___________44___________
498 __________33333_________
499 ______________22________
500 ___________________2222_
501 _________111111111______
502 _____________________11_
503 _________________4______
504
505 Sanitized equivalent (no overlap):
506 1_______________________
507 _44_____________________
508 ___1____________________
509 ____22__________________
510 ______11________________
511 _________1______________
512 __________3_____________
513 ___________44___________
514 _____________33_________
515 _______________2________
516 ________________1_______
517 _________________4______
518 ___________________2____
519 ____________________33__
520 ______________________4_
521 */
522
523 /* if there's only one memory region, don't bother */
524 if (*pnr_map < 2)
525 return -1;
526
527 old_nr = *pnr_map;
528
529 /* bail out if we find any unreasonable addresses in bios map */
530 for (i=0; i<old_nr; i++)
531 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
532 return -1;
533
534 /* create pointers for initial change-point information (for sorting) */
535 for (i=0; i < 2*old_nr; i++)
536 change_point[i] = &change_point_list[i];
537
538 /* record all known change-points (starting and ending addresses),
539 omitting those that are for empty memory regions */
540 chgidx = 0;
541 for (i=0; i < old_nr; i++) {
542 if (biosmap[i].size != 0) {
543 change_point[chgidx]->addr = biosmap[i].addr;
544 change_point[chgidx++]->pbios = &biosmap[i];
545 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
546 change_point[chgidx++]->pbios = &biosmap[i];
547 }
548 }
549 chg_nr = chgidx; /* true number of change-points */
550
551 /* sort change-point list by memory addresses (low -> high) */
552 still_changing = 1;
553 while (still_changing) {
554 still_changing = 0;
555 for (i=1; i < chg_nr; i++) {
556 /* if <current_addr> > <last_addr>, swap */
557 /* or, if current=<start_addr> & last=<end_addr>, swap */
558 if ((change_point[i]->addr < change_point[i-1]->addr) ||
559 ((change_point[i]->addr == change_point[i-1]->addr) &&
560 (change_point[i]->addr == change_point[i]->pbios->addr) &&
561 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
562 )
563 {
564 change_tmp = change_point[i];
565 change_point[i] = change_point[i-1];
566 change_point[i-1] = change_tmp;
567 still_changing=1;
568 }
569 }
570 }
571
572 /* create a new bios memory map, removing overlaps */
573 overlap_entries=0; /* number of entries in the overlap table */
574 new_bios_entry=0; /* index for creating new bios map entries */
575 last_type = 0; /* start with undefined memory type */
576 last_addr = 0; /* start with 0 as last starting address */
577 /* loop through change-points, determining affect on the new bios map */
578 for (chgidx=0; chgidx < chg_nr; chgidx++)
579 {
580 /* keep track of all overlapping bios entries */
581 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
582 {
583 /* add map entry to overlap list (> 1 entry implies an overlap) */
584 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
585 }
586 else
587 {
588 /* remove entry from list (order independent, so swap with last) */
589 for (i=0; i<overlap_entries; i++)
590 {
591 if (overlap_list[i] == change_point[chgidx]->pbios)
592 overlap_list[i] = overlap_list[overlap_entries-1];
593 }
594 overlap_entries--;
595 }
596 /* if there are overlapping entries, decide which "type" to use */
597 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
598 current_type = 0;
599 for (i=0; i<overlap_entries; i++)
600 if (overlap_list[i]->type > current_type)
601 current_type = overlap_list[i]->type;
602 /* continue building up new bios map based on this information */
603 if (current_type != last_type) {
604 if (last_type != 0) {
605 new_bios[new_bios_entry].size =
606 change_point[chgidx]->addr - last_addr;
607 /* move forward only if the new size was non-zero */
608 if (new_bios[new_bios_entry].size != 0)
609 if (++new_bios_entry >= E820MAX)
610 break; /* no more space left for new bios entries */
611 }
612 if (current_type != 0) {
613 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
614 new_bios[new_bios_entry].type = current_type;
615 last_addr=change_point[chgidx]->addr;
616 }
617 last_type = current_type;
618 }
619 }
620 new_nr = new_bios_entry; /* retain count for new bios entries */
621
622 /* copy new bios mapping into original location */
623 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
624 *pnr_map = new_nr;
625
626 return 0;
627}
628
629/*
630 * Copy the BIOS e820 map into a safe place.
631 *
632 * Sanity-check it while we're at it..
633 *
634 * If we're lucky and live on a modern system, the setup code
635 * will have given us a memory map that we can use to properly
636 * set up memory. If we aren't, we'll fake a memory map.
637 *
638 * We check to see that the memory map contains at least 2 elements
639 * before we'll use it, because the detection code in setup.S may
640 * not be perfect and most every PC known to man has two memory
641 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
642 * thinkpad 560x, for example, does not cooperate with the memory
643 * detection code.)
644 */
645static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
646{
647 /* Only one memory region (or negative)? Ignore it */
648 if (nr_map < 2)
649 return -1;
650
651 do {
652 unsigned long long start = biosmap->addr;
653 unsigned long long size = biosmap->size;
654 unsigned long long end = start + size;
655 unsigned long type = biosmap->type;
656
657 /* Overflow in 64 bits? Ignore the memory map. */
658 if (start > end)
659 return -1;
660
661 /*
662 * Some BIOSes claim RAM in the 640k - 1M region.
663 * Not right. Fix it up.
664 */
665 if (type == E820_RAM) {
666 if (start < 0x100000ULL && end > 0xA0000ULL) {
667 if (start < 0xA0000ULL)
668 add_memory_region(start, 0xA0000ULL-start, type);
669 if (end <= 0x100000ULL)
670 continue;
671 start = 0x100000ULL;
672 size = end - start;
673 }
674 }
675 add_memory_region(start, size, type);
676 } while (biosmap++,--nr_map);
677 return 0;
678}
679
680#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
681struct edd edd;
682#ifdef CONFIG_EDD_MODULE
683EXPORT_SYMBOL(edd);
684#endif
685/**
686 * copy_edd() - Copy the BIOS EDD information
687 * from boot_params into a safe place.
688 *
689 */
690static inline void copy_edd(void)
691{
692 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
693 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
694 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
695 edd.edd_info_nr = EDD_NR;
696}
697#else
698static inline void copy_edd(void)
699{
700}
701#endif
702
703/*
704 * Do NOT EVER look at the BIOS memory size location.
705 * It does not work on many machines.
706 */
707#define LOWMEMSIZE() (0x9f000)
708
709static void __init parse_cmdline_early (char ** cmdline_p)
710{
711 char c = ' ', *to = command_line, *from = saved_command_line;
712 int len = 0;
713 int userdef = 0;
714
715 /* Save unparsed command line copy for /proc/cmdline */
716 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
717
718 for (;;) {
719 if (c != ' ')
720 goto next_char;
721 /*
722 * "mem=nopentium" disables the 4MB page tables.
723 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
724 * to <mem>, overriding the bios size.
725 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
726 * <start> to <start>+<mem>, overriding the bios size.
727 *
728 * HPA tells me bootloaders need to parse mem=, so no new
729 * option should be mem= [also see Documentation/i386/boot.txt]
730 */
731 if (!memcmp(from, "mem=", 4)) {
732 if (to != command_line)
733 to--;
734 if (!memcmp(from+4, "nopentium", 9)) {
735 from += 9+4;
736 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
737 disable_pse = 1;
738 } else {
739 /* If the user specifies memory size, we
740 * limit the BIOS-provided memory map to
741 * that size. exactmap can be used to specify
742 * the exact map. mem=number can be used to
743 * trim the existing memory map.
744 */
745 unsigned long long mem_size;
746
747 mem_size = memparse(from+4, &from);
748 limit_regions(mem_size);
749 userdef=1;
750 }
751 }
752
753 else if (!memcmp(from, "memmap=", 7)) {
754 if (to != command_line)
755 to--;
756 if (!memcmp(from+7, "exactmap", 8)) {
Vivek Goyal92aa63a2005-06-25 14:58:18 -0700757#ifdef CONFIG_CRASH_DUMP
758 /* If we are doing a crash dump, we
759 * still need to know the real mem
760 * size before original memory map is
761 * reset.
762 */
763 find_max_pfn();
764 saved_max_pfn = max_pfn;
765#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 from += 8+7;
767 e820.nr_map = 0;
768 userdef = 1;
769 } else {
770 /* If the user specifies memory size, we
771 * limit the BIOS-provided memory map to
772 * that size. exactmap can be used to specify
773 * the exact map. mem=number can be used to
774 * trim the existing memory map.
775 */
776 unsigned long long start_at, mem_size;
777
778 mem_size = memparse(from+7, &from);
779 if (*from == '@') {
780 start_at = memparse(from+1, &from);
781 add_memory_region(start_at, mem_size, E820_RAM);
782 } else if (*from == '#') {
783 start_at = memparse(from+1, &from);
784 add_memory_region(start_at, mem_size, E820_ACPI);
785 } else if (*from == '$') {
786 start_at = memparse(from+1, &from);
787 add_memory_region(start_at, mem_size, E820_RESERVED);
788 } else {
789 limit_regions(mem_size);
790 userdef=1;
791 }
792 }
793 }
794
795 else if (!memcmp(from, "noexec=", 7))
796 noexec_setup(from + 7);
797
798
799#ifdef CONFIG_X86_SMP
800 /*
801 * If the BIOS enumerates physical processors before logical,
802 * maxcpus=N at enumeration-time can be used to disable HT.
803 */
804 else if (!memcmp(from, "maxcpus=", 8)) {
805 extern unsigned int maxcpus;
806
807 maxcpus = simple_strtoul(from + 8, NULL, 0);
808 }
809#endif
810
Len Brown888ba6c2005-08-24 12:07:20 -0400811#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 /* "acpi=off" disables both ACPI table parsing and interpreter */
813 else if (!memcmp(from, "acpi=off", 8)) {
814 disable_acpi();
815 }
816
817 /* acpi=force to over-ride black-list */
818 else if (!memcmp(from, "acpi=force", 10)) {
819 acpi_force = 1;
820 acpi_ht = 1;
821 acpi_disabled = 0;
822 }
823
824 /* acpi=strict disables out-of-spec workarounds */
825 else if (!memcmp(from, "acpi=strict", 11)) {
826 acpi_strict = 1;
827 }
828
829 /* Limit ACPI just to boot-time to enable HT */
830 else if (!memcmp(from, "acpi=ht", 7)) {
831 if (!acpi_force)
832 disable_acpi();
833 acpi_ht = 1;
834 }
835
836 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
837 else if (!memcmp(from, "pci=noacpi", 10)) {
838 acpi_disable_pci();
839 }
840 /* "acpi=noirq" disables ACPI interrupt routing */
841 else if (!memcmp(from, "acpi=noirq", 10)) {
842 acpi_noirq_set();
843 }
844
845 else if (!memcmp(from, "acpi_sci=edge", 13))
846 acpi_sci_flags.trigger = 1;
847
848 else if (!memcmp(from, "acpi_sci=level", 14))
849 acpi_sci_flags.trigger = 3;
850
851 else if (!memcmp(from, "acpi_sci=high", 13))
852 acpi_sci_flags.polarity = 1;
853
854 else if (!memcmp(from, "acpi_sci=low", 12))
855 acpi_sci_flags.polarity = 3;
856
857#ifdef CONFIG_X86_IO_APIC
858 else if (!memcmp(from, "acpi_skip_timer_override", 24))
859 acpi_skip_timer_override = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860
Chuck Ebbert66759a02005-09-12 18:49:25 +0200861 if (!memcmp(from, "disable_timer_pin_1", 19))
862 disable_timer_pin_1 = 1;
863 if (!memcmp(from, "enable_timer_pin_1", 18))
864 disable_timer_pin_1 = -1;
865
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 /* disable IO-APIC */
867 else if (!memcmp(from, "noapic", 6))
868 disable_ioapic_setup();
Cal Peake0a305d22005-09-13 02:28:07 -0400869#endif /* CONFIG_X86_IO_APIC */
Len Brown888ba6c2005-08-24 12:07:20 -0400870#endif /* CONFIG_ACPI */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
Eric W. Biederman9635b472005-06-25 14:57:41 -0700872#ifdef CONFIG_X86_LOCAL_APIC
873 /* enable local APIC */
874 else if (!memcmp(from, "lapic", 5))
875 lapic_enable();
876
877 /* disable local APIC */
878 else if (!memcmp(from, "nolapic", 6))
879 lapic_disable();
880#endif /* CONFIG_X86_LOCAL_APIC */
881
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700882#ifdef CONFIG_KEXEC
883 /* crashkernel=size@addr specifies the location to reserve for
884 * a crash kernel. By reserving this memory we guarantee
885 * that linux never set's it up as a DMA target.
886 * Useful for holding code to do something appropriate
887 * after a kernel panic.
888 */
889 else if (!memcmp(from, "crashkernel=", 12)) {
890 unsigned long size, base;
891 size = memparse(from+12, &from);
892 if (*from == '@') {
893 base = memparse(from+1, &from);
894 /* FIXME: Do I want a sanity check
895 * to validate the memory range?
896 */
897 crashk_res.start = base;
898 crashk_res.end = base + size - 1;
899 }
900 }
901#endif
Vivek Goyalaac04b32006-01-09 20:51:47 -0800902#ifdef CONFIG_PROC_VMCORE
Vivek Goyal2030eae2005-06-25 14:58:20 -0700903 /* elfcorehdr= specifies the location of elf core header
904 * stored by the crashed kernel.
905 */
906 else if (!memcmp(from, "elfcorehdr=", 11))
907 elfcorehdr_addr = memparse(from+11, &from);
908#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700909
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 /*
911 * highmem=size forces highmem to be exactly 'size' bytes.
912 * This works even on boxes that have no highmem otherwise.
913 * This also works to reduce highmem size on bigger boxes.
914 */
915 else if (!memcmp(from, "highmem=", 8))
916 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
917
918 /*
919 * vmalloc=size forces the vmalloc area to be exactly 'size'
920 * bytes. This can be used to increase (or decrease) the
921 * vmalloc area - the default is 128m.
922 */
923 else if (!memcmp(from, "vmalloc=", 8))
924 __VMALLOC_RESERVE = memparse(from+8, &from);
925
926 next_char:
927 c = *(from++);
928 if (!c)
929 break;
930 if (COMMAND_LINE_SIZE <= ++len)
931 break;
932 *(to++) = c;
933 }
934 *to = '\0';
935 *cmdline_p = command_line;
936 if (userdef) {
937 printk(KERN_INFO "user-defined physical RAM map:\n");
938 print_memory_map("user");
939 }
940}
941
942/*
943 * Callback for efi_memory_walk.
944 */
945static int __init
946efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
947{
948 unsigned long *max_pfn = arg, pfn;
949
950 if (start < end) {
951 pfn = PFN_UP(end -1);
952 if (pfn > *max_pfn)
953 *max_pfn = pfn;
954 }
955 return 0;
956}
957
Andy Whitcroft215c3402006-01-06 00:12:06 -0800958static int __init
959efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
960{
961 memory_present(0, start, end);
962 return 0;
963}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964
965/*
966 * Find the highest page frame number we have available
967 */
968void __init find_max_pfn(void)
969{
970 int i;
971
972 max_pfn = 0;
973 if (efi_enabled) {
974 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
Andy Whitcroft215c3402006-01-06 00:12:06 -0800975 efi_memmap_walk(efi_memory_present_wrapper, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 return;
977 }
978
979 for (i = 0; i < e820.nr_map; i++) {
980 unsigned long start, end;
981 /* RAM? */
982 if (e820.map[i].type != E820_RAM)
983 continue;
984 start = PFN_UP(e820.map[i].addr);
985 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
986 if (start >= end)
987 continue;
988 if (end > max_pfn)
989 max_pfn = end;
Andy Whitcroft215c3402006-01-06 00:12:06 -0800990 memory_present(0, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 }
992}
993
994/*
995 * Determine low and high memory ranges:
996 */
997unsigned long __init find_max_low_pfn(void)
998{
999 unsigned long max_low_pfn;
1000
1001 max_low_pfn = max_pfn;
1002 if (max_low_pfn > MAXMEM_PFN) {
1003 if (highmem_pages == -1)
1004 highmem_pages = max_pfn - MAXMEM_PFN;
1005 if (highmem_pages + MAXMEM_PFN < max_pfn)
1006 max_pfn = MAXMEM_PFN + highmem_pages;
1007 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1008 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1009 highmem_pages = 0;
1010 }
1011 max_low_pfn = MAXMEM_PFN;
1012#ifndef CONFIG_HIGHMEM
1013 /* Maximum memory usable is what is directly addressable */
1014 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1015 MAXMEM>>20);
1016 if (max_pfn > MAX_NONPAE_PFN)
1017 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1018 else
1019 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1020 max_pfn = MAXMEM_PFN;
1021#else /* !CONFIG_HIGHMEM */
1022#ifndef CONFIG_X86_PAE
1023 if (max_pfn > MAX_NONPAE_PFN) {
1024 max_pfn = MAX_NONPAE_PFN;
1025 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1026 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1027 }
1028#endif /* !CONFIG_X86_PAE */
1029#endif /* !CONFIG_HIGHMEM */
1030 } else {
1031 if (highmem_pages == -1)
1032 highmem_pages = 0;
1033#ifdef CONFIG_HIGHMEM
1034 if (highmem_pages >= max_pfn) {
1035 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1036 highmem_pages = 0;
1037 }
1038 if (highmem_pages) {
1039 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1040 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1041 highmem_pages = 0;
1042 }
1043 max_low_pfn -= highmem_pages;
1044 }
1045#else
1046 if (highmem_pages)
1047 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1048#endif
1049 }
1050 return max_low_pfn;
1051}
1052
1053/*
1054 * Free all available memory for boot time allocation. Used
1055 * as a callback function by efi_memory_walk()
1056 */
1057
1058static int __init
1059free_available_memory(unsigned long start, unsigned long end, void *arg)
1060{
1061 /* check max_low_pfn */
1062 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1063 return 0;
1064 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1065 end = (max_low_pfn + 1) << PAGE_SHIFT;
1066 if (start < end)
1067 free_bootmem(start, end - start);
1068
1069 return 0;
1070}
1071/*
1072 * Register fully available low RAM pages with the bootmem allocator.
1073 */
1074static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1075{
1076 int i;
1077
1078 if (efi_enabled) {
1079 efi_memmap_walk(free_available_memory, NULL);
1080 return;
1081 }
1082 for (i = 0; i < e820.nr_map; i++) {
1083 unsigned long curr_pfn, last_pfn, size;
1084 /*
1085 * Reserve usable low memory
1086 */
1087 if (e820.map[i].type != E820_RAM)
1088 continue;
1089 /*
1090 * We are rounding up the start address of usable memory:
1091 */
1092 curr_pfn = PFN_UP(e820.map[i].addr);
1093 if (curr_pfn >= max_low_pfn)
1094 continue;
1095 /*
1096 * ... and at the end of the usable range downwards:
1097 */
1098 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1099
1100 if (last_pfn > max_low_pfn)
1101 last_pfn = max_low_pfn;
1102
1103 /*
1104 * .. finally, did all the rounding and playing
1105 * around just make the area go away?
1106 */
1107 if (last_pfn <= curr_pfn)
1108 continue;
1109
1110 size = last_pfn - curr_pfn;
1111 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1112 }
1113}
1114
1115/*
1116 * workaround for Dell systems that neglect to reserve EBDA
1117 */
1118static void __init reserve_ebda_region(void)
1119{
1120 unsigned int addr;
1121 addr = get_bios_ebda();
1122 if (addr)
1123 reserve_bootmem(addr, PAGE_SIZE);
1124}
1125
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001126#ifndef CONFIG_NEED_MULTIPLE_NODES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127void __init setup_bootmem_allocator(void);
1128static unsigned long __init setup_memory(void)
1129{
1130 /*
1131 * partially used pages are not usable - thus
1132 * we are rounding upwards:
1133 */
1134 min_low_pfn = PFN_UP(init_pg_tables_end);
1135
1136 find_max_pfn();
1137
1138 max_low_pfn = find_max_low_pfn();
1139
1140#ifdef CONFIG_HIGHMEM
1141 highstart_pfn = highend_pfn = max_pfn;
1142 if (max_pfn > max_low_pfn) {
1143 highstart_pfn = max_low_pfn;
1144 }
1145 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1146 pages_to_mb(highend_pfn - highstart_pfn));
1147#endif
1148 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1149 pages_to_mb(max_low_pfn));
1150
1151 setup_bootmem_allocator();
1152
1153 return max_low_pfn;
1154}
1155
1156void __init zone_sizes_init(void)
1157{
1158 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1159 unsigned int max_dma, low;
1160
1161 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1162 low = max_low_pfn;
1163
1164 if (low < max_dma)
1165 zones_size[ZONE_DMA] = low;
1166 else {
1167 zones_size[ZONE_DMA] = max_dma;
1168 zones_size[ZONE_NORMAL] = low - max_dma;
1169#ifdef CONFIG_HIGHMEM
1170 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1171#endif
1172 }
1173 free_area_init(zones_size);
1174}
1175#else
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001176extern unsigned long __init setup_memory(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177extern void zone_sizes_init(void);
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001178#endif /* !CONFIG_NEED_MULTIPLE_NODES */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179
1180void __init setup_bootmem_allocator(void)
1181{
1182 unsigned long bootmap_size;
1183 /*
1184 * Initialize the boot-time allocator (with low memory only):
1185 */
1186 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1187
1188 register_bootmem_low_pages(max_low_pfn);
1189
1190 /*
1191 * Reserve the bootmem bitmap itself as well. We do this in two
1192 * steps (first step was init_bootmem()) because this catches
1193 * the (very unlikely) case of us accidentally initializing the
1194 * bootmem allocator with an invalid RAM area.
1195 */
Vivek Goyal8a919082005-06-25 14:57:51 -07001196 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1197 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198
1199 /*
1200 * reserve physical page 0 - it's a special BIOS page on many boxes,
1201 * enabling clean reboots, SMP operation, laptop functions.
1202 */
1203 reserve_bootmem(0, PAGE_SIZE);
1204
1205 /* reserve EBDA region, it's a 4K region */
1206 reserve_ebda_region();
1207
1208 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1209 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1210 unless you have no PS/2 mouse plugged in. */
1211 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1212 boot_cpu_data.x86 == 6)
1213 reserve_bootmem(0xa0000 - 4096, 4096);
1214
1215#ifdef CONFIG_SMP
1216 /*
1217 * But first pinch a few for the stack/trampoline stuff
1218 * FIXME: Don't need the extra page at 4K, but need to fix
1219 * trampoline before removing it. (see the GDT stuff)
1220 */
1221 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1222#endif
1223#ifdef CONFIG_ACPI_SLEEP
1224 /*
1225 * Reserve low memory region for sleep support.
1226 */
1227 acpi_reserve_bootmem();
1228#endif
1229#ifdef CONFIG_X86_FIND_SMP_CONFIG
1230 /*
1231 * Find and reserve possible boot-time SMP configuration:
1232 */
1233 find_smp_config();
1234#endif
1235
1236#ifdef CONFIG_BLK_DEV_INITRD
1237 if (LOADER_TYPE && INITRD_START) {
1238 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1239 reserve_bootmem(INITRD_START, INITRD_SIZE);
1240 initrd_start =
1241 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1242 initrd_end = initrd_start+INITRD_SIZE;
1243 }
1244 else {
1245 printk(KERN_ERR "initrd extends beyond end of memory "
1246 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1247 INITRD_START + INITRD_SIZE,
1248 max_low_pfn << PAGE_SHIFT);
1249 initrd_start = 0;
1250 }
1251 }
1252#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001253#ifdef CONFIG_KEXEC
1254 if (crashk_res.start != crashk_res.end)
1255 reserve_bootmem(crashk_res.start,
1256 crashk_res.end - crashk_res.start + 1);
1257#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258}
1259
1260/*
1261 * The node 0 pgdat is initialized before all of these because
1262 * it's needed for bootmem. node>0 pgdats have their virtual
1263 * space allocated before the pagetables are in place to access
1264 * them, so they can't be cleared then.
1265 *
1266 * This should all compile down to nothing when NUMA is off.
1267 */
1268void __init remapped_pgdat_init(void)
1269{
1270 int nid;
1271
1272 for_each_online_node(nid) {
1273 if (nid != 0)
1274 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1275 }
1276}
1277
1278/*
1279 * Request address space for all standard RAM and ROM resources
1280 * and also for regions reported as reserved by the e820.
1281 */
1282static void __init
1283legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1284{
1285 int i;
1286
1287 probe_roms();
1288 for (i = 0; i < e820.nr_map; i++) {
1289 struct resource *res;
1290 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1291 continue;
1292 res = alloc_bootmem_low(sizeof(struct resource));
1293 switch (e820.map[i].type) {
1294 case E820_RAM: res->name = "System RAM"; break;
1295 case E820_ACPI: res->name = "ACPI Tables"; break;
1296 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1297 default: res->name = "reserved";
1298 }
1299 res->start = e820.map[i].addr;
1300 res->end = res->start + e820.map[i].size - 1;
1301 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1302 request_resource(&iomem_resource, res);
1303 if (e820.map[i].type == E820_RAM) {
1304 /*
1305 * We don't know which RAM region contains kernel data,
1306 * so we try it repeatedly and let the resource manager
1307 * test it.
1308 */
1309 request_resource(res, code_resource);
1310 request_resource(res, data_resource);
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001311#ifdef CONFIG_KEXEC
1312 request_resource(res, &crashk_res);
1313#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 }
1315 }
1316}
1317
1318/*
1319 * Request address space for all standard resources
1320 */
1321static void __init register_memory(void)
1322{
Daniel Ritzf0eca962005-09-09 00:57:14 +02001323 unsigned long gapstart, gapsize, round;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 unsigned long long last;
1325 int i;
1326
1327 if (efi_enabled)
1328 efi_initialize_iomem_resources(&code_resource, &data_resource);
1329 else
1330 legacy_init_iomem_resources(&code_resource, &data_resource);
1331
1332 /* EFI systems may still have VGA */
1333 request_resource(&iomem_resource, &video_ram_resource);
1334
1335 /* request I/O space for devices used on all i[345]86 PCs */
1336 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1337 request_resource(&ioport_resource, &standard_io_resources[i]);
1338
1339 /*
1340 * Search for the bigest gap in the low 32 bits of the e820
1341 * memory space.
1342 */
1343 last = 0x100000000ull;
1344 gapstart = 0x10000000;
1345 gapsize = 0x400000;
1346 i = e820.nr_map;
1347 while (--i >= 0) {
1348 unsigned long long start = e820.map[i].addr;
1349 unsigned long long end = start + e820.map[i].size;
1350
1351 /*
1352 * Since "last" is at most 4GB, we know we'll
1353 * fit in 32 bits if this condition is true
1354 */
1355 if (last > end) {
1356 unsigned long gap = last - end;
1357
1358 if (gap > gapsize) {
1359 gapsize = gap;
1360 gapstart = end;
1361 }
1362 }
1363 if (start < last)
1364 last = start;
1365 }
1366
1367 /*
Daniel Ritzf0eca962005-09-09 00:57:14 +02001368 * See how much we want to round up: start off with
1369 * rounding to the next 1MB area.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370 */
Daniel Ritzf0eca962005-09-09 00:57:14 +02001371 round = 0x100000;
1372 while ((gapsize >> 4) > round)
1373 round += round;
1374 /* Fun with two's complement */
1375 pci_mem_start = (gapstart + round) & -round;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376
1377 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1378 pci_mem_start, gapstart, gapsize);
1379}
1380
1381/* Use inline assembly to define this because the nops are defined
1382 as inline assembly strings in the include files and we cannot
1383 get them easily into strings. */
1384asm("\t.data\nintelnops: "
1385 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1386 GENERIC_NOP7 GENERIC_NOP8);
1387asm("\t.data\nk8nops: "
1388 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1389 K8_NOP7 K8_NOP8);
1390asm("\t.data\nk7nops: "
1391 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1392 K7_NOP7 K7_NOP8);
1393
1394extern unsigned char intelnops[], k8nops[], k7nops[];
1395static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1396 NULL,
1397 intelnops,
1398 intelnops + 1,
1399 intelnops + 1 + 2,
1400 intelnops + 1 + 2 + 3,
1401 intelnops + 1 + 2 + 3 + 4,
1402 intelnops + 1 + 2 + 3 + 4 + 5,
1403 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1404 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1405};
1406static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1407 NULL,
1408 k8nops,
1409 k8nops + 1,
1410 k8nops + 1 + 2,
1411 k8nops + 1 + 2 + 3,
1412 k8nops + 1 + 2 + 3 + 4,
1413 k8nops + 1 + 2 + 3 + 4 + 5,
1414 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1415 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1416};
1417static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1418 NULL,
1419 k7nops,
1420 k7nops + 1,
1421 k7nops + 1 + 2,
1422 k7nops + 1 + 2 + 3,
1423 k7nops + 1 + 2 + 3 + 4,
1424 k7nops + 1 + 2 + 3 + 4 + 5,
1425 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1426 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1427};
1428static struct nop {
1429 int cpuid;
1430 unsigned char **noptable;
1431} noptypes[] = {
1432 { X86_FEATURE_K8, k8_nops },
1433 { X86_FEATURE_K7, k7_nops },
1434 { -1, NULL }
1435};
1436
1437/* Replace instructions with better alternatives for this CPU type.
1438
1439 This runs before SMP is initialized to avoid SMP problems with
1440 self modifying code. This implies that assymetric systems where
1441 APs have less capabilities than the boot processor are not handled.
Linus Torvalds72538d82005-07-22 18:29:40 -04001442 Tough. Make sure you disable such features by hand. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443void apply_alternatives(void *start, void *end)
1444{
1445 struct alt_instr *a;
1446 int diff, i, k;
1447 unsigned char **noptable = intel_nops;
1448 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1449 if (boot_cpu_has(noptypes[i].cpuid)) {
1450 noptable = noptypes[i].noptable;
1451 break;
1452 }
1453 }
1454 for (a = start; (void *)a < end; a++) {
1455 if (!boot_cpu_has(a->cpuid))
1456 continue;
1457 BUG_ON(a->replacementlen > a->instrlen);
1458 memcpy(a->instr, a->replacement, a->replacementlen);
1459 diff = a->instrlen - a->replacementlen;
1460 /* Pad the rest with nops */
1461 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1462 k = diff;
1463 if (k > ASM_NOP_MAX)
1464 k = ASM_NOP_MAX;
1465 memcpy(a->instr + i, noptable[k], k);
1466 }
1467 }
1468}
1469
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470void __init alternative_instructions(void)
1471{
1472 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 apply_alternatives(__alt_instructions, __alt_instructions_end);
1474}
1475
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476static char * __init machine_specific_memory_setup(void);
1477
1478#ifdef CONFIG_MCA
1479static void set_mca_bus(int x)
1480{
1481 MCA_bus = x;
1482}
1483#else
1484static void set_mca_bus(int x) { }
1485#endif
1486
1487/*
1488 * Determine if we were loaded by an EFI loader. If so, then we have also been
1489 * passed the efi memmap, systab, etc., so we should use these data structures
1490 * for initialization. Note, the efi init code path is determined by the
1491 * global efi_enabled. This allows the same kernel image to be used on existing
1492 * systems (with a traditional BIOS) as well as on EFI systems.
1493 */
1494void __init setup_arch(char **cmdline_p)
1495{
1496 unsigned long max_low_pfn;
1497
1498 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1499 pre_setup_arch_hook();
1500 early_cpu_init();
1501
1502 /*
1503 * FIXME: This isn't an official loader_type right
1504 * now but does currently work with elilo.
1505 * If we were configured as an EFI kernel, check to make
1506 * sure that we were loaded correctly from elilo and that
1507 * the system table is valid. If not, then initialize normally.
1508 */
1509#ifdef CONFIG_EFI
1510 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1511 efi_enabled = 1;
1512#endif
1513
1514 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1515 drive_info = DRIVE_INFO;
1516 screen_info = SCREEN_INFO;
1517 edid_info = EDID_INFO;
1518 apm_info.bios = APM_BIOS_INFO;
1519 ist_info = IST_INFO;
1520 saved_videomode = VIDEO_MODE;
1521 if( SYS_DESC_TABLE.length != 0 ) {
1522 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1523 machine_id = SYS_DESC_TABLE.table[0];
1524 machine_submodel_id = SYS_DESC_TABLE.table[1];
1525 BIOS_revision = SYS_DESC_TABLE.table[2];
1526 }
1527 bootloader_type = LOADER_TYPE;
1528
1529#ifdef CONFIG_BLK_DEV_RAM
1530 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1531 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1532 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1533#endif
1534 ARCH_SETUP
1535 if (efi_enabled)
1536 efi_init();
1537 else {
1538 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1539 print_memory_map(machine_specific_memory_setup());
1540 }
1541
1542 copy_edd();
1543
1544 if (!MOUNT_ROOT_RDONLY)
1545 root_mountflags &= ~MS_RDONLY;
1546 init_mm.start_code = (unsigned long) _text;
1547 init_mm.end_code = (unsigned long) _etext;
1548 init_mm.end_data = (unsigned long) _edata;
1549 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1550
1551 code_resource.start = virt_to_phys(_text);
1552 code_resource.end = virt_to_phys(_etext)-1;
1553 data_resource.start = virt_to_phys(_etext);
1554 data_resource.end = virt_to_phys(_edata)-1;
1555
1556 parse_cmdline_early(cmdline_p);
1557
1558 max_low_pfn = setup_memory();
1559
1560 /*
1561 * NOTE: before this point _nobody_ is allowed to allocate
1562 * any memory using the bootmem allocator. Although the
1563 * alloctor is now initialised only the first 8Mb of the kernel
1564 * virtual address space has been mapped. All allocations before
1565 * paging_init() has completed must use the alloc_bootmem_low_pages()
1566 * variant (which allocates DMA'able memory) and care must be taken
1567 * not to exceed the 8Mb limit.
1568 */
1569
1570#ifdef CONFIG_SMP
1571 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1572#endif
1573 paging_init();
1574 remapped_pgdat_init();
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001575 sparse_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 zone_sizes_init();
1577
1578 /*
1579 * NOTE: at this point the bootmem allocator is fully available.
1580 */
1581
1582#ifdef CONFIG_EARLY_PRINTK
1583 {
1584 char *s = strstr(*cmdline_p, "earlyprintk=");
1585 if (s) {
1586 extern void setup_early_printk(char *);
1587
Jan Beulich2a2d5922006-01-11 22:47:03 +01001588 setup_early_printk(strchr(s, '=') + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 printk("early console enabled\n");
1590 }
1591 }
1592#endif
1593
1594
1595 dmi_scan_machine();
1596
1597#ifdef CONFIG_X86_GENERICARCH
1598 generic_apic_probe(*cmdline_p);
1599#endif
1600 if (efi_enabled)
1601 efi_map_memmap();
1602
Andi Kleenf9262c12006-03-08 17:57:25 -08001603#ifdef CONFIG_X86_IO_APIC
1604 check_acpi_pci(); /* Checks more than just ACPI actually */
1605#endif
1606
Len Brown888ba6c2005-08-24 12:07:20 -04001607#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 /*
1609 * Parse the ACPI tables for possible boot-time SMP configuration.
1610 */
1611 acpi_boot_table_init();
1612 acpi_boot_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613
Venkatesh Pallipadi911a62d2005-09-03 15:56:31 -07001614#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1615 if (def_to_bigsmp)
1616 printk(KERN_WARNING "More than 8 CPUs detected and "
1617 "CONFIG_X86_PC cannot handle it.\nUse "
1618 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1619#endif
1620#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621#ifdef CONFIG_X86_LOCAL_APIC
1622 if (smp_found_config)
1623 get_smp_config();
1624#endif
1625
1626 register_memory();
1627
1628#ifdef CONFIG_VT
1629#if defined(CONFIG_VGA_CONSOLE)
1630 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1631 conswitchp = &vga_con;
1632#elif defined(CONFIG_DUMMY_CONSOLE)
1633 conswitchp = &dummy_con;
1634#endif
1635#endif
1636}
1637
Michael Neulinge5c6c8e2006-03-14 00:11:50 -05001638static __init int add_pcspkr(void)
1639{
1640 struct platform_device *pd;
1641 int ret;
1642
1643 pd = platform_device_alloc("pcspkr", -1);
1644 if (!pd)
1645 return -ENOMEM;
1646
1647 ret = platform_device_add(pd);
1648 if (ret)
1649 platform_device_put(pd);
1650
1651 return ret;
1652}
1653device_initcall(add_pcspkr);
1654
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655#include "setup_arch_post.h"
1656/*
1657 * Local Variables:
1658 * mode:c
1659 * c-file-style:"k&r"
1660 * c-basic-offset:8
1661 * End:
1662 */