blob: d88ebdfa6ccd980c233cd206c7440b21af0ea476 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
21
22/*
23 * This file handles the architecture-dependent parts of initialization
24 */
25
Alexey Dobriyan129f6942005-06-23 00:08:33 -070026#include <linux/config.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <linux/sched.h>
28#include <linux/mm.h>
Andy Whitcroft05b79bd2005-06-23 00:07:57 -070029#include <linux/mmzone.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/tty.h>
31#include <linux/ioport.h>
32#include <linux/acpi.h>
33#include <linux/apm_bios.h>
34#include <linux/initrd.h>
35#include <linux/bootmem.h>
36#include <linux/seq_file.h>
37#include <linux/console.h>
38#include <linux/mca.h>
39#include <linux/root_dev.h>
40#include <linux/highmem.h>
41#include <linux/module.h>
42#include <linux/efi.h>
43#include <linux/init.h>
44#include <linux/edd.h>
45#include <linux/nodemask.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070046#include <linux/kexec.h>
47
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <video/edid.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070049
Eric W. Biederman9635b472005-06-25 14:57:41 -070050#include <asm/apic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <asm/e820.h>
52#include <asm/mpspec.h>
53#include <asm/setup.h>
54#include <asm/arch_hooks.h>
55#include <asm/sections.h>
56#include <asm/io_apic.h>
57#include <asm/ist.h>
58#include <asm/io.h>
59#include "setup_arch_pre.h"
60#include <bios_ebda.h>
61
62/* This value is set up by the early boot code to point to the value
63 immediately after the boot time page tables. It contains a *physical*
64 address, and must not be in the .bss segment! */
65unsigned long init_pg_tables_end __initdata = ~0UL;
66
Li Shaohua0bb31842005-06-25 14:54:55 -070067int disable_pse __devinitdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69/*
70 * Machine setup..
71 */
72
73#ifdef CONFIG_EFI
74int efi_enabled = 0;
75EXPORT_SYMBOL(efi_enabled);
76#endif
77
78/* cpu data as detected by the assembly code in head.S */
79struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
80/* common cpu data for all cpus */
81struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
Alexey Dobriyan129f6942005-06-23 00:08:33 -070082EXPORT_SYMBOL(boot_cpu_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84unsigned long mmu_cr4_features;
85
86#ifdef CONFIG_ACPI_INTERPRETER
87 int acpi_disabled = 0;
88#else
89 int acpi_disabled = 1;
90#endif
91EXPORT_SYMBOL(acpi_disabled);
92
93#ifdef CONFIG_ACPI_BOOT
94int __initdata acpi_force = 0;
95extern acpi_interrupt_flags acpi_sci_flags;
96#endif
97
98/* for MCA, but anyone else can use it if they want */
99unsigned int machine_id;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700100#ifdef CONFIG_MCA
101EXPORT_SYMBOL(machine_id);
102#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103unsigned int machine_submodel_id;
104unsigned int BIOS_revision;
105unsigned int mca_pentium_flag;
106
107/* For PCI or other memory-mapped resources */
108unsigned long pci_mem_start = 0x10000000;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700109#ifdef CONFIG_PCI
110EXPORT_SYMBOL(pci_mem_start);
111#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
113/* Boot loader ID as an integer, for the benefit of proc_dointvec */
114int bootloader_type;
115
116/* user-defined highmem size */
117static unsigned int highmem_pages = -1;
118
119/*
120 * Setup options
121 */
122struct drive_info_struct { char dummy[32]; } drive_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700123#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
124 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
125EXPORT_SYMBOL(drive_info);
126#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127struct screen_info screen_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700128#ifdef CONFIG_VT
129EXPORT_SYMBOL(screen_info);
130#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131struct apm_info apm_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700132EXPORT_SYMBOL(apm_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133struct sys_desc_table_struct {
134 unsigned short length;
135 unsigned char table[0];
136};
137struct edid_info edid_info;
138struct ist_info ist_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700139#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
140 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
141EXPORT_SYMBOL(ist_info);
142#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143struct e820map e820;
144
145extern void early_cpu_init(void);
146extern void dmi_scan_machine(void);
147extern void generic_apic_probe(char *);
148extern int root_mountflags;
149
150unsigned long saved_videomode;
151
152#define RAMDISK_IMAGE_START_MASK 0x07FF
153#define RAMDISK_PROMPT_FLAG 0x8000
154#define RAMDISK_LOAD_FLAG 0x4000
155
156static char command_line[COMMAND_LINE_SIZE];
157
158unsigned char __initdata boot_params[PARAM_SIZE];
159
160static struct resource data_resource = {
161 .name = "Kernel data",
162 .start = 0,
163 .end = 0,
164 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
165};
166
167static struct resource code_resource = {
168 .name = "Kernel code",
169 .start = 0,
170 .end = 0,
171 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
172};
173
174static struct resource system_rom_resource = {
175 .name = "System ROM",
176 .start = 0xf0000,
177 .end = 0xfffff,
178 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
179};
180
181static struct resource extension_rom_resource = {
182 .name = "Extension ROM",
183 .start = 0xe0000,
184 .end = 0xeffff,
185 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
186};
187
188static struct resource adapter_rom_resources[] = { {
189 .name = "Adapter ROM",
190 .start = 0xc8000,
191 .end = 0,
192 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
193}, {
194 .name = "Adapter ROM",
195 .start = 0,
196 .end = 0,
197 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
198}, {
199 .name = "Adapter ROM",
200 .start = 0,
201 .end = 0,
202 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
203}, {
204 .name = "Adapter ROM",
205 .start = 0,
206 .end = 0,
207 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
208}, {
209 .name = "Adapter ROM",
210 .start = 0,
211 .end = 0,
212 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
213}, {
214 .name = "Adapter ROM",
215 .start = 0,
216 .end = 0,
217 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
218} };
219
220#define ADAPTER_ROM_RESOURCES \
221 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
222
223static struct resource video_rom_resource = {
224 .name = "Video ROM",
225 .start = 0xc0000,
226 .end = 0xc7fff,
227 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
228};
229
230static struct resource video_ram_resource = {
231 .name = "Video RAM area",
232 .start = 0xa0000,
233 .end = 0xbffff,
234 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
235};
236
237static struct resource standard_io_resources[] = { {
238 .name = "dma1",
239 .start = 0x0000,
240 .end = 0x001f,
241 .flags = IORESOURCE_BUSY | IORESOURCE_IO
242}, {
243 .name = "pic1",
244 .start = 0x0020,
245 .end = 0x0021,
246 .flags = IORESOURCE_BUSY | IORESOURCE_IO
247}, {
248 .name = "timer0",
249 .start = 0x0040,
250 .end = 0x0043,
251 .flags = IORESOURCE_BUSY | IORESOURCE_IO
252}, {
253 .name = "timer1",
254 .start = 0x0050,
255 .end = 0x0053,
256 .flags = IORESOURCE_BUSY | IORESOURCE_IO
257}, {
258 .name = "keyboard",
259 .start = 0x0060,
260 .end = 0x006f,
261 .flags = IORESOURCE_BUSY | IORESOURCE_IO
262}, {
263 .name = "dma page reg",
264 .start = 0x0080,
265 .end = 0x008f,
266 .flags = IORESOURCE_BUSY | IORESOURCE_IO
267}, {
268 .name = "pic2",
269 .start = 0x00a0,
270 .end = 0x00a1,
271 .flags = IORESOURCE_BUSY | IORESOURCE_IO
272}, {
273 .name = "dma2",
274 .start = 0x00c0,
275 .end = 0x00df,
276 .flags = IORESOURCE_BUSY | IORESOURCE_IO
277}, {
278 .name = "fpu",
279 .start = 0x00f0,
280 .end = 0x00ff,
281 .flags = IORESOURCE_BUSY | IORESOURCE_IO
282} };
283
284#define STANDARD_IO_RESOURCES \
285 (sizeof standard_io_resources / sizeof standard_io_resources[0])
286
287#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
288
289static int __init romchecksum(unsigned char *rom, unsigned long length)
290{
291 unsigned char *p, sum = 0;
292
293 for (p = rom; p < rom + length; p++)
294 sum += *p;
295 return sum == 0;
296}
297
298static void __init probe_roms(void)
299{
300 unsigned long start, length, upper;
301 unsigned char *rom;
302 int i;
303
304 /* video rom */
305 upper = adapter_rom_resources[0].start;
306 for (start = video_rom_resource.start; start < upper; start += 2048) {
307 rom = isa_bus_to_virt(start);
308 if (!romsignature(rom))
309 continue;
310
311 video_rom_resource.start = start;
312
313 /* 0 < length <= 0x7f * 512, historically */
314 length = rom[2] * 512;
315
316 /* if checksum okay, trust length byte */
317 if (length && romchecksum(rom, length))
318 video_rom_resource.end = start + length - 1;
319
320 request_resource(&iomem_resource, &video_rom_resource);
321 break;
322 }
323
324 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
325 if (start < upper)
326 start = upper;
327
328 /* system rom */
329 request_resource(&iomem_resource, &system_rom_resource);
330 upper = system_rom_resource.start;
331
332 /* check for extension rom (ignore length byte!) */
333 rom = isa_bus_to_virt(extension_rom_resource.start);
334 if (romsignature(rom)) {
335 length = extension_rom_resource.end - extension_rom_resource.start + 1;
336 if (romchecksum(rom, length)) {
337 request_resource(&iomem_resource, &extension_rom_resource);
338 upper = extension_rom_resource.start;
339 }
340 }
341
342 /* check for adapter roms on 2k boundaries */
343 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
344 rom = isa_bus_to_virt(start);
345 if (!romsignature(rom))
346 continue;
347
348 /* 0 < length <= 0x7f * 512, historically */
349 length = rom[2] * 512;
350
351 /* but accept any length that fits if checksum okay */
352 if (!length || start + length > upper || !romchecksum(rom, length))
353 continue;
354
355 adapter_rom_resources[i].start = start;
356 adapter_rom_resources[i].end = start + length - 1;
357 request_resource(&iomem_resource, &adapter_rom_resources[i]);
358
359 start = adapter_rom_resources[i++].end & ~2047UL;
360 }
361}
362
363static void __init limit_regions(unsigned long long size)
364{
365 unsigned long long current_addr = 0;
366 int i;
367
368 if (efi_enabled) {
369 for (i = 0; i < memmap.nr_map; i++) {
370 current_addr = memmap.map[i].phys_addr +
371 (memmap.map[i].num_pages << 12);
372 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
373 if (current_addr >= size) {
374 memmap.map[i].num_pages -=
375 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
376 memmap.nr_map = i + 1;
377 return;
378 }
379 }
380 }
381 }
382 for (i = 0; i < e820.nr_map; i++) {
383 if (e820.map[i].type == E820_RAM) {
384 current_addr = e820.map[i].addr + e820.map[i].size;
385 if (current_addr >= size) {
386 e820.map[i].size -= current_addr-size;
387 e820.nr_map = i + 1;
388 return;
389 }
390 }
391 }
392}
393
394static void __init add_memory_region(unsigned long long start,
395 unsigned long long size, int type)
396{
397 int x;
398
399 if (!efi_enabled) {
400 x = e820.nr_map;
401
402 if (x == E820MAX) {
403 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
404 return;
405 }
406
407 e820.map[x].addr = start;
408 e820.map[x].size = size;
409 e820.map[x].type = type;
410 e820.nr_map++;
411 }
412} /* add_memory_region */
413
414#define E820_DEBUG 1
415
416static void __init print_memory_map(char *who)
417{
418 int i;
419
420 for (i = 0; i < e820.nr_map; i++) {
421 printk(" %s: %016Lx - %016Lx ", who,
422 e820.map[i].addr,
423 e820.map[i].addr + e820.map[i].size);
424 switch (e820.map[i].type) {
425 case E820_RAM: printk("(usable)\n");
426 break;
427 case E820_RESERVED:
428 printk("(reserved)\n");
429 break;
430 case E820_ACPI:
431 printk("(ACPI data)\n");
432 break;
433 case E820_NVS:
434 printk("(ACPI NVS)\n");
435 break;
436 default: printk("type %lu\n", e820.map[i].type);
437 break;
438 }
439 }
440}
441
442/*
443 * Sanitize the BIOS e820 map.
444 *
445 * Some e820 responses include overlapping entries. The following
446 * replaces the original e820 map with a new one, removing overlaps.
447 *
448 */
449struct change_member {
450 struct e820entry *pbios; /* pointer to original bios entry */
451 unsigned long long addr; /* address for this change point */
452};
453static struct change_member change_point_list[2*E820MAX] __initdata;
454static struct change_member *change_point[2*E820MAX] __initdata;
455static struct e820entry *overlap_list[E820MAX] __initdata;
456static struct e820entry new_bios[E820MAX] __initdata;
457
458static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
459{
460 struct change_member *change_tmp;
461 unsigned long current_type, last_type;
462 unsigned long long last_addr;
463 int chgidx, still_changing;
464 int overlap_entries;
465 int new_bios_entry;
466 int old_nr, new_nr, chg_nr;
467 int i;
468
469 /*
470 Visually we're performing the following (1,2,3,4 = memory types)...
471
472 Sample memory map (w/overlaps):
473 ____22__________________
474 ______________________4_
475 ____1111________________
476 _44_____________________
477 11111111________________
478 ____________________33__
479 ___________44___________
480 __________33333_________
481 ______________22________
482 ___________________2222_
483 _________111111111______
484 _____________________11_
485 _________________4______
486
487 Sanitized equivalent (no overlap):
488 1_______________________
489 _44_____________________
490 ___1____________________
491 ____22__________________
492 ______11________________
493 _________1______________
494 __________3_____________
495 ___________44___________
496 _____________33_________
497 _______________2________
498 ________________1_______
499 _________________4______
500 ___________________2____
501 ____________________33__
502 ______________________4_
503 */
504
505 /* if there's only one memory region, don't bother */
506 if (*pnr_map < 2)
507 return -1;
508
509 old_nr = *pnr_map;
510
511 /* bail out if we find any unreasonable addresses in bios map */
512 for (i=0; i<old_nr; i++)
513 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
514 return -1;
515
516 /* create pointers for initial change-point information (for sorting) */
517 for (i=0; i < 2*old_nr; i++)
518 change_point[i] = &change_point_list[i];
519
520 /* record all known change-points (starting and ending addresses),
521 omitting those that are for empty memory regions */
522 chgidx = 0;
523 for (i=0; i < old_nr; i++) {
524 if (biosmap[i].size != 0) {
525 change_point[chgidx]->addr = biosmap[i].addr;
526 change_point[chgidx++]->pbios = &biosmap[i];
527 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
528 change_point[chgidx++]->pbios = &biosmap[i];
529 }
530 }
531 chg_nr = chgidx; /* true number of change-points */
532
533 /* sort change-point list by memory addresses (low -> high) */
534 still_changing = 1;
535 while (still_changing) {
536 still_changing = 0;
537 for (i=1; i < chg_nr; i++) {
538 /* if <current_addr> > <last_addr>, swap */
539 /* or, if current=<start_addr> & last=<end_addr>, swap */
540 if ((change_point[i]->addr < change_point[i-1]->addr) ||
541 ((change_point[i]->addr == change_point[i-1]->addr) &&
542 (change_point[i]->addr == change_point[i]->pbios->addr) &&
543 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
544 )
545 {
546 change_tmp = change_point[i];
547 change_point[i] = change_point[i-1];
548 change_point[i-1] = change_tmp;
549 still_changing=1;
550 }
551 }
552 }
553
554 /* create a new bios memory map, removing overlaps */
555 overlap_entries=0; /* number of entries in the overlap table */
556 new_bios_entry=0; /* index for creating new bios map entries */
557 last_type = 0; /* start with undefined memory type */
558 last_addr = 0; /* start with 0 as last starting address */
559 /* loop through change-points, determining affect on the new bios map */
560 for (chgidx=0; chgidx < chg_nr; chgidx++)
561 {
562 /* keep track of all overlapping bios entries */
563 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
564 {
565 /* add map entry to overlap list (> 1 entry implies an overlap) */
566 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
567 }
568 else
569 {
570 /* remove entry from list (order independent, so swap with last) */
571 for (i=0; i<overlap_entries; i++)
572 {
573 if (overlap_list[i] == change_point[chgidx]->pbios)
574 overlap_list[i] = overlap_list[overlap_entries-1];
575 }
576 overlap_entries--;
577 }
578 /* if there are overlapping entries, decide which "type" to use */
579 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
580 current_type = 0;
581 for (i=0; i<overlap_entries; i++)
582 if (overlap_list[i]->type > current_type)
583 current_type = overlap_list[i]->type;
584 /* continue building up new bios map based on this information */
585 if (current_type != last_type) {
586 if (last_type != 0) {
587 new_bios[new_bios_entry].size =
588 change_point[chgidx]->addr - last_addr;
589 /* move forward only if the new size was non-zero */
590 if (new_bios[new_bios_entry].size != 0)
591 if (++new_bios_entry >= E820MAX)
592 break; /* no more space left for new bios entries */
593 }
594 if (current_type != 0) {
595 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
596 new_bios[new_bios_entry].type = current_type;
597 last_addr=change_point[chgidx]->addr;
598 }
599 last_type = current_type;
600 }
601 }
602 new_nr = new_bios_entry; /* retain count for new bios entries */
603
604 /* copy new bios mapping into original location */
605 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
606 *pnr_map = new_nr;
607
608 return 0;
609}
610
611/*
612 * Copy the BIOS e820 map into a safe place.
613 *
614 * Sanity-check it while we're at it..
615 *
616 * If we're lucky and live on a modern system, the setup code
617 * will have given us a memory map that we can use to properly
618 * set up memory. If we aren't, we'll fake a memory map.
619 *
620 * We check to see that the memory map contains at least 2 elements
621 * before we'll use it, because the detection code in setup.S may
622 * not be perfect and most every PC known to man has two memory
623 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
624 * thinkpad 560x, for example, does not cooperate with the memory
625 * detection code.)
626 */
627static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
628{
629 /* Only one memory region (or negative)? Ignore it */
630 if (nr_map < 2)
631 return -1;
632
633 do {
634 unsigned long long start = biosmap->addr;
635 unsigned long long size = biosmap->size;
636 unsigned long long end = start + size;
637 unsigned long type = biosmap->type;
638
639 /* Overflow in 64 bits? Ignore the memory map. */
640 if (start > end)
641 return -1;
642
643 /*
644 * Some BIOSes claim RAM in the 640k - 1M region.
645 * Not right. Fix it up.
646 */
647 if (type == E820_RAM) {
648 if (start < 0x100000ULL && end > 0xA0000ULL) {
649 if (start < 0xA0000ULL)
650 add_memory_region(start, 0xA0000ULL-start, type);
651 if (end <= 0x100000ULL)
652 continue;
653 start = 0x100000ULL;
654 size = end - start;
655 }
656 }
657 add_memory_region(start, size, type);
658 } while (biosmap++,--nr_map);
659 return 0;
660}
661
662#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
663struct edd edd;
664#ifdef CONFIG_EDD_MODULE
665EXPORT_SYMBOL(edd);
666#endif
667/**
668 * copy_edd() - Copy the BIOS EDD information
669 * from boot_params into a safe place.
670 *
671 */
672static inline void copy_edd(void)
673{
674 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
675 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
676 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
677 edd.edd_info_nr = EDD_NR;
678}
679#else
680static inline void copy_edd(void)
681{
682}
683#endif
684
685/*
686 * Do NOT EVER look at the BIOS memory size location.
687 * It does not work on many machines.
688 */
689#define LOWMEMSIZE() (0x9f000)
690
691static void __init parse_cmdline_early (char ** cmdline_p)
692{
693 char c = ' ', *to = command_line, *from = saved_command_line;
694 int len = 0;
695 int userdef = 0;
696
697 /* Save unparsed command line copy for /proc/cmdline */
698 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
699
700 for (;;) {
701 if (c != ' ')
702 goto next_char;
703 /*
704 * "mem=nopentium" disables the 4MB page tables.
705 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
706 * to <mem>, overriding the bios size.
707 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
708 * <start> to <start>+<mem>, overriding the bios size.
709 *
710 * HPA tells me bootloaders need to parse mem=, so no new
711 * option should be mem= [also see Documentation/i386/boot.txt]
712 */
713 if (!memcmp(from, "mem=", 4)) {
714 if (to != command_line)
715 to--;
716 if (!memcmp(from+4, "nopentium", 9)) {
717 from += 9+4;
718 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
719 disable_pse = 1;
720 } else {
721 /* If the user specifies memory size, we
722 * limit the BIOS-provided memory map to
723 * that size. exactmap can be used to specify
724 * the exact map. mem=number can be used to
725 * trim the existing memory map.
726 */
727 unsigned long long mem_size;
728
729 mem_size = memparse(from+4, &from);
730 limit_regions(mem_size);
731 userdef=1;
732 }
733 }
734
735 else if (!memcmp(from, "memmap=", 7)) {
736 if (to != command_line)
737 to--;
738 if (!memcmp(from+7, "exactmap", 8)) {
739 from += 8+7;
740 e820.nr_map = 0;
741 userdef = 1;
742 } else {
743 /* If the user specifies memory size, we
744 * limit the BIOS-provided memory map to
745 * that size. exactmap can be used to specify
746 * the exact map. mem=number can be used to
747 * trim the existing memory map.
748 */
749 unsigned long long start_at, mem_size;
750
751 mem_size = memparse(from+7, &from);
752 if (*from == '@') {
753 start_at = memparse(from+1, &from);
754 add_memory_region(start_at, mem_size, E820_RAM);
755 } else if (*from == '#') {
756 start_at = memparse(from+1, &from);
757 add_memory_region(start_at, mem_size, E820_ACPI);
758 } else if (*from == '$') {
759 start_at = memparse(from+1, &from);
760 add_memory_region(start_at, mem_size, E820_RESERVED);
761 } else {
762 limit_regions(mem_size);
763 userdef=1;
764 }
765 }
766 }
767
768 else if (!memcmp(from, "noexec=", 7))
769 noexec_setup(from + 7);
770
771
772#ifdef CONFIG_X86_SMP
773 /*
774 * If the BIOS enumerates physical processors before logical,
775 * maxcpus=N at enumeration-time can be used to disable HT.
776 */
777 else if (!memcmp(from, "maxcpus=", 8)) {
778 extern unsigned int maxcpus;
779
780 maxcpus = simple_strtoul(from + 8, NULL, 0);
781 }
782#endif
783
784#ifdef CONFIG_ACPI_BOOT
785 /* "acpi=off" disables both ACPI table parsing and interpreter */
786 else if (!memcmp(from, "acpi=off", 8)) {
787 disable_acpi();
788 }
789
790 /* acpi=force to over-ride black-list */
791 else if (!memcmp(from, "acpi=force", 10)) {
792 acpi_force = 1;
793 acpi_ht = 1;
794 acpi_disabled = 0;
795 }
796
797 /* acpi=strict disables out-of-spec workarounds */
798 else if (!memcmp(from, "acpi=strict", 11)) {
799 acpi_strict = 1;
800 }
801
802 /* Limit ACPI just to boot-time to enable HT */
803 else if (!memcmp(from, "acpi=ht", 7)) {
804 if (!acpi_force)
805 disable_acpi();
806 acpi_ht = 1;
807 }
808
809 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
810 else if (!memcmp(from, "pci=noacpi", 10)) {
811 acpi_disable_pci();
812 }
813 /* "acpi=noirq" disables ACPI interrupt routing */
814 else if (!memcmp(from, "acpi=noirq", 10)) {
815 acpi_noirq_set();
816 }
817
818 else if (!memcmp(from, "acpi_sci=edge", 13))
819 acpi_sci_flags.trigger = 1;
820
821 else if (!memcmp(from, "acpi_sci=level", 14))
822 acpi_sci_flags.trigger = 3;
823
824 else if (!memcmp(from, "acpi_sci=high", 13))
825 acpi_sci_flags.polarity = 1;
826
827 else if (!memcmp(from, "acpi_sci=low", 12))
828 acpi_sci_flags.polarity = 3;
829
830#ifdef CONFIG_X86_IO_APIC
831 else if (!memcmp(from, "acpi_skip_timer_override", 24))
832 acpi_skip_timer_override = 1;
833#endif
834
835#ifdef CONFIG_X86_LOCAL_APIC
836 /* disable IO-APIC */
837 else if (!memcmp(from, "noapic", 6))
838 disable_ioapic_setup();
839#endif /* CONFIG_X86_LOCAL_APIC */
840#endif /* CONFIG_ACPI_BOOT */
841
Eric W. Biederman9635b472005-06-25 14:57:41 -0700842#ifdef CONFIG_X86_LOCAL_APIC
843 /* enable local APIC */
844 else if (!memcmp(from, "lapic", 5))
845 lapic_enable();
846
847 /* disable local APIC */
848 else if (!memcmp(from, "nolapic", 6))
849 lapic_disable();
850#endif /* CONFIG_X86_LOCAL_APIC */
851
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700852#ifdef CONFIG_KEXEC
853 /* crashkernel=size@addr specifies the location to reserve for
854 * a crash kernel. By reserving this memory we guarantee
855 * that linux never set's it up as a DMA target.
856 * Useful for holding code to do something appropriate
857 * after a kernel panic.
858 */
859 else if (!memcmp(from, "crashkernel=", 12)) {
860 unsigned long size, base;
861 size = memparse(from+12, &from);
862 if (*from == '@') {
863 base = memparse(from+1, &from);
864 /* FIXME: Do I want a sanity check
865 * to validate the memory range?
866 */
867 crashk_res.start = base;
868 crashk_res.end = base + size - 1;
869 }
870 }
871#endif
872
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 /*
874 * highmem=size forces highmem to be exactly 'size' bytes.
875 * This works even on boxes that have no highmem otherwise.
876 * This also works to reduce highmem size on bigger boxes.
877 */
878 else if (!memcmp(from, "highmem=", 8))
879 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
880
881 /*
882 * vmalloc=size forces the vmalloc area to be exactly 'size'
883 * bytes. This can be used to increase (or decrease) the
884 * vmalloc area - the default is 128m.
885 */
886 else if (!memcmp(from, "vmalloc=", 8))
887 __VMALLOC_RESERVE = memparse(from+8, &from);
888
889 next_char:
890 c = *(from++);
891 if (!c)
892 break;
893 if (COMMAND_LINE_SIZE <= ++len)
894 break;
895 *(to++) = c;
896 }
897 *to = '\0';
898 *cmdline_p = command_line;
899 if (userdef) {
900 printk(KERN_INFO "user-defined physical RAM map:\n");
901 print_memory_map("user");
902 }
903}
904
905/*
906 * Callback for efi_memory_walk.
907 */
908static int __init
909efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
910{
911 unsigned long *max_pfn = arg, pfn;
912
913 if (start < end) {
914 pfn = PFN_UP(end -1);
915 if (pfn > *max_pfn)
916 *max_pfn = pfn;
917 }
918 return 0;
919}
920
921
922/*
923 * Find the highest page frame number we have available
924 */
925void __init find_max_pfn(void)
926{
927 int i;
928
929 max_pfn = 0;
930 if (efi_enabled) {
931 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
932 return;
933 }
934
935 for (i = 0; i < e820.nr_map; i++) {
936 unsigned long start, end;
937 /* RAM? */
938 if (e820.map[i].type != E820_RAM)
939 continue;
940 start = PFN_UP(e820.map[i].addr);
941 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
942 if (start >= end)
943 continue;
944 if (end > max_pfn)
945 max_pfn = end;
946 }
947}
948
949/*
950 * Determine low and high memory ranges:
951 */
952unsigned long __init find_max_low_pfn(void)
953{
954 unsigned long max_low_pfn;
955
956 max_low_pfn = max_pfn;
957 if (max_low_pfn > MAXMEM_PFN) {
958 if (highmem_pages == -1)
959 highmem_pages = max_pfn - MAXMEM_PFN;
960 if (highmem_pages + MAXMEM_PFN < max_pfn)
961 max_pfn = MAXMEM_PFN + highmem_pages;
962 if (highmem_pages + MAXMEM_PFN > max_pfn) {
963 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
964 highmem_pages = 0;
965 }
966 max_low_pfn = MAXMEM_PFN;
967#ifndef CONFIG_HIGHMEM
968 /* Maximum memory usable is what is directly addressable */
969 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
970 MAXMEM>>20);
971 if (max_pfn > MAX_NONPAE_PFN)
972 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
973 else
974 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
975 max_pfn = MAXMEM_PFN;
976#else /* !CONFIG_HIGHMEM */
977#ifndef CONFIG_X86_PAE
978 if (max_pfn > MAX_NONPAE_PFN) {
979 max_pfn = MAX_NONPAE_PFN;
980 printk(KERN_WARNING "Warning only 4GB will be used.\n");
981 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
982 }
983#endif /* !CONFIG_X86_PAE */
984#endif /* !CONFIG_HIGHMEM */
985 } else {
986 if (highmem_pages == -1)
987 highmem_pages = 0;
988#ifdef CONFIG_HIGHMEM
989 if (highmem_pages >= max_pfn) {
990 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
991 highmem_pages = 0;
992 }
993 if (highmem_pages) {
994 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
995 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
996 highmem_pages = 0;
997 }
998 max_low_pfn -= highmem_pages;
999 }
1000#else
1001 if (highmem_pages)
1002 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1003#endif
1004 }
1005 return max_low_pfn;
1006}
1007
1008/*
1009 * Free all available memory for boot time allocation. Used
1010 * as a callback function by efi_memory_walk()
1011 */
1012
1013static int __init
1014free_available_memory(unsigned long start, unsigned long end, void *arg)
1015{
1016 /* check max_low_pfn */
1017 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1018 return 0;
1019 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1020 end = (max_low_pfn + 1) << PAGE_SHIFT;
1021 if (start < end)
1022 free_bootmem(start, end - start);
1023
1024 return 0;
1025}
1026/*
1027 * Register fully available low RAM pages with the bootmem allocator.
1028 */
1029static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1030{
1031 int i;
1032
1033 if (efi_enabled) {
1034 efi_memmap_walk(free_available_memory, NULL);
1035 return;
1036 }
1037 for (i = 0; i < e820.nr_map; i++) {
1038 unsigned long curr_pfn, last_pfn, size;
1039 /*
1040 * Reserve usable low memory
1041 */
1042 if (e820.map[i].type != E820_RAM)
1043 continue;
1044 /*
1045 * We are rounding up the start address of usable memory:
1046 */
1047 curr_pfn = PFN_UP(e820.map[i].addr);
1048 if (curr_pfn >= max_low_pfn)
1049 continue;
1050 /*
1051 * ... and at the end of the usable range downwards:
1052 */
1053 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1054
1055 if (last_pfn > max_low_pfn)
1056 last_pfn = max_low_pfn;
1057
1058 /*
1059 * .. finally, did all the rounding and playing
1060 * around just make the area go away?
1061 */
1062 if (last_pfn <= curr_pfn)
1063 continue;
1064
1065 size = last_pfn - curr_pfn;
1066 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1067 }
1068}
1069
1070/*
1071 * workaround for Dell systems that neglect to reserve EBDA
1072 */
1073static void __init reserve_ebda_region(void)
1074{
1075 unsigned int addr;
1076 addr = get_bios_ebda();
1077 if (addr)
1078 reserve_bootmem(addr, PAGE_SIZE);
1079}
1080
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001081#ifndef CONFIG_NEED_MULTIPLE_NODES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082void __init setup_bootmem_allocator(void);
1083static unsigned long __init setup_memory(void)
1084{
1085 /*
1086 * partially used pages are not usable - thus
1087 * we are rounding upwards:
1088 */
1089 min_low_pfn = PFN_UP(init_pg_tables_end);
1090
1091 find_max_pfn();
1092
1093 max_low_pfn = find_max_low_pfn();
1094
1095#ifdef CONFIG_HIGHMEM
1096 highstart_pfn = highend_pfn = max_pfn;
1097 if (max_pfn > max_low_pfn) {
1098 highstart_pfn = max_low_pfn;
1099 }
1100 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1101 pages_to_mb(highend_pfn - highstart_pfn));
1102#endif
1103 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1104 pages_to_mb(max_low_pfn));
1105
1106 setup_bootmem_allocator();
1107
1108 return max_low_pfn;
1109}
1110
1111void __init zone_sizes_init(void)
1112{
1113 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1114 unsigned int max_dma, low;
1115
1116 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1117 low = max_low_pfn;
1118
1119 if (low < max_dma)
1120 zones_size[ZONE_DMA] = low;
1121 else {
1122 zones_size[ZONE_DMA] = max_dma;
1123 zones_size[ZONE_NORMAL] = low - max_dma;
1124#ifdef CONFIG_HIGHMEM
1125 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1126#endif
1127 }
1128 free_area_init(zones_size);
1129}
1130#else
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001131extern unsigned long __init setup_memory(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132extern void zone_sizes_init(void);
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001133#endif /* !CONFIG_NEED_MULTIPLE_NODES */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134
1135void __init setup_bootmem_allocator(void)
1136{
1137 unsigned long bootmap_size;
1138 /*
1139 * Initialize the boot-time allocator (with low memory only):
1140 */
1141 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1142
1143 register_bootmem_low_pages(max_low_pfn);
1144
1145 /*
1146 * Reserve the bootmem bitmap itself as well. We do this in two
1147 * steps (first step was init_bootmem()) because this catches
1148 * the (very unlikely) case of us accidentally initializing the
1149 * bootmem allocator with an invalid RAM area.
1150 */
Vivek Goyal8a919082005-06-25 14:57:51 -07001151 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1152 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153
1154 /*
1155 * reserve physical page 0 - it's a special BIOS page on many boxes,
1156 * enabling clean reboots, SMP operation, laptop functions.
1157 */
1158 reserve_bootmem(0, PAGE_SIZE);
1159
1160 /* reserve EBDA region, it's a 4K region */
1161 reserve_ebda_region();
1162
1163 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1164 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1165 unless you have no PS/2 mouse plugged in. */
1166 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1167 boot_cpu_data.x86 == 6)
1168 reserve_bootmem(0xa0000 - 4096, 4096);
1169
1170#ifdef CONFIG_SMP
1171 /*
1172 * But first pinch a few for the stack/trampoline stuff
1173 * FIXME: Don't need the extra page at 4K, but need to fix
1174 * trampoline before removing it. (see the GDT stuff)
1175 */
1176 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1177#endif
1178#ifdef CONFIG_ACPI_SLEEP
1179 /*
1180 * Reserve low memory region for sleep support.
1181 */
1182 acpi_reserve_bootmem();
1183#endif
1184#ifdef CONFIG_X86_FIND_SMP_CONFIG
1185 /*
1186 * Find and reserve possible boot-time SMP configuration:
1187 */
1188 find_smp_config();
1189#endif
1190
1191#ifdef CONFIG_BLK_DEV_INITRD
1192 if (LOADER_TYPE && INITRD_START) {
1193 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1194 reserve_bootmem(INITRD_START, INITRD_SIZE);
1195 initrd_start =
1196 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1197 initrd_end = initrd_start+INITRD_SIZE;
1198 }
1199 else {
1200 printk(KERN_ERR "initrd extends beyond end of memory "
1201 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1202 INITRD_START + INITRD_SIZE,
1203 max_low_pfn << PAGE_SHIFT);
1204 initrd_start = 0;
1205 }
1206 }
1207#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001208#ifdef CONFIG_KEXEC
1209 if (crashk_res.start != crashk_res.end)
1210 reserve_bootmem(crashk_res.start,
1211 crashk_res.end - crashk_res.start + 1);
1212#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213}
1214
1215/*
1216 * The node 0 pgdat is initialized before all of these because
1217 * it's needed for bootmem. node>0 pgdats have their virtual
1218 * space allocated before the pagetables are in place to access
1219 * them, so they can't be cleared then.
1220 *
1221 * This should all compile down to nothing when NUMA is off.
1222 */
1223void __init remapped_pgdat_init(void)
1224{
1225 int nid;
1226
1227 for_each_online_node(nid) {
1228 if (nid != 0)
1229 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1230 }
1231}
1232
1233/*
1234 * Request address space for all standard RAM and ROM resources
1235 * and also for regions reported as reserved by the e820.
1236 */
1237static void __init
1238legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1239{
1240 int i;
1241
1242 probe_roms();
1243 for (i = 0; i < e820.nr_map; i++) {
1244 struct resource *res;
1245 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1246 continue;
1247 res = alloc_bootmem_low(sizeof(struct resource));
1248 switch (e820.map[i].type) {
1249 case E820_RAM: res->name = "System RAM"; break;
1250 case E820_ACPI: res->name = "ACPI Tables"; break;
1251 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1252 default: res->name = "reserved";
1253 }
1254 res->start = e820.map[i].addr;
1255 res->end = res->start + e820.map[i].size - 1;
1256 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1257 request_resource(&iomem_resource, res);
1258 if (e820.map[i].type == E820_RAM) {
1259 /*
1260 * We don't know which RAM region contains kernel data,
1261 * so we try it repeatedly and let the resource manager
1262 * test it.
1263 */
1264 request_resource(res, code_resource);
1265 request_resource(res, data_resource);
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001266#ifdef CONFIG_KEXEC
1267 request_resource(res, &crashk_res);
1268#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 }
1270 }
1271}
1272
1273/*
1274 * Request address space for all standard resources
1275 */
1276static void __init register_memory(void)
1277{
1278 unsigned long gapstart, gapsize;
1279 unsigned long long last;
1280 int i;
1281
1282 if (efi_enabled)
1283 efi_initialize_iomem_resources(&code_resource, &data_resource);
1284 else
1285 legacy_init_iomem_resources(&code_resource, &data_resource);
1286
1287 /* EFI systems may still have VGA */
1288 request_resource(&iomem_resource, &video_ram_resource);
1289
1290 /* request I/O space for devices used on all i[345]86 PCs */
1291 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1292 request_resource(&ioport_resource, &standard_io_resources[i]);
1293
1294 /*
1295 * Search for the bigest gap in the low 32 bits of the e820
1296 * memory space.
1297 */
1298 last = 0x100000000ull;
1299 gapstart = 0x10000000;
1300 gapsize = 0x400000;
1301 i = e820.nr_map;
1302 while (--i >= 0) {
1303 unsigned long long start = e820.map[i].addr;
1304 unsigned long long end = start + e820.map[i].size;
1305
1306 /*
1307 * Since "last" is at most 4GB, we know we'll
1308 * fit in 32 bits if this condition is true
1309 */
1310 if (last > end) {
1311 unsigned long gap = last - end;
1312
1313 if (gap > gapsize) {
1314 gapsize = gap;
1315 gapstart = end;
1316 }
1317 }
1318 if (start < last)
1319 last = start;
1320 }
1321
1322 /*
1323 * Start allocating dynamic PCI memory a bit into the gap,
1324 * aligned up to the nearest megabyte.
1325 *
1326 * Question: should we try to pad it up a bit (do something
1327 * like " + (gapsize >> 3)" in there too?). We now have the
1328 * technology.
1329 */
1330 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1331
1332 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1333 pci_mem_start, gapstart, gapsize);
1334}
1335
1336/* Use inline assembly to define this because the nops are defined
1337 as inline assembly strings in the include files and we cannot
1338 get them easily into strings. */
1339asm("\t.data\nintelnops: "
1340 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1341 GENERIC_NOP7 GENERIC_NOP8);
1342asm("\t.data\nk8nops: "
1343 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1344 K8_NOP7 K8_NOP8);
1345asm("\t.data\nk7nops: "
1346 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1347 K7_NOP7 K7_NOP8);
1348
1349extern unsigned char intelnops[], k8nops[], k7nops[];
1350static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1351 NULL,
1352 intelnops,
1353 intelnops + 1,
1354 intelnops + 1 + 2,
1355 intelnops + 1 + 2 + 3,
1356 intelnops + 1 + 2 + 3 + 4,
1357 intelnops + 1 + 2 + 3 + 4 + 5,
1358 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1359 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1360};
1361static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1362 NULL,
1363 k8nops,
1364 k8nops + 1,
1365 k8nops + 1 + 2,
1366 k8nops + 1 + 2 + 3,
1367 k8nops + 1 + 2 + 3 + 4,
1368 k8nops + 1 + 2 + 3 + 4 + 5,
1369 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1370 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1371};
1372static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1373 NULL,
1374 k7nops,
1375 k7nops + 1,
1376 k7nops + 1 + 2,
1377 k7nops + 1 + 2 + 3,
1378 k7nops + 1 + 2 + 3 + 4,
1379 k7nops + 1 + 2 + 3 + 4 + 5,
1380 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1381 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1382};
1383static struct nop {
1384 int cpuid;
1385 unsigned char **noptable;
1386} noptypes[] = {
1387 { X86_FEATURE_K8, k8_nops },
1388 { X86_FEATURE_K7, k7_nops },
1389 { -1, NULL }
1390};
1391
1392/* Replace instructions with better alternatives for this CPU type.
1393
1394 This runs before SMP is initialized to avoid SMP problems with
1395 self modifying code. This implies that assymetric systems where
1396 APs have less capabilities than the boot processor are not handled.
1397 In this case boot with "noreplacement". */
1398void apply_alternatives(void *start, void *end)
1399{
1400 struct alt_instr *a;
1401 int diff, i, k;
1402 unsigned char **noptable = intel_nops;
1403 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1404 if (boot_cpu_has(noptypes[i].cpuid)) {
1405 noptable = noptypes[i].noptable;
1406 break;
1407 }
1408 }
1409 for (a = start; (void *)a < end; a++) {
1410 if (!boot_cpu_has(a->cpuid))
1411 continue;
1412 BUG_ON(a->replacementlen > a->instrlen);
1413 memcpy(a->instr, a->replacement, a->replacementlen);
1414 diff = a->instrlen - a->replacementlen;
1415 /* Pad the rest with nops */
1416 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1417 k = diff;
1418 if (k > ASM_NOP_MAX)
1419 k = ASM_NOP_MAX;
1420 memcpy(a->instr + i, noptable[k], k);
1421 }
1422 }
1423}
1424
1425static int no_replacement __initdata = 0;
1426
1427void __init alternative_instructions(void)
1428{
1429 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1430 if (no_replacement)
1431 return;
1432 apply_alternatives(__alt_instructions, __alt_instructions_end);
1433}
1434
1435static int __init noreplacement_setup(char *s)
1436{
1437 no_replacement = 1;
1438 return 0;
1439}
1440
1441__setup("noreplacement", noreplacement_setup);
1442
1443static char * __init machine_specific_memory_setup(void);
1444
1445#ifdef CONFIG_MCA
1446static void set_mca_bus(int x)
1447{
1448 MCA_bus = x;
1449}
1450#else
1451static void set_mca_bus(int x) { }
1452#endif
1453
1454/*
1455 * Determine if we were loaded by an EFI loader. If so, then we have also been
1456 * passed the efi memmap, systab, etc., so we should use these data structures
1457 * for initialization. Note, the efi init code path is determined by the
1458 * global efi_enabled. This allows the same kernel image to be used on existing
1459 * systems (with a traditional BIOS) as well as on EFI systems.
1460 */
1461void __init setup_arch(char **cmdline_p)
1462{
1463 unsigned long max_low_pfn;
1464
1465 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1466 pre_setup_arch_hook();
1467 early_cpu_init();
1468
1469 /*
1470 * FIXME: This isn't an official loader_type right
1471 * now but does currently work with elilo.
1472 * If we were configured as an EFI kernel, check to make
1473 * sure that we were loaded correctly from elilo and that
1474 * the system table is valid. If not, then initialize normally.
1475 */
1476#ifdef CONFIG_EFI
1477 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1478 efi_enabled = 1;
1479#endif
1480
1481 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1482 drive_info = DRIVE_INFO;
1483 screen_info = SCREEN_INFO;
1484 edid_info = EDID_INFO;
1485 apm_info.bios = APM_BIOS_INFO;
1486 ist_info = IST_INFO;
1487 saved_videomode = VIDEO_MODE;
1488 if( SYS_DESC_TABLE.length != 0 ) {
1489 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1490 machine_id = SYS_DESC_TABLE.table[0];
1491 machine_submodel_id = SYS_DESC_TABLE.table[1];
1492 BIOS_revision = SYS_DESC_TABLE.table[2];
1493 }
1494 bootloader_type = LOADER_TYPE;
1495
1496#ifdef CONFIG_BLK_DEV_RAM
1497 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1498 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1499 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1500#endif
1501 ARCH_SETUP
1502 if (efi_enabled)
1503 efi_init();
1504 else {
1505 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1506 print_memory_map(machine_specific_memory_setup());
1507 }
1508
1509 copy_edd();
1510
1511 if (!MOUNT_ROOT_RDONLY)
1512 root_mountflags &= ~MS_RDONLY;
1513 init_mm.start_code = (unsigned long) _text;
1514 init_mm.end_code = (unsigned long) _etext;
1515 init_mm.end_data = (unsigned long) _edata;
1516 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1517
1518 code_resource.start = virt_to_phys(_text);
1519 code_resource.end = virt_to_phys(_etext)-1;
1520 data_resource.start = virt_to_phys(_etext);
1521 data_resource.end = virt_to_phys(_edata)-1;
1522
1523 parse_cmdline_early(cmdline_p);
1524
1525 max_low_pfn = setup_memory();
1526
1527 /*
1528 * NOTE: before this point _nobody_ is allowed to allocate
1529 * any memory using the bootmem allocator. Although the
1530 * alloctor is now initialised only the first 8Mb of the kernel
1531 * virtual address space has been mapped. All allocations before
1532 * paging_init() has completed must use the alloc_bootmem_low_pages()
1533 * variant (which allocates DMA'able memory) and care must be taken
1534 * not to exceed the 8Mb limit.
1535 */
1536
1537#ifdef CONFIG_SMP
1538 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1539#endif
1540 paging_init();
1541 remapped_pgdat_init();
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001542 sparse_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 zone_sizes_init();
1544
1545 /*
1546 * NOTE: at this point the bootmem allocator is fully available.
1547 */
1548
1549#ifdef CONFIG_EARLY_PRINTK
1550 {
1551 char *s = strstr(*cmdline_p, "earlyprintk=");
1552 if (s) {
1553 extern void setup_early_printk(char *);
1554
1555 setup_early_printk(s);
1556 printk("early console enabled\n");
1557 }
1558 }
1559#endif
1560
1561
1562 dmi_scan_machine();
1563
1564#ifdef CONFIG_X86_GENERICARCH
1565 generic_apic_probe(*cmdline_p);
1566#endif
1567 if (efi_enabled)
1568 efi_map_memmap();
1569
Alexander Nybergadaa7652005-05-31 14:39:27 -07001570#ifdef CONFIG_ACPI_BOOT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 /*
1572 * Parse the ACPI tables for possible boot-time SMP configuration.
1573 */
1574 acpi_boot_table_init();
1575 acpi_boot_init();
Alexander Nybergadaa7652005-05-31 14:39:27 -07001576#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577
1578#ifdef CONFIG_X86_LOCAL_APIC
1579 if (smp_found_config)
1580 get_smp_config();
1581#endif
1582
1583 register_memory();
1584
1585#ifdef CONFIG_VT
1586#if defined(CONFIG_VGA_CONSOLE)
1587 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1588 conswitchp = &vga_con;
1589#elif defined(CONFIG_DUMMY_CONSOLE)
1590 conswitchp = &dummy_con;
1591#endif
1592#endif
1593}
1594
1595#include "setup_arch_post.h"
1596/*
1597 * Local Variables:
1598 * mode:c
1599 * c-file-style:"k&r"
1600 * c-basic-offset:8
1601 * End:
1602 */