blob: 35da8cdbe5e666dae6961ccc952b313fc17e5c93 [file] [log] [blame]
Yinghai Lub79cd8f2008-05-11 00:30:15 -07001/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 * Getting sanitize_e820_map() in sync with i386 version by applying change:
6 * - Provisions for empty E820 memory regions (reported by certain BIOSes).
7 * Alex Achenbach <xela@slit.de>, December 2002.
8 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/pfn.h>
21
22#include <asm/pgtable.h>
23#include <asm/page.h>
24#include <asm/e820.h>
Yinghai Lua4c81cf2008-05-18 01:18:57 -070025#include <asm/proto.h>
Yinghai Lub79cd8f2008-05-11 00:30:15 -070026#include <asm/setup.h>
Yinghai Lua4c81cf2008-05-18 01:18:57 -070027#include <asm/trampoline.h>
Yinghai Lub79cd8f2008-05-11 00:30:15 -070028
29struct e820map e820;
30
31/* For PCI or other memory-mapped resources */
32unsigned long pci_mem_start = 0xaeedbabe;
33#ifdef CONFIG_PCI
34EXPORT_SYMBOL(pci_mem_start);
35#endif
36
37/*
38 * This function checks if any part of the range <start,end> is mapped
39 * with type.
40 */
41int
42e820_any_mapped(u64 start, u64 end, unsigned type)
43{
44 int i;
45
46 for (i = 0; i < e820.nr_map; i++) {
47 struct e820entry *ei = &e820.map[i];
48
49 if (type && ei->type != type)
50 continue;
51 if (ei->addr >= end || ei->addr + ei->size <= start)
52 continue;
53 return 1;
54 }
55 return 0;
56}
57EXPORT_SYMBOL_GPL(e820_any_mapped);
58
59/*
60 * This function checks if the entire range <start,end> is mapped with type.
61 *
62 * Note: this function only works correct if the e820 table is sorted and
63 * not-overlapping, which is the case
64 */
65int __init e820_all_mapped(u64 start, u64 end, unsigned type)
66{
67 int i;
68
69 for (i = 0; i < e820.nr_map; i++) {
70 struct e820entry *ei = &e820.map[i];
71
72 if (type && ei->type != type)
73 continue;
74 /* is the region (part) in overlap with the current region ?*/
75 if (ei->addr >= end || ei->addr + ei->size <= start)
76 continue;
77
78 /* if the region is at the beginning of <start,end> we move
79 * start to the end of the region since it's ok until there
80 */
81 if (ei->addr <= start)
82 start = ei->addr + ei->size;
83 /*
84 * if start is now at or beyond end, we're done, full
85 * coverage
86 */
87 if (start >= end)
88 return 1;
89 }
90 return 0;
91}
92
93/*
94 * Add a memory region to the kernel e820 map.
95 */
96void __init add_memory_region(u64 start, u64 size, int type)
97{
98 int x = e820.nr_map;
99
Paul Jacksonc3965bd2008-05-14 08:15:34 -0700100 if (x == ARRAY_SIZE(e820.map)) {
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700101 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
102 return;
103 }
104
105 e820.map[x].addr = start;
106 e820.map[x].size = size;
107 e820.map[x].type = type;
108 e820.nr_map++;
109}
110
111void __init e820_print_map(char *who)
112{
113 int i;
114
115 for (i = 0; i < e820.nr_map; i++) {
116 printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
117 (unsigned long long) e820.map[i].addr,
118 (unsigned long long)
119 (e820.map[i].addr + e820.map[i].size));
120 switch (e820.map[i].type) {
121 case E820_RAM:
122 printk(KERN_CONT "(usable)\n");
123 break;
124 case E820_RESERVED:
125 printk(KERN_CONT "(reserved)\n");
126 break;
127 case E820_ACPI:
128 printk(KERN_CONT "(ACPI data)\n");
129 break;
130 case E820_NVS:
131 printk(KERN_CONT "(ACPI NVS)\n");
132 break;
133 default:
134 printk(KERN_CONT "type %u\n", e820.map[i].type);
135 break;
136 }
137 }
138}
139
140/*
141 * Sanitize the BIOS e820 map.
142 *
143 * Some e820 responses include overlapping entries. The following
Paul Jackson5b7eb2e2008-05-14 08:15:52 -0700144 * replaces the original e820 map with a new one, removing overlaps,
145 * and resolving conflicting memory types in favor of highest
146 * numbered type.
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700147 *
Paul Jackson5b7eb2e2008-05-14 08:15:52 -0700148 * The input parameter biosmap points to an array of 'struct
149 * e820entry' which on entry has elements in the range [0, *pnr_map)
150 * valid, and which has space for up to max_nr_map entries.
151 * On return, the resulting sanitized e820 map entries will be in
152 * overwritten in the same location, starting at biosmap.
153 *
154 * The integer pointed to by pnr_map must be valid on entry (the
155 * current number of valid entries located at biosmap) and will
156 * be updated on return, with the new number of valid entries
157 * (something no more than max_nr_map.)
158 *
159 * The return value from sanitize_e820_map() is zero if it
160 * successfully 'sanitized' the map entries passed in, and is -1
161 * if it did nothing, which can happen if either of (1) it was
162 * only passed one map entry, or (2) any of the input map entries
163 * were invalid (start + size < start, meaning that the size was
164 * so big the described memory range wrapped around through zero.)
165 *
166 * Visually we're performing the following
167 * (1,2,3,4 = memory types)...
168 *
169 * Sample memory map (w/overlaps):
170 * ____22__________________
171 * ______________________4_
172 * ____1111________________
173 * _44_____________________
174 * 11111111________________
175 * ____________________33__
176 * ___________44___________
177 * __________33333_________
178 * ______________22________
179 * ___________________2222_
180 * _________111111111______
181 * _____________________11_
182 * _________________4______
183 *
184 * Sanitized equivalent (no overlap):
185 * 1_______________________
186 * _44_____________________
187 * ___1____________________
188 * ____22__________________
189 * ______11________________
190 * _________1______________
191 * __________3_____________
192 * ___________44___________
193 * _____________33_________
194 * _______________2________
195 * ________________1_______
196 * _________________4______
197 * ___________________2____
198 * ____________________33__
199 * ______________________4_
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700200 */
Paul Jackson5b7eb2e2008-05-14 08:15:52 -0700201
Paul Jacksonc3965bd2008-05-14 08:15:34 -0700202int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
Paul Jackson6e9bcc72008-05-14 08:15:46 -0700203 int *pnr_map)
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700204{
205 struct change_member {
206 struct e820entry *pbios; /* pointer to original bios entry */
207 unsigned long long addr; /* address for this change point */
208 };
Paul Jackson028b7852008-05-14 08:15:40 -0700209static struct change_member change_point_list[2*E820_X_MAX] __initdata;
210static struct change_member *change_point[2*E820_X_MAX] __initdata;
211static struct e820entry *overlap_list[E820_X_MAX] __initdata;
212static struct e820entry new_bios[E820_X_MAX] __initdata;
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700213 struct change_member *change_tmp;
214 unsigned long current_type, last_type;
215 unsigned long long last_addr;
216 int chgidx, still_changing;
217 int overlap_entries;
218 int new_bios_entry;
219 int old_nr, new_nr, chg_nr;
220 int i;
221
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700222 /* if there's only one memory region, don't bother */
223 if (*pnr_map < 2)
224 return -1;
225
226 old_nr = *pnr_map;
Paul Jackson6e9bcc72008-05-14 08:15:46 -0700227 BUG_ON(old_nr > max_nr_map);
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700228
229 /* bail out if we find any unreasonable addresses in bios map */
230 for (i = 0; i < old_nr; i++)
231 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
232 return -1;
233
234 /* create pointers for initial change-point information (for sorting) */
235 for (i = 0; i < 2 * old_nr; i++)
236 change_point[i] = &change_point_list[i];
237
238 /* record all known change-points (starting and ending addresses),
239 omitting those that are for empty memory regions */
240 chgidx = 0;
241 for (i = 0; i < old_nr; i++) {
242 if (biosmap[i].size != 0) {
243 change_point[chgidx]->addr = biosmap[i].addr;
244 change_point[chgidx++]->pbios = &biosmap[i];
245 change_point[chgidx]->addr = biosmap[i].addr +
246 biosmap[i].size;
247 change_point[chgidx++]->pbios = &biosmap[i];
248 }
249 }
250 chg_nr = chgidx;
251
252 /* sort change-point list by memory addresses (low -> high) */
253 still_changing = 1;
254 while (still_changing) {
255 still_changing = 0;
256 for (i = 1; i < chg_nr; i++) {
257 unsigned long long curaddr, lastaddr;
258 unsigned long long curpbaddr, lastpbaddr;
259
260 curaddr = change_point[i]->addr;
261 lastaddr = change_point[i - 1]->addr;
262 curpbaddr = change_point[i]->pbios->addr;
263 lastpbaddr = change_point[i - 1]->pbios->addr;
264
265 /*
266 * swap entries, when:
267 *
268 * curaddr > lastaddr or
269 * curaddr == lastaddr and curaddr == curpbaddr and
270 * lastaddr != lastpbaddr
271 */
272 if (curaddr < lastaddr ||
273 (curaddr == lastaddr && curaddr == curpbaddr &&
274 lastaddr != lastpbaddr)) {
275 change_tmp = change_point[i];
276 change_point[i] = change_point[i-1];
277 change_point[i-1] = change_tmp;
278 still_changing = 1;
279 }
280 }
281 }
282
283 /* create a new bios memory map, removing overlaps */
284 overlap_entries = 0; /* number of entries in the overlap table */
285 new_bios_entry = 0; /* index for creating new bios map entries */
286 last_type = 0; /* start with undefined memory type */
287 last_addr = 0; /* start with 0 as last starting address */
288
289 /* loop through change-points, determining affect on the new bios map */
290 for (chgidx = 0; chgidx < chg_nr; chgidx++) {
291 /* keep track of all overlapping bios entries */
292 if (change_point[chgidx]->addr ==
293 change_point[chgidx]->pbios->addr) {
294 /*
295 * add map entry to overlap list (> 1 entry
296 * implies an overlap)
297 */
298 overlap_list[overlap_entries++] =
299 change_point[chgidx]->pbios;
300 } else {
301 /*
302 * remove entry from list (order independent,
303 * so swap with last)
304 */
305 for (i = 0; i < overlap_entries; i++) {
306 if (overlap_list[i] ==
307 change_point[chgidx]->pbios)
308 overlap_list[i] =
309 overlap_list[overlap_entries-1];
310 }
311 overlap_entries--;
312 }
313 /*
314 * if there are overlapping entries, decide which
315 * "type" to use (larger value takes precedence --
316 * 1=usable, 2,3,4,4+=unusable)
317 */
318 current_type = 0;
319 for (i = 0; i < overlap_entries; i++)
320 if (overlap_list[i]->type > current_type)
321 current_type = overlap_list[i]->type;
322 /*
323 * continue building up new bios map based on this
324 * information
325 */
326 if (current_type != last_type) {
327 if (last_type != 0) {
328 new_bios[new_bios_entry].size =
329 change_point[chgidx]->addr - last_addr;
330 /*
331 * move forward only if the new size
332 * was non-zero
333 */
334 if (new_bios[new_bios_entry].size != 0)
335 /*
336 * no more space left for new
337 * bios entries ?
338 */
Paul Jacksonc3965bd2008-05-14 08:15:34 -0700339 if (++new_bios_entry >= max_nr_map)
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700340 break;
341 }
342 if (current_type != 0) {
343 new_bios[new_bios_entry].addr =
344 change_point[chgidx]->addr;
345 new_bios[new_bios_entry].type = current_type;
346 last_addr = change_point[chgidx]->addr;
347 }
348 last_type = current_type;
349 }
350 }
351 /* retain count for new bios entries */
352 new_nr = new_bios_entry;
353
354 /* copy new bios mapping into original location */
355 memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
356 *pnr_map = new_nr;
357
358 return 0;
359}
360
361/*
362 * Copy the BIOS e820 map into a safe place.
363 *
364 * Sanity-check it while we're at it..
365 *
366 * If we're lucky and live on a modern system, the setup code
367 * will have given us a memory map that we can use to properly
368 * set up memory. If we aren't, we'll fake a memory map.
369 */
370int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
371{
372 /* Only one memory region (or negative)? Ignore it */
373 if (nr_map < 2)
374 return -1;
375
376 do {
377 u64 start = biosmap->addr;
378 u64 size = biosmap->size;
379 u64 end = start + size;
380 u32 type = biosmap->type;
381
382 /* Overflow in 64 bits? Ignore the memory map. */
383 if (start > end)
384 return -1;
385
386 add_memory_region(start, size, type);
387 } while (biosmap++, --nr_map);
388 return 0;
389}
390
391u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
392 unsigned new_type)
393{
394 int i;
395 u64 real_updated_size = 0;
396
397 BUG_ON(old_type == new_type);
398
399 for (i = 0; i < e820.nr_map; i++) {
400 struct e820entry *ei = &e820.map[i];
401 u64 final_start, final_end;
402 if (ei->type != old_type)
403 continue;
404 /* totally covered? */
405 if (ei->addr >= start &&
406 (ei->addr + ei->size) <= (start + size)) {
407 ei->type = new_type;
408 real_updated_size += ei->size;
409 continue;
410 }
411 /* partially covered */
412 final_start = max(start, ei->addr);
413 final_end = min(start + size, ei->addr + ei->size);
414 if (final_start >= final_end)
415 continue;
416 add_memory_region(final_start, final_end - final_start,
417 new_type);
418 real_updated_size += final_end - final_start;
419 }
420 return real_updated_size;
421}
422
423void __init update_e820(void)
424{
Paul Jackson6e9bcc72008-05-14 08:15:46 -0700425 int nr_map;
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700426
427 nr_map = e820.nr_map;
Paul Jacksonc3965bd2008-05-14 08:15:34 -0700428 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
Yinghai Lub79cd8f2008-05-11 00:30:15 -0700429 return;
430 e820.nr_map = nr_map;
431 printk(KERN_INFO "modified physical RAM map:\n");
432 e820_print_map("modified");
433}
434
435/*
436 * Search for the biggest gap in the low 32 bits of the e820
437 * memory space. We pass this space to PCI to assign MMIO resources
438 * for hotplug or unconfigured devices in.
439 * Hopefully the BIOS let enough space left.
440 */
441__init void e820_setup_gap(void)
442{
443 unsigned long gapstart, gapsize, round;
444 unsigned long long last;
445 int i;
446 int found = 0;
447
448 last = 0x100000000ull;
449 gapstart = 0x10000000;
450 gapsize = 0x400000;
451 i = e820.nr_map;
452 while (--i >= 0) {
453 unsigned long long start = e820.map[i].addr;
454 unsigned long long end = start + e820.map[i].size;
455
456 /*
457 * Since "last" is at most 4GB, we know we'll
458 * fit in 32 bits if this condition is true
459 */
460 if (last > end) {
461 unsigned long gap = last - end;
462
463 if (gap > gapsize) {
464 gapsize = gap;
465 gapstart = end;
466 found = 1;
467 }
468 }
469 if (start < last)
470 last = start;
471 }
472
473#ifdef CONFIG_X86_64
474 if (!found) {
475 gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
476 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
477 "address range\n"
478 KERN_ERR "PCI: Unassigned devices with 32bit resource "
479 "registers may break!\n");
480 }
481#endif
482
483 /*
484 * See how much we want to round up: start off with
485 * rounding to the next 1MB area.
486 */
487 round = 0x100000;
488 while ((gapsize >> 4) > round)
489 round += round;
490 /* Fun with two's complement */
491 pci_mem_start = (gapstart + round) & -round;
492
493 printk(KERN_INFO
494 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
495 pci_mem_start, gapstart, gapsize);
496}
497
Yinghai Lua4c81cf2008-05-18 01:18:57 -0700498
499/*
500 * Early reserved memory areas.
501 */
502#define MAX_EARLY_RES 20
503
504struct early_res {
505 u64 start, end;
506 char name[16];
507};
508static struct early_res early_res[MAX_EARLY_RES] __initdata = {
509 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
510#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
511 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
512#endif
513#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
514 /*
515 * But first pinch a few for the stack/trampoline stuff
516 * FIXME: Don't need the extra page at 4K, but need to fix
517 * trampoline before removing it. (see the GDT stuff)
518 */
519 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
520 /*
521 * Has to be in very low memory so we can execute
522 * real-mode AP code.
523 */
524 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
525#endif
526 {}
527};
528
529void __init reserve_early(u64 start, u64 end, char *name)
530{
531 int i;
532 struct early_res *r;
533 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
534 r = &early_res[i];
535 if (end > r->start && start < r->end)
536 panic("Overlapping early reservations %llx-%llx %s to %llx-%llx %s\n",
537 start, end - 1, name?name:"", r->start,
538 r->end - 1, r->name);
539 }
540 if (i >= MAX_EARLY_RES)
541 panic("Too many early reservations");
542 r = &early_res[i];
543 r->start = start;
544 r->end = end;
545 if (name)
546 strncpy(r->name, name, sizeof(r->name) - 1);
547}
548
549void __init free_early(u64 start, u64 end)
550{
551 struct early_res *r;
552 int i, j;
553
554 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
555 r = &early_res[i];
556 if (start == r->start && end == r->end)
557 break;
558 }
559 if (i >= MAX_EARLY_RES || !early_res[i].end)
560 panic("free_early on not reserved area: %llx-%llx!",
561 start, end);
562
563 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
564 ;
565
566 memmove(&early_res[i], &early_res[i + 1],
567 (j - 1 - i) * sizeof(struct early_res));
568
569 early_res[j - 1].end = 0;
570}
571
572void __init early_res_to_bootmem(u64 start, u64 end)
573{
574 int i;
575 u64 final_start, final_end;
576 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
577 struct early_res *r = &early_res[i];
578 final_start = max(start, r->start);
579 final_end = min(end, r->end);
580 if (final_start >= final_end)
581 continue;
582 printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i,
583 final_start, final_end - 1, r->name);
584#ifdef CONFIG_X86_64
585 reserve_bootmem_generic(final_start, final_end - final_start);
586#else
587 reserve_bootmem(final_start, final_end - final_start,
588 BOOTMEM_DEFAULT);
589#endif
590 }
591}
592
593/* Check for already reserved areas */
594static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
595{
596 int i;
597 u64 addr = *addrp, last;
598 int changed = 0;
599again:
600 last = addr + size;
601 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
602 struct early_res *r = &early_res[i];
603 if (last >= r->start && addr < r->end) {
604 *addrp = addr = round_up(r->end, align);
605 changed = 1;
606 goto again;
607 }
608 }
609 return changed;
610}
611
612/* Check for already reserved areas */
613static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
614{
615 int i;
616 u64 addr = *addrp, last;
617 u64 size = *sizep;
618 int changed = 0;
619again:
620 last = addr + size;
621 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
622 struct early_res *r = &early_res[i];
623 if (last > r->start && addr < r->start) {
624 size = r->start - addr;
625 changed = 1;
626 goto again;
627 }
628 if (last > r->end && addr < r->end) {
629 addr = round_up(r->end, align);
630 size = last - addr;
631 changed = 1;
632 goto again;
633 }
634 if (last <= r->end && addr >= r->start) {
635 (*sizep)++;
636 return 0;
637 }
638 }
639 if (changed) {
640 *addrp = addr;
641 *sizep = size;
642 }
643 return changed;
644}
645
646/*
647 * Find a free area with specified alignment in a specific range.
648 */
649u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
650{
651 int i;
652
653 for (i = 0; i < e820.nr_map; i++) {
654 struct e820entry *ei = &e820.map[i];
655 u64 addr, last;
656 u64 ei_last;
657
658 if (ei->type != E820_RAM)
659 continue;
660 addr = round_up(ei->addr, align);
661 ei_last = ei->addr + ei->size;
662 if (addr < start)
663 addr = round_up(start, align);
664 if (addr >= ei_last)
665 continue;
666 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
667 ;
668 last = addr + size;
669 if (last > ei_last)
670 continue;
671 if (last > end)
672 continue;
673 return addr;
674 }
675 return -1ULL;
676}
677
678/*
679 * Find next free range after *start
680 */
681u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
682{
683 int i;
684
685 for (i = 0; i < e820.nr_map; i++) {
686 struct e820entry *ei = &e820.map[i];
687 u64 addr, last;
688 u64 ei_last;
689
690 if (ei->type != E820_RAM)
691 continue;
692 addr = round_up(ei->addr, align);
693 ei_last = ei->addr + ei->size;
694 if (addr < start)
695 addr = round_up(start, align);
696 if (addr >= ei_last)
697 continue;
698 *sizep = ei_last - addr;
699 while (bad_addr_size(&addr, sizep, align) &&
700 addr + *sizep <= ei_last)
701 ;
702 last = addr + *sizep;
703 if (last > ei_last)
704 continue;
705 return addr;
706 }
707 return -1UL;
708
709}