blob: cbd42e51cb082d82b8f287eaa1c244913268d64c [file] [log] [blame]
Thomas Gleixner2f36fa12008-01-30 13:30:12 +01001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
Venkatesh Pallipadi8059b2a2005-05-01 08:58:52 -07004 *
5 * Getting sanitize_e820_map() in sync with i386 version by applying change:
6 * - Provisions for empty E820 memory regions (reported by certain BIOSes).
7 * Alex Achenbach <xela@slit.de>, December 2002.
8 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
Eric W. Biederman5f5609d2005-06-25 14:58:04 -070017#include <linux/kexec.h>
Andrew Mortonb9491ac2005-09-16 19:27:54 -070018#include <linux/module.h>
Rafael J. Wysockie8eff5a2006-09-25 23:32:46 -070019#include <linux/mm.h>
Rafael J. Wysocki74dfd662007-05-06 14:50:43 -070020#include <linux/suspend.h>
21#include <linux/pfn.h>
Andrew Mortonb9491ac2005-09-16 19:27:54 -070022
Andrew Morton1a91023a2006-07-10 04:43:49 -070023#include <asm/pgtable.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <asm/page.h>
25#include <asm/e820.h>
26#include <asm/proto.h>
H. Peter Anvin30c82642007-10-15 17:13:22 -070027#include <asm/setup.h>
Andi Kleen2bc04142005-11-05 17:25:53 +010028#include <asm/sections.h>
Thomas Gleixner718fc132008-01-30 13:30:17 +010029#include <asm/kdebug.h>
Pavel Macheke44b7b72008-04-10 23:28:10 +020030#include <asm/trampoline.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Jan Beulichb92e9fa2007-05-02 19:27:11 +020032struct e820map e820;
Andi Kleen3bd4d182006-09-26 10:52:33 +020033
Thomas Gleixner2f36fa12008-01-30 13:30:12 +010034/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070035 * PFN of last memory page.
36 */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +010037unsigned long end_pfn;
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
Thomas Gleixner2f36fa12008-01-30 13:30:12 +010039/*
Thomas Gleixner67794292008-03-21 21:27:10 +010040 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
41 * The direct mapping extends to max_pfn_mapped, so that we can directly access
Linus Torvalds1da177e2005-04-16 15:20:36 -070042 * apertures, ACPI and other tables without having to play with fixmaps.
Thomas Gleixner2f36fa12008-01-30 13:30:12 +010043 */
Thomas Gleixner67794292008-03-21 21:27:10 +010044unsigned long max_pfn_mapped;
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
Thomas Gleixner2f36fa12008-01-30 13:30:12 +010046/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070047 * Last pfn which the user wants to use.
48 */
Jan Beulichcaff0712006-09-26 10:52:31 +020049static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Andi Kleen75175272008-01-30 13:33:17 +010051/*
52 * Early reserved memory areas.
53 */
54#define MAX_EARLY_RES 20
55
56struct early_res {
57 unsigned long start, end;
Yinghai Lu25eff8d2008-02-01 17:49:41 +010058 char name[16];
Andi Kleen75175272008-01-30 13:33:17 +010059};
60static struct early_res early_res[MAX_EARLY_RES] __initdata = {
Yinghai Lu25eff8d2008-02-01 17:49:41 +010061 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
Pavel Macheke44b7b72008-04-10 23:28:10 +020062#ifdef CONFIG_X86_TRAMPOLINE
63 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
Andi Kleen75175272008-01-30 13:33:17 +010064#endif
65 {}
66};
67
Yinghai Lu25eff8d2008-02-01 17:49:41 +010068void __init reserve_early(unsigned long start, unsigned long end, char *name)
Andi Kleen75175272008-01-30 13:33:17 +010069{
70 int i;
71 struct early_res *r;
72 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
73 r = &early_res[i];
74 if (end > r->start && start < r->end)
Yinghai Lu25eff8d2008-02-01 17:49:41 +010075 panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
76 start, end - 1, name?name:"", r->start, r->end - 1, r->name);
Andi Kleen75175272008-01-30 13:33:17 +010077 }
78 if (i >= MAX_EARLY_RES)
79 panic("Too many early reservations");
80 r = &early_res[i];
81 r->start = start;
82 r->end = end;
Yinghai Lu25eff8d2008-02-01 17:49:41 +010083 if (name)
84 strncpy(r->name, name, sizeof(r->name) - 1);
Andi Kleen75175272008-01-30 13:33:17 +010085}
86
87void __init early_res_to_bootmem(void)
88{
89 int i;
90 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
91 struct early_res *r = &early_res[i];
Yinghai Lu25eff8d2008-02-01 17:49:41 +010092 printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
93 r->start, r->end - 1, r->name);
Andi Kleen75175272008-01-30 13:33:17 +010094 reserve_bootmem_generic(r->start, r->end - r->start);
95 }
96}
97
98/* Check for already reserved areas */
Jacek Luczak1a7a34a2008-04-10 13:40:57 +020099static inline int __init
Yinghai Lu48c508b2008-04-17 17:40:45 +0200100bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100101{
Andi Kleen75175272008-01-30 13:33:17 +0100102 int i;
103 unsigned long addr = *addrp, last;
104 int changed = 0;
105again:
106 last = addr + size;
107 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
108 struct early_res *r = &early_res[i];
109 if (last >= r->start && addr < r->end) {
Yinghai Lu48c508b2008-04-17 17:40:45 +0200110 *addrp = addr = round_up(r->end, align);
Andi Kleen75175272008-01-30 13:33:17 +0100111 changed = 1;
112 goto again;
H. Peter Anvin30c82642007-10-15 17:13:22 -0700113 }
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100114 }
Andi Kleen75175272008-01-30 13:33:17 +0100115 return changed;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100116}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700118/* Check for already reserved areas */
Jacek Luczak1a7a34a2008-04-10 13:40:57 +0200119static inline int __init
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700120bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
121{
122 int i;
123 unsigned long addr = *addrp, last;
124 unsigned long size = *sizep;
125 int changed = 0;
126again:
127 last = addr + size;
128 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
129 struct early_res *r = &early_res[i];
130 if (last > r->start && addr < r->start) {
131 size = r->start - addr;
132 changed = 1;
133 goto again;
134 }
135 if (last > r->end && addr < r->end) {
136 addr = round_up(r->end, align);
137 size = last - addr;
138 changed = 1;
139 goto again;
140 }
141 if (last <= r->end && addr >= r->start) {
142 (*sizep)++;
143 return 0;
144 }
145 }
146 if (changed) {
147 *addrp = addr;
148 *sizep = size;
149 }
150 return changed;
151}
Arjan van de Ven95222362006-04-07 19:49:27 +0200152/*
153 * This function checks if any part of the range <start,end> is mapped
154 * with type.
155 */
Jan Beulichb92e9fa2007-05-02 19:27:11 +0200156int
Arjan van de Veneee5a9f2006-04-07 19:49:24 +0200157e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100158{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 int i;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100160
161 for (i = 0; i < e820.nr_map; i++) {
162 struct e820entry *ei = &e820.map[i];
163
164 if (type && ei->type != type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 continue;
Eric W. Biederman48c8b112005-09-06 15:16:20 -0700166 if (ei->addr >= end || ei->addr + ei->size <= start)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100167 continue;
168 return 1;
169 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 return 0;
171}
Jan Beulichb92e9fa2007-05-02 19:27:11 +0200172EXPORT_SYMBOL_GPL(e820_any_mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173
Linus Torvalds79e453d2006-09-19 08:15:22 -0700174/*
175 * This function checks if the entire range <start,end> is mapped with type.
176 *
177 * Note: this function only works correct if the e820 table is sorted and
178 * not-overlapping, which is the case
179 */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100180int __init e820_all_mapped(unsigned long start, unsigned long end,
181 unsigned type)
Linus Torvalds79e453d2006-09-19 08:15:22 -0700182{
183 int i;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100184
Linus Torvalds79e453d2006-09-19 08:15:22 -0700185 for (i = 0; i < e820.nr_map; i++) {
186 struct e820entry *ei = &e820.map[i];
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100187
Linus Torvalds79e453d2006-09-19 08:15:22 -0700188 if (type && ei->type != type)
189 continue;
190 /* is the region (part) in overlap with the current region ?*/
191 if (ei->addr >= end || ei->addr + ei->size <= start)
192 continue;
193
194 /* if the region is at the beginning of <start,end> we move
195 * start to the end of the region since it's ok until there
196 */
197 if (ei->addr <= start)
198 start = ei->addr + ei->size;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100199 /*
200 * if start is now at or beyond end, we're done, full
201 * coverage
202 */
Linus Torvalds79e453d2006-09-19 08:15:22 -0700203 if (start >= end)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100204 return 1;
Linus Torvalds79e453d2006-09-19 08:15:22 -0700205 }
206 return 0;
207}
208
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100209/*
Yinghai Lu24a5da72008-02-01 17:49:41 +0100210 * Find a free area with specified alignment in a specific range.
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100211 */
212unsigned long __init find_e820_area(unsigned long start, unsigned long end,
Yinghai Lu48c508b2008-04-17 17:40:45 +0200213 unsigned long size, unsigned long align)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100214{
215 int i;
216
217 for (i = 0; i < e820.nr_map; i++) {
218 struct e820entry *ei = &e820.map[i];
Yinghai Lu48c508b2008-04-17 17:40:45 +0200219 unsigned long addr, last;
220 unsigned long ei_last;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100221
222 if (ei->type != E820_RAM)
223 continue;
Yinghai Lu48c508b2008-04-17 17:40:45 +0200224 addr = round_up(ei->addr, align);
225 ei_last = ei->addr + ei->size;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100226 if (addr < start)
Yinghai Lu48c508b2008-04-17 17:40:45 +0200227 addr = round_up(start, align);
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700228 if (addr >= ei_last)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100229 continue;
Yinghai Lu48c508b2008-04-17 17:40:45 +0200230 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 ;
Yinghai Lu24a5da72008-02-01 17:49:41 +0100232 last = addr + size;
Yinghai Lu48c508b2008-04-17 17:40:45 +0200233 if (last > ei_last)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 continue;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100235 if (last > end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 continue;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100237 return addr;
238 }
239 return -1UL;
240}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242/*
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700243 * Find next free range after *start
244 */
Yinghai Luc64df702008-03-21 18:56:19 -0700245unsigned long __init find_e820_area_size(unsigned long start,
246 unsigned long *sizep,
247 unsigned long align)
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700248{
249 int i;
250
251 for (i = 0; i < e820.nr_map; i++) {
252 struct e820entry *ei = &e820.map[i];
253 unsigned long addr, last;
254 unsigned long ei_last;
255
256 if (ei->type != E820_RAM)
257 continue;
258 addr = round_up(ei->addr, align);
259 ei_last = ei->addr + ei->size;
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700260 if (addr < start)
261 addr = round_up(start, align);
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700262 if (addr >= ei_last)
263 continue;
264 *sizep = ei_last - addr;
Yinghai Luc64df702008-03-21 18:56:19 -0700265 while (bad_addr_size(&addr, sizep, align) &&
266 addr + *sizep <= ei_last)
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700267 ;
268 last = addr + *sizep;
Yinghai Lu272b9ca2008-03-20 23:58:33 -0700269 if (last > ei_last)
270 continue;
271 return addr;
272 }
273 return -1UL;
274
275}
276/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 * Find the highest page frame number we have available
278 */
279unsigned long __init e820_end_of_ram(void)
280{
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100281 unsigned long end_pfn;
282
Mel Gorman5cb248a2006-09-27 01:49:52 -0700283 end_pfn = find_max_pfn_with_active_regions();
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100284
Thomas Gleixner67794292008-03-21 21:27:10 +0100285 if (end_pfn > max_pfn_mapped)
286 max_pfn_mapped = end_pfn;
287 if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
288 max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 if (end_pfn > end_user_pfn)
290 end_pfn = end_user_pfn;
Thomas Gleixner67794292008-03-21 21:27:10 +0100291 if (end_pfn > max_pfn_mapped)
292 end_pfn = max_pfn_mapped;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293
Thomas Gleixner67794292008-03-21 21:27:10 +0100294 printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100295 return end_pfn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296}
297
Andi Kleen485761b2005-08-26 18:34:10 -0700298/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 * Mark e820 reserved areas as busy for the resource manager.
300 */
Yinghai Lu3def3d62008-02-22 17:07:16 -0800301void __init e820_reserve_resources(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302{
303 int i;
Yinghai Lu01561262008-03-20 23:57:21 -0700304 struct resource *res;
305
306 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 for (i = 0; i < e820.nr_map; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 switch (e820.map[i].type) {
309 case E820_RAM: res->name = "System RAM"; break;
310 case E820_ACPI: res->name = "ACPI Tables"; break;
311 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
312 default: res->name = "reserved";
313 }
314 res->start = e820.map[i].addr;
315 res->end = res->start + e820.map[i].size - 1;
316 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
Yinghai Lu3def3d62008-02-22 17:07:16 -0800317 insert_resource(&iomem_resource, res);
Yinghai Lu01561262008-03-20 23:57:21 -0700318 res++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 }
320}
321
Rafael J. Wysockie8eff5a2006-09-25 23:32:46 -0700322/*
323 * Find the ranges of physical addresses that do not correspond to
324 * e820 RAM areas and mark the corresponding pages as nosave for software
325 * suspend and suspend to RAM.
326 *
327 * This function requires the e820 map to be sorted and without any
328 * overlapping entries and assumes the first e820 area to be RAM.
329 */
330void __init e820_mark_nosave_regions(void)
331{
332 int i;
333 unsigned long paddr;
334
335 paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
336 for (i = 1; i < e820.nr_map; i++) {
337 struct e820entry *ei = &e820.map[i];
338
339 if (paddr < ei->addr)
Rafael J. Wysocki74dfd662007-05-06 14:50:43 -0700340 register_nosave_region(PFN_DOWN(paddr),
341 PFN_UP(ei->addr));
Rafael J. Wysockie8eff5a2006-09-25 23:32:46 -0700342
343 paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
344 if (ei->type != E820_RAM)
Rafael J. Wysocki74dfd662007-05-06 14:50:43 -0700345 register_nosave_region(PFN_UP(ei->addr),
346 PFN_DOWN(paddr));
Rafael J. Wysockie8eff5a2006-09-25 23:32:46 -0700347
348 if (paddr >= (end_pfn << PAGE_SHIFT))
349 break;
350 }
351}
352
David Rientjes3af044e2007-07-21 17:10:31 +0200353/*
354 * Finds an active region in the address range from start_pfn to end_pfn and
355 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
356 */
357static int __init e820_find_active_region(const struct e820entry *ei,
358 unsigned long start_pfn,
359 unsigned long end_pfn,
360 unsigned long *ei_startpfn,
361 unsigned long *ei_endpfn)
362{
363 *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
364 *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
365
366 /* Skip map entries smaller than a page */
367 if (*ei_startpfn >= *ei_endpfn)
368 return 0;
369
Thomas Gleixner67794292008-03-21 21:27:10 +0100370 /* Check if max_pfn_mapped should be updated */
371 if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
372 max_pfn_mapped = *ei_endpfn;
David Rientjes3af044e2007-07-21 17:10:31 +0200373
374 /* Skip if map is outside the node */
375 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
376 *ei_startpfn >= end_pfn)
377 return 0;
378
379 /* Check for overlaps */
380 if (*ei_startpfn < start_pfn)
381 *ei_startpfn = start_pfn;
382 if (*ei_endpfn > end_pfn)
383 *ei_endpfn = end_pfn;
384
385 /* Obey end_user_pfn to save on memmap */
386 if (*ei_startpfn >= end_user_pfn)
387 return 0;
388 if (*ei_endpfn > end_user_pfn)
389 *ei_endpfn = end_user_pfn;
390
391 return 1;
392}
393
Mel Gorman5cb248a2006-09-27 01:49:52 -0700394/* Walk the e820 map and register active regions within a node */
395void __init
396e820_register_active_regions(int nid, unsigned long start_pfn,
397 unsigned long end_pfn)
398{
David Rientjes3af044e2007-07-21 17:10:31 +0200399 unsigned long ei_startpfn;
400 unsigned long ei_endpfn;
Mel Gorman5cb248a2006-09-27 01:49:52 -0700401 int i;
Mel Gorman5cb248a2006-09-27 01:49:52 -0700402
David Rientjes3af044e2007-07-21 17:10:31 +0200403 for (i = 0; i < e820.nr_map; i++)
404 if (e820_find_active_region(&e820.map[i],
405 start_pfn, end_pfn,
406 &ei_startpfn, &ei_endpfn))
407 add_active_range(nid, ei_startpfn, ei_endpfn);
Mel Gorman5cb248a2006-09-27 01:49:52 -0700408}
409
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100410/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 * Add a memory region to the kernel e820 map.
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100412 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413void __init add_memory_region(unsigned long start, unsigned long size, int type)
414{
415 int x = e820.nr_map;
416
417 if (x == E820MAX) {
418 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
419 return;
420 }
421
422 e820.map[x].addr = start;
423 e820.map[x].size = size;
424 e820.map[x].type = type;
425 e820.nr_map++;
426}
427
David Rientjesa7e96622007-07-21 17:11:29 +0200428/*
429 * Find the hole size (in bytes) in the memory range.
430 * @start: starting address of the memory range to scan
431 * @end: ending address of the memory range to scan
432 */
433unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
434{
435 unsigned long start_pfn = start >> PAGE_SHIFT;
436 unsigned long end_pfn = end >> PAGE_SHIFT;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100437 unsigned long ei_startpfn, ei_endpfn, ram = 0;
David Rientjesa7e96622007-07-21 17:11:29 +0200438 int i;
439
440 for (i = 0; i < e820.nr_map; i++) {
441 if (e820_find_active_region(&e820.map[i],
442 start_pfn, end_pfn,
443 &ei_startpfn, &ei_endpfn))
444 ram += ei_endpfn - ei_startpfn;
445 }
446 return end - start - (ram << PAGE_SHIFT);
447}
448
Adrian Bunk013d23e2008-01-30 13:30:30 +0100449static void __init e820_print_map(char *who)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450{
451 int i;
452
453 for (i = 0; i < e820.nr_map; i++) {
Dan Aloni5a3ece72007-07-21 17:11:37 +0200454 printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100455 (unsigned long long) e820.map[i].addr,
456 (unsigned long long)
457 (e820.map[i].addr + e820.map[i].size));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 switch (e820.map[i].type) {
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100459 case E820_RAM:
460 printk(KERN_CONT "(usable)\n");
461 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 case E820_RESERVED:
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100463 printk(KERN_CONT "(reserved)\n");
464 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 case E820_ACPI:
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100466 printk(KERN_CONT "(ACPI data)\n");
467 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 case E820_NVS:
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100469 printk(KERN_CONT "(ACPI NVS)\n");
470 break;
471 default:
472 printk(KERN_CONT "type %u\n", e820.map[i].type);
473 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 }
475 }
476}
477
478/*
479 * Sanitize the BIOS e820 map.
480 *
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100481 * Some e820 responses include overlapping entries. The following
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 * replaces the original e820 map with a new one, removing overlaps.
483 *
484 */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100485static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486{
487 struct change_member {
488 struct e820entry *pbios; /* pointer to original bios entry */
489 unsigned long long addr; /* address for this change point */
490 };
491 static struct change_member change_point_list[2*E820MAX] __initdata;
492 static struct change_member *change_point[2*E820MAX] __initdata;
493 static struct e820entry *overlap_list[E820MAX] __initdata;
494 static struct e820entry new_bios[E820MAX] __initdata;
495 struct change_member *change_tmp;
496 unsigned long current_type, last_type;
497 unsigned long long last_addr;
498 int chgidx, still_changing;
499 int overlap_entries;
500 int new_bios_entry;
Venkatesh Pallipadi8059b2a2005-05-01 08:58:52 -0700501 int old_nr, new_nr, chg_nr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 int i;
503
504 /*
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100505 Visually we're performing the following
506 (1,2,3,4 = memory types)...
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507
508 Sample memory map (w/overlaps):
509 ____22__________________
510 ______________________4_
511 ____1111________________
512 _44_____________________
513 11111111________________
514 ____________________33__
515 ___________44___________
516 __________33333_________
517 ______________22________
518 ___________________2222_
519 _________111111111______
520 _____________________11_
521 _________________4______
522
523 Sanitized equivalent (no overlap):
524 1_______________________
525 _44_____________________
526 ___1____________________
527 ____22__________________
528 ______11________________
529 _________1______________
530 __________3_____________
531 ___________44___________
532 _____________33_________
533 _______________2________
534 ________________1_______
535 _________________4______
536 ___________________2____
537 ____________________33__
538 ______________________4_
539 */
540
541 /* if there's only one memory region, don't bother */
542 if (*pnr_map < 2)
543 return -1;
544
545 old_nr = *pnr_map;
546
547 /* bail out if we find any unreasonable addresses in bios map */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100548 for (i = 0; i < old_nr; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
550 return -1;
551
552 /* create pointers for initial change-point information (for sorting) */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100553 for (i = 0; i < 2 * old_nr; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 change_point[i] = &change_point_list[i];
555
Venkatesh Pallipadi8059b2a2005-05-01 08:58:52 -0700556 /* record all known change-points (starting and ending addresses),
557 omitting those that are for empty memory regions */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 chgidx = 0;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100559 for (i = 0; i < old_nr; i++) {
Venkatesh Pallipadi8059b2a2005-05-01 08:58:52 -0700560 if (biosmap[i].size != 0) {
561 change_point[chgidx]->addr = biosmap[i].addr;
562 change_point[chgidx++]->pbios = &biosmap[i];
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100563 change_point[chgidx]->addr = biosmap[i].addr +
564 biosmap[i].size;
Venkatesh Pallipadi8059b2a2005-05-01 08:58:52 -0700565 change_point[chgidx++]->pbios = &biosmap[i];
566 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 }
Venkatesh Pallipadi8059b2a2005-05-01 08:58:52 -0700568 chg_nr = chgidx;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569
570 /* sort change-point list by memory addresses (low -> high) */
571 still_changing = 1;
572 while (still_changing) {
573 still_changing = 0;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100574 for (i = 1; i < chg_nr; i++) {
575 unsigned long long curaddr, lastaddr;
576 unsigned long long curpbaddr, lastpbaddr;
577
578 curaddr = change_point[i]->addr;
579 lastaddr = change_point[i - 1]->addr;
580 curpbaddr = change_point[i]->pbios->addr;
581 lastpbaddr = change_point[i - 1]->pbios->addr;
582
583 /*
584 * swap entries, when:
585 *
586 * curaddr > lastaddr or
587 * curaddr == lastaddr and curaddr == curpbaddr and
588 * lastaddr != lastpbaddr
589 */
590 if (curaddr < lastaddr ||
591 (curaddr == lastaddr && curaddr == curpbaddr &&
592 lastaddr != lastpbaddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 change_tmp = change_point[i];
594 change_point[i] = change_point[i-1];
595 change_point[i-1] = change_tmp;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100596 still_changing = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 }
598 }
599 }
600
601 /* create a new bios memory map, removing overlaps */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100602 overlap_entries = 0; /* number of entries in the overlap table */
603 new_bios_entry = 0; /* index for creating new bios map entries */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 last_type = 0; /* start with undefined memory type */
605 last_addr = 0; /* start with 0 as last starting address */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100606
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 /* loop through change-points, determining affect on the new bios map */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100608 for (chgidx = 0; chgidx < chg_nr; chgidx++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 /* keep track of all overlapping bios entries */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100610 if (change_point[chgidx]->addr ==
611 change_point[chgidx]->pbios->addr) {
612 /*
613 * add map entry to overlap list (> 1 entry
614 * implies an overlap)
615 */
616 overlap_list[overlap_entries++] =
617 change_point[chgidx]->pbios;
618 } else {
619 /*
620 * remove entry from list (order independent,
621 * so swap with last)
622 */
623 for (i = 0; i < overlap_entries; i++) {
624 if (overlap_list[i] ==
625 change_point[chgidx]->pbios)
626 overlap_list[i] =
627 overlap_list[overlap_entries-1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 }
629 overlap_entries--;
630 }
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100631 /*
632 * if there are overlapping entries, decide which
633 * "type" to use (larger value takes precedence --
634 * 1=usable, 2,3,4,4+=unusable)
635 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 current_type = 0;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100637 for (i = 0; i < overlap_entries; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 if (overlap_list[i]->type > current_type)
639 current_type = overlap_list[i]->type;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100640 /*
641 * continue building up new bios map based on this
642 * information
643 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 if (current_type != last_type) {
645 if (last_type != 0) {
646 new_bios[new_bios_entry].size =
647 change_point[chgidx]->addr - last_addr;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100648 /*
649 * move forward only if the new size
650 * was non-zero
651 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 if (new_bios[new_bios_entry].size != 0)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100653 /*
654 * no more space left for new
655 * bios entries ?
656 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 if (++new_bios_entry >= E820MAX)
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100658 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 }
660 if (current_type != 0) {
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100661 new_bios[new_bios_entry].addr =
662 change_point[chgidx]->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 new_bios[new_bios_entry].type = current_type;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100664 last_addr = change_point[chgidx]->addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 }
666 last_type = current_type;
667 }
668 }
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100669 /* retain count for new bios entries */
670 new_nr = new_bios_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
672 /* copy new bios mapping into original location */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100673 memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 *pnr_map = new_nr;
675
676 return 0;
677}
678
679/*
680 * Copy the BIOS e820 map into a safe place.
681 *
682 * Sanity-check it while we're at it..
683 *
684 * If we're lucky and live on a modern system, the setup code
685 * will have given us a memory map that we can use to properly
686 * set up memory. If we aren't, we'll fake a memory map.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 */
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100688static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689{
690 /* Only one memory region (or negative)? Ignore it */
691 if (nr_map < 2)
692 return -1;
693
694 do {
Alexander van Heukelum320a6b22008-03-01 17:12:43 +0100695 u64 start = biosmap->addr;
696 u64 size = biosmap->size;
697 u64 end = start + size;
698 u32 type = biosmap->type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699
700 /* Overflow in 64 bits? Ignore the memory map. */
701 if (start > end)
702 return -1;
703
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 add_memory_region(start, size, type);
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100705 } while (biosmap++, --nr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 return 0;
707}
708
Adrian Bunk013d23e2008-01-30 13:30:30 +0100709static void early_panic(char *msg)
Andi Kleen8380aab2006-09-26 10:52:37 +0200710{
711 early_printk(msg);
712 panic(msg);
713}
714
Glauber de Oliveira Costa746ef0c2008-01-30 13:31:11 +0100715/* We're not void only for x86 32-bit compat */
716char * __init machine_specific_memory_setup(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717{
Glauber de Oliveira Costa746ef0c2008-01-30 13:31:11 +0100718 char *who = "BIOS-e820";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 /*
720 * Try to copy the BIOS-supplied E820-map.
721 *
722 * Otherwise fake a memory map; one section from 0k->640k,
723 * the next section from 1mb->appropriate_mem_k
724 */
H. Peter Anvin30c82642007-10-15 17:13:22 -0700725 sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
726 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
Andi Kleen8380aab2006-09-26 10:52:37 +0200727 early_panic("Cannot find a valid memory map");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
Glauber de Oliveira Costa746ef0c2008-01-30 13:31:11 +0100729 e820_print_map(who);
730
731 /* In case someone cares... */
732 return who;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733}
734
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200735static int __init parse_memopt(char *p)
akpm@osdl.org69cda7b2006-01-09 20:51:46 -0800736{
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200737 if (!p)
738 return -EINVAL;
739 end_user_pfn = memparse(p, &p);
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100740 end_user_pfn >>= PAGE_SHIFT;
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200741 return 0;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100742}
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200743early_param("mem", parse_memopt);
744
745static int userdef __initdata;
746
747static int __init parse_memmap_opt(char *p)
748{
749 char *oldp;
akpm@osdl.org69cda7b2006-01-09 20:51:46 -0800750 unsigned long long start_at, mem_size;
751
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200752 if (!strcmp(p, "exactmap")) {
753#ifdef CONFIG_CRASH_DUMP
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100754 /*
755 * If we are doing a crash dump, we still need to know
756 * the real mem size before original memory map is
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200757 * reset.
758 */
Magnus Damm15803a42006-11-14 16:57:46 +0100759 e820_register_active_regions(0, 0, -1UL);
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200760 saved_max_pfn = e820_end_of_ram();
Magnus Damm15803a42006-11-14 16:57:46 +0100761 remove_all_active_ranges();
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200762#endif
Thomas Gleixner67794292008-03-21 21:27:10 +0100763 max_pfn_mapped = 0;
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200764 e820.nr_map = 0;
765 userdef = 1;
766 return 0;
767 }
768
769 oldp = p;
770 mem_size = memparse(p, &p);
771 if (p == oldp)
772 return -EINVAL;
Vladimir Bereznikerb3ca74a2008-01-30 13:30:46 +0100773
774 userdef = 1;
akpm@osdl.org69cda7b2006-01-09 20:51:46 -0800775 if (*p == '@') {
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200776 start_at = memparse(p+1, &p);
akpm@osdl.org69cda7b2006-01-09 20:51:46 -0800777 add_memory_region(start_at, mem_size, E820_RAM);
778 } else if (*p == '#') {
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200779 start_at = memparse(p+1, &p);
akpm@osdl.org69cda7b2006-01-09 20:51:46 -0800780 add_memory_region(start_at, mem_size, E820_ACPI);
781 } else if (*p == '$') {
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200782 start_at = memparse(p+1, &p);
akpm@osdl.org69cda7b2006-01-09 20:51:46 -0800783 add_memory_region(start_at, mem_size, E820_RESERVED);
784 } else {
785 end_user_pfn = (mem_size >> PAGE_SHIFT);
786 }
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200787 return *p == '\0' ? 0 : -EINVAL;
788}
789early_param("memmap", parse_memmap_opt);
790
Sam Ravnborg43999d92007-03-16 21:07:36 +0100791void __init finish_e820_parsing(void)
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200792{
793 if (userdef) {
Vladimir Bereznikerb3ca74a2008-01-30 13:30:46 +0100794 char nr = e820.nr_map;
795
796 if (sanitize_e820_map(e820.map, &nr) < 0)
797 early_panic("Invalid user supplied memory map");
798 e820.nr_map = nr;
799
Andi Kleen2c8c0e62006-09-26 10:52:32 +0200800 printk(KERN_INFO "user-defined physical RAM map:\n");
801 e820_print_map("user");
802 }
akpm@osdl.org69cda7b2006-01-09 20:51:46 -0800803}
804
Yinghai Lu5dca6a12008-03-18 16:44:19 -0700805void __init update_memory_range(u64 start, u64 size, unsigned old_type,
806 unsigned new_type)
807{
808 int i;
809
810 BUG_ON(old_type == new_type);
811
812 for (i = 0; i < e820.nr_map; i++) {
813 struct e820entry *ei = &e820.map[i];
814 u64 final_start, final_end;
815 if (ei->type != old_type)
816 continue;
817 /* totally covered? */
818 if (ei->addr >= start && ei->size <= size) {
819 ei->type = new_type;
820 continue;
821 }
822 /* partially covered */
823 final_start = max(start, ei->addr);
824 final_end = min(start + size, ei->addr + ei->size);
825 if (final_start >= final_end)
826 continue;
827 add_memory_region(final_start, final_end - final_start,
828 new_type);
829 }
830}
831
Yinghai Luaaf23042008-01-30 13:33:09 +0100832void __init update_e820(void)
833{
834 u8 nr_map;
835
836 nr_map = e820.nr_map;
837 if (sanitize_e820_map(e820.map, &nr_map))
838 return;
839 e820.nr_map = nr_map;
840 printk(KERN_INFO "modified physical RAM map:\n");
841 e820_print_map("modified");
842}
843
Andi Kleena1e97782005-04-16 15:25:12 -0700844unsigned long pci_mem_start = 0xaeedbabe;
Andi Kleen2ee60e172006-06-26 13:59:44 +0200845EXPORT_SYMBOL(pci_mem_start);
Andi Kleena1e97782005-04-16 15:25:12 -0700846
847/*
848 * Search for the biggest gap in the low 32 bits of the e820
849 * memory space. We pass this space to PCI to assign MMIO resources
850 * for hotplug or unconfigured devices in.
851 * Hopefully the BIOS let enough space left.
852 */
853__init void e820_setup_gap(void)
854{
Daniel Ritzf0eca962005-09-09 00:57:14 +0200855 unsigned long gapstart, gapsize, round;
Andi Kleena1e97782005-04-16 15:25:12 -0700856 unsigned long last;
857 int i;
858 int found = 0;
859
860 last = 0x100000000ull;
861 gapstart = 0x10000000;
862 gapsize = 0x400000;
863 i = e820.nr_map;
864 while (--i >= 0) {
865 unsigned long long start = e820.map[i].addr;
866 unsigned long long end = start + e820.map[i].size;
867
868 /*
869 * Since "last" is at most 4GB, we know we'll
870 * fit in 32 bits if this condition is true
871 */
872 if (last > end) {
873 unsigned long gap = last - end;
874
875 if (gap > gapsize) {
876 gapsize = gap;
877 gapstart = end;
878 found = 1;
879 }
880 }
881 if (start < last)
882 last = start;
883 }
884
885 if (!found) {
886 gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100887 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
888 "address range\n"
889 KERN_ERR "PCI: Unassigned devices with 32bit resource "
890 "registers may break!\n");
Andi Kleena1e97782005-04-16 15:25:12 -0700891 }
892
893 /*
Daniel Ritzf0eca962005-09-09 00:57:14 +0200894 * See how much we want to round up: start off with
895 * rounding to the next 1MB area.
Andi Kleena1e97782005-04-16 15:25:12 -0700896 */
Daniel Ritzf0eca962005-09-09 00:57:14 +0200897 round = 0x100000;
898 while ((gapsize >> 4) > round)
899 round += round;
900 /* Fun with two's complement */
901 pci_mem_start = (gapstart + round) & -round;
Andi Kleena1e97782005-04-16 15:25:12 -0700902
Thomas Gleixner2f36fa12008-01-30 13:30:12 +0100903 printk(KERN_INFO
904 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
905 pci_mem_start, gapstart, gapsize);
Andi Kleena1e97782005-04-16 15:25:12 -0700906}
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -0700907
908int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
909{
910 int i;
911
912 if (slot < 0 || slot >= e820.nr_map)
913 return -1;
914 for (i = slot; i < e820.nr_map; i++) {
915 if (e820.map[i].type != E820_RAM)
916 continue;
917 break;
918 }
919 if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
920 return -1;
921 *addr = e820.map[i].addr;
922 *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
923 max_pfn << PAGE_SHIFT) - *addr;
924 return i + 1;
925}