blob: 597e011cfb51cb1ba6aca65e3fbe61e2c10a831c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070019#include <linux/memblock.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020020#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/proto.h>
22#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010023#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010024#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010025#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Andi Kleenc31fbb12006-09-26 10:52:33 +020027int acpi_numa __initdata;
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static struct acpi_table_slit *acpi_slit;
30
31static nodemask_t nodes_parsed __initdata;
Jack Steinerdc098552009-04-17 09:22:42 -050032static nodemask_t cpu_nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010033static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070034static struct bootnode nodes_add[MAX_NUMNODES];
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070036static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
Linus Torvalds1da177e2005-04-16 15:20:36 -070040static __init int setup_node(int pxm)
41{
Yasunori Goto762834e2006-06-23 02:03:19 -070042 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
44
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070045static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070046{
47 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048 for (i = 0; i < num_node_memblks; i++) {
49 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 if (nd->start == nd->end)
51 continue;
52 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070053 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070055 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 }
57 return -1;
58}
59
60static __init void cutoff_node(int i, unsigned long start, unsigned long end)
61{
Andi Kleenabe059e2006-03-25 16:29:12 +010062 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020063
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 if (nd->start < start) {
65 nd->start = start;
66 if (nd->end < nd->start)
67 nd->start = nd->end;
68 }
69 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 nd->end = end;
71 if (nd->start > nd->end)
72 nd->start = nd->end;
73 }
74}
75
76static __init void bad_srat(void)
77{
Andi Kleen2bce2b52005-09-12 18:49:25 +020078 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 printk(KERN_ERR "SRAT: SRAT not used.\n");
80 acpi_numa = -1;
Andi Kleen429b2b32009-07-18 08:56:57 +020081 for (i = 0; i < MAX_NUMNODES; i++) {
82 nodes[i].start = nodes[i].end = 0;
83 nodes_add[i].start = nodes_add[i].end = 0;
84 }
Mel Gorman5cb248a2006-09-27 01:49:52 -070085 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070086}
87
88static __init inline int srat_disabled(void)
89{
Tejun Heoffe77a42011-02-16 12:13:06 +010090 return acpi_numa < 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070091}
92
93/* Callback for SLIT parsing */
94void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
95{
Yinghai Luf302a5bb2008-07-10 20:36:37 -070096 unsigned length;
97 unsigned long phys;
98
99 length = slit->header.length;
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700100 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700101 PAGE_SIZE);
102
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700103 if (phys == MEMBLOCK_ERROR)
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700104 panic(" Can not save slit!\n");
105
106 acpi_slit = __va(phys);
107 memcpy(acpi_slit, slit, length);
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700108 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109}
110
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800111/* Callback for Proximity Domain -> x2APIC mapping */
112void __init
113acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
114{
115 int pxm, node;
116 int apic_id;
117
118 if (srat_disabled())
119 return;
120 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
121 bad_srat();
122 return;
123 }
124 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
125 return;
126 pxm = pa->proximity_domain;
127 node = setup_node(pxm);
128 if (node < 0) {
129 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
130 bad_srat();
131 return;
132 }
133
134 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800135 if (apic_id >= MAX_LOCAL_APIC) {
136 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
137 return;
138 }
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100139 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500140 node_set(node, cpu_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800141 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800142 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800143 pxm, apic_id, node);
144}
145
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146/* Callback for Proximity Domain -> LAPIC mapping */
147void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300148acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149{
150 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100151 int apic_id;
152
Andi Kleend22fe802006-02-03 21:51:26 +0100153 if (srat_disabled())
154 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300155 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200156 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100157 return;
158 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300159 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300161 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 node = setup_node(pxm);
163 if (node < 0) {
164 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
165 bad_srat();
166 return;
167 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800168
Jack Steiner2e420602008-09-23 15:37:13 -0500169 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500170 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
171 else
172 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800173
174 if (apic_id >= MAX_LOCAL_APIC) {
175 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
176 return;
177 }
178
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100179 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500180 node_set(node, cpu_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800182 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100183 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184}
185
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700186#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
187static inline int save_add_info(void) {return 1;}
188#else
189static inline int save_add_info(void) {return 0;}
190#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200191/*
Yinghai Lu888a5892009-05-15 13:59:37 -0700192 * Update nodes_add[]
193 * This code supports one contiguous hot add area per node
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200194 */
Yinghai Lu888a5892009-05-15 13:59:37 -0700195static void __init
196update_nodes_add(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200197{
198 unsigned long s_pfn = start >> PAGE_SHIFT;
199 unsigned long e_pfn = end >> PAGE_SHIFT;
Yinghai Lu888a5892009-05-15 13:59:37 -0700200 int changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200201 struct bootnode *nd = &nodes_add[node];
202
203 /* I had some trouble with strange memory hotadd regions breaking
204 the boot. Be very strict here and reject anything unexpected.
205 If you want working memory hotadd write correct SRATs.
206
207 The node size check is a basic sanity check to guard against
208 mistakes */
209 if ((signed long)(end - start) < NODE_MIN_SIZE) {
210 printk(KERN_ERR "SRAT: Hotplug area too small\n");
Yinghai Lu888a5892009-05-15 13:59:37 -0700211 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200212 }
213
214 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700215 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700216 printk(KERN_ERR
217 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
218 s_pfn, e_pfn);
Yinghai Lu888a5892009-05-15 13:59:37 -0700219 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200220 }
221
222 /* Looks good */
223
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200224 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300225 nd->start = start;
226 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200227 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300228 } else {
229 if (nd->start == end) {
230 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200231 changed = 1;
232 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300233 if (nd->end == start) {
234 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200235 changed = 1;
236 }
237 if (!changed)
238 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300239 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200240
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800241 if (changed) {
242 node_set(node, cpu_nodes_parsed);
Yinghai Lu888a5892009-05-15 13:59:37 -0700243 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
244 nd->start, nd->end);
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800245 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200246}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200247
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
249void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300250acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251{
Tejun Heo13081df2011-02-16 12:13:06 +0100252 struct bootnode *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 unsigned long start, end;
254 int node, pxm;
255 int i;
256
Andi Kleend22fe802006-02-03 21:51:26 +0100257 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300259 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100260 bad_srat();
261 return;
262 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300263 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100264 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300265
266 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200267 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300268 start = ma->base_address;
269 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 pxm = ma->proximity_domain;
271 node = setup_node(pxm);
272 if (node < 0) {
273 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
274 bad_srat();
275 return;
276 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700277 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200278 if (i == node) {
279 printk(KERN_WARNING
280 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
281 pxm, start, end, nodes[i].start, nodes[i].end);
282 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200284 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
285 pxm, start, end, node_to_pxm(i),
286 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 bad_srat();
288 return;
289 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200290
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700291 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
292 start, end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200293
Tejun Heo13081df2011-02-16 12:13:06 +0100294 if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
295 nd = &nodes[node];
296 if (!node_test_and_set(node, nodes_parsed)) {
297 nd->start = start;
298 nd->end = end;
299 } else {
300 if (start < nd->start)
301 nd->start = start;
302 if (nd->end < end)
303 nd->end = end;
304 }
305 } else
Yinghai Lu888a5892009-05-15 13:59:37 -0700306 update_nodes_add(node, start, end);
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700307
308 node_memblk_range[num_node_memblks].start = start;
309 node_memblk_range[num_node_memblks].end = end;
310 memblk_nodeid[num_node_memblks] = node;
311 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312}
313
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100314/* Sanity check to catch more bad SRATs (they are amazingly common).
315 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200316static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100317{
318 int i;
319 unsigned long pxmram, e820ram;
320
321 pxmram = 0;
322 for_each_node_mask(i, nodes_parsed) {
323 unsigned long s = nodes[i].start >> PAGE_SHIFT;
324 unsigned long e = nodes[i].end >> PAGE_SHIFT;
325 pxmram += e - s;
Yinghai Lu32996252009-12-15 17:59:02 -0800326 pxmram -= __absent_pages_in_range(i, s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200327 if ((long)pxmram < 0)
328 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100329 }
330
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700331 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
Yinghai Lu0964b052009-05-08 00:37:34 -0700332 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
333 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100334 printk(KERN_ERR
335 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
336 (pxmram << PAGE_SHIFT) >> 20,
337 (e820ram << PAGE_SHIFT) >> 20);
338 return 0;
339 }
340 return 1;
341}
342
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343void __init acpi_numa_arch_fixup(void) {}
344
David Rientjes4e76f4e2010-12-22 17:23:47 -0800345#ifdef CONFIG_NUMA_EMU
David Rientjesa387e952010-12-22 17:23:56 -0800346void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
347 unsigned long end)
David Rientjes87162732009-09-25 15:20:04 -0700348{
349 int i;
David Rientjes87162732009-09-25 15:20:04 -0700350
351 for_each_node_mask(i, nodes_parsed) {
David Rientjesa387e952010-12-22 17:23:56 -0800352 cutoff_node(i, start, end);
353 physnodes[i].start = nodes[i].start;
354 physnodes[i].end = nodes[i].end;
David Rientjes87162732009-09-25 15:20:04 -0700355 }
David Rientjes87162732009-09-25 15:20:04 -0700356}
David Rientjes4e76f4e2010-12-22 17:23:47 -0800357#endif /* CONFIG_NUMA_EMU */
David Rientjes87162732009-09-25 15:20:04 -0700358
Tejun Heoa9aec562011-02-16 12:13:06 +0100359int __init x86_acpi_numa_init(void)
360{
361 int ret;
362
363 ret = acpi_numa_init();
364 if (ret < 0)
365 return ret;
366 return srat_disabled() ? -EINVAL : 0;
367}
368
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369/* Use the information discovered above to actually set up the nodes. */
Tejun Heo940fed22011-02-16 12:13:06 +0100370int __init acpi_scan_nodes(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371{
372 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100373
David Rientjesae2c6dc2007-07-21 17:09:56 +0200374 if (acpi_numa <= 0)
375 return -1;
376
Andi Kleen9391a3f2006-02-03 21:51:17 +0100377 /* First clean up the node list */
Yinghai Lu7c437692009-05-15 13:59:37 -0700378 for (i = 0; i < MAX_NUMNODES; i++)
Tejun Heo940fed22011-02-16 12:13:06 +0100379 cutoff_node(i, 0, max_pfn << PAGE_SHIFT);
Andi Kleen9391a3f2006-02-03 21:51:17 +0100380
Jan Beulich2e618782010-04-21 16:13:20 +0100381 /*
382 * Join together blocks on the same node, holes between
383 * which don't overlap with memory on other nodes.
384 */
385 for (i = 0; i < num_node_memblks; ++i) {
386 int j, k;
387
388 for (j = i + 1; j < num_node_memblks; ++j) {
389 unsigned long start, end;
390
391 if (memblk_nodeid[i] != memblk_nodeid[j])
392 continue;
393 start = min(node_memblk_range[i].end,
394 node_memblk_range[j].end);
395 end = max(node_memblk_range[i].start,
396 node_memblk_range[j].start);
397 for (k = 0; k < num_node_memblks; ++k) {
398 if (memblk_nodeid[i] == memblk_nodeid[k])
399 continue;
400 if (start < node_memblk_range[k].end &&
401 end > node_memblk_range[k].start)
402 break;
403 }
404 if (k < num_node_memblks)
405 continue;
406 start = min(node_memblk_range[i].start,
407 node_memblk_range[j].start);
408 end = max(node_memblk_range[i].end,
409 node_memblk_range[j].end);
410 printk(KERN_INFO "SRAT: Node %d "
411 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
412 memblk_nodeid[i],
413 node_memblk_range[i].start,
414 node_memblk_range[i].end,
415 node_memblk_range[j].start,
416 node_memblk_range[j].end,
417 start, end);
418 node_memblk_range[i].start = start;
419 node_memblk_range[i].end = end;
420 k = --num_node_memblks - j;
421 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
422 k * sizeof(*memblk_nodeid));
423 memmove(node_memblk_range + j, node_memblk_range + j+1,
424 k * sizeof(*node_memblk_range));
425 --j;
426 }
427 }
428
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700429 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
430 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 if (memnode_shift < 0) {
432 printk(KERN_ERR
433 "SRAT: No NUMA node hash function found. Contact maintainer\n");
434 bad_srat();
435 return -1;
436 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200437
Yinghai Lu73cf6242010-10-10 19:52:15 -0700438 for (i = 0; i < num_node_memblks; i++)
H. Peter Anvin8e4029e2010-10-11 17:05:11 -0700439 memblock_x86_register_active_regions(memblk_nodeid[i],
Yinghai Lu73cf6242010-10-10 19:52:15 -0700440 node_memblk_range[i].start >> PAGE_SHIFT,
441 node_memblk_range[i].end >> PAGE_SHIFT);
442
Yinghai Lu32996252009-12-15 17:59:02 -0800443 /* for out of order entries in SRAT */
444 sort_node_map();
David Rientjes87162732009-09-25 15:20:04 -0700445 if (!nodes_cover_memory(nodes)) {
446 bad_srat();
447 return -1;
448 }
449
Yinghai Lu1411e0e2010-12-27 16:48:17 -0800450 init_memory_mapping_high();
451
Jack Steinerdc098552009-04-17 09:22:42 -0500452 /* Account for nodes with cpus and no memory */
453 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200454
Andi Kleene58e0d02005-09-12 18:49:25 +0200455 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200456 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200458 /* Try again in case setup_node_bootmem missed one due
459 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200460 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200461 if (!node_online(i))
462 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
463
Mike Travis168ef542008-12-16 17:34:01 -0800464 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100465 int node = early_cpu_to_node(i);
466
travis@sgi.com834beda12008-01-30 13:33:21 +0100467 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 continue;
Yinghai Lu7c437692009-05-15 13:59:37 -0700469 if (!node_online(node))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200470 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 }
472 numa_init_array();
473 return 0;
474}
475
David Rientjes3484d792007-07-21 17:10:32 +0200476#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100477static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
478 [0 ... MAX_NUMNODES-1] = PXM_INVAL
479};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100480static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100481 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
482};
David Rientjes3484d792007-07-21 17:10:32 +0200483static int __init find_node_by_addr(unsigned long addr)
484{
485 int ret = NUMA_NO_NODE;
486 int i;
487
488 for_each_node_mask(i, nodes_parsed) {
489 /*
490 * Find the real node that this emulated node appears on. For
491 * the sake of simplicity, we only use a real node's starting
492 * address to determine which emulated node it appears on.
493 */
494 if (addr >= nodes[i].start && addr < nodes[i].end) {
495 ret = i;
496 break;
497 }
498 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100499 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200500}
501
502/*
503 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
504 * mappings that respect the real ACPI topology but reflect our emulated
505 * environment. For each emulated node, we find which real node it appears on
506 * and create PXM to NID mappings for those fake nodes which mirror that
507 * locality. SLIT will now represent the correct distances between emulated
508 * nodes as a result of the real topology.
509 */
510void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
511{
David Rientjes08705b82007-07-21 17:10:33 +0200512 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200513
David Rientjes3484d792007-07-21 17:10:32 +0200514 for (i = 0; i < num_nodes; i++) {
515 int nid, pxm;
516
517 nid = find_node_by_addr(fake_nodes[i].start);
518 if (nid == NUMA_NO_NODE)
519 continue;
520 pxm = node_to_pxm(nid);
521 if (pxm == PXM_INVAL)
522 continue;
523 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200524 /*
525 * For each apicid_to_node mapping that exists for this real
526 * node, it must now point to the fake node ID.
527 */
528 for (j = 0; j < MAX_LOCAL_APIC; j++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100529 if (__apicid_to_node[j] == nid &&
David Rientjesb0c4d952010-05-06 02:24:34 -0700530 fake_apicid_to_node[j] == NUMA_NO_NODE)
David Rientjes08705b82007-07-21 17:10:33 +0200531 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200532 }
David Rientjesa387e952010-12-22 17:23:56 -0800533
534 /*
535 * If there are apicid-to-node mappings for physical nodes that do not
536 * have a corresponding emulated node, it should default to a guaranteed
537 * value.
538 */
539 for (i = 0; i < MAX_LOCAL_APIC; i++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100540 if (__apicid_to_node[i] != NUMA_NO_NODE &&
David Rientjesa387e952010-12-22 17:23:56 -0800541 fake_apicid_to_node[i] == NUMA_NO_NODE)
542 fake_apicid_to_node[i] = 0;
543
David Rientjes3484d792007-07-21 17:10:32 +0200544 for (i = 0; i < num_nodes; i++)
545 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100546 memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200547
548 nodes_clear(nodes_parsed);
549 for (i = 0; i < num_nodes; i++)
550 if (fake_nodes[i].start != fake_nodes[i].end)
551 node_set(i, nodes_parsed);
David Rientjes3484d792007-07-21 17:10:32 +0200552}
553
554static int null_slit_node_compare(int a, int b)
555{
556 return node_to_pxm(a) == node_to_pxm(b);
557}
558#else
559static int null_slit_node_compare(int a, int b)
560{
561 return a == b;
562}
563#endif /* CONFIG_NUMA_EMU */
564
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565int __node_distance(int a, int b)
566{
567 int index;
568
569 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200570 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
571 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300572 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 return acpi_slit->entry[index + node_to_pxm(b)];
574}
575
576EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700577
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200578#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700579int memory_add_physaddr_to_nid(u64 start)
580{
581 int i, ret = 0;
582
583 for_each_node(i)
584 if (nodes_add[i].start <= start && nodes_add[i].end > start)
585 ret = i;
586
587 return ret;
588}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700589EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200590#endif