blob: b0dbbd48e58a33e2307b4efb3f92c66c459cc680 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
19#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/proto.h>
21#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010022#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010023#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010024#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
Andi Kleenc31fbb12006-09-26 10:52:33 +020026int acpi_numa __initdata;
27
Linus Torvalds1da177e2005-04-16 15:20:36 -070028static struct acpi_table_slit *acpi_slit;
29
30static nodemask_t nodes_parsed __initdata;
Jack Steinerdc098552009-04-17 09:22:42 -050031static nodemask_t cpu_nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010032static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070033static struct bootnode nodes_add[MAX_NUMNODES];
Linus Torvalds1da177e2005-04-16 15:20:36 -070034
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070035static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
Andi Kleen9391a3f2006-02-03 21:51:17 +010039/* Too small nodes confuse the VM badly. Usually they result
40 from BIOS bugs. */
41#define NODE_MIN_SIZE (4*1024*1024)
42
Linus Torvalds1da177e2005-04-16 15:20:36 -070043static __init int setup_node(int pxm)
44{
Yasunori Goto762834e2006-06-23 02:03:19 -070045 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070046}
47
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070049{
50 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070051 for (i = 0; i < num_node_memblks; i++) {
52 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 if (nd->start == nd->end)
54 continue;
55 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070056 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070058 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 }
60 return -1;
61}
62
63static __init void cutoff_node(int i, unsigned long start, unsigned long end)
64{
Andi Kleenabe059e2006-03-25 16:29:12 +010065 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020066
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 if (nd->start < start) {
68 nd->start = start;
69 if (nd->end < nd->start)
70 nd->start = nd->end;
71 }
72 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070073 nd->end = end;
74 if (nd->start > nd->end)
75 nd->start = nd->end;
76 }
77}
78
79static __init void bad_srat(void)
80{
Andi Kleen2bce2b52005-09-12 18:49:25 +020081 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 printk(KERN_ERR "SRAT: SRAT not used.\n");
83 acpi_numa = -1;
Andi Kleen2bce2b52005-09-12 18:49:25 +020084 for (i = 0; i < MAX_LOCAL_APIC; i++)
85 apicid_to_node[i] = NUMA_NO_NODE;
Andi Kleen68a3a7f2006-04-07 19:49:18 +020086 for (i = 0; i < MAX_NUMNODES; i++)
87 nodes_add[i].start = nodes[i].end = 0;
Mel Gorman5cb248a2006-09-27 01:49:52 -070088 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070089}
90
91static __init inline int srat_disabled(void)
92{
93 return numa_off || acpi_numa < 0;
94}
95
96/* Callback for SLIT parsing */
97void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
98{
Yinghai Luf302a5bb2008-07-10 20:36:37 -070099 unsigned length;
100 unsigned long phys;
101
102 length = slit->header.length;
103 phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
104 PAGE_SIZE);
105
106 if (phys == -1L)
107 panic(" Can not save slit!\n");
108
109 acpi_slit = __va(phys);
110 memcpy(acpi_slit, slit, length);
111 reserve_early(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112}
113
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800114/* Callback for Proximity Domain -> x2APIC mapping */
115void __init
116acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
117{
118 int pxm, node;
119 int apic_id;
120
121 if (srat_disabled())
122 return;
123 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
124 bad_srat();
125 return;
126 }
127 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
128 return;
129 pxm = pa->proximity_domain;
130 node = setup_node(pxm);
131 if (node < 0) {
132 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
133 bad_srat();
134 return;
135 }
136
137 apic_id = pa->apic_id;
138 apicid_to_node[apic_id] = node;
Jack Steinerdc098552009-04-17 09:22:42 -0500139 node_set(node, cpu_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800140 acpi_numa = 1;
141 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
142 pxm, apic_id, node);
143}
144
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145/* Callback for Proximity Domain -> LAPIC mapping */
146void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300147acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148{
149 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100150 int apic_id;
151
Andi Kleend22fe802006-02-03 21:51:26 +0100152 if (srat_disabled())
153 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300154 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200155 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100156 return;
157 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300158 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300160 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 node = setup_node(pxm);
162 if (node < 0) {
163 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
164 bad_srat();
165 return;
166 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800167
Jack Steiner2e420602008-09-23 15:37:13 -0500168 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500169 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
170 else
171 apic_id = pa->apic_id;
travis@sgi.comef970012008-01-30 13:33:10 +0100172 apicid_to_node[apic_id] = node;
Jack Steinerdc098552009-04-17 09:22:42 -0500173 node_set(node, cpu_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 acpi_numa = 1;
Andi Kleen0b07e982005-09-12 18:49:24 +0200175 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100176 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177}
178
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700179#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
180static inline int save_add_info(void) {return 1;}
181#else
182static inline int save_add_info(void) {return 0;}
183#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200184/*
Yinghai Lu888a5892009-05-15 13:59:37 -0700185 * Update nodes_add[]
186 * This code supports one contiguous hot add area per node
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200187 */
Yinghai Lu888a5892009-05-15 13:59:37 -0700188static void __init
189update_nodes_add(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200190{
191 unsigned long s_pfn = start >> PAGE_SHIFT;
192 unsigned long e_pfn = end >> PAGE_SHIFT;
Yinghai Lu888a5892009-05-15 13:59:37 -0700193 int changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200194 struct bootnode *nd = &nodes_add[node];
195
196 /* I had some trouble with strange memory hotadd regions breaking
197 the boot. Be very strict here and reject anything unexpected.
198 If you want working memory hotadd write correct SRATs.
199
200 The node size check is a basic sanity check to guard against
201 mistakes */
202 if ((signed long)(end - start) < NODE_MIN_SIZE) {
203 printk(KERN_ERR "SRAT: Hotplug area too small\n");
Yinghai Lu888a5892009-05-15 13:59:37 -0700204 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200205 }
206
207 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700208 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700209 printk(KERN_ERR
210 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
211 s_pfn, e_pfn);
Yinghai Lu888a5892009-05-15 13:59:37 -0700212 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200213 }
214
215 /* Looks good */
216
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200217 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300218 nd->start = start;
219 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200220 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300221 } else {
222 if (nd->start == end) {
223 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200224 changed = 1;
225 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300226 if (nd->end == start) {
227 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200228 changed = 1;
229 }
230 if (!changed)
231 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300232 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200233
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200234 if (changed)
Yinghai Lu888a5892009-05-15 13:59:37 -0700235 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
236 nd->start, nd->end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200237}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200238
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
240void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300241acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242{
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200243 struct bootnode *nd, oldnode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 unsigned long start, end;
245 int node, pxm;
246 int i;
247
Andi Kleend22fe802006-02-03 21:51:26 +0100248 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300250 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100251 bad_srat();
252 return;
253 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300254 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100255 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300256
257 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200258 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300259 start = ma->base_address;
260 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 pxm = ma->proximity_domain;
262 node = setup_node(pxm);
263 if (node < 0) {
264 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
265 bad_srat();
266 return;
267 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700268 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200269 if (i == node) {
270 printk(KERN_WARNING
271 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
272 pxm, start, end, nodes[i].start, nodes[i].end);
273 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200275 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
276 pxm, start, end, node_to_pxm(i),
277 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 bad_srat();
279 return;
280 }
281 nd = &nodes[node];
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200282 oldnode = *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 if (!node_test_and_set(node, nodes_parsed)) {
284 nd->start = start;
285 nd->end = end;
286 } else {
287 if (start < nd->start)
288 nd->start = start;
289 if (nd->end < end)
290 nd->end = end;
291 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200292
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700293 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
294 start, end);
295 e820_register_active_regions(node, start >> PAGE_SHIFT,
296 end >> PAGE_SHIFT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200297
Yinghai Lu888a5892009-05-15 13:59:37 -0700298 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
299 update_nodes_add(node, start, end);
300 /* restore nodes[node] */
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200301 *nd = oldnode;
302 if ((nd->start | nd->end) == 0)
303 node_clear(node, nodes_parsed);
304 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700305
306 node_memblk_range[num_node_memblks].start = start;
307 node_memblk_range[num_node_memblks].end = end;
308 memblk_nodeid[num_node_memblks] = node;
309 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310}
311
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100312/* Sanity check to catch more bad SRATs (they are amazingly common).
313 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200314static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100315{
316 int i;
317 unsigned long pxmram, e820ram;
318
319 pxmram = 0;
320 for_each_node_mask(i, nodes_parsed) {
321 unsigned long s = nodes[i].start >> PAGE_SHIFT;
322 unsigned long e = nodes[i].end >> PAGE_SHIFT;
323 pxmram += e - s;
Mel Gorman5cb248a2006-09-27 01:49:52 -0700324 pxmram -= absent_pages_in_range(s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200325 if ((long)pxmram < 0)
326 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100327 }
328
Yinghai Lub37ab912009-05-08 00:36:44 -0700329 e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
Yinghai Lu0964b052009-05-08 00:37:34 -0700330 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
331 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100332 printk(KERN_ERR
333 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
334 (pxmram << PAGE_SHIFT) >> 20,
335 (e820ram << PAGE_SHIFT) >> 20);
336 return 0;
337 }
338 return 1;
339}
340
Sam Ravnborg1e296f52008-01-30 13:33:37 +0100341static void __init unparse_node(int node)
Andi Kleen9391a3f2006-02-03 21:51:17 +0100342{
343 int i;
344 node_clear(node, nodes_parsed);
David Rientjes7eccf7b2009-05-05 12:50:02 -0700345 node_clear(node, cpu_nodes_parsed);
Andi Kleen9391a3f2006-02-03 21:51:17 +0100346 for (i = 0; i < MAX_LOCAL_APIC; i++) {
347 if (apicid_to_node[i] == node)
348 apicid_to_node[i] = NUMA_NO_NODE;
349 }
350}
351
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352void __init acpi_numa_arch_fixup(void) {}
353
354/* Use the information discovered above to actually set up the nodes. */
355int __init acpi_scan_nodes(unsigned long start, unsigned long end)
356{
357 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100358
David Rientjesae2c6dc2007-07-21 17:09:56 +0200359 if (acpi_numa <= 0)
360 return -1;
361
Andi Kleen9391a3f2006-02-03 21:51:17 +0100362 /* First clean up the node list */
363 for (i = 0; i < MAX_NUMNODES; i++) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300364 cutoff_node(i, start, end);
Mike Travis693e3c52008-01-30 13:33:14 +0100365 /*
366 * don't confuse VM with a node that doesn't have the
367 * minimum memory.
368 */
369 if (nodes[i].end &&
370 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
Andi Kleen9391a3f2006-02-03 21:51:17 +0100371 unparse_node(i);
Daniel Yeisley0d015322006-05-30 22:47:57 +0200372 node_set_offline(i);
373 }
Andi Kleen9391a3f2006-02-03 21:51:17 +0100374 }
375
David Rientjes3484d792007-07-21 17:10:32 +0200376 if (!nodes_cover_memory(nodes)) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100377 bad_srat();
378 return -1;
379 }
380
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700381 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
382 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 if (memnode_shift < 0) {
384 printk(KERN_ERR
385 "SRAT: No NUMA node hash function found. Contact maintainer\n");
386 bad_srat();
387 return -1;
388 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200389
Jack Steinerdc098552009-04-17 09:22:42 -0500390 /* Account for nodes with cpus and no memory */
391 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200392
Andi Kleene58e0d02005-09-12 18:49:25 +0200393 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200394 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200396 /* Try again in case setup_node_bootmem missed one due
397 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200398 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200399 if (!node_online(i))
400 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
401
Mike Travis168ef542008-12-16 17:34:01 -0800402 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100403 int node = early_cpu_to_node(i);
404
travis@sgi.com834beda12008-01-30 13:33:21 +0100405 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 continue;
travis@sgi.com834beda12008-01-30 13:33:21 +0100407 if (!node_isset(node, node_possible_map))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200408 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 }
410 numa_init_array();
411 return 0;
412}
413
David Rientjes3484d792007-07-21 17:10:32 +0200414#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100415static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
416 [0 ... MAX_NUMNODES-1] = PXM_INVAL
417};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100418static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100419 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
420};
David Rientjes3484d792007-07-21 17:10:32 +0200421static int __init find_node_by_addr(unsigned long addr)
422{
423 int ret = NUMA_NO_NODE;
424 int i;
425
426 for_each_node_mask(i, nodes_parsed) {
427 /*
428 * Find the real node that this emulated node appears on. For
429 * the sake of simplicity, we only use a real node's starting
430 * address to determine which emulated node it appears on.
431 */
432 if (addr >= nodes[i].start && addr < nodes[i].end) {
433 ret = i;
434 break;
435 }
436 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100437 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200438}
439
440/*
441 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
442 * mappings that respect the real ACPI topology but reflect our emulated
443 * environment. For each emulated node, we find which real node it appears on
444 * and create PXM to NID mappings for those fake nodes which mirror that
445 * locality. SLIT will now represent the correct distances between emulated
446 * nodes as a result of the real topology.
447 */
448void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
449{
David Rientjes08705b82007-07-21 17:10:33 +0200450 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200451
452 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
453 "topology.\n");
454 for (i = 0; i < num_nodes; i++) {
455 int nid, pxm;
456
457 nid = find_node_by_addr(fake_nodes[i].start);
458 if (nid == NUMA_NO_NODE)
459 continue;
460 pxm = node_to_pxm(nid);
461 if (pxm == PXM_INVAL)
462 continue;
463 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200464 /*
465 * For each apicid_to_node mapping that exists for this real
466 * node, it must now point to the fake node ID.
467 */
468 for (j = 0; j < MAX_LOCAL_APIC; j++)
469 if (apicid_to_node[j] == nid)
470 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200471 }
472 for (i = 0; i < num_nodes; i++)
473 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
David Rientjes08705b82007-07-21 17:10:33 +0200474 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200475
476 nodes_clear(nodes_parsed);
477 for (i = 0; i < num_nodes; i++)
478 if (fake_nodes[i].start != fake_nodes[i].end)
479 node_set(i, nodes_parsed);
480 WARN_ON(!nodes_cover_memory(fake_nodes));
481}
482
483static int null_slit_node_compare(int a, int b)
484{
485 return node_to_pxm(a) == node_to_pxm(b);
486}
487#else
488static int null_slit_node_compare(int a, int b)
489{
490 return a == b;
491}
492#endif /* CONFIG_NUMA_EMU */
493
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494int __node_distance(int a, int b)
495{
496 int index;
497
498 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200499 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
500 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300501 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 return acpi_slit->entry[index + node_to_pxm(b)];
503}
504
505EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700506
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200507#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700508int memory_add_physaddr_to_nid(u64 start)
509{
510 int i, ret = 0;
511
512 for_each_node(i)
513 if (nodes_add[i].start <= start && nodes_add[i].end > start)
514 ret = i;
515
516 return ret;
517}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700518EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200519#endif