blob: 171a0aacb99a0874373619f4fd51ed955e2ddb9e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070019#include <linux/memblock.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020020#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/proto.h>
22#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010023#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010024#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010025#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Andi Kleenc31fbb12006-09-26 10:52:33 +020027int acpi_numa __initdata;
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static struct acpi_table_slit *acpi_slit;
30
31static nodemask_t nodes_parsed __initdata;
Jack Steinerdc098552009-04-17 09:22:42 -050032static nodemask_t cpu_nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010033static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070034static struct bootnode nodes_add[MAX_NUMNODES];
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070036static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
Linus Torvalds1da177e2005-04-16 15:20:36 -070040static __init int setup_node(int pxm)
41{
Yasunori Goto762834e2006-06-23 02:03:19 -070042 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
44
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070045static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070046{
47 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048 for (i = 0; i < num_node_memblks; i++) {
49 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 if (nd->start == nd->end)
51 continue;
52 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070053 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070055 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 }
57 return -1;
58}
59
60static __init void cutoff_node(int i, unsigned long start, unsigned long end)
61{
Andi Kleenabe059e2006-03-25 16:29:12 +010062 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020063
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 if (nd->start < start) {
65 nd->start = start;
66 if (nd->end < nd->start)
67 nd->start = nd->end;
68 }
69 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 nd->end = end;
71 if (nd->start > nd->end)
72 nd->start = nd->end;
73 }
74}
75
76static __init void bad_srat(void)
77{
Andi Kleen2bce2b52005-09-12 18:49:25 +020078 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 printk(KERN_ERR "SRAT: SRAT not used.\n");
80 acpi_numa = -1;
Andi Kleen2bce2b52005-09-12 18:49:25 +020081 for (i = 0; i < MAX_LOCAL_APIC; i++)
82 apicid_to_node[i] = NUMA_NO_NODE;
Andi Kleen429b2b32009-07-18 08:56:57 +020083 for (i = 0; i < MAX_NUMNODES; i++) {
84 nodes[i].start = nodes[i].end = 0;
85 nodes_add[i].start = nodes_add[i].end = 0;
86 }
Mel Gorman5cb248a2006-09-27 01:49:52 -070087 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070088}
89
90static __init inline int srat_disabled(void)
91{
92 return numa_off || acpi_numa < 0;
93}
94
95/* Callback for SLIT parsing */
96void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
97{
Yinghai Luf302a5bb2008-07-10 20:36:37 -070098 unsigned length;
99 unsigned long phys;
100
101 length = slit->header.length;
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700102 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700103 PAGE_SIZE);
104
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700105 if (phys == MEMBLOCK_ERROR)
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700106 panic(" Can not save slit!\n");
107
108 acpi_slit = __va(phys);
109 memcpy(acpi_slit, slit, length);
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700110 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111}
112
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800113/* Callback for Proximity Domain -> x2APIC mapping */
114void __init
115acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
116{
117 int pxm, node;
118 int apic_id;
119
120 if (srat_disabled())
121 return;
122 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
123 bad_srat();
124 return;
125 }
126 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
127 return;
128 pxm = pa->proximity_domain;
129 node = setup_node(pxm);
130 if (node < 0) {
131 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
132 bad_srat();
133 return;
134 }
135
136 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800137 if (apic_id >= MAX_LOCAL_APIC) {
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return;
140 }
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800141 apicid_to_node[apic_id] = node;
Jack Steinerdc098552009-04-17 09:22:42 -0500142 node_set(node, cpu_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800143 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800144 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800145 pxm, apic_id, node);
146}
147
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148/* Callback for Proximity Domain -> LAPIC mapping */
149void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300150acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151{
152 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100153 int apic_id;
154
Andi Kleend22fe802006-02-03 21:51:26 +0100155 if (srat_disabled())
156 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300157 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200158 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100159 return;
160 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300161 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300163 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 node = setup_node(pxm);
165 if (node < 0) {
166 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
167 bad_srat();
168 return;
169 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800170
Jack Steiner2e420602008-09-23 15:37:13 -0500171 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
173 else
174 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800175
176 if (apic_id >= MAX_LOCAL_APIC) {
177 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
178 return;
179 }
180
travis@sgi.comef970012008-01-30 13:33:10 +0100181 apicid_to_node[apic_id] = node;
Jack Steinerdc098552009-04-17 09:22:42 -0500182 node_set(node, cpu_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800184 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100185 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186}
187
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700188#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
189static inline int save_add_info(void) {return 1;}
190#else
191static inline int save_add_info(void) {return 0;}
192#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200193/*
Yinghai Lu888a5892009-05-15 13:59:37 -0700194 * Update nodes_add[]
195 * This code supports one contiguous hot add area per node
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200196 */
Yinghai Lu888a5892009-05-15 13:59:37 -0700197static void __init
198update_nodes_add(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200199{
200 unsigned long s_pfn = start >> PAGE_SHIFT;
201 unsigned long e_pfn = end >> PAGE_SHIFT;
Yinghai Lu888a5892009-05-15 13:59:37 -0700202 int changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200203 struct bootnode *nd = &nodes_add[node];
204
205 /* I had some trouble with strange memory hotadd regions breaking
206 the boot. Be very strict here and reject anything unexpected.
207 If you want working memory hotadd write correct SRATs.
208
209 The node size check is a basic sanity check to guard against
210 mistakes */
211 if ((signed long)(end - start) < NODE_MIN_SIZE) {
212 printk(KERN_ERR "SRAT: Hotplug area too small\n");
Yinghai Lu888a5892009-05-15 13:59:37 -0700213 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200214 }
215
216 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700217 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700218 printk(KERN_ERR
219 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
220 s_pfn, e_pfn);
Yinghai Lu888a5892009-05-15 13:59:37 -0700221 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200222 }
223
224 /* Looks good */
225
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200226 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300227 nd->start = start;
228 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200229 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300230 } else {
231 if (nd->start == end) {
232 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200233 changed = 1;
234 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300235 if (nd->end == start) {
236 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200237 changed = 1;
238 }
239 if (!changed)
240 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300241 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200242
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800243 if (changed) {
244 node_set(node, cpu_nodes_parsed);
Yinghai Lu888a5892009-05-15 13:59:37 -0700245 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
246 nd->start, nd->end);
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800247 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200248}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200249
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
251void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300252acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200254 struct bootnode *nd, oldnode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 unsigned long start, end;
256 int node, pxm;
257 int i;
258
Andi Kleend22fe802006-02-03 21:51:26 +0100259 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300261 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100262 bad_srat();
263 return;
264 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300265 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100266 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300267
268 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200269 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300270 start = ma->base_address;
271 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 pxm = ma->proximity_domain;
273 node = setup_node(pxm);
274 if (node < 0) {
275 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
276 bad_srat();
277 return;
278 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700279 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200280 if (i == node) {
281 printk(KERN_WARNING
282 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
283 pxm, start, end, nodes[i].start, nodes[i].end);
284 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200286 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
287 pxm, start, end, node_to_pxm(i),
288 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 bad_srat();
290 return;
291 }
292 nd = &nodes[node];
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200293 oldnode = *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 if (!node_test_and_set(node, nodes_parsed)) {
295 nd->start = start;
296 nd->end = end;
297 } else {
298 if (start < nd->start)
299 nd->start = start;
300 if (nd->end < end)
301 nd->end = end;
302 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200303
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700304 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
305 start, end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200306
Yinghai Lu888a5892009-05-15 13:59:37 -0700307 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
308 update_nodes_add(node, start, end);
309 /* restore nodes[node] */
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200310 *nd = oldnode;
311 if ((nd->start | nd->end) == 0)
312 node_clear(node, nodes_parsed);
313 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700314
315 node_memblk_range[num_node_memblks].start = start;
316 node_memblk_range[num_node_memblks].end = end;
317 memblk_nodeid[num_node_memblks] = node;
318 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319}
320
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100321/* Sanity check to catch more bad SRATs (they are amazingly common).
322 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200323static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100324{
325 int i;
326 unsigned long pxmram, e820ram;
327
328 pxmram = 0;
329 for_each_node_mask(i, nodes_parsed) {
330 unsigned long s = nodes[i].start >> PAGE_SHIFT;
331 unsigned long e = nodes[i].end >> PAGE_SHIFT;
332 pxmram += e - s;
Yinghai Lu32996252009-12-15 17:59:02 -0800333 pxmram -= __absent_pages_in_range(i, s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200334 if ((long)pxmram < 0)
335 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100336 }
337
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700338 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
Yinghai Lu0964b052009-05-08 00:37:34 -0700339 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
340 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100341 printk(KERN_ERR
342 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
343 (pxmram << PAGE_SHIFT) >> 20,
344 (e820ram << PAGE_SHIFT) >> 20);
345 return 0;
346 }
347 return 1;
348}
349
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350void __init acpi_numa_arch_fixup(void) {}
351
David Rientjes87162732009-09-25 15:20:04 -0700352int __init acpi_get_nodes(struct bootnode *physnodes)
353{
354 int i;
355 int ret = 0;
356
357 for_each_node_mask(i, nodes_parsed) {
358 physnodes[ret].start = nodes[i].start;
359 physnodes[ret].end = nodes[i].end;
360 ret++;
361 }
362 return ret;
363}
364
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365/* Use the information discovered above to actually set up the nodes. */
366int __init acpi_scan_nodes(unsigned long start, unsigned long end)
367{
368 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100369
David Rientjesae2c6dc2007-07-21 17:09:56 +0200370 if (acpi_numa <= 0)
371 return -1;
372
Andi Kleen9391a3f2006-02-03 21:51:17 +0100373 /* First clean up the node list */
Yinghai Lu7c437692009-05-15 13:59:37 -0700374 for (i = 0; i < MAX_NUMNODES; i++)
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300375 cutoff_node(i, start, end);
Andi Kleen9391a3f2006-02-03 21:51:17 +0100376
Jan Beulich2e618782010-04-21 16:13:20 +0100377 /*
378 * Join together blocks on the same node, holes between
379 * which don't overlap with memory on other nodes.
380 */
381 for (i = 0; i < num_node_memblks; ++i) {
382 int j, k;
383
384 for (j = i + 1; j < num_node_memblks; ++j) {
385 unsigned long start, end;
386
387 if (memblk_nodeid[i] != memblk_nodeid[j])
388 continue;
389 start = min(node_memblk_range[i].end,
390 node_memblk_range[j].end);
391 end = max(node_memblk_range[i].start,
392 node_memblk_range[j].start);
393 for (k = 0; k < num_node_memblks; ++k) {
394 if (memblk_nodeid[i] == memblk_nodeid[k])
395 continue;
396 if (start < node_memblk_range[k].end &&
397 end > node_memblk_range[k].start)
398 break;
399 }
400 if (k < num_node_memblks)
401 continue;
402 start = min(node_memblk_range[i].start,
403 node_memblk_range[j].start);
404 end = max(node_memblk_range[i].end,
405 node_memblk_range[j].end);
406 printk(KERN_INFO "SRAT: Node %d "
407 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
408 memblk_nodeid[i],
409 node_memblk_range[i].start,
410 node_memblk_range[i].end,
411 node_memblk_range[j].start,
412 node_memblk_range[j].end,
413 start, end);
414 node_memblk_range[i].start = start;
415 node_memblk_range[i].end = end;
416 k = --num_node_memblks - j;
417 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
418 k * sizeof(*memblk_nodeid));
419 memmove(node_memblk_range + j, node_memblk_range + j+1,
420 k * sizeof(*node_memblk_range));
421 --j;
422 }
423 }
424
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700425 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
426 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 if (memnode_shift < 0) {
428 printk(KERN_ERR
429 "SRAT: No NUMA node hash function found. Contact maintainer\n");
430 bad_srat();
431 return -1;
432 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200433
Yinghai Lu73cf6242010-10-10 19:52:15 -0700434 for (i = 0; i < num_node_memblks; i++)
H. Peter Anvin8e4029e2010-10-11 17:05:11 -0700435 memblock_x86_register_active_regions(memblk_nodeid[i],
Yinghai Lu73cf6242010-10-10 19:52:15 -0700436 node_memblk_range[i].start >> PAGE_SHIFT,
437 node_memblk_range[i].end >> PAGE_SHIFT);
438
Yinghai Lu32996252009-12-15 17:59:02 -0800439 /* for out of order entries in SRAT */
440 sort_node_map();
David Rientjes87162732009-09-25 15:20:04 -0700441 if (!nodes_cover_memory(nodes)) {
442 bad_srat();
443 return -1;
444 }
445
Jack Steinerdc098552009-04-17 09:22:42 -0500446 /* Account for nodes with cpus and no memory */
447 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200448
Andi Kleene58e0d02005-09-12 18:49:25 +0200449 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200450 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200452 /* Try again in case setup_node_bootmem missed one due
453 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200454 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200455 if (!node_online(i))
456 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
457
Mike Travis168ef542008-12-16 17:34:01 -0800458 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100459 int node = early_cpu_to_node(i);
460
travis@sgi.com834beda12008-01-30 13:33:21 +0100461 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 continue;
Yinghai Lu7c437692009-05-15 13:59:37 -0700463 if (!node_online(node))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200464 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 }
466 numa_init_array();
467 return 0;
468}
469
David Rientjes3484d792007-07-21 17:10:32 +0200470#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100471static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
472 [0 ... MAX_NUMNODES-1] = PXM_INVAL
473};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100474static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100475 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
476};
David Rientjes3484d792007-07-21 17:10:32 +0200477static int __init find_node_by_addr(unsigned long addr)
478{
479 int ret = NUMA_NO_NODE;
480 int i;
481
482 for_each_node_mask(i, nodes_parsed) {
483 /*
484 * Find the real node that this emulated node appears on. For
485 * the sake of simplicity, we only use a real node's starting
486 * address to determine which emulated node it appears on.
487 */
488 if (addr >= nodes[i].start && addr < nodes[i].end) {
489 ret = i;
490 break;
491 }
492 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100493 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200494}
495
496/*
497 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
498 * mappings that respect the real ACPI topology but reflect our emulated
499 * environment. For each emulated node, we find which real node it appears on
500 * and create PXM to NID mappings for those fake nodes which mirror that
501 * locality. SLIT will now represent the correct distances between emulated
502 * nodes as a result of the real topology.
503 */
504void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
505{
David Rientjes08705b82007-07-21 17:10:33 +0200506 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200507
508 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
509 "topology.\n");
510 for (i = 0; i < num_nodes; i++) {
511 int nid, pxm;
512
513 nid = find_node_by_addr(fake_nodes[i].start);
514 if (nid == NUMA_NO_NODE)
515 continue;
516 pxm = node_to_pxm(nid);
517 if (pxm == PXM_INVAL)
518 continue;
519 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200520 /*
521 * For each apicid_to_node mapping that exists for this real
522 * node, it must now point to the fake node ID.
523 */
524 for (j = 0; j < MAX_LOCAL_APIC; j++)
David Rientjesb0c4d952010-05-06 02:24:34 -0700525 if (apicid_to_node[j] == nid &&
526 fake_apicid_to_node[j] == NUMA_NO_NODE)
David Rientjes08705b82007-07-21 17:10:33 +0200527 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200528 }
529 for (i = 0; i < num_nodes; i++)
530 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
David Rientjes08705b82007-07-21 17:10:33 +0200531 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200532
533 nodes_clear(nodes_parsed);
534 for (i = 0; i < num_nodes; i++)
535 if (fake_nodes[i].start != fake_nodes[i].end)
536 node_set(i, nodes_parsed);
David Rientjes3484d792007-07-21 17:10:32 +0200537}
538
539static int null_slit_node_compare(int a, int b)
540{
541 return node_to_pxm(a) == node_to_pxm(b);
542}
543#else
544static int null_slit_node_compare(int a, int b)
545{
546 return a == b;
547}
548#endif /* CONFIG_NUMA_EMU */
549
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550int __node_distance(int a, int b)
551{
552 int index;
553
554 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200555 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
556 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300557 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 return acpi_slit->entry[index + node_to_pxm(b)];
559}
560
561EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700562
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200563#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700564int memory_add_physaddr_to_nid(u64 start)
565{
566 int i, ret = 0;
567
568 for_each_node(i)
569 if (nodes_add[i].start <= start && nodes_add[i].end > start)
570 ret = i;
571
572 return ret;
573}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700574EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200575#endif