blob: 33e72ec4fa4c93db7857e7b1de815b8b3ebcef32 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070019#include <linux/memblock.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020020#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/proto.h>
22#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010023#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010024#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010025#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Andi Kleenc31fbb12006-09-26 10:52:33 +020027int acpi_numa __initdata;
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static struct acpi_table_slit *acpi_slit;
30
Andi Kleenabe059e2006-03-25 16:29:12 +010031static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070032static struct bootnode nodes_add[MAX_NUMNODES];
Linus Torvalds1da177e2005-04-16 15:20:36 -070033
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070034static int num_node_memblks __initdata;
35static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
36static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
37
Linus Torvalds1da177e2005-04-16 15:20:36 -070038static __init int setup_node(int pxm)
39{
Yasunori Goto762834e2006-06-23 02:03:19 -070040 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070041}
42
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070043static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070044{
45 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070046 for (i = 0; i < num_node_memblks; i++) {
47 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070048 if (nd->start == nd->end)
49 continue;
50 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070051 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070052 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070053 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 }
55 return -1;
56}
57
58static __init void cutoff_node(int i, unsigned long start, unsigned long end)
59{
Andi Kleenabe059e2006-03-25 16:29:12 +010060 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020061
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 if (nd->start < start) {
63 nd->start = start;
64 if (nd->end < nd->start)
65 nd->start = nd->end;
66 }
67 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 nd->end = end;
69 if (nd->start > nd->end)
70 nd->start = nd->end;
71 }
72}
73
74static __init void bad_srat(void)
75{
Andi Kleen2bce2b52005-09-12 18:49:25 +020076 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 printk(KERN_ERR "SRAT: SRAT not used.\n");
78 acpi_numa = -1;
Andi Kleen429b2b32009-07-18 08:56:57 +020079 for (i = 0; i < MAX_NUMNODES; i++) {
80 nodes[i].start = nodes[i].end = 0;
81 nodes_add[i].start = nodes_add[i].end = 0;
82 }
Mel Gorman5cb248a2006-09-27 01:49:52 -070083 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070084}
85
86static __init inline int srat_disabled(void)
87{
Tejun Heoffe77a42011-02-16 12:13:06 +010088 return acpi_numa < 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070089}
90
91/* Callback for SLIT parsing */
92void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
93{
Yinghai Luf302a5bb2008-07-10 20:36:37 -070094 unsigned length;
95 unsigned long phys;
96
97 length = slit->header.length;
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070098 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
Yinghai Luf302a5bb2008-07-10 20:36:37 -070099 PAGE_SIZE);
100
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700101 if (phys == MEMBLOCK_ERROR)
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700102 panic(" Can not save slit!\n");
103
104 acpi_slit = __va(phys);
105 memcpy(acpi_slit, slit, length);
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700106 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107}
108
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800109/* Callback for Proximity Domain -> x2APIC mapping */
110void __init
111acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
112{
113 int pxm, node;
114 int apic_id;
115
116 if (srat_disabled())
117 return;
118 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
119 bad_srat();
120 return;
121 }
122 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
123 return;
124 pxm = pa->proximity_domain;
125 node = setup_node(pxm);
126 if (node < 0) {
127 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
128 bad_srat();
129 return;
130 }
131
132 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800133 if (apic_id >= MAX_LOCAL_APIC) {
134 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
135 return;
136 }
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100137 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500138 node_set(node, cpu_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800139 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800140 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800141 pxm, apic_id, node);
142}
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144/* Callback for Proximity Domain -> LAPIC mapping */
145void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300146acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147{
148 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100149 int apic_id;
150
Andi Kleend22fe802006-02-03 21:51:26 +0100151 if (srat_disabled())
152 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300153 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200154 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100155 return;
156 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300157 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300159 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 node = setup_node(pxm);
161 if (node < 0) {
162 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
163 bad_srat();
164 return;
165 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800166
Jack Steiner2e420602008-09-23 15:37:13 -0500167 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500168 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
169 else
170 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800171
172 if (apic_id >= MAX_LOCAL_APIC) {
173 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
174 return;
175 }
176
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100177 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500178 node_set(node, cpu_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800180 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100181 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182}
183
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700184#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
185static inline int save_add_info(void) {return 1;}
186#else
187static inline int save_add_info(void) {return 0;}
188#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200189/*
Yinghai Lu888a5892009-05-15 13:59:37 -0700190 * Update nodes_add[]
191 * This code supports one contiguous hot add area per node
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200192 */
Yinghai Lu888a5892009-05-15 13:59:37 -0700193static void __init
194update_nodes_add(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200195{
196 unsigned long s_pfn = start >> PAGE_SHIFT;
197 unsigned long e_pfn = end >> PAGE_SHIFT;
Yinghai Lu888a5892009-05-15 13:59:37 -0700198 int changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200199 struct bootnode *nd = &nodes_add[node];
200
201 /* I had some trouble with strange memory hotadd regions breaking
202 the boot. Be very strict here and reject anything unexpected.
203 If you want working memory hotadd write correct SRATs.
204
205 The node size check is a basic sanity check to guard against
206 mistakes */
207 if ((signed long)(end - start) < NODE_MIN_SIZE) {
208 printk(KERN_ERR "SRAT: Hotplug area too small\n");
Yinghai Lu888a5892009-05-15 13:59:37 -0700209 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200210 }
211
212 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700213 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700214 printk(KERN_ERR
215 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
216 s_pfn, e_pfn);
Yinghai Lu888a5892009-05-15 13:59:37 -0700217 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200218 }
219
220 /* Looks good */
221
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200222 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300223 nd->start = start;
224 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200225 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300226 } else {
227 if (nd->start == end) {
228 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200229 changed = 1;
230 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300231 if (nd->end == start) {
232 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200233 changed = 1;
234 }
235 if (!changed)
236 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300237 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200238
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800239 if (changed) {
240 node_set(node, cpu_nodes_parsed);
Yinghai Lu888a5892009-05-15 13:59:37 -0700241 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
242 nd->start, nd->end);
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800243 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200244}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200245
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
247void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300248acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249{
Tejun Heo13081df2011-02-16 12:13:06 +0100250 struct bootnode *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 unsigned long start, end;
252 int node, pxm;
253 int i;
254
Andi Kleend22fe802006-02-03 21:51:26 +0100255 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300257 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100258 bad_srat();
259 return;
260 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300261 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100262 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300263
264 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200265 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300266 start = ma->base_address;
267 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 pxm = ma->proximity_domain;
269 node = setup_node(pxm);
270 if (node < 0) {
271 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
272 bad_srat();
273 return;
274 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700275 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200276 if (i == node) {
277 printk(KERN_WARNING
278 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
279 pxm, start, end, nodes[i].start, nodes[i].end);
280 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200282 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
283 pxm, start, end, node_to_pxm(i),
284 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 bad_srat();
286 return;
287 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200288
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700289 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
290 start, end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200291
Tejun Heo13081df2011-02-16 12:13:06 +0100292 if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
293 nd = &nodes[node];
Tejun Heoec8cf29b2011-02-16 12:13:07 +0100294 if (!node_test_and_set(node, mem_nodes_parsed)) {
Tejun Heo13081df2011-02-16 12:13:06 +0100295 nd->start = start;
296 nd->end = end;
297 } else {
298 if (start < nd->start)
299 nd->start = start;
300 if (nd->end < end)
301 nd->end = end;
302 }
303 } else
Yinghai Lu888a5892009-05-15 13:59:37 -0700304 update_nodes_add(node, start, end);
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700305
306 node_memblk_range[num_node_memblks].start = start;
307 node_memblk_range[num_node_memblks].end = end;
308 memblk_nodeid[num_node_memblks] = node;
309 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310}
311
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100312/* Sanity check to catch more bad SRATs (they are amazingly common).
313 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200314static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100315{
316 int i;
317 unsigned long pxmram, e820ram;
318
319 pxmram = 0;
Tejun Heoec8cf29b2011-02-16 12:13:07 +0100320 for_each_node_mask(i, mem_nodes_parsed) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100321 unsigned long s = nodes[i].start >> PAGE_SHIFT;
322 unsigned long e = nodes[i].end >> PAGE_SHIFT;
323 pxmram += e - s;
Yinghai Lu32996252009-12-15 17:59:02 -0800324 pxmram -= __absent_pages_in_range(i, s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200325 if ((long)pxmram < 0)
326 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100327 }
328
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700329 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
Yinghai Lu0964b052009-05-08 00:37:34 -0700330 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
331 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100332 printk(KERN_ERR
333 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
334 (pxmram << PAGE_SHIFT) >> 20,
335 (e820ram << PAGE_SHIFT) >> 20);
336 return 0;
337 }
338 return 1;
339}
340
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341void __init acpi_numa_arch_fixup(void) {}
342
David Rientjes4e76f4e2010-12-22 17:23:47 -0800343#ifdef CONFIG_NUMA_EMU
David Rientjesa387e952010-12-22 17:23:56 -0800344void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
345 unsigned long end)
David Rientjes87162732009-09-25 15:20:04 -0700346{
347 int i;
David Rientjes87162732009-09-25 15:20:04 -0700348
Tejun Heoec8cf29b2011-02-16 12:13:07 +0100349 for_each_node_mask(i, mem_nodes_parsed) {
David Rientjesa387e952010-12-22 17:23:56 -0800350 cutoff_node(i, start, end);
351 physnodes[i].start = nodes[i].start;
352 physnodes[i].end = nodes[i].end;
David Rientjes87162732009-09-25 15:20:04 -0700353 }
David Rientjes87162732009-09-25 15:20:04 -0700354}
David Rientjes4e76f4e2010-12-22 17:23:47 -0800355#endif /* CONFIG_NUMA_EMU */
David Rientjes87162732009-09-25 15:20:04 -0700356
Tejun Heoa9aec562011-02-16 12:13:06 +0100357int __init x86_acpi_numa_init(void)
358{
359 int ret;
360
361 ret = acpi_numa_init();
362 if (ret < 0)
363 return ret;
364 return srat_disabled() ? -EINVAL : 0;
365}
366
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367/* Use the information discovered above to actually set up the nodes. */
Tejun Heo940fed22011-02-16 12:13:06 +0100368int __init acpi_scan_nodes(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369{
370 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100371
David Rientjesae2c6dc2007-07-21 17:09:56 +0200372 if (acpi_numa <= 0)
373 return -1;
374
Andi Kleen9391a3f2006-02-03 21:51:17 +0100375 /* First clean up the node list */
Yinghai Lu7c437692009-05-15 13:59:37 -0700376 for (i = 0; i < MAX_NUMNODES; i++)
Tejun Heo940fed22011-02-16 12:13:06 +0100377 cutoff_node(i, 0, max_pfn << PAGE_SHIFT);
Andi Kleen9391a3f2006-02-03 21:51:17 +0100378
Jan Beulich2e618782010-04-21 16:13:20 +0100379 /*
380 * Join together blocks on the same node, holes between
381 * which don't overlap with memory on other nodes.
382 */
383 for (i = 0; i < num_node_memblks; ++i) {
384 int j, k;
385
386 for (j = i + 1; j < num_node_memblks; ++j) {
387 unsigned long start, end;
388
389 if (memblk_nodeid[i] != memblk_nodeid[j])
390 continue;
391 start = min(node_memblk_range[i].end,
392 node_memblk_range[j].end);
393 end = max(node_memblk_range[i].start,
394 node_memblk_range[j].start);
395 for (k = 0; k < num_node_memblks; ++k) {
396 if (memblk_nodeid[i] == memblk_nodeid[k])
397 continue;
398 if (start < node_memblk_range[k].end &&
399 end > node_memblk_range[k].start)
400 break;
401 }
402 if (k < num_node_memblks)
403 continue;
404 start = min(node_memblk_range[i].start,
405 node_memblk_range[j].start);
406 end = max(node_memblk_range[i].end,
407 node_memblk_range[j].end);
408 printk(KERN_INFO "SRAT: Node %d "
409 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
410 memblk_nodeid[i],
411 node_memblk_range[i].start,
412 node_memblk_range[i].end,
413 node_memblk_range[j].start,
414 node_memblk_range[j].end,
415 start, end);
416 node_memblk_range[i].start = start;
417 node_memblk_range[i].end = end;
418 k = --num_node_memblks - j;
419 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
420 k * sizeof(*memblk_nodeid));
421 memmove(node_memblk_range + j, node_memblk_range + j+1,
422 k * sizeof(*node_memblk_range));
423 --j;
424 }
425 }
426
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700427 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
428 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 if (memnode_shift < 0) {
430 printk(KERN_ERR
431 "SRAT: No NUMA node hash function found. Contact maintainer\n");
432 bad_srat();
433 return -1;
434 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200435
Yinghai Lu73cf6242010-10-10 19:52:15 -0700436 for (i = 0; i < num_node_memblks; i++)
H. Peter Anvin8e4029e2010-10-11 17:05:11 -0700437 memblock_x86_register_active_regions(memblk_nodeid[i],
Yinghai Lu73cf6242010-10-10 19:52:15 -0700438 node_memblk_range[i].start >> PAGE_SHIFT,
439 node_memblk_range[i].end >> PAGE_SHIFT);
440
Yinghai Lu32996252009-12-15 17:59:02 -0800441 /* for out of order entries in SRAT */
442 sort_node_map();
David Rientjes87162732009-09-25 15:20:04 -0700443 if (!nodes_cover_memory(nodes)) {
444 bad_srat();
445 return -1;
446 }
447
Yinghai Lu1411e0e2010-12-27 16:48:17 -0800448 init_memory_mapping_high();
449
Andi Kleene58e0d02005-09-12 18:49:25 +0200450 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200451 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200453 /* Try again in case setup_node_bootmem missed one due
454 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200455 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200456 if (!node_online(i))
457 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
458
Mike Travis168ef542008-12-16 17:34:01 -0800459 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100460 int node = early_cpu_to_node(i);
461
travis@sgi.com834beda12008-01-30 13:33:21 +0100462 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 continue;
Yinghai Lu7c437692009-05-15 13:59:37 -0700464 if (!node_online(node))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200465 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 }
467 numa_init_array();
468 return 0;
469}
470
David Rientjes3484d792007-07-21 17:10:32 +0200471#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100472static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
473 [0 ... MAX_NUMNODES-1] = PXM_INVAL
474};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100475static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100476 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
477};
David Rientjes3484d792007-07-21 17:10:32 +0200478static int __init find_node_by_addr(unsigned long addr)
479{
480 int ret = NUMA_NO_NODE;
481 int i;
482
Tejun Heoec8cf29b2011-02-16 12:13:07 +0100483 for_each_node_mask(i, mem_nodes_parsed) {
David Rientjes3484d792007-07-21 17:10:32 +0200484 /*
485 * Find the real node that this emulated node appears on. For
486 * the sake of simplicity, we only use a real node's starting
487 * address to determine which emulated node it appears on.
488 */
489 if (addr >= nodes[i].start && addr < nodes[i].end) {
490 ret = i;
491 break;
492 }
493 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100494 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200495}
496
497/*
498 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
499 * mappings that respect the real ACPI topology but reflect our emulated
500 * environment. For each emulated node, we find which real node it appears on
501 * and create PXM to NID mappings for those fake nodes which mirror that
502 * locality. SLIT will now represent the correct distances between emulated
503 * nodes as a result of the real topology.
504 */
505void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
506{
David Rientjes08705b82007-07-21 17:10:33 +0200507 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200508
David Rientjes3484d792007-07-21 17:10:32 +0200509 for (i = 0; i < num_nodes; i++) {
510 int nid, pxm;
511
512 nid = find_node_by_addr(fake_nodes[i].start);
513 if (nid == NUMA_NO_NODE)
514 continue;
515 pxm = node_to_pxm(nid);
516 if (pxm == PXM_INVAL)
517 continue;
518 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200519 /*
520 * For each apicid_to_node mapping that exists for this real
521 * node, it must now point to the fake node ID.
522 */
523 for (j = 0; j < MAX_LOCAL_APIC; j++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100524 if (__apicid_to_node[j] == nid &&
David Rientjesb0c4d952010-05-06 02:24:34 -0700525 fake_apicid_to_node[j] == NUMA_NO_NODE)
David Rientjes08705b82007-07-21 17:10:33 +0200526 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200527 }
David Rientjesa387e952010-12-22 17:23:56 -0800528
529 /*
530 * If there are apicid-to-node mappings for physical nodes that do not
531 * have a corresponding emulated node, it should default to a guaranteed
532 * value.
533 */
534 for (i = 0; i < MAX_LOCAL_APIC; i++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100535 if (__apicid_to_node[i] != NUMA_NO_NODE &&
David Rientjesa387e952010-12-22 17:23:56 -0800536 fake_apicid_to_node[i] == NUMA_NO_NODE)
537 fake_apicid_to_node[i] = 0;
538
David Rientjes3484d792007-07-21 17:10:32 +0200539 for (i = 0; i < num_nodes; i++)
540 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100541 memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200542
Tejun Heoec8cf29b2011-02-16 12:13:07 +0100543 nodes_clear(mem_nodes_parsed);
David Rientjes3484d792007-07-21 17:10:32 +0200544 for (i = 0; i < num_nodes; i++)
545 if (fake_nodes[i].start != fake_nodes[i].end)
Tejun Heoec8cf29b2011-02-16 12:13:07 +0100546 node_set(i, mem_nodes_parsed);
David Rientjes3484d792007-07-21 17:10:32 +0200547}
548
549static int null_slit_node_compare(int a, int b)
550{
551 return node_to_pxm(a) == node_to_pxm(b);
552}
553#else
554static int null_slit_node_compare(int a, int b)
555{
556 return a == b;
557}
558#endif /* CONFIG_NUMA_EMU */
559
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560int __node_distance(int a, int b)
561{
562 int index;
563
564 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200565 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
566 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300567 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 return acpi_slit->entry[index + node_to_pxm(b)];
569}
570
571EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700572
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200573#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700574int memory_add_physaddr_to_nid(u64 start)
575{
576 int i, ret = 0;
577
578 for_each_node(i)
579 if (nodes_add[i].start <= start && nodes_add[i].end > start)
580 ret = i;
581
582 return ret;
583}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700584EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200585#endif