blob: 56b92635d87c6a305b400c758034a99dc12afca9 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070019#include <linux/memblock.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020020#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/proto.h>
22#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010023#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010024#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010025#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Andi Kleenc31fbb12006-09-26 10:52:33 +020027int acpi_numa __initdata;
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static struct acpi_table_slit *acpi_slit;
30
31static nodemask_t nodes_parsed __initdata;
Jack Steinerdc098552009-04-17 09:22:42 -050032static nodemask_t cpu_nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010033static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070034static struct bootnode nodes_add[MAX_NUMNODES];
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070036static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
Linus Torvalds1da177e2005-04-16 15:20:36 -070040static __init int setup_node(int pxm)
41{
Yasunori Goto762834e2006-06-23 02:03:19 -070042 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
44
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070045static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070046{
47 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048 for (i = 0; i < num_node_memblks; i++) {
49 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 if (nd->start == nd->end)
51 continue;
52 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070053 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070055 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 }
57 return -1;
58}
59
60static __init void cutoff_node(int i, unsigned long start, unsigned long end)
61{
Andi Kleenabe059e2006-03-25 16:29:12 +010062 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020063
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 if (nd->start < start) {
65 nd->start = start;
66 if (nd->end < nd->start)
67 nd->start = nd->end;
68 }
69 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 nd->end = end;
71 if (nd->start > nd->end)
72 nd->start = nd->end;
73 }
74}
75
76static __init void bad_srat(void)
77{
Andi Kleen2bce2b52005-09-12 18:49:25 +020078 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 printk(KERN_ERR "SRAT: SRAT not used.\n");
80 acpi_numa = -1;
Andi Kleen2bce2b52005-09-12 18:49:25 +020081 for (i = 0; i < MAX_LOCAL_APIC; i++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +010082 set_apicid_to_node(i, NUMA_NO_NODE);
Andi Kleen429b2b32009-07-18 08:56:57 +020083 for (i = 0; i < MAX_NUMNODES; i++) {
84 nodes[i].start = nodes[i].end = 0;
85 nodes_add[i].start = nodes_add[i].end = 0;
86 }
Mel Gorman5cb248a2006-09-27 01:49:52 -070087 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070088}
89
90static __init inline int srat_disabled(void)
91{
92 return numa_off || acpi_numa < 0;
93}
94
95/* Callback for SLIT parsing */
96void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
97{
Yinghai Luf302a5bb2008-07-10 20:36:37 -070098 unsigned length;
99 unsigned long phys;
100
101 length = slit->header.length;
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700102 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700103 PAGE_SIZE);
104
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700105 if (phys == MEMBLOCK_ERROR)
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700106 panic(" Can not save slit!\n");
107
108 acpi_slit = __va(phys);
109 memcpy(acpi_slit, slit, length);
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700110 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111}
112
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800113/* Callback for Proximity Domain -> x2APIC mapping */
114void __init
115acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
116{
117 int pxm, node;
118 int apic_id;
119
120 if (srat_disabled())
121 return;
122 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
123 bad_srat();
124 return;
125 }
126 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
127 return;
128 pxm = pa->proximity_domain;
129 node = setup_node(pxm);
130 if (node < 0) {
131 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
132 bad_srat();
133 return;
134 }
135
136 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800137 if (apic_id >= MAX_LOCAL_APIC) {
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return;
140 }
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100141 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500142 node_set(node, cpu_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800143 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800144 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800145 pxm, apic_id, node);
146}
147
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148/* Callback for Proximity Domain -> LAPIC mapping */
149void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300150acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151{
152 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100153 int apic_id;
154
Andi Kleend22fe802006-02-03 21:51:26 +0100155 if (srat_disabled())
156 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300157 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200158 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100159 return;
160 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300161 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300163 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 node = setup_node(pxm);
165 if (node < 0) {
166 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
167 bad_srat();
168 return;
169 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800170
Jack Steiner2e420602008-09-23 15:37:13 -0500171 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
173 else
174 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800175
176 if (apic_id >= MAX_LOCAL_APIC) {
177 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
178 return;
179 }
180
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100181 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500182 node_set(node, cpu_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800184 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100185 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186}
187
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700188#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
189static inline int save_add_info(void) {return 1;}
190#else
191static inline int save_add_info(void) {return 0;}
192#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200193/*
Yinghai Lu888a5892009-05-15 13:59:37 -0700194 * Update nodes_add[]
195 * This code supports one contiguous hot add area per node
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200196 */
Yinghai Lu888a5892009-05-15 13:59:37 -0700197static void __init
198update_nodes_add(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200199{
200 unsigned long s_pfn = start >> PAGE_SHIFT;
201 unsigned long e_pfn = end >> PAGE_SHIFT;
Yinghai Lu888a5892009-05-15 13:59:37 -0700202 int changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200203 struct bootnode *nd = &nodes_add[node];
204
205 /* I had some trouble with strange memory hotadd regions breaking
206 the boot. Be very strict here and reject anything unexpected.
207 If you want working memory hotadd write correct SRATs.
208
209 The node size check is a basic sanity check to guard against
210 mistakes */
211 if ((signed long)(end - start) < NODE_MIN_SIZE) {
212 printk(KERN_ERR "SRAT: Hotplug area too small\n");
Yinghai Lu888a5892009-05-15 13:59:37 -0700213 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200214 }
215
216 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700217 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700218 printk(KERN_ERR
219 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
220 s_pfn, e_pfn);
Yinghai Lu888a5892009-05-15 13:59:37 -0700221 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200222 }
223
224 /* Looks good */
225
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200226 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300227 nd->start = start;
228 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200229 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300230 } else {
231 if (nd->start == end) {
232 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200233 changed = 1;
234 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300235 if (nd->end == start) {
236 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200237 changed = 1;
238 }
239 if (!changed)
240 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300241 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200242
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800243 if (changed) {
244 node_set(node, cpu_nodes_parsed);
Yinghai Lu888a5892009-05-15 13:59:37 -0700245 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
246 nd->start, nd->end);
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800247 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200248}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200249
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
251void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300252acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
Tejun Heo13081df2011-02-16 12:13:06 +0100254 struct bootnode *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 unsigned long start, end;
256 int node, pxm;
257 int i;
258
Andi Kleend22fe802006-02-03 21:51:26 +0100259 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300261 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100262 bad_srat();
263 return;
264 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300265 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100266 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300267
268 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200269 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300270 start = ma->base_address;
271 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 pxm = ma->proximity_domain;
273 node = setup_node(pxm);
274 if (node < 0) {
275 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
276 bad_srat();
277 return;
278 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700279 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200280 if (i == node) {
281 printk(KERN_WARNING
282 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
283 pxm, start, end, nodes[i].start, nodes[i].end);
284 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200286 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
287 pxm, start, end, node_to_pxm(i),
288 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 bad_srat();
290 return;
291 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200292
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700293 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
294 start, end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200295
Tejun Heo13081df2011-02-16 12:13:06 +0100296 if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
297 nd = &nodes[node];
298 if (!node_test_and_set(node, nodes_parsed)) {
299 nd->start = start;
300 nd->end = end;
301 } else {
302 if (start < nd->start)
303 nd->start = start;
304 if (nd->end < end)
305 nd->end = end;
306 }
307 } else
Yinghai Lu888a5892009-05-15 13:59:37 -0700308 update_nodes_add(node, start, end);
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700309
310 node_memblk_range[num_node_memblks].start = start;
311 node_memblk_range[num_node_memblks].end = end;
312 memblk_nodeid[num_node_memblks] = node;
313 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314}
315
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100316/* Sanity check to catch more bad SRATs (they are amazingly common).
317 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200318static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100319{
320 int i;
321 unsigned long pxmram, e820ram;
322
323 pxmram = 0;
324 for_each_node_mask(i, nodes_parsed) {
325 unsigned long s = nodes[i].start >> PAGE_SHIFT;
326 unsigned long e = nodes[i].end >> PAGE_SHIFT;
327 pxmram += e - s;
Yinghai Lu32996252009-12-15 17:59:02 -0800328 pxmram -= __absent_pages_in_range(i, s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200329 if ((long)pxmram < 0)
330 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100331 }
332
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700333 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
Yinghai Lu0964b052009-05-08 00:37:34 -0700334 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
335 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100336 printk(KERN_ERR
337 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
338 (pxmram << PAGE_SHIFT) >> 20,
339 (e820ram << PAGE_SHIFT) >> 20);
340 return 0;
341 }
342 return 1;
343}
344
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345void __init acpi_numa_arch_fixup(void) {}
346
David Rientjes4e76f4e2010-12-22 17:23:47 -0800347#ifdef CONFIG_NUMA_EMU
David Rientjesa387e952010-12-22 17:23:56 -0800348void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
349 unsigned long end)
David Rientjes87162732009-09-25 15:20:04 -0700350{
351 int i;
David Rientjes87162732009-09-25 15:20:04 -0700352
353 for_each_node_mask(i, nodes_parsed) {
David Rientjesa387e952010-12-22 17:23:56 -0800354 cutoff_node(i, start, end);
355 physnodes[i].start = nodes[i].start;
356 physnodes[i].end = nodes[i].end;
David Rientjes87162732009-09-25 15:20:04 -0700357 }
David Rientjes87162732009-09-25 15:20:04 -0700358}
David Rientjes4e76f4e2010-12-22 17:23:47 -0800359#endif /* CONFIG_NUMA_EMU */
David Rientjes87162732009-09-25 15:20:04 -0700360
Tejun Heoa9aec562011-02-16 12:13:06 +0100361int __init x86_acpi_numa_init(void)
362{
363 int ret;
364
365 ret = acpi_numa_init();
366 if (ret < 0)
367 return ret;
368 return srat_disabled() ? -EINVAL : 0;
369}
370
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371/* Use the information discovered above to actually set up the nodes. */
Tejun Heo940fed22011-02-16 12:13:06 +0100372int __init acpi_scan_nodes(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373{
374 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100375
David Rientjesae2c6dc2007-07-21 17:09:56 +0200376 if (acpi_numa <= 0)
377 return -1;
378
Andi Kleen9391a3f2006-02-03 21:51:17 +0100379 /* First clean up the node list */
Yinghai Lu7c437692009-05-15 13:59:37 -0700380 for (i = 0; i < MAX_NUMNODES; i++)
Tejun Heo940fed22011-02-16 12:13:06 +0100381 cutoff_node(i, 0, max_pfn << PAGE_SHIFT);
Andi Kleen9391a3f2006-02-03 21:51:17 +0100382
Jan Beulich2e618782010-04-21 16:13:20 +0100383 /*
384 * Join together blocks on the same node, holes between
385 * which don't overlap with memory on other nodes.
386 */
387 for (i = 0; i < num_node_memblks; ++i) {
388 int j, k;
389
390 for (j = i + 1; j < num_node_memblks; ++j) {
391 unsigned long start, end;
392
393 if (memblk_nodeid[i] != memblk_nodeid[j])
394 continue;
395 start = min(node_memblk_range[i].end,
396 node_memblk_range[j].end);
397 end = max(node_memblk_range[i].start,
398 node_memblk_range[j].start);
399 for (k = 0; k < num_node_memblks; ++k) {
400 if (memblk_nodeid[i] == memblk_nodeid[k])
401 continue;
402 if (start < node_memblk_range[k].end &&
403 end > node_memblk_range[k].start)
404 break;
405 }
406 if (k < num_node_memblks)
407 continue;
408 start = min(node_memblk_range[i].start,
409 node_memblk_range[j].start);
410 end = max(node_memblk_range[i].end,
411 node_memblk_range[j].end);
412 printk(KERN_INFO "SRAT: Node %d "
413 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
414 memblk_nodeid[i],
415 node_memblk_range[i].start,
416 node_memblk_range[i].end,
417 node_memblk_range[j].start,
418 node_memblk_range[j].end,
419 start, end);
420 node_memblk_range[i].start = start;
421 node_memblk_range[i].end = end;
422 k = --num_node_memblks - j;
423 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
424 k * sizeof(*memblk_nodeid));
425 memmove(node_memblk_range + j, node_memblk_range + j+1,
426 k * sizeof(*node_memblk_range));
427 --j;
428 }
429 }
430
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700431 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
432 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 if (memnode_shift < 0) {
434 printk(KERN_ERR
435 "SRAT: No NUMA node hash function found. Contact maintainer\n");
436 bad_srat();
437 return -1;
438 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200439
Yinghai Lu73cf6242010-10-10 19:52:15 -0700440 for (i = 0; i < num_node_memblks; i++)
H. Peter Anvin8e4029e2010-10-11 17:05:11 -0700441 memblock_x86_register_active_regions(memblk_nodeid[i],
Yinghai Lu73cf6242010-10-10 19:52:15 -0700442 node_memblk_range[i].start >> PAGE_SHIFT,
443 node_memblk_range[i].end >> PAGE_SHIFT);
444
Yinghai Lu32996252009-12-15 17:59:02 -0800445 /* for out of order entries in SRAT */
446 sort_node_map();
David Rientjes87162732009-09-25 15:20:04 -0700447 if (!nodes_cover_memory(nodes)) {
448 bad_srat();
449 return -1;
450 }
451
Yinghai Lu1411e0e2010-12-27 16:48:17 -0800452 init_memory_mapping_high();
453
Jack Steinerdc098552009-04-17 09:22:42 -0500454 /* Account for nodes with cpus and no memory */
455 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200456
Andi Kleene58e0d02005-09-12 18:49:25 +0200457 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200458 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200460 /* Try again in case setup_node_bootmem missed one due
461 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200462 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200463 if (!node_online(i))
464 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
465
Mike Travis168ef542008-12-16 17:34:01 -0800466 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100467 int node = early_cpu_to_node(i);
468
travis@sgi.com834beda12008-01-30 13:33:21 +0100469 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 continue;
Yinghai Lu7c437692009-05-15 13:59:37 -0700471 if (!node_online(node))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200472 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 }
474 numa_init_array();
475 return 0;
476}
477
David Rientjes3484d792007-07-21 17:10:32 +0200478#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100479static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
480 [0 ... MAX_NUMNODES-1] = PXM_INVAL
481};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100482static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100483 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
484};
David Rientjes3484d792007-07-21 17:10:32 +0200485static int __init find_node_by_addr(unsigned long addr)
486{
487 int ret = NUMA_NO_NODE;
488 int i;
489
490 for_each_node_mask(i, nodes_parsed) {
491 /*
492 * Find the real node that this emulated node appears on. For
493 * the sake of simplicity, we only use a real node's starting
494 * address to determine which emulated node it appears on.
495 */
496 if (addr >= nodes[i].start && addr < nodes[i].end) {
497 ret = i;
498 break;
499 }
500 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100501 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200502}
503
504/*
505 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
506 * mappings that respect the real ACPI topology but reflect our emulated
507 * environment. For each emulated node, we find which real node it appears on
508 * and create PXM to NID mappings for those fake nodes which mirror that
509 * locality. SLIT will now represent the correct distances between emulated
510 * nodes as a result of the real topology.
511 */
512void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
513{
David Rientjes08705b82007-07-21 17:10:33 +0200514 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200515
David Rientjes3484d792007-07-21 17:10:32 +0200516 for (i = 0; i < num_nodes; i++) {
517 int nid, pxm;
518
519 nid = find_node_by_addr(fake_nodes[i].start);
520 if (nid == NUMA_NO_NODE)
521 continue;
522 pxm = node_to_pxm(nid);
523 if (pxm == PXM_INVAL)
524 continue;
525 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200526 /*
527 * For each apicid_to_node mapping that exists for this real
528 * node, it must now point to the fake node ID.
529 */
530 for (j = 0; j < MAX_LOCAL_APIC; j++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100531 if (__apicid_to_node[j] == nid &&
David Rientjesb0c4d952010-05-06 02:24:34 -0700532 fake_apicid_to_node[j] == NUMA_NO_NODE)
David Rientjes08705b82007-07-21 17:10:33 +0200533 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200534 }
David Rientjesa387e952010-12-22 17:23:56 -0800535
536 /*
537 * If there are apicid-to-node mappings for physical nodes that do not
538 * have a corresponding emulated node, it should default to a guaranteed
539 * value.
540 */
541 for (i = 0; i < MAX_LOCAL_APIC; i++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100542 if (__apicid_to_node[i] != NUMA_NO_NODE &&
David Rientjesa387e952010-12-22 17:23:56 -0800543 fake_apicid_to_node[i] == NUMA_NO_NODE)
544 fake_apicid_to_node[i] = 0;
545
David Rientjes3484d792007-07-21 17:10:32 +0200546 for (i = 0; i < num_nodes; i++)
547 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100548 memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200549
550 nodes_clear(nodes_parsed);
551 for (i = 0; i < num_nodes; i++)
552 if (fake_nodes[i].start != fake_nodes[i].end)
553 node_set(i, nodes_parsed);
David Rientjes3484d792007-07-21 17:10:32 +0200554}
555
556static int null_slit_node_compare(int a, int b)
557{
558 return node_to_pxm(a) == node_to_pxm(b);
559}
560#else
561static int null_slit_node_compare(int a, int b)
562{
563 return a == b;
564}
565#endif /* CONFIG_NUMA_EMU */
566
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567int __node_distance(int a, int b)
568{
569 int index;
570
571 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200572 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
573 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300574 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 return acpi_slit->entry[index + node_to_pxm(b)];
576}
577
578EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700579
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200580#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700581int memory_add_physaddr_to_nid(u64 start)
582{
583 int i, ret = 0;
584
585 for_each_node(i)
586 if (nodes_add[i].start <= start && nodes_add[i].end > start)
587 ret = i;
588
589 return ret;
590}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700591EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200592#endif