x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
The mapping between cpu/apicid and node is done via
apicid_to_node[] on 64bit and apicid_2_node[] +
apic->x86_32_numa_cpu_node() on 32bit. This difference makes it
difficult to further unify 32 and 64bit NUMA handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed
by two accessors - set_apicid_to_node() and numa_cpu_node(). On
64bit, numa_cpu_node() always consults __apicid_to_node[]
directly while 32bit goes through apic->numa_cpu_node() method
to allow apic implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken
NUMA configuration which assumes relationship between APIC ID,
HT node ID and NUMA topology. Leave it to access
__apicid_to_node[] directly as mapping through CPU might result
in undesirable behavior change. The comment is reformatted and
updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: eric.dumazet@gmail.com
Cc: yinghai@kernel.org
Cc: brgerst@gmail.com
Cc: gorcunov@gmail.com
Cc: shaohui.zheng@intel.com
Cc: rientjes@google.com
LKML-Reference: <1295789862-25482-14-git-send-email-tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: David Rientjes <rientjes@google.com>
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c..c7fae38 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -247,7 +247,7 @@
__acpi_map_pxm_to_node(nid, i);
#endif
}
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+ memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
}
#endif /* CONFIG_NUMA_EMU */
@@ -285,7 +285,7 @@
nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
for (j = apicid_base; j < cores + apicid_base; j++)
- apicid_to_node[(i << bits) + j] = i;
+ set_apicid_to_node((i << bits) + j, i);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d78..480b357 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,8 +26,12 @@
early_param("numa", numa_setup);
/*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
*/
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..8d91d22 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+ return apic->x86_32_numa_cpu_node(cpu);
+}
+
/*
* FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea155..1e1026f 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@
struct memnode memnode;
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
@@ -716,12 +712,8 @@
BUG_ON(cpu_to_apicid == NULL);
for_each_possible_cpu(cpu) {
- int node;
- u16 apicid = cpu_to_apicid[cpu];
+ int node = numa_cpu_node(cpu);
- if (apicid == BAD_APICID)
- continue;
- node = apicid_to_node[apicid];
if (node == NUMA_NO_NODE)
continue;
if (!node_online(node))
@@ -731,6 +723,14 @@
}
#endif
+int __cpuinit numa_cpu_node(int cpu)
+{
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
+}
void __cpuinit numa_set_node(int cpu, int node)
{
@@ -776,13 +776,9 @@
void __cpuinit numa_add_cpu(int cpu)
{
unsigned long addr;
- u16 apicid;
- int physnid;
- int nid = NUMA_NO_NODE;
+ int physnid, nid;
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- if (apicid != BAD_APICID)
- nid = apicid_to_node[apicid];
+ nid = numa_cpu_node(cpu);
if (nid == NUMA_NO_NODE)
nid = early_cpu_to_node(cpu);
BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6027a48..48651c6 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@
num_memory_chunks);
for (i = 0; i < MAX_LOCAL_APIC; i++)
- apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+ set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
for (j = 0; j < num_memory_chunks; j++){
struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285..9a97261 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
for (i = 0; i < MAX_LOCAL_APIC; i++)
- apicid_to_node[i] = NUMA_NO_NODE;
+ set_apicid_to_node(i, NUMA_NO_NODE);
for (i = 0; i < MAX_NUMNODES; i++) {
nodes[i].start = nodes[i].end = 0;
nodes_add[i].start = nodes_add[i].end = 0;
@@ -138,7 +138,7 @@
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
}
- apicid_to_node[apic_id] = node;
+ set_apicid_to_node(apic_id, node);
node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -178,7 +178,7 @@
return;
}
- apicid_to_node[apic_id] = node;
+ set_apicid_to_node(apic_id, node);
node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -521,7 +521,7 @@
* node, it must now point to the fake node ID.
*/
for (j = 0; j < MAX_LOCAL_APIC; j++)
- if (apicid_to_node[j] == nid &&
+ if (__apicid_to_node[j] == nid &&
fake_apicid_to_node[j] == NUMA_NO_NODE)
fake_apicid_to_node[j] = i;
}
@@ -532,13 +532,13 @@
* value.
*/
for (i = 0; i < MAX_LOCAL_APIC; i++)
- if (apicid_to_node[i] != NUMA_NO_NODE &&
+ if (__apicid_to_node[i] != NUMA_NO_NODE &&
fake_apicid_to_node[i] == NUMA_NO_NODE)
fake_apicid_to_node[i] = 0;
for (i = 0; i < num_nodes; i++)
__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+ memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
nodes_clear(nodes_parsed);
for (i = 0; i < num_nodes; i++)