x86: Unify NUMA initialization between 32 and 64bit Now that everything else is unified, NUMA initialization can be unified too. * numa_init_array() and init_cpu_to_node() are moved from numa_64 to numa. * numa_32::initmem_init() is updated to call numa_init_array() and setup_arch() to call init_cpu_to_node() on 32bit too. * x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on 32bit too. This is safe now as numa_init_array() will initialize it early during boot. This makes NUMA mapping fully initialized before setup_per_cpu_areas() on 32bit too and thus makes the first percpu chunk which contains all the static variables and some of dynamic area allocated with NUMA affinity correctly considered. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-17-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> Reviewed-by: Pekka Enberg <penberg@kernel.org>

commit: 8db78cc4b4048e3add40bca1bc3e55057c319256 [log] [tgz]
author: Tejun Heo <tj@kernel.org> Sun Jan 23 14:37:42 2011 +0100
committer: Ingo Molnar <mingo@elte.hu> Fri Jan 28 14:54:10 2011 +0100
tree: 16779c36de4baba8add96402a6331820df735f2c
parent: de2d9445f1627830ed2ebd00ee9d851986c940b5 [diff] [blame]
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 75abecb..bf60715 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c

@@ -38,11 +38,7 @@
 /*
  * Map cpu index to node index
  */
-#ifdef CONFIG_X86_32
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
-#else
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-#endif
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 
 void __cpuinit numa_set_node(int cpu, int node)
@@ -99,6 +95,78 @@
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+	int rr, i;
+
+	rr = first_node(node_online_map);
+	for (i = 0; i < nr_cpu_ids; i++) {
+		if (early_cpu_to_node(i) != NUMA_NO_NODE)
+			continue;
+		numa_set_node(i, rr);
+		rr = next_node(rr, node_online_map);
+		if (rr == MAX_NUMNODES)
+			rr = first_node(node_online_map);
+	}
+}
+
+static __init int find_near_online_node(int node)
+{
+	int n, val;
+	int min_val = INT_MAX;
+	int best_node = -1;
+
+	for_each_online_node(n) {
+		val = node_distance(node, n);
+
+		if (val < min_val) {
+			min_val = val;
+			best_node = n;
+		}
+	}
+
+	return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+	int cpu;
+	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+	BUG_ON(cpu_to_apicid == NULL);
+
+	for_each_possible_cpu(cpu) {
+		int node = numa_cpu_node(cpu);
+
+		if (node == NUMA_NO_NODE)
+			continue;
+		if (!node_online(node))
+			node = find_near_online_node(node);
+		numa_set_node(cpu, node);
+	}
+}
+
 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
 
 # ifndef CONFIG_NUMA_EMU
commit	8db78cc4b4048e3add40bca1bc3e55057c319256	[log] [tgz]
author	Tejun Heo <tj@kernel.org>	Sun Jan 23 14:37:42 2011 +0100
committer	Ingo Molnar <mingo@elte.hu>	Fri Jan 28 14:54:10 2011 +0100
tree	16779c36de4baba8add96402a6331820df735f2c
parent	de2d9445f1627830ed2ebd00ee9d851986c940b5 [diff] [blame]