MIPS: Netlogic: Support for multi-chip configuration

Upto 4 Netlogic XLP SoCs can be connected over ICI links to form a
coherent multi-node system.  Each SoC has its own set of on-chip
devices including PIC.  To support this, add a per SoC stucture and
use it for the PIC and SYS block addresses instead of using global
variables.

Signed-off-by: Jayachandran C <jchandra@broadcom.com>
Patchwork: http://patchwork.linux-mips.org/patch/4469
Signed-off-by: John Crispin <blogic@openwrt.org>
diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c
index e52bfcb..4d6bd8f 100644
--- a/arch/mips/netlogic/common/irq.c
+++ b/arch/mips/netlogic/common/irq.c
@@ -70,33 +70,34 @@
  */
 
 /* Globals */
-static uint64_t nlm_irq_mask;
-static DEFINE_SPINLOCK(nlm_pic_lock);
-
 static void xlp_pic_enable(struct irq_data *d)
 {
 	unsigned long flags;
+	struct nlm_soc_info *nodep;
 	int irt;
 
+	nodep = nlm_current_node();
 	irt = nlm_irq_to_irt(d->irq);
 	if (irt == -1)
 		return;
-	spin_lock_irqsave(&nlm_pic_lock, flags);
-	nlm_pic_enable_irt(nlm_pic_base, irt);
-	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+	spin_lock_irqsave(&nodep->piclock, flags);
+	nlm_pic_enable_irt(nodep->picbase, irt);
+	spin_unlock_irqrestore(&nodep->piclock, flags);
 }
 
 static void xlp_pic_disable(struct irq_data *d)
 {
+	struct nlm_soc_info *nodep;
 	unsigned long flags;
 	int irt;
 
+	nodep = nlm_current_node();
 	irt = nlm_irq_to_irt(d->irq);
 	if (irt == -1)
 		return;
-	spin_lock_irqsave(&nlm_pic_lock, flags);
-	nlm_pic_disable_irt(nlm_pic_base, irt);
-	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+	spin_lock_irqsave(&nodep->piclock, flags);
+	nlm_pic_disable_irt(nodep->picbase, irt);
+	spin_unlock_irqrestore(&nodep->piclock, flags);
 }
 
 static void xlp_pic_mask_ack(struct irq_data *d)
@@ -109,8 +110,10 @@
 static void xlp_pic_unmask(struct irq_data *d)
 {
 	void *hd = irq_data_get_irq_handler_data(d);
+	struct nlm_soc_info *nodep;
 	int irt;
 
+	nodep = nlm_current_node();
 	irt = nlm_irq_to_irt(d->irq);
 	if (irt == -1)
 		return;
@@ -120,7 +123,7 @@
 		extra_ack(d);
 	}
 	/* Ack is a single write, no need to lock */
-	nlm_pic_ack(nlm_pic_base, irt);
+	nlm_pic_ack(nodep->picbase, irt);
 }
 
 static struct irq_chip xlp_pic = {
@@ -177,7 +180,11 @@
 void __init init_nlm_common_irqs(void)
 {
 	int i, irq, irt;
+	uint64_t irqmask;
+	struct nlm_soc_info *nodep;
 
+	nodep = nlm_current_node();
+	irqmask = (1ULL << IRQ_TIMER);
 	for (i = 0; i < PIC_IRT_FIRST_IRQ; i++)
 		irq_set_chip_and_handler(i, &nlm_cpu_intr, handle_percpu_irq);
 
@@ -189,7 +196,7 @@
 			 nlm_smp_function_ipi_handler);
 	irq_set_chip_and_handler(IRQ_IPI_SMP_RESCHEDULE, &nlm_cpu_intr,
 			 nlm_smp_resched_ipi_handler);
-	nlm_irq_mask |=
+	irqmask |=
 	    ((1ULL << IRQ_IPI_SMP_FUNCTION) | (1ULL << IRQ_IPI_SMP_RESCHEDULE));
 #endif
 
@@ -197,11 +204,11 @@
 		irt = nlm_irq_to_irt(irq);
 		if (irt == -1)
 			continue;
-		nlm_irq_mask |= (1ULL << irq);
-		nlm_pic_init_irt(nlm_pic_base, irt, irq, 0);
+		irqmask |= (1ULL << irq);
+		nlm_pic_init_irt(nodep->picbase, irt, irq, 0);
 	}
 
-	nlm_irq_mask |= (1ULL << IRQ_TIMER);
+	nodep->irqmask = irqmask;
 }
 
 void __init arch_init_irq(void)
@@ -209,29 +216,39 @@
 	/* Initialize the irq descriptors */
 	init_nlm_common_irqs();
 
-	write_c0_eimr(nlm_irq_mask);
+	write_c0_eimr(nlm_current_node()->irqmask);
 }
 
 void __cpuinit nlm_smp_irq_init(void)
 {
 	/* set interrupt mask for non-zero cpus */
-	write_c0_eimr(nlm_irq_mask);
+	write_c0_eimr(nlm_current_node()->irqmask);
 }
 
 asmlinkage void plat_irq_dispatch(void)
 {
 	uint64_t eirr;
-	int i;
+	int i, node;
 
+	node = nlm_nodeid();
 	eirr = read_c0_eirr() & read_c0_eimr();
 	if (eirr & (1 << IRQ_TIMER)) {
 		do_IRQ(IRQ_TIMER);
 		return;
 	}
-
+#ifdef CONFIG_SMP
+	if (eirr & IRQ_IPI_SMP_FUNCTION) {
+		do_IRQ(IRQ_IPI_SMP_FUNCTION);
+		return;
+	}
+	if (eirr & IRQ_IPI_SMP_RESCHEDULE) {
+		do_IRQ(IRQ_IPI_SMP_RESCHEDULE);
+		return;
+	}
+#endif
 	i = __ilog2_u64(eirr);
 	if (i == -1)
 		return;
 
-	do_IRQ(i);
+	do_IRQ(nlm_irq_to_xirq(node, i));
 }
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index 4fe8992..e40b467 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -59,12 +59,17 @@
 
 void nlm_send_ipi_single(int logical_cpu, unsigned int action)
 {
-	int cpu = cpu_logical_map(logical_cpu);
+	int cpu, node;
+	uint64_t picbase;
+
+	cpu = cpu_logical_map(logical_cpu);
+	node = cpu / NLM_CPUS_PER_NODE;
+	picbase = nlm_get_node(node)->picbase;
 
 	if (action & SMP_CALL_FUNCTION)
-		nlm_pic_send_ipi(nlm_pic_base, cpu, IRQ_IPI_SMP_FUNCTION, 0);
+		nlm_pic_send_ipi(picbase, cpu, IRQ_IPI_SMP_FUNCTION, 0);
 	if (action & SMP_RESCHEDULE_YOURSELF)
-		nlm_pic_send_ipi(nlm_pic_base, cpu, IRQ_IPI_SMP_RESCHEDULE, 0);
+		nlm_pic_send_ipi(picbase, cpu, IRQ_IPI_SMP_RESCHEDULE, 0);
 }
 
 void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action)
@@ -96,11 +101,12 @@
 void nlm_early_init_secondary(int cpu)
 {
 	change_c0_config(CONF_CM_CMASK, 0x3);
-	write_c0_ebase((uint32_t)nlm_common_ebase);
 #ifdef CONFIG_CPU_XLP
-	if (cpu % 4 == 0)
+	/* mmu init, once per core */
+	if (cpu % NLM_THREADS_PER_CORE == 0)
 		xlp_mmu_init();
 #endif
+	write_c0_ebase(nlm_current_node()->ebase);
 }
 
 /*
@@ -108,7 +114,7 @@
  */
 static void __cpuinit nlm_init_secondary(void)
 {
-	current_cpu_data.core = hard_smp_processor_id() / 4;
+	current_cpu_data.core = hard_smp_processor_id() / NLM_THREADS_PER_CORE;
 	nlm_smp_irq_init();
 }
 
@@ -142,22 +148,22 @@
 
 void nlm_boot_secondary(int logical_cpu, struct task_struct *idle)
 {
-	unsigned long gp = (unsigned long)task_thread_info(idle);
-	unsigned long sp = (unsigned long)__KSTK_TOS(idle);
-	int cpu = cpu_logical_map(logical_cpu);
+	int cpu, node;
 
-	nlm_next_sp = sp;
-	nlm_next_gp = gp;
+	cpu = cpu_logical_map(logical_cpu);
+	node = cpu / NLM_CPUS_PER_NODE;
+	nlm_next_sp = (unsigned long)__KSTK_TOS(idle);
+	nlm_next_gp = (unsigned long)task_thread_info(idle);
 
-	/* barrier */
+	/* barrier for sp/gp store above */
 	__sync();
-	nlm_pic_send_ipi(nlm_pic_base, cpu, 1, 1);
+	nlm_pic_send_ipi(nlm_get_node(node)->picbase, cpu, 1, 1);  /* NMI */
 }
 
 void __init nlm_smp_setup(void)
 {
 	unsigned int boot_cpu;
-	int num_cpus, i;
+	int num_cpus, i, ncore;
 
 	boot_cpu = hard_smp_processor_id();
 	cpumask_clear(&phys_cpu_present_map);
@@ -182,11 +188,16 @@
 		}
 	}
 
+	/* check with the cores we have worken up */
+	for (ncore = 0, i = 0; i < NLM_NR_NODES; i++)
+		ncore += hweight32(nlm_get_node(i)->coremask);
+
 	pr_info("Phys CPU present map: %lx, possible map %lx\n",
 		(unsigned long)cpumask_bits(&phys_cpu_present_map)[0],
 		(unsigned long)cpumask_bits(cpu_possible_mask)[0]);
 
-	pr_info("Detected %i Slave CPU(s)\n", num_cpus);
+	pr_info("Detected (%dc%dt) %d Slave CPU(s)\n", ncore,
+		nlm_threads_per_core, num_cpus);
 	nlm_set_nmi_handler(nlm_boot_secondary_cpus);
 }
 
@@ -196,7 +207,7 @@
 	int threadmode, i, j;
 
 	core0_thr_mask = 0;
-	for (i = 0; i < 4; i++)
+	for (i = 0; i < NLM_THREADS_PER_CORE; i++)
 		if (cpumask_test_cpu(i, wakeup_mask))
 			core0_thr_mask |= (1 << i);
 	switch (core0_thr_mask) {
@@ -217,9 +228,9 @@
 	}
 
 	/* Verify other cores CPU masks */
-	for (i = 0; i < NR_CPUS; i += 4) {
+	for (i = 0; i < NR_CPUS; i += NLM_THREADS_PER_CORE) {
 		core_thr_mask = 0;
-		for (j = 0; j < 4; j++)
+		for (j = 0; j < NLM_THREADS_PER_CORE; j++)
 			if (cpumask_test_cpu(i + j, wakeup_mask))
 				core_thr_mask |= (1 << j);
 		if (core_thr_mask != 0 && core_thr_mask != core0_thr_mask)