85xx: Added support for multicore boot mechanism

Added the cpu command that provides a generic mechanism to get status,
reset, and release secondary cores in multicore processors.

Added support for using the ePAPR defined spin-table mechanism on 85xx.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
diff --git a/cpu/mpc85xx/Makefile b/cpu/mpc85xx/Makefile
index 2205dca..adbc585 100644
--- a/cpu/mpc85xx/Makefile
+++ b/cpu/mpc85xx/Makefile
@@ -29,6 +29,9 @@
 LIB	= $(obj)lib$(CPU).a
 
 START	= start.o resetvec.o
+SOBJS-$(CONFIG_MP) += release.o
+SOBJS	= $(SOBJS-y)
+COBJS-$(CONFIG_MP) += mp.o
 COBJS-$(CONFIG_OF_LIBFDT) += fdt.o
 COBJS	= traps.o cpu.o cpu_init.o speed.o interrupts.o tlb.o \
 	  pci.o serial_scc.o commproc.o ether_fcc.o spd_sdram.o qe_io.o \
diff --git a/cpu/mpc85xx/cpu_init.c b/cpu/mpc85xx/cpu_init.c
index 5f02e0e..fce0c48 100644
--- a/cpu/mpc85xx/cpu_init.c
+++ b/cpu/mpc85xx/cpu_init.c
@@ -33,6 +33,7 @@
 #include <asm/io.h>
 #include <asm/mmu.h>
 #include <asm/fsl_law.h>
+#include "mp.h"
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -328,5 +329,8 @@
 	qe_reset();
 #endif
 
+#if defined(CONFIG_MP)
+	setup_mp();
+#endif
 	return 0;
 }
diff --git a/cpu/mpc85xx/fdt.c b/cpu/mpc85xx/fdt.c
index a6b014c..43df1c7 100644
--- a/cpu/mpc85xx/fdt.c
+++ b/cpu/mpc85xx/fdt.c
@@ -28,6 +28,54 @@
 #include <fdt_support.h>
 
 extern void ft_qe_setup(void *blob);
+#ifdef CONFIG_MP
+#include "mp.h"
+DECLARE_GLOBAL_DATA_PTR;
+
+void ft_fixup_cpu(void *blob, u64 memory_limit)
+{
+	int off;
+	ulong spin_tbl_addr = get_spin_addr();
+	u32 bootpg, id = get_my_id();
+
+	/* if we have 4G or more of memory, put the boot page at 4Gb-4k */
+	if ((u64)gd->ram_size > 0xfffff000)
+		bootpg = 0xfffff000;
+	else
+		bootpg = gd->ram_size - 4096;
+
+	off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", 4);
+	while (off != -FDT_ERR_NOTFOUND) {
+		u32 *reg = (u32 *)fdt_getprop(blob, off, "reg", 0);
+
+		if (reg) {
+			if (*reg == id) {
+				fdt_setprop_string(blob, off, "status", "okay");
+			} else {
+				u32 val = *reg * 24 + spin_tbl_addr;
+				val = cpu_to_fdt32(val);
+				fdt_setprop_string(blob, off, "status",
+								"disabled");
+				fdt_setprop_string(blob, off, "enable-method",
+								"spin-table");
+				fdt_setprop(blob, off, "cpu-release-addr",
+						&val, sizeof(val));
+			}
+		} else {
+			printf ("cpu NULL\n");
+		}
+		off = fdt_node_offset_by_prop_value(blob, off,
+				"device_type", "cpu", 4);
+	}
+
+	/* Reserve the boot page so OSes dont use it */
+	if ((u64)bootpg < memory_limit) {
+		off = fdt_add_mem_rsv(blob, bootpg, (u64)4096);
+		if (off < 0)
+			printf("%s: %s\n", __FUNCTION__, fdt_strerror(off));
+	}
+}
+#endif
 
 void ft_cpu_setup(void *blob, bd_t *bd)
 {
@@ -62,4 +110,8 @@
 #endif
 
 	fdt_fixup_memory(blob, (u64)bd->bi_memstart, (u64)bd->bi_memsize);
+
+#ifdef CONFIG_MP
+	ft_fixup_cpu(blob, (u64)bd->bi_memstart + (u64)bd->bi_memsize);
+#endif
 }
diff --git a/cpu/mpc85xx/mp.c b/cpu/mpc85xx/mp.c
new file mode 100644
index 0000000..aa91cea
--- /dev/null
+++ b/cpu/mpc85xx/mp.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2008 Freescale Semiconductor.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+#include <asm/processor.h>
+#include <ioports.h>
+#include <asm/io.h>
+#include "mp.h"
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define BOOT_ENTRY_ADDR	0
+#define BOOT_ENTRY_PIR	1
+#define BOOT_ENTRY_R3	2
+#define BOOT_ENTRY_R4	3
+#define BOOT_ENTRY_R6	4
+#define BOOT_ENTRY_R7	5
+#define NUM_BOOT_ENTRY	6
+
+u32 get_my_id()
+{
+	return mfspr(SPRN_PIR);
+}
+
+int cpu_reset(int nr)
+{
+	volatile ccsr_pic_t *pic = (void *)(CFG_MPC85xx_PIC_ADDR);
+	out_be32(&pic->pir, 1 << nr);
+	(void)in_be32(&pic->pir);
+	out_be32(&pic->pir, 0x0);
+
+	return 0;
+}
+
+int cpu_status(int nr)
+{
+	u32 *table, id = get_my_id();
+
+	if (nr == id) {
+		table = (u32 *)get_spin_addr();
+		printf("table base @ 0x%08x\n", table);
+	} else {
+		table = (u32 *)get_spin_addr() + nr * NUM_BOOT_ENTRY;
+		printf("Running on cpu %d\n", id);
+		printf("\n");
+		printf("table @ 0x%08x:\n", table);
+		printf("   addr - 0x%08x\n", table[BOOT_ENTRY_ADDR]);
+		printf("   pir  - 0x%08x\n", table[BOOT_ENTRY_PIR]);
+		printf("   r3   - 0x%08x\n", table[BOOT_ENTRY_R3]);
+		printf("   r4   - 0x%08x\n", table[BOOT_ENTRY_R4]);
+		printf("   r6   - 0x%08x\n", table[BOOT_ENTRY_R6]);
+		printf("   r7   - 0x%08x\n", table[BOOT_ENTRY_R7]);
+	}
+
+	return 0;
+}
+
+int cpu_release(int nr, unsigned long boot_addr, int argc, char *argv[])
+{
+	u32 i, val, *table = (u32 *)get_spin_addr() + nr * NUM_BOOT_ENTRY;
+
+	if (nr == get_my_id()) {
+		printf("Invalid to release the boot core.\n\n");
+		return 1;
+	}
+
+	if (argc != 5) {
+		printf("Invalid number of arguments to release.\n\n");
+		return 1;
+	}
+
+	/* handle pir, r3, r4, r6, r7 */
+	for (i = 0; i < 5; i++) {
+		if (argv[i][0] != '-') {
+			val = simple_strtoul(argv[i], NULL, 16);
+			table[i+BOOT_ENTRY_PIR] = val;
+		}
+	}
+
+	table[BOOT_ENTRY_ADDR] = boot_addr;
+
+	return 0;
+}
+
+ulong get_spin_addr(void)
+{
+	extern ulong __secondary_start_page;
+	extern ulong __spin_table;
+
+	ulong addr =
+		(ulong)&__spin_table - (ulong)&__secondary_start_page;
+	addr += 0xfffff000;
+
+	return addr;
+}
+
+static void pq3_mp_up(unsigned long bootpg)
+{
+	u32 up, cpu_up_mask, whoami;
+	u32 *table = (u32 *)get_spin_addr();
+	volatile u32 bpcr;
+	volatile ccsr_local_ecm_t *ecm = (void *)(CFG_MPC85xx_ECM_ADDR);
+	volatile ccsr_gur_t *gur = (void *)(CFG_MPC85xx_GUTS_ADDR);
+	volatile ccsr_pic_t *pic = (void *)(CFG_MPC85xx_PIC_ADDR);
+	u32 devdisr;
+	int timeout = 10;
+
+	whoami = in_be32(&pic->whoami);
+	out_be32(&ecm->bptr, 0x80000000 | (bootpg >> 12));
+
+	/* disable time base at the platform */
+	devdisr = in_be32(&gur->devdisr);
+	if (whoami)
+		devdisr |= MPC85xx_DEVDISR_TB0;
+	else
+		devdisr |= MPC85xx_DEVDISR_TB1;
+	out_be32(&gur->devdisr, devdisr);
+
+	/* release the hounds */
+	up = ((1 << CONFIG_NR_CPUS) - 1);
+	bpcr = in_be32(&ecm->eebpcr);
+	bpcr |= (up << 24);
+	out_be32(&ecm->eebpcr, bpcr);
+	asm("sync; isync; msync");
+
+	cpu_up_mask = 1 << whoami;
+	/* wait for everyone */
+	while (timeout) {
+		int i;
+		for (i = 1; i < CONFIG_NR_CPUS; i++) {
+			if (table[i * NUM_BOOT_ENTRY])
+				cpu_up_mask |= (1 << i);
+		};
+
+		if ((cpu_up_mask & up) == up)
+			break;
+
+		udelay(100);
+		timeout--;
+	}
+
+	/* enable time base at the platform */
+	if (whoami)
+		devdisr |= MPC85xx_DEVDISR_TB1;
+	else
+		devdisr |= MPC85xx_DEVDISR_TB0;
+	out_be32(&gur->devdisr, devdisr);
+	mtspr(SPRN_TBWU, 0);
+	mtspr(SPRN_TBWL, 0);
+
+	devdisr &= ~(MPC85xx_DEVDISR_TB0 | MPC85xx_DEVDISR_TB1);
+	out_be32(&gur->devdisr, devdisr);
+}
+
+void setup_mp(void)
+{
+	extern ulong __secondary_start_page;
+	ulong fixup = (ulong)&__secondary_start_page;
+	u32 bootpg;
+
+	/* if we have 4G or more of memory, put the boot page at 4Gb-4k */
+	if ((u64)gd->ram_size > 0xfffff000)
+		bootpg = 0xfffff000;
+	else
+		bootpg = gd->ram_size - 4096;
+
+	memcpy((void *)bootpg, (void *)fixup, 4096);
+	flush_cache(bootpg, 4096);
+
+	pq3_mp_up(bootpg);
+}
diff --git a/cpu/mpc85xx/mp.h b/cpu/mpc85xx/mp.h
new file mode 100644
index 0000000..d9fbb82
--- /dev/null
+++ b/cpu/mpc85xx/mp.h
@@ -0,0 +1,8 @@
+#ifndef __MPC85XX_MP_H_
+#define __MPC85XX_MP_H_
+
+ulong get_spin_addr(void);
+void setup_mp(void);
+u32 get_my_id(void);
+
+#endif
diff --git a/cpu/mpc85xx/release.S b/cpu/mpc85xx/release.S
new file mode 100644
index 0000000..fe1775c
--- /dev/null
+++ b/cpu/mpc85xx/release.S
@@ -0,0 +1,148 @@
+#include <config.h>
+#include <mpc85xx.h>
+#include <version.h>
+
+#define _LINUX_CONFIG_H 1	/* avoid reading Linux autoconf.h file	*/
+
+#include <ppc_asm.tmpl>
+#include <ppc_defs.h>
+
+#include <asm/cache.h>
+#include <asm/mmu.h>
+
+/* To boot secondary cpus, we need a place for them to start up.
+ * Normally, they start at 0xfffffffc, but that's usually the
+ * firmware, and we don't want to have to run the firmware again.
+ * Instead, the primary cpu will set the BPTR to point here to
+ * this page.  We then set up the core, and head to
+ * start_secondary.  Note that this means that the code below
+ * must never exceed 1023 instructions (the branch at the end
+ * would then be the 1024th).
+ */
+	.globl	__secondary_start_page
+	.align	12
+__secondary_start_page:
+/* First do some preliminary setup */
+	lis	r3, HID0_EMCP@h		/* enable machine check */
+	ori	r3,r3,HID0_TBEN@l	/* enable Timebase */
+#ifdef CONFIG_PHYS_64BIT
+	ori	r3,r3,HID0_ENMAS7@l	/* enable MAS7 updates */
+#endif
+	mtspr	SPRN_HID0,r3
+
+	li	r3,(HID1_ASTME|HID1_ABE)@l	/* Addr streaming & broadcast */
+	mtspr	SPRN_HID1,r3
+
+	/* Enable branch prediction */
+	li	r3,0x201
+	mtspr	SPRN_BUCSR,r3
+
+	/* Enable/invalidate the I-Cache */
+	mfspr	r0,SPRN_L1CSR1
+	ori	r0,r0,(L1CSR1_ICFI|L1CSR1_ICE)
+	mtspr	SPRN_L1CSR1,r0
+	isync
+
+	/* Enable/invalidate the D-Cache */
+	mfspr	r0,SPRN_L1CSR0
+	ori	r0,r0,(L1CSR0_DCFI|L1CSR0_DCE)
+	msync
+	isync
+	mtspr	SPRN_L1CSR0,r0
+	isync
+
+#define toreset(x) (x - __secondary_start_page + 0xfffff000)
+
+	/* get our PIR to figure out our table entry */
+	lis	r3,toreset(__spin_table)@h
+	ori	r3,r3,toreset(__spin_table)@l
+
+	/* r9 has the base address for the entry */
+	mfspr	r0,SPRN_PIR
+	mr	r4,r0
+	slwi	r8,r4,4
+	slwi	r9,r4,3
+	add	r8,r8,r9
+	add	r9,r3,r8
+
+#define EPAPR_MAGIC	(0x65504150)
+#define ENTRY_ADDR	0
+#define ENTRY_PIR	4
+#define ENTRY_R3	8
+#define ENTRY_R4	12
+#define ENTRY_R6	16
+#define ENTRY_R7	20
+
+	/* setup the entry */
+	li	r4,0
+	li	r8,1
+	lis	r6,EPAPR_MAGIC@h
+	ori	r6,r6,EPAPR_MAGIC@l
+	stw	r0,ENTRY_PIR(r9)
+	stw	r8,ENTRY_ADDR(r9)
+	stw	r4,ENTRY_R3(r9)
+	stw	r4,ENTRY_R4(r9)
+	stw	r6,ENTRY_R6(r9)
+	stw	r4,ENTRY_R7(r9)
+
+	/* spin waiting for addr */
+1:	lwz	r4,ENTRY_ADDR(r9)
+	andi.	r11,r4,1
+	bne	1b
+
+	/* setup branch addr */
+	mtctr	r4
+
+	/* mark the entry as released */
+	li	r8,3
+	stw	r8,ENTRY_ADDR(r9)
+
+	/* mask by ~64M to setup our tlb we will jump to */
+	rlwinm	r8,r4,0,0,5
+
+	/* setup r3, r5, r6, r7 */
+	lwz	r3,ENTRY_R3(r9)
+	lwz	r4,ENTRY_R4(r9)
+	li	r5,0
+	lwz	r6,ENTRY_R6(r9)
+	lwz	r7,ENTRY_R7(r9)
+
+	/* load up the pir */
+	lwz	r0,ENTRY_PIR(r9)
+	mtspr	SPRN_PIR,r0
+	mfspr	r0,SPRN_PIR
+	stw	r0,ENTRY_PIR(r9)
+
+/*
+ * Coming here, we know the cpu has one TLB mapping in TLB1[0]
+ * which maps 0xfffff000-0xffffffff one-to-one.  We set up a
+ * second mapping that maps addr 1:1 for 64M, and then we jump to
+ * addr
+ */
+	lis	r9,(MAS0_TLBSEL(1)|MAS0_ESEL(1))@h
+	mtspr	SPRN_MAS0,r9
+	lis	r9,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r9,r9,(MAS1_TSIZE(BOOKE_PAGESZ_64M))@l
+	mtspr	SPRN_MAS1,r9
+	/* WIMGE = 0b00000 for now */
+	mtspr	SPRN_MAS2,r8
+	ori	r8,r8,(MAS3_SX|MAS3_SW|MAS3_SR)
+	mtspr	SPRN_MAS3,r8
+	tlbwe
+
+/* Now we have another mapping for this page, so we jump to that
+ * mapping
+ */
+	bctr
+
+	.align 3
+	.globl __spin_table
+__spin_table:
+	.space CONFIG_NR_CPUS*24
+
+	/* Fill in the empty space.  The actual reset vector is
+	 * the last word of the page */
+__secondary_start_code_end:
+	.space 4092 - (__secondary_start_code_end - __secondary_start_page)
+__secondary_reset_vector:
+	b	__secondary_start_page