powerpc/mpc85xx: Rewrite spin table to comply with ePAPR v1.1

Move spin table to cached memory to comply with ePAPR v1.1.
Load R3 with 64-bit value if CONFIG_SYS_PPC64 is defined.

'M' bit is set for DDR TLB to maintain cache coherence.

See details in doc/README.mpc85xx-spin-table.

Signed-off-by: York Sun <yorksun@freescale.com>
Signed-off-by: Andy Fleming <afleming@freescale.com>
diff --git a/arch/powerpc/cpu/mpc85xx/fdt.c b/arch/powerpc/cpu/mpc85xx/fdt.c
index 8221481..a364ef2 100644
--- a/arch/powerpc/cpu/mpc85xx/fdt.c
+++ b/arch/powerpc/cpu/mpc85xx/fdt.c
@@ -47,7 +47,7 @@
 void ft_fixup_cpu(void *blob, u64 memory_limit)
 {
 	int off;
-	ulong spin_tbl_addr = get_spin_phys_addr();
+	phys_addr_t spin_tbl_addr = get_spin_phys_addr();
 	u32 bootpg = determine_mp_bootpg(NULL);
 	u32 id = get_my_id();
 	const char *enable_method;
@@ -97,7 +97,16 @@
 	if ((u64)bootpg < memory_limit) {
 		off = fdt_add_mem_rsv(blob, bootpg, (u64)4096);
 		if (off < 0)
-			printf("%s: %s\n", __FUNCTION__, fdt_strerror(off));
+			printf("Failed to reserve memory for bootpg: %s\n",
+				fdt_strerror(off));
+	}
+	/* Reserve spin table page */
+	if (spin_tbl_addr < memory_limit) {
+		off = fdt_add_mem_rsv(blob,
+			(spin_tbl_addr & ~0xffful), 4096);
+		if (off < 0)
+			printf("Failed to reserve memory for spin table: %s\n",
+				fdt_strerror(off));
 	}
 }
 #endif
diff --git a/arch/powerpc/cpu/mpc85xx/mp.c b/arch/powerpc/cpu/mpc85xx/mp.c
index 881b681..e1197ac 100644
--- a/arch/powerpc/cpu/mpc85xx/mp.c
+++ b/arch/powerpc/cpu/mpc85xx/mp.c
@@ -33,6 +33,8 @@
 DECLARE_GLOBAL_DATA_PTR;
 u32 fsl_ddr_get_intl3r(void);
 
+extern u32 __spin_table[];
+
 u32 get_my_id()
 {
 	return mfspr(SPRN_PIR);
@@ -78,10 +80,10 @@
 		return 0;
 
 	if (nr == id) {
-		table = (u32 *)get_spin_virt_addr();
+		table = (u32 *)&__spin_table;
 		printf("table base @ 0x%p\n", table);
 	} else {
-		table = (u32 *)get_spin_virt_addr() + nr * NUM_BOOT_ENTRY;
+		table = (u32 *)&__spin_table + nr * NUM_BOOT_ENTRY;
 		printf("Running on cpu %d\n", id);
 		printf("\n");
 		printf("table @ 0x%p\n", table);
@@ -154,7 +156,7 @@
 
 int cpu_release(int nr, int argc, char * const argv[])
 {
-	u32 i, val, *table = (u32 *)get_spin_virt_addr() + nr * NUM_BOOT_ENTRY;
+	u32 i, val, *table = (u32 *)&__spin_table + nr * NUM_BOOT_ENTRY;
 	u64 boot_addr;
 
 	if (hold_cores_in_reset(1))
@@ -200,11 +202,11 @@
 	struct law_entry e;
 #endif
 
-	/* if we have 4G or more of memory, put the boot page at 4Gb-4k */
-	if ((u64)gd->ram_size > 0xfffff000)
-		bootpg = 0xfffff000;
-	else
-		bootpg = gd->ram_size - 4096;
+
+	/* use last 4K of mapped memory */
+	bootpg = ((gd->ram_size > CONFIG_MAX_MEM_MAPPED) ?
+		CONFIG_MAX_MEM_MAPPED : gd->ram_size) +
+		CONFIG_SYS_SDRAM_BASE - 4096;
 	if (pagesize)
 		*pagesize = 4096;
 
@@ -255,29 +257,16 @@
 	return bootpg;
 }
 
-ulong get_spin_phys_addr(void)
+phys_addr_t get_spin_phys_addr(void)
 {
-	extern ulong __secondary_start_page;
-	extern ulong __spin_table;
-
-	return (determine_mp_bootpg() +
-		(ulong)&__spin_table - (ulong)&__secondary_start_page);
-}
-
-ulong get_spin_virt_addr(void)
-{
-	extern ulong __secondary_start_page;
-	extern ulong __spin_table;
-
-	return (CONFIG_BPTR_VIRT_ADDR +
-		(ulong)&__spin_table - (ulong)&__secondary_start_page);
+	return virt_to_phys(&__spin_table);
 }
 
 #ifdef CONFIG_FSL_CORENET
 static void plat_mp_up(unsigned long bootpg, unsigned int pagesize)
 {
 	u32 cpu_up_mask, whoami, brsize = LAW_SIZE_4K;
-	u32 *table = (u32 *)get_spin_virt_addr();
+	u32 *table = (u32 *)&__spin_table;
 	volatile ccsr_gur_t *gur;
 	volatile ccsr_local_t *ccm;
 	volatile ccsr_rcpm_t *rcpm;
@@ -356,7 +345,7 @@
 static void plat_mp_up(unsigned long bootpg, unsigned int pagesize)
 {
 	u32 up, cpu_up_mask, whoami;
-	u32 *table = (u32 *)get_spin_virt_addr();
+	u32 *table = (u32 *)&__spin_table;
 	volatile u32 bpcr;
 	volatile ccsr_local_ecm_t *ecm = (void *)(CONFIG_SYS_MPC85xx_ECM_ADDR);
 	volatile ccsr_gur_t *gur = (void *)(CONFIG_SYS_MPC85xx_GUTS_ADDR);
@@ -440,10 +429,11 @@
 
 void setup_mp(void)
 {
-	extern ulong __secondary_start_page;
-	extern ulong __bootpg_addr;
+	extern u32 __secondary_start_page;
+	extern u32 __bootpg_addr, __spin_table_addr, __second_half_boot_page;
 
-	ulong fixup = (ulong)&__secondary_start_page;
+	int i;
+	ulong fixup = (u32)&__secondary_start_page;
 	u32 bootpg, bootpg_map, pagesize;
 
 	bootpg = determine_mp_bootpg(&pagesize);
@@ -464,11 +454,20 @@
 	if (hold_cores_in_reset(0))
 		return;
 
-	/* Store the bootpg's SDRAM address for use by secondary CPU cores */
-	__bootpg_addr = bootpg;
+	/*
+	 * Store the bootpg's cache-able half address for use by secondary
+	 * CPU cores to continue to boot
+	 */
+	__bootpg_addr = (u32)virt_to_phys(&__second_half_boot_page);
+
+	/* Store spin table's physical address for use by secondary cores */
+	__spin_table_addr = (u32)get_spin_phys_addr();
+
+	/* flush bootpg it before copying invalidate any staled cacheline */
+	flush_cache(bootpg, 4096);
 
 	/* look for the tlb covering the reset page, there better be one */
-	int i = find_tlb_idx((void *)CONFIG_BPTR_VIRT_ADDR, 1);
+	i = find_tlb_idx((void *)CONFIG_BPTR_VIRT_ADDR, 1);
 
 	/* we found a match */
 	if (i != -1) {
diff --git a/arch/powerpc/cpu/mpc85xx/mp.h b/arch/powerpc/cpu/mpc85xx/mp.h
index 87bac37..ad9950b 100644
--- a/arch/powerpc/cpu/mpc85xx/mp.h
+++ b/arch/powerpc/cpu/mpc85xx/mp.h
@@ -3,8 +3,7 @@
 
 #include <asm/mp.h>
 
-ulong get_spin_phys_addr(void);
-ulong get_spin_virt_addr(void);
+phys_addr_t get_spin_phys_addr(void);
 u32 get_my_id(void);
 int hold_cores_in_reset(int verbose);
 
@@ -16,7 +15,7 @@
 #define BOOT_ENTRY_PIR		5
 #define BOOT_ENTRY_R6_UPPER	6
 #define BOOT_ENTRY_R6_LOWER	7
-#define NUM_BOOT_ENTRY		8
+#define NUM_BOOT_ENTRY		16	/* pad to 64 bytes */
 #define SIZE_BOOT_ENTRY		(NUM_BOOT_ENTRY * sizeof(u32))
 
 #endif
diff --git a/arch/powerpc/cpu/mpc85xx/release.S b/arch/powerpc/cpu/mpc85xx/release.S
index d08b1f3..4ba44a9 100644
--- a/arch/powerpc/cpu/mpc85xx/release.S
+++ b/arch/powerpc/cpu/mpc85xx/release.S
@@ -150,10 +150,14 @@
 #define toreset(x) (x - __secondary_start_page + 0xfffff000)
 
 	/* get our PIR to figure out our table entry */
-	lis	r3,toreset(__spin_table)@h
-	ori	r3,r3,toreset(__spin_table)@l
+	lis	r3,toreset(__spin_table_addr)@h
+	ori	r3,r3,toreset(__spin_table_addr)@l
+	lwz	r3,0(r3)
 
-	/* r10 has the base address for the entry */
+	/*
+	 * r10 has the base address for the entry.
+	 * we cannot access it yet before setting up a new TLB
+	 */
 	mfspr	r0,SPRN_PIR
 #if	defined(CONFIG_E6500)
 /*
@@ -180,7 +184,7 @@
 #else
 	mr	r4,r0
 #endif
-	slwi	r8,r4,5
+	slwi	r8,r4,6	/* spin table is padded to 64 byte */
 	add	r10,r3,r8
 
 #ifdef CONFIG_E6500
@@ -277,73 +281,111 @@
 	beq	2b
 #endif
 3:
+	/* setup mapping for the spin table, WIMGE=0b00100 */
+	lis	r13,toreset(__spin_table_addr)@h
+	ori	r13,r13,toreset(__spin_table_addr)@l
+	lwz	r13,0(r13)
+	/* mask by 4K */
+	rlwinm	r13,r13,0,0,19
 
-#define EPAPR_MAGIC		(0x45504150)
+	lis	r11,(MAS0_TLBSEL(1)|MAS0_ESEL(1))@h
+	mtspr	SPRN_MAS0,r11
+	lis	r11,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r11,r11,(MAS1_TS|MAS1_TSIZE(BOOKE_PAGESZ_4K))@l
+	mtspr	SPRN_MAS1,r11
+	oris	r11,r13,(MAS2_M|MAS2_G)@h
+	ori	r11,r13,(MAS2_M|MAS2_G)@l
+	mtspr	SPRN_MAS2,r11
+	oris	r11,r13,(MAS3_SX|MAS3_SW|MAS3_SR)@h
+	ori	r11,r13,(MAS3_SX|MAS3_SW|MAS3_SR)@l
+	mtspr	SPRN_MAS3,r11
+	li	r11,0
+	mtspr	SPRN_MAS7,r11
+	tlbwe
+
+	/*
+	 * __bootpg_addr has the address of __second_half_boot_page
+	 * jump there in AS=1 space with cache enabled
+	 */
+	lis	r13,toreset(__bootpg_addr)@h
+	ori	r13,r13,toreset(__bootpg_addr)@l
+	lwz	r11,0(r13)
+	mtspr	SPRN_SRR0,r11
+	mfmsr	r13
+	ori	r12,r13,MSR_IS|MSR_DS@l
+	mtspr	SPRN_SRR1,r12
+	rfi
+
+	/*
+	 * Allocate some space for the SDRAM address of the bootpg.
+	 * This variable has to be in the boot page so that it can
+	 * be accessed by secondary cores when they come out of reset.
+	 */
+	.align L1_CACHE_SHIFT
+	.globl __bootpg_addr
+__bootpg_addr:
+	.long	0
+
+	.global __spin_table_addr
+__spin_table_addr:
+	.long	0
+
+	/*
+	 * This variable is set by cpu_init_r() after parsing hwconfig
+	 * to enable workaround for erratum NMG_CPU_A011.
+	 */
+	.align L1_CACHE_SHIFT
+	.global enable_cpu_a011_workaround
+enable_cpu_a011_workaround:
+	.long	1
+
+	/* Fill in the empty space.  The actual reset vector is
+	 * the last word of the page */
+__secondary_start_code_end:
+	.space 4092 - (__secondary_start_code_end - __secondary_start_page)
+__secondary_reset_vector:
+	b	__secondary_start_page
+
+
+/* this is a separated page for the spin table and cacheable boot code */
+	.align L1_CACHE_SHIFT
+	.global __second_half_boot_page
+__second_half_boot_page:
+#define EPAPR_MAGIC		0x45504150
 #define ENTRY_ADDR_UPPER	0
 #define ENTRY_ADDR_LOWER	4
 #define ENTRY_R3_UPPER		8
 #define ENTRY_R3_LOWER		12
 #define ENTRY_RESV		16
 #define ENTRY_PIR		20
-#define ENTRY_R6_UPPER		24
-#define ENTRY_R6_LOWER		28
-#define ENTRY_SIZE		32
-
-	/* setup the entry */
+#define ENTRY_SIZE		64
+	/*
+	 * setup the entry
+	 * r10 has the base address of the spin table.
+	 * spin table is defined as
+	 * struct {
+	 *	uint64_t entry_addr;
+	 *	uint64_t r3;
+	 *	uint32_t rsvd1;
+	 *	uint32_t pir;
+	 * };
+	 * we pad this struct to 64 bytes so each entry is in its own cacheline
+	 */
 	li	r3,0
 	li	r8,1
-	stw	r4,ENTRY_PIR(r10)
+	mfspr	r4,SPRN_PIR
 	stw	r3,ENTRY_ADDR_UPPER(r10)
-	stw	r8,ENTRY_ADDR_LOWER(r10)
 	stw	r3,ENTRY_R3_UPPER(r10)
 	stw	r4,ENTRY_R3_LOWER(r10)
-	stw	r3,ENTRY_R6_UPPER(r10)
-	stw	r3,ENTRY_R6_LOWER(r10)
-
-	/* load r13 with the address of the 'bootpg' in SDRAM */
-	lis	r13,toreset(__bootpg_addr)@h
-	ori	r13,r13,toreset(__bootpg_addr)@l
-	lwz	r13,0(r13)
-
-	/* setup mapping for AS = 1, and jump there */
-	lis	r11,(MAS0_TLBSEL(1)|MAS0_ESEL(1))@h
-	mtspr	SPRN_MAS0,r11
-	lis	r11,(MAS1_VALID|MAS1_IPROT)@h
-	ori	r11,r11,(MAS1_TS|MAS1_TSIZE(BOOKE_PAGESZ_4K))@l
-	mtspr	SPRN_MAS1,r11
-	oris	r11,r13,(MAS2_I|MAS2_G)@h
-	ori	r11,r13,(MAS2_I|MAS2_G)@l
-	mtspr	SPRN_MAS2,r11
-	oris	r11,r13,(MAS3_SX|MAS3_SW|MAS3_SR)@h
-	ori	r11,r13,(MAS3_SX|MAS3_SW|MAS3_SR)@l
-	mtspr	SPRN_MAS3,r11
-	tlbwe
-
-	bl	1f
-1:	mflr	r11
-	/*
-	 * OR in 0xfff to create a mask of the bootpg SDRAM address.  We use
-	 * this mask to fixup the cpu spin table and the address that we want
-	 * to jump to, eg change them from 0xfffffxxx to 0x7ffffxxx if the
-	 * bootpg is at 0x7ffff000 in SDRAM.
-	 */
-	ori	r13,r13,0xfff
-	and	r11, r11, r13
-	and	r10, r10, r13
-
-	addi	r11,r11,(2f-1b)
-	mfmsr	r13
-	ori	r12,r13,MSR_IS|MSR_DS@l
-
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-	rfi
+	stw	r3,ENTRY_RESV(r10)
+	stw	r4,ENTRY_PIR(r10)
+	msync
+	stw	r8,ENTRY_ADDR_LOWER(r10)
 
 	/* spin waiting for addr */
-2:
-	lwz	r4,ENTRY_ADDR_LOWER(r10)
+3:	lwz	r4,ENTRY_ADDR_LOWER(r10)
 	andi.	r11,r4,1
-	bne	2b
+	bne	3b
 	isync
 
 	/* setup IVORs to match fixed offsets */
@@ -362,8 +404,17 @@
 	/* mask by ~64M to setup our tlb we will jump to */
 	rlwinm	r12,r4,0,0,5
 
-	/* setup r3, r4, r5, r6, r7, r8, r9 */
+	/*
+	 * setup r3, r4, r5, r6, r7, r8, r9
+	 * r3 contains the value to put in the r3 register at secondary cpu
+	 * entry. The high 32-bits are ignored on 32-bit chip implementations.
+	 * 64-bit chip implementations however shall load all 64-bits
+	 */
+#ifdef CONFIG_SYS_PPC64
+	ld	r3,ENTRY_R3_UPPER(r10)
+#else
 	lwz	r3,ENTRY_R3_LOWER(r10)
+#endif
 	li	r4,0
 	li	r5,0
 	li	r6,0
@@ -404,32 +455,10 @@
 	mtspr	SPRN_SRR1,r13
 	rfi
 
-	/*
-	 * Allocate some space for the SDRAM address of the bootpg.
-	 * This variable has to be in the boot page so that it can
-	 * be accessed by secondary cores when they come out of reset.
-	 */
-	.globl __bootpg_addr
-__bootpg_addr:
-	.long	0
 
-	.align L1_CACHE_SHIFT
+	.align 6
 	.globl __spin_table
 __spin_table:
 	.space CONFIG_MAX_CPUS*ENTRY_SIZE
-
-	/*
-	 * This variable is set by cpu_init_r() after parsing hwconfig
-	 * to enable workaround for erratum NMG_CPU_A011.
-	 */
-	.align L1_CACHE_SHIFT
-	.global enable_cpu_a011_workaround
-enable_cpu_a011_workaround:
-	.long	1
-
-	/* Fill in the empty space.  The actual reset vector is
-	 * the last word of the page */
-__secondary_start_code_end:
-	.space 4092 - (__secondary_start_code_end - __secondary_start_page)
-__secondary_reset_vector:
-	b	__secondary_start_page
+__spin_table_end:
+	.space 4096 - (__spin_table_end - __spin_table)
diff --git a/arch/powerpc/cpu/mpc85xx/tlb.c b/arch/powerpc/cpu/mpc85xx/tlb.c
index 929f6a6..a548dec 100644
--- a/arch/powerpc/cpu/mpc85xx/tlb.c
+++ b/arch/powerpc/cpu/mpc85xx/tlb.c
@@ -249,7 +249,7 @@
 {
 	int i;
 	unsigned int tlb_size;
-	unsigned int wimge = 0;
+	unsigned int wimge = MAS2_M;
 	unsigned int ram_tlb_address = (unsigned int)CONFIG_SYS_DDR_SDRAM_BASE;
 	unsigned int max_cam;
 	u64 size, memsize = (u64)memsize_in_meg << 20;
diff --git a/arch/powerpc/include/asm/config_mpc85xx.h b/arch/powerpc/include/asm/config_mpc85xx.h
index 92ca2ad..8a7c81b 100644
--- a/arch/powerpc/include/asm/config_mpc85xx.h
+++ b/arch/powerpc/include/asm/config_mpc85xx.h
@@ -416,6 +416,7 @@
 #define CONFIG_SYS_FSL_ERRATUM_SRIO_A004034
 
 #elif defined(CONFIG_PPC_P5020) /* also supports P5010 */
+#define CONFIG_SYS_PPC64		/* 64-bit core */
 #define CONFIG_SYS_FSL_QORIQ_CHASSIS1
 #define CONFIG_MAX_CPUS			2
 #define CONFIG_SYS_FSL_NUM_CC_PLLS	2
@@ -485,6 +486,7 @@
 #define CONFIG_SYS_FSL_ERRATUM_ESDHC111
 
 #elif defined(CONFIG_PPC_T4240)
+#define CONFIG_SYS_PPC64		/* 64-bit core */
 #define CONFIG_FSL_CORENET		/* Freescale CoreNet platform */
 #define CONFIG_SYS_FSL_QORIQ_CHASSIS2	/* Freescale Chassis generation 2 */
 #define CONFIG_SYS_FSL_QMAN_V3		/* QMAN version 3 */
@@ -516,6 +518,7 @@
 #define CONFIG_SYS_CCSRBAR_DEFAULT	0xfe000000
 
 #elif defined(CONFIG_PPC_B4860)
+#define CONFIG_SYS_PPC64		/* 64-bit core */
 #define CONFIG_FSL_CORENET		/* Freescale CoreNet platform */
 #define CONFIG_SYS_FSL_QORIQ_CHASSIS2	/* Freescale Chassis generation 2 */
 #define CONFIG_SYS_FSL_QMAN_V3		/* QMAN version 3 */