ARM: UniPhier: optimize kicking secondary CPUs code

Currently, the secondary CPU(s) are kicked three times:
Boot ROM ---(kick)--> SPL ---(kick)--> U-boot ---(kick)--> Linux.
It makes the boot sequence very complicated.

This commit merges the first and the second kicks, so the secondary
CPU(s) can directly jump from SPL to Linux.
arch/arm/mach-uniphier/smp.S is no longer necessary.

Linux boot test passed.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
diff --git a/arch/arm/mach-uniphier/lowlevel_init.S b/arch/arm/mach-uniphier/lowlevel_init.S
index 4a23ea4..825b160 100644
--- a/arch/arm/mach-uniphier/lowlevel_init.S
+++ b/arch/arm/mach-uniphier/lowlevel_init.S
@@ -48,6 +48,25 @@
 	bl	enable_mmu
 
 #ifdef CONFIG_UNIPHIER_SMP
+secondary_startup:
+	/*
+	 * Entry point for secondary CPUs
+	 *
+	 * The Boot ROM has already enabled MMU for the secondary CPUs as well
+	 * as for the primary one.  The MMU table embedded in the Boot ROM
+	 * prohibits the DRAM access, so it is impossible to bring the
+	 * secondary CPUs into DRAM directly.  They must jump here into SPL,
+	 * which is run on L2 cache.
+	 *
+	 * Boot Sequence
+	 *  [primary CPU]                    [secondary CPUs]
+	 *  start from Boot ROM             start from Boot ROM
+	 *     jump to SPL                    sleep in Boot ROM
+	 *  kick secondaries   ---(sev)--->    jump to SPL
+	 *  jump to U-Boot main               sleep in SPL
+	 *  jump to Linux
+	 *  kick secondaries   ---(sev)--->    jump to Linux
+	 */
 	/*
 	 * ACTLR (Auxiliary Control Register) for Cortex-A9
 	 * bit[9]  Parity on
@@ -68,17 +87,28 @@
 	and  	r0, r0, #0x3
 	cmp	r0, #0x0
 	beq	primary_cpu
-	ldr	r1, =ROM_BOOT_ROMRSV2
+	/* only for secondary CPUs */
+	ldr	r1, =ROM_BOOT_ROMRSV2	@ The last data access to L2 cache
+	mrc	p15, 0, r0, c1, c0, 0	@ SCTLR (System Control Register)
+	orr	r0, r0, #CR_I		@ Enable ICache
+	bic	r0, r0, #(CR_C | CR_M)	@ MMU and Dcache must be disabled
+	mcr	p15, 0, r0, c1, c0, 0	@ before jumping to Linux
 	mov	r0, #0
 	str	r0, [r1]
-0:	wfe
-	ldr	r0, [r1]
+	b	1f
+	/*
+	 * L2 cache is shared among all the CPUs and it might be disabled by
+	 * the primary one.  Before that, the following 5 lines must be cached
+	 * on the Icaches of the secondary CPUs.
+	 */
+0:	wfe				@ kicked by Linux
+1:	ldr	r0, [r1]
 	cmp	r0, #0
-	beq	0b
-	bx	r0			@ r0: entry point of U-Boot main for the secondary CPU
+	bxne	r0			@ r0: Linux entry for secondary CPUs
+	b	0b
 primary_cpu:
 	ldr	r1, =ROM_BOOT_ROMRSV2
-	ldr	r0, =_start		@ entry for the secondary CPU
+	ldr	r0, =secondary_startup
 	str	r0, [r1]
 	ldr	r0, [r1]		@ make sure str is complete before sev
 	sev				@ kick the secondary CPU