mips: octeon: use mips_mach_early_init() to copy to L2 cache

This patch adds the code to copy itself from bootrom location to a
different location (TEXT_BASE) to the Octeon platform. Its used in
this case to copy the complete U-Boot image into L2 cache, which
greatly improves the bootup time - especially in regard to the
very long and complex DDR4 init code.

The Kconfig symbol CONFIG_MIPS_MACH_EARLY_INIT is enabled with this
patch for Octeon.

Signed-off-by: Stefan Roese <sr@denx.de>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d2a0a35..997e145 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -116,6 +116,7 @@
 	select DM_GPIO
 	select DM_ETH
 	select MIPS_L2_CACHE
+	select MIPS_MACH_EARLY_INIT
 	select MIPS_TUNE_OCTEON3
 	select ROM_EXCEPTION_VECTORS
 	select SUPPORTS_BIG_ENDIAN
diff --git a/arch/mips/mach-octeon/lowlevel_init.S b/arch/mips/mach-octeon/lowlevel_init.S
index d9aab38..fa87cb4 100644
--- a/arch/mips/mach-octeon/lowlevel_init.S
+++ b/arch/mips/mach-octeon/lowlevel_init.S
@@ -17,3 +17,53 @@
 	jr	ra
 	 nop
 	END(lowlevel_init)
+
+LEAF(mips_mach_early_init)
+
+	move    s0, ra
+
+	bal	__dummy
+	 nop
+
+__dummy:
+	/* Get the actual address that we are running at */
+	PTR_LA	a7, __dummy
+	dsubu	t3, ra, a7	/* t3 now has reloc offset */
+
+	PTR_LA	t1, _start
+	daddu	t0, t1, t3	/* t0 now has actual address of _start */
+
+	/* Calculate end address of copy loop */
+	PTR_LA	t2, _end
+	daddiu	t2, t2, 0x4000	/* Increase size to include appended DTB */
+	daddiu	t2, t2, 127
+	ins	t2, zero, 0, 7	/* Round up to cache line for memcpy */
+
+	/* Copy ourself to the L2 cache from flash, 32 bytes at a time */
+1:
+	ld	a0, 0(t0)
+	ld	a1, 8(t0)
+	ld	a2, 16(t0)
+	ld	a3, 24(t0)
+	sd	a0, 0(t1)
+	sd	a1, 8(t1)
+	sd	a2, 16(t1)
+	sd	a3, 24(t1)
+	addiu	t0, 32
+	addiu	t1, 32
+	bne	t1, t2, 1b
+	 nop
+
+	sync
+
+	/*
+	 * Return to start.S now running from TEXT_BASE, which points
+	 * to DRAM address space, which effectively is L2 cache now.
+	 * This speeds up the init process extremely, especially the
+	 * DDR init code.
+	 */
+	dsubu	s0, s0, t3	/* Fixup return address with reloc offset */
+	jr.hb	s0		/* Jump back with hazard barrier */
+	 nop
+
+	END(mips_mach_early_init)