MIPS: Flush data cache upon relocation

This patch now adds a flush to the data cache upon relocation. The
current implementation is missing this. Only a comment states that it
should be done. So let's really do it now.

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Shinya Kuribayashi <skuribay@ruby.dti.ne.jp>
diff --git a/cpu/mips/start.S b/cpu/mips/start.S
index f6069b3..57db589 100644
--- a/cpu/mips/start.S
+++ b/cpu/mips/start.S
@@ -311,6 +311,7 @@
 	la	t3, in_ram
 	lw	t2, -12(t3)	/* t2 <-- uboot_end_data	*/
 	move	t1, a2
+	move	s2, a2		/* s2 <-- destination address	*/
 
 	/*
 	 * Fix $gp:
@@ -320,13 +321,21 @@
 	move	t6, gp
 	sub	gp, CONFIG_SYS_MONITOR_BASE
 	add	gp, a2		/* gp now adjusted		*/
-	sub	t6, gp, t6	/* t6 <-- relocation offset	*/
+	sub	s1, gp, t6	/* s1 <-- relocation offset	*/
 
 	/*
 	 * t0 = source address
 	 * t1 = target address
 	 * t2 = source end address
 	 */
+
+	/*
+	 * Save destination address and size for later usage in flush_cache()
+	 */
+	move	s0, a1		/* save gd in s0		*/
+	move	a0, t1		/* a0 <-- destination addr	*/
+	sub	a1, t2, t0	/* a1 <-- size			*/
+
 	/* On the purple board we copy the code earlier in a special way
 	 * in order to solve flash problems
 	 */
@@ -342,9 +351,14 @@
 	/* If caches were enabled, we would have to flush them here.
 	 */
 
+	/* a0 & a1 are already set up for flush_cache(start, size) */
+	la	t9, flush_cache
+	jalr	t9
+	nop
+
 	/* Jump to where we've relocated ourselves.
 	 */
-	addi	t0, a2, in_ram - _start
+	addi	t0, s2, in_ram - _start
 	jr	t0
 	nop
 
@@ -371,7 +385,7 @@
 1:
 	lw	t1, 0(t4)
 	beqz	t1, 2f
-	add	t1, t6
+	add	t1, s1
 	sw	t1, 0(t4)
 2:
 	addi	t2, 1
@@ -382,8 +396,8 @@
 	 */
 	lw	t1, -12(t0)	/* t1 <-- uboot_end_data	*/
 	lw	t2, -8(t0)	/* t2 <-- uboot_end		*/
-	add	t1, t6		/* adjust pointers		*/
-	add	t2, t6
+	add	t1, s1		/* adjust pointers		*/
+	add	t2, s1
 
 	sub	t1, 4
 1:
@@ -391,10 +405,10 @@
 	bltl	t1, t2, 1b
 	sw	zero, 0(t1)	/* delay slot			*/
 
-	move	a0, a1
+	move	a0, s0		/* a0 <-- gd			*/
 	la	t9, board_init_r
 	jr	t9
-	move	a1, a2		/* delay slot			*/
+	move	a1, s2		/* delay slot			*/
 
 	.end	relocate_code