MIPS: start.S: optimize BSS initialization

Get the start and end address for clearing BSS from the newly
introduced symbols __bss_start and __bss_end. After GOT is
relocated, those symbols are already pointing to the correct
addresses.

Also optimize the loop by moving the address incrementation
to the delay slot to avoid the initial sub instruction.

Signed-off-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
diff --git a/arch/mips/cpu/mips32/start.S b/arch/mips/cpu/mips32/start.S
index 7373d4e..cd8b914 100644
--- a/arch/mips/cpu/mips32/start.S
+++ b/arch/mips/cpu/mips32/start.S
@@ -228,17 +228,19 @@
 	blt	t2, t3, 1b
 	 addi	t4, 4
 
-	/* Clear BSS */
-	lw	t1, -12(t0)		# t1 <-- uboot_end_data
-	lw	t2, -8(t0)		# t2 <-- uboot_end
-	add	t1, s1			# adjust pointers
-	add	t2, s1
+	/*
+	 * Clear BSS
+	 *
+	 * GOT is now relocated. Thus __bss_start and __bss_end can be
+	 * accessed directly via $gp.
+	 */
+	la	t1, __bss_start		# t1 <-- __bss_start
+	la	t2, __bss_end		# t2 <-- __bss_end
 
-	sub	t1, 4
 1:
-	addi	t1, 4
-	bltl	t1, t2, 1b
-	 sw	zero, 0(t1)
+	sw	zero, 0(t1)
+	blt	t1, t2, 1b
+	 addi	t1, 4
 
 	move	a0, s0			# a0 <-- gd
 	la	t9, board_init_r