MIPS: start.S: optimize BSS initialization

Get the start and end address for clearing BSS from the newly
introduced symbols __bss_start and __bss_end. After GOT is
relocated, those symbols are already pointing to the correct
addresses.

Also optimize the loop by moving the address incrementation
to the delay slot to avoid the initial sub instruction.

Signed-off-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
diff --git a/arch/mips/cpu/mips32/start.S b/arch/mips/cpu/mips32/start.S
index 7373d4e..cd8b914 100644
--- a/arch/mips/cpu/mips32/start.S
+++ b/arch/mips/cpu/mips32/start.S
@@ -228,17 +228,19 @@
 	blt	t2, t3, 1b
 	 addi	t4, 4
 
-	/* Clear BSS */
-	lw	t1, -12(t0)		# t1 <-- uboot_end_data
-	lw	t2, -8(t0)		# t2 <-- uboot_end
-	add	t1, s1			# adjust pointers
-	add	t2, s1
+	/*
+	 * Clear BSS
+	 *
+	 * GOT is now relocated. Thus __bss_start and __bss_end can be
+	 * accessed directly via $gp.
+	 */
+	la	t1, __bss_start		# t1 <-- __bss_start
+	la	t2, __bss_end		# t2 <-- __bss_end
 
-	sub	t1, 4
 1:
-	addi	t1, 4
-	bltl	t1, t2, 1b
-	 sw	zero, 0(t1)
+	sw	zero, 0(t1)
+	blt	t1, t2, 1b
+	 addi	t1, 4
 
 	move	a0, s0			# a0 <-- gd
 	la	t9, board_init_r
diff --git a/arch/mips/cpu/mips64/start.S b/arch/mips/cpu/mips64/start.S
index c0ae41a..ba4ca4d 100644
--- a/arch/mips/cpu/mips64/start.S
+++ b/arch/mips/cpu/mips64/start.S
@@ -220,17 +220,19 @@
 	blt	t2, t3, 1b
 	 daddi	t8, 8
 
-	/* Clear BSS */
-	ld	t1, -24(t0)		# t1 <-- uboot_end_data
-	ld	t2, -16(t0)		# t2 <-- uboot_end
-	dadd	t1, s1			# adjust pointers
-	dadd	t2, s1
+	/*
+	 * Clear BSS
+	 *
+	 * GOT is now relocated. Thus __bss_start and __bss_end can be
+	 * accessed directly via $gp.
+	 */
+	dla	t1, __bss_start		# t1 <-- __bss_start
+	dla	t2, __bss_end		# t2 <-- __bss_end
 
-	dsub	t1, 8
 1:
-	daddi	t1, 8
-	bltl	t1, t2, 1b
-	 sd	zero, 0(t1)
+	sd	zero, 0(t1)
+	blt	t1, t2, 1b
+	 daddi	t1, 8
 
 	move	a0, s0			# a0 <-- gd
 	dla	t9, board_init_r
diff --git a/arch/mips/cpu/xburst/start.S b/arch/mips/cpu/xburst/start.S
index 50b7fb1..bd9390a 100644
--- a/arch/mips/cpu/xburst/start.S
+++ b/arch/mips/cpu/xburst/start.S
@@ -143,16 +143,19 @@
 	blt	t2, t3, 1b
 	 addi	t4, 4
 
-	/* Clear BSS */
-	lw	t1, -12(t0)		# t1 <-- uboot_end_data
-	lw	t2, -8(t0)		# t2 <-- uboot_end
-	add	t1, t6			# adjust pointers
-	add	t2, t6
+	/*
+	 * Clear BSS
+	 *
+	 * GOT is now relocated. Thus __bss_start and __bss_end can be
+	 * accessed directly via $gp.
+	 */
+	la	t1, __bss_start		# t1 <-- __bss_start
+	la	t2, __bss_end		# t2 <-- __bss_end
 
-	sub	t1, 4
-1:	addi	t1, 4
-	bltl	t1, t2, 1b
-	 sw	zero, 0(t1)
+1:
+	sw	zero, 0(t1)
+	blt	t1, t2, 1b
+	 addi	t1, 4
 
 	move	a0, a1			# a0 <-- gd
 	la	t9, board_init_r