[MIPS] Fix $gp usage

Now we load $gp with _GLOBAL_OFFSET_TABLE_, but this is incorrect use.
As a general principle, we should use _gp for $gp.

Thanks to linker script's help we fortunately have _gp which equals to
_GLOBAL_OFFSET_TABLE_. But once _gp gets out of alignment, we will not
be able to access to GOT entires, global variables and procedure entry
points. The right thing to do is to use _gp.

This patch also introduce a new symbol `.gpword _GLOBAL_OFFSET_TABLE_'
which holds the offset from _gp. When updating GOT entries, we use this
offset and _gp to calculate the final _GLOBAL_OFFSET_TABLE_.

This patch is originally submitted by Vlad Lungu <vlad@comsys.ro>, then
I made some change to leave over num_got_entries.

Signed-off-by: Shinya Kuribayashi <shinya.kuribayashi@necel.com>
Cc: Vlad Lungu <vlad@comsys.ro>
diff --git a/cpu/mips/start.S b/cpu/mips/start.S
index e91e213..074d01d 100644
--- a/cpu/mips/start.S
+++ b/cpu/mips/start.S
@@ -234,11 +234,11 @@
 	li	t0, CONF_CM_UNCACHED
 	mtc0	t0, CP0_CONFIG
 
-	/* Initialize GOT pointer.
+	/* Initialize $gp.
 	*/
 	bal     1f
 	nop
-	.word   _GLOBAL_OFFSET_TABLE_
+	.word	_gp
 	1:
 	move    gp, ra
 	lw      t1, 0(ra)
@@ -306,9 +306,9 @@
 	move	t1, a2
 
 	/*
-	 * Fix GOT pointer:
+	 * Fix $gp:
 	 *
-	 * New GOT-PTR = (old GOT-PTR - CFG_MONITOR_BASE) + Destination Address
+	 * New $gp = (Old $gp - CFG_MONITOR_BASE) + Destination Address
 	 */
 	move	t6, gp
 	sub	gp, CFG_MONITOR_BASE
@@ -341,15 +341,22 @@
 	j	t0
 	nop
 
+	.gpword	_GLOBAL_OFFSET_TABLE_	/* _GLOBAL_OFFSET_TABLE_ - _gp	*/
 	.word	uboot_end_data
 	.word	uboot_end
 	.word	num_got_entries
 
 in_ram:
-	/* Now we want to update GOT.
+	/*
+	 * Now we want to update GOT.
+	 *
+	 * GOT[0] is reserved. GOT[1] is also reserved for the dynamic object
+	 * generated by GNU ld. Skip these reserved entries from relocation.
 	 */
 	lw	t3, -4(t0)	/* t3 <-- num_got_entries	*/
-	addi	t4, gp, 8	/* Skipping first two entries.	*/
+	lw	t4, -16(t0)	/* t4 <-- (_GLOBAL_OFFSET_TABLE_ - _gp)	*/
+	add	t4, t4, gp	/* t4 now holds _GLOBAL_OFFSET_TABLE_	*/
+	addi	t4, t4, 8	/* Skipping first two entries.	*/
 	li	t2, 2
 1:
 	lw	t1, 0(t4)