arm: arm64 32bit address relocation

Current relocation code is limited to 21bit PC-relative addressing
which might not be enough for bigger code sizes. The following patch
increases the addressing to 32bit PC-relative. This feature is
specially interesting if U-Boot is build without optimiation (-O0) as
the text section is increased significativelly.

Signed-off-by: Ibai Erkiaga <ibai.erkiaga-elorza@xilinx.com>
diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S
index 7603f52..26d29c5 100644
--- a/arch/arm/lib/relocate_64.S
+++ b/arch/arm/lib/relocate_64.S
@@ -26,9 +26,10 @@
 	/*
 	 * Copy u-boot from flash to RAM
 	 */
-	adr	x1, __image_copy_start	/* x1 <- Run &__image_copy_start */
-	subs	x9, x0, x1		/* x8 <- Run to copy offset */
-	b.eq	relocate_done		/* skip relocation */
+	adrp	x1, __image_copy_start		/* x1 <- address bits [31:12] */
+	add	x1, x1, :lo12:__image_copy_start/* x1 <- address bits [11:00] */
+	subs	x9, x0, x1			/* x9 <- Run to copy offset */
+	b.eq	relocate_done			/* skip relocation */
 	/*
 	 * Don't ldr x1, __image_copy_start here, since if the code is already
 	 * running at an address other than it was linked to, that instruction
@@ -42,8 +43,10 @@
 	ldr	x1, _TEXT_BASE		/* x1 <- Linked &__image_copy_start */
 	subs	x9, x0, x1		/* x9 <- Link to copy offset */
 
-	adr	x1, __image_copy_start	/* x1 <- Run &__image_copy_start */
-	adr	x2, __image_copy_end	/* x2 <- Run &__image_copy_end */
+	adrp	x1, __image_copy_start		/* x1 <- address bits [31:12] */
+	add	x1, x1, :lo12:__image_copy_start/* x1 <- address bits [11:00] */
+	adrp	x2, __image_copy_end		/* x2 <- address bits [31:12] */
+	add	x2, x2, :lo12:__image_copy_end	/* x2 <- address bits [11:00] */
 copy_loop:
 	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
 	stp	x10, x11, [x0], #16	/* copy to   target address [x0] */
@@ -54,8 +57,10 @@
 	/*
 	 * Fix .rela.dyn relocations
 	 */
-	adr	x2, __rel_dyn_start	/* x2 <- Run &__rel_dyn_start */
-	adr	x3, __rel_dyn_end	/* x3 <- Run &__rel_dyn_end */
+	adrp	x2, __rel_dyn_start		/* x2 <- address bits [31:12] */
+	add	x2, x2, :lo12:__rel_dyn_start	/* x2 <- address bits [11:00] */
+	adrp	x3, __rel_dyn_end		/* x3 <- address bits [31:12] */
+	add	x3, x3, :lo12:__rel_dyn_end	/* x3 <- address bits [11:00] */
 fixloop:
 	ldp	x0, x1, [x2], #16	/* (x0,x1) <- (SRC location, fixup) */
 	ldr	x4, [x2], #8		/* x4 <- addend */