microblaze: Add support for run time relocation

Microblaze is using NEEDS_MANUAL_RELOC from the beginnging. This is causing
issues with function pointer arrays which need to be updated manually after
relocation. Building code with -fPIC and linking with -pic will remove this
limitation and there is no longer need to run manual update.

By default still old option is enabled but by disabling NEEDS_MANUAL_RELOC
code will be compiled for full relocation.

The patch does couple of things which are connected to each other.
- Define STATIC_RELA dependency to call relocate-rela to fill sections.
- REMAKE_ELF was already enabled but u-boot file can't be used because
  sections are empty. relocate-rela will fill them and output file is
  u-boot.elf which should be used.
- Add support for full relocation (u-boot.elf)
- Add support for early relocation when u-boot.bin is loaded to different
  address then CONFIG_SYS_TEXT_BASE
- Add rela.dyn and dynsym sections

Disabling NEEDS_MANUAL_RELOC U-Boot size increased by 10% of it's original
size (550kB to 608kB).

Signed-off-by: Michal Simek <michal.simek@amd.com>
Link: https://lore.kernel.org/r/a845670b34925859b2e321875f7588a29f6655f9.1655299267.git.michal.simek@amd.com
diff --git a/arch/Kconfig b/arch/Kconfig
index 02de32f..d91475d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -8,9 +8,6 @@
 config HAVE_ARCH_IOREMAP
 	bool
 
-config NEEDS_MANUAL_RELOC
-	bool
-
 config SYS_CACHE_SHIFT_4
 	bool
 
@@ -76,7 +73,6 @@
 
 config MICROBLAZE
 	bool "MicroBlaze architecture"
-	select NEEDS_MANUAL_RELOC
 	select SUPPORT_OF_CONTROL
 	imply CMD_IRQ
 
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 7f6e431..d501c4c 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,6 +4,9 @@
 config SYS_ARCH
 	default "m68k"
 
+config NEEDS_MANUAL_RELOC
+	def_bool y
+
 # processor family
 config MCF520x
 	select OF_CONTROL
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index d7d1b21..6f45d19 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -4,6 +4,20 @@
 config SYS_ARCH
 	default "microblaze"
 
+config NEEDS_MANUAL_RELOC
+	bool "Disable position-independent pre-relocation code"
+	default y
+	help
+	  U-Boot expects to be linked to a specific hard-coded address, and to
+	  be loaded to and run from that address. This option lifts that
+	  restriction, thus allowing the code to be loaded to and executed from
+	  almost any 4K aligned address. This logic relies on the relocation
+	  information that is embedded in the binary to support U-Boot
+	  relocating itself to the top-of-RAM later during execution.
+
+config STATIC_RELA
+	def_bool y if !NEEDS_MANUAL_RELOC
+
 choice
 	prompt "Target select"
 	optional
diff --git a/arch/microblaze/config.mk b/arch/microblaze/config.mk
index 3e84a83..d35b4f6 100644
--- a/arch/microblaze/config.mk
+++ b/arch/microblaze/config.mk
@@ -17,6 +17,11 @@
 PLATFORM_CPPFLAGS += -fPIC
 endif
 
+ifeq ($(CONFIG_STATIC_RELA),y)
+PLATFORM_CPPFLAGS += -fPIC
+LDFLAGS_u-boot += -pic
+endif
+
 ifeq ($(CONFIG_SYS_LITTLE_ENDIAN),y)
 PLATFORM_ELFFLAGS += -B microblaze $(OBJCOPYFLAGS) -O elf32-microblazeel
 else
diff --git a/arch/microblaze/cpu/Makefile b/arch/microblaze/cpu/Makefile
index f7a83d0..1feffc6 100644
--- a/arch/microblaze/cpu/Makefile
+++ b/arch/microblaze/cpu/Makefile
@@ -6,4 +6,5 @@
 extra-y	= start.o
 obj-y	= irq.o
 obj-y	+= interrupts.o cache.o exception.o timer.o
+obj-$(CONFIG_STATIC_RELA)	+= relocate.o
 obj-$(CONFIG_SPL_BUILD)	+= spl.o
diff --git a/arch/microblaze/cpu/relocate.c b/arch/microblaze/cpu/relocate.c
new file mode 100644
index 0000000..b00d02b
--- /dev/null
+++ b/arch/microblaze/cpu/relocate.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) Copyright 2022 Advanced Micro Devices, Inc
+ * Michal Simek <michal.simek@amd.com>
+ */
+
+#include <common.h>
+#include <elf.h>
+
+#define R_MICROBLAZE_NONE	0
+#define R_MICROBLAZE_32		1
+#define R_MICROBLAZE_REL	16
+#define R_MICROBLAZE_GLOB_DAT	18
+
+/**
+ * mb_fix_rela - update relocation to new address
+ * @reloc_addr: new relocation address
+ * @verbose: enable version messages
+ * @rela_start: rela section start
+ * @rela_end: rela section end
+ * @dyn_start: dynamic section start
+ * @origin_addr: address where u-boot starts(doesn't need to be CONFIG_SYS_TEXT_BASE)
+ */
+void mb_fix_rela(u32 reloc_addr, u32 verbose, u32 rela_start,
+		 u32 rela_end, u32 dyn_start, u32 origin_addr)
+{
+	u32 num, type, mask, i, reloc_off;
+
+	/*
+	 * Return in case u-boot.elf is used directly.
+	 * Skip it when u-boot.bin is loaded to different address than
+	 * CONFIG_SYS_TEXT_BASE. In this case relocation is necessary to run.
+	 */
+	if (reloc_addr == CONFIG_SYS_TEXT_BASE) {
+		debug_cond(verbose,
+			   "Relocation address is the same - skip relocation\n");
+		return;
+	}
+
+	reloc_off = reloc_addr - origin_addr;
+
+	debug_cond(verbose, "Relocation address:\t0x%08x\n", reloc_addr);
+	debug_cond(verbose, "Relocation offset:\t0x%08x\n", reloc_off);
+	debug_cond(verbose, "Origin address:\t0x%08x\n", origin_addr);
+	debug_cond(verbose, "Rela start:\t0x%08x\n", rela_start);
+	debug_cond(verbose, "Rela end:\t0x%08x\n", rela_end);
+	debug_cond(verbose, "Dynsym start:\t0x%08x\n", dyn_start);
+
+	num = (rela_end - rela_start) / sizeof(Elf32_Rela);
+
+	debug_cond(verbose, "Number of entries:\t%u\n", num);
+
+	for (i = 0; i < num; i++) {
+		Elf32_Rela *rela;
+		u32 temp;
+
+		rela = (Elf32_Rela *)(rela_start + sizeof(Elf32_Rela) * i);
+
+		mask = 0xffULL; /* would be different on 32-bit */
+		type = rela->r_info & mask;
+
+		debug_cond(verbose, "\nRela possition:\t%d/0x%x\n",
+			   i, (u32)rela);
+
+		switch (type) {
+		case R_MICROBLAZE_REL:
+			temp = *(u32 *)rela->r_offset;
+
+			debug_cond(verbose, "Type:\tREL\n");
+			debug_cond(verbose, "Rela r_offset:\t\t0x%x\n", rela->r_offset);
+			debug_cond(verbose, "Rela r_info:\t\t0x%x\n", rela->r_info);
+			debug_cond(verbose, "Rela r_addend:\t\t0x%x\n", rela->r_addend);
+			debug_cond(verbose, "Value at r_offset:\t0x%x\n", temp);
+
+			rela->r_offset += reloc_off;
+			rela->r_addend += reloc_off;
+
+			temp = *(u32 *)rela->r_offset;
+			temp += reloc_off;
+			*(u32 *)rela->r_offset = temp;
+
+			debug_cond(verbose, "New:Rela r_offset:\t0x%x\n", rela->r_offset);
+			debug_cond(verbose, "New:Rela r_addend:\t0x%x\n", rela->r_addend);
+			debug_cond(verbose, "New:Value at r_offset:\t0x%x\n", temp);
+			break;
+		case R_MICROBLAZE_32:
+		case R_MICROBLAZE_GLOB_DAT:
+			debug_cond(verbose, "Type:\t(32/GLOB) %u\n", type);
+			debug_cond(verbose, "Rela r_offset:\t\t0x%x\n", rela->r_offset);
+			debug_cond(verbose, "Rela r_info:\t\t0x%x\n", rela->r_info);
+			debug_cond(verbose, "Rela r_addend:\t\t0x%x\n", rela->r_addend);
+			debug_cond(verbose, "Value at r_offset:\t0x%x\n", temp);
+
+			rela->r_offset += reloc_off;
+
+			temp = *(u32 *)rela->r_offset;
+			temp += reloc_off;
+			*(u32 *)rela->r_offset = temp;
+
+			debug_cond(verbose, "New:Rela r_offset:\t0x%x\n", rela->r_offset);
+			debug_cond(verbose, "New:Value at r_offset:\t0x%x\n", temp);
+			break;
+		case R_MICROBLAZE_NONE:
+			debug_cond(verbose, "R_MICROBLAZE_NONE - skip\n");
+			break;
+		default:
+			debug_cond(verbose, "warning: unsupported relocation type %d at %x\n",
+				   type, rela->r_offset);
+		}
+	}
+}
diff --git a/arch/microblaze/cpu/start.S b/arch/microblaze/cpu/start.S
index 72b0f33..9a661e7 100644
--- a/arch/microblaze/cpu/start.S
+++ b/arch/microblaze/cpu/start.S
@@ -10,8 +10,16 @@
 #include <asm-offsets.h>
 #include <config.h>
 
+#if defined(CONFIG_STATIC_RELA)
+#define SYM_ADDR(reg, reg_add, symbol)	\
+	mfs	r20, rpc; \
+	addik	r20, r20, _GLOBAL_OFFSET_TABLE_ + 8; \
+	lwi	reg, r20, symbol@GOT; \
+	addk	reg, reg reg_add;
+#else
 #define SYM_ADDR(reg, reg_add, symbol)	\
 	addi	reg, reg_add, symbol
+#endif
 
 	.text
 	.global _start
@@ -27,6 +35,39 @@
 	addi	r1, r0, CONFIG_SPL_STACK_ADDR
 #else
 	add	r1, r0, r20
+#if defined(CONFIG_STATIC_RELA)
+	bri	1f
+
+	/* Force alignment for easier ASM code below */
+#define ALIGNMENT_ADDR	0x20
+	.align	4
+uboot_dyn_start:
+	.word	__rel_dyn_start
+
+uboot_dyn_end:
+	.word	__rel_dyn_end
+
+uboot_sym_start:
+	.word	__dyn_sym_start
+1:
+
+	addi	r5, r20, 0
+	add	r6, r0, r0
+
+	lwi	r7, r20, ALIGNMENT_ADDR
+	addi	r7, r7, -CONFIG_SYS_TEXT_BASE
+	add	r7, r7, r5
+	lwi	r8, r20, ALIGNMENT_ADDR + 0x4
+	addi	r8, r8, -CONFIG_SYS_TEXT_BASE
+	add	r8, r8, r5
+	lwi	r9, r20, ALIGNMENT_ADDR + 0x8
+	addi	r9, r9, -CONFIG_SYS_TEXT_BASE
+	add	r9, r9, r5
+	addi	r10, r0, CONFIG_SYS_TEXT_BASE
+
+	brlid	r15, mb_fix_rela
+	nop
+#endif
 #endif
 
 	addi	r1, r1, -4	/* Decrement SP to top of memory */
@@ -297,6 +338,30 @@
 	brlid	r15, __setup_exceptions
 	nop
 
+#if defined(CONFIG_STATIC_RELA)
+	/* reloc_offset is current location */
+	SYM_ADDR(r10, r0, _start)
+
+	/* r5 new address where I should copy code */
+	add	r5, r0, r7 /* Move reloc addr to r5 */
+
+	/* Verbose message */
+	addi	r6, r0, 0
+
+	SYM_ADDR(r7, r0, __rel_dyn_start)
+	rsub	r7, r10, r7
+	add	r7, r7, r5
+	SYM_ADDR(r8, r0, __rel_dyn_end)
+	rsub	r8, r10, r8
+	add	r8, r8, r5
+	SYM_ADDR(r9, r0, __dyn_sym_start)
+	rsub	r9, r10, r9
+	add	r9, r9, r5
+	brlid	r15, mb_fix_rela
+	nop
+
+	/* end of code which does relocation */
+#else
 	/* Check if GOT exist */
 	addik	r21, r23, _got_start
 	addik	r22, r23, _got_end
@@ -314,6 +379,7 @@
 	cmpu	r12, r21, r22 /* Check if this cross boundary */
 	bneid	r12, 3b
 	addik	r21. r21, 4
+#endif
 
 	/* Flush caches to ensure consistency */
 	addik	r5, r0, 0
diff --git a/arch/microblaze/cpu/u-boot.lds b/arch/microblaze/cpu/u-boot.lds
index 2b316cc..821cc55 100644
--- a/arch/microblaze/cpu/u-boot.lds
+++ b/arch/microblaze/cpu/u-boot.lds
@@ -46,6 +46,20 @@
 	}
 	__init_end = . ;
 
+	. = ALIGN(4);
+	__rel_dyn_start = .;
+	.rela.dyn : {
+		*(.rela.dyn)
+	}
+	__rel_dyn_end = .;
+
+	. = ALIGN(4);
+	__dyn_sym_start = .;
+	.dynsym : {
+		*(.dynsym)
+	}
+	__dyn_sym_end = .;
+
 	.bss ALIGN(0x4):
 	{
 		__bss_start = .;
diff --git a/common/board_f.c b/common/board_f.c
index 51d2f3c..a5666ca 100644
--- a/common/board_f.c
+++ b/common/board_f.c
@@ -684,6 +684,8 @@
 #ifdef CONFIG_SYS_TEXT_BASE
 #ifdef ARM
 		gd->reloc_off = gd->relocaddr - (unsigned long)__image_copy_start;
+#elif defined(CONFIG_MICROBLAZE)
+		gd->reloc_off = gd->relocaddr - (u32)_start;
 #elif defined(CONFIG_M68K)
 		/*
 		 * On all ColdFire arch cpu, monitor code starts always